Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
NIRS_Workflow
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
CEFE
PACE
NIRS_Workflow
Commits
7c9546c2
Commit
7c9546c2
authored
8 months ago
by
BARTHES Nicolas
Browse files
Options
Downloads
Patches
Plain Diff
LWPLSR subprocess documentation
parent
996d9c7f
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
.gitignore
+1
-0
1 addition, 0 deletions
.gitignore
src/Class_Mod/LWPLSR_.py
+17
-22
17 additions, 22 deletions
src/Class_Mod/LWPLSR_.py
src/Class_Mod/LWPLSR_Call.py
+9
-2
9 additions, 2 deletions
src/Class_Mod/LWPLSR_Call.py
with
27 additions
and
24 deletions
.gitignore
+
1
−
0
View file @
7c9546c2
...
...
@@ -7,3 +7,4 @@ src/Report/*.zip
src/Report/figures/*.pdf
src/Report/figures/*.png
config/config.json
data/params/Preprocessing.json
\ No newline at end of file
This diff is collapsed.
Click to expand it.
src/Class_Mod/LWPLSR_.py
+
17
−
22
View file @
7c9546c2
...
...
@@ -7,33 +7,28 @@ class LWPLSR:
Returns:
self.scores (DataFrame): various metrics and scores
self.predicted_results_on_train (DataFrame):
self.predicted_results_on_test (DataFrame):
self.predicted_results (Dictionary): Dict containing all predicted results (train, test, cross-validation)
self.mod (Julia model): the prepared model
"""
def
__init__
(
self
,
dataset
):
"""
Initiate the LWPLSR and prepare data for Julia computing.
"""
# self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))]
# get train / test data from dataset
self
.
x_train
,
self
.
y_train
,
self
.
x_test
,
self
.
y_test
=
[
dataset
[
i
]
for
i
in
range
(
4
)]
# calculate number of KFolds and get CV data from dataset
self
.
nb_fold
=
int
((
len
(
dataset
)
-
4
)
/
4
)
for
i
in
range
(
self
.
nb_fold
):
setattr
(
self
,
"
xtr_fold
"
+
str
(
i
+
1
),
dataset
[
i
+
7
])
setattr
(
self
,
"
ytr_fold
"
+
str
(
i
+
1
),
dataset
[
i
+
13
])
setattr
(
self
,
"
xte_fold
"
+
str
(
i
+
1
),
dataset
[
i
+
4
])
# setattr(self, "yte_fold"+str(i+1), dataset[i+10])
setattr
(
jl
,
"
xtr_fold
"
+
str
(
i
+
1
),
dataset
[
i
+
7
])
setattr
(
jl
,
"
ytr_fold
"
+
str
(
i
+
1
),
dataset
[
i
+
13
])
setattr
(
jl
,
"
xte_fold
"
+
str
(
i
+
1
),
dataset
[
i
+
4
])
# setattr(jl, "yte_fold"+str(i+1), dataset[i+10])
# prepare to send dataframes to julia and Jchemo
# prepare to send dataframes to julia and Jchemo
(with the jl. prefix)
jl
.
x_train
,
jl
.
y_train
,
jl
.
x_test
,
jl
.
y_test
=
self
.
x_train
,
self
.
y_train
,
self
.
x_test
,
self
.
y_test
# initialize vars from the class
y_shape
=
self
.
y_test
.
shape
self
.
predicted_results_on_test
=
pd
.
DataFrame
self
.
predicted_results_on_train
=
pd
.
DataFrame
self
.
pred_test
=
np
.
zeros
(
shape
=
(
y_shape
[
0
],
1
))
self
.
pred_train
=
np
.
zeros
(
shape
=
(
y_shape
[
0
],
1
))
self
.
mod
=
""
...
...
@@ -52,7 +47,7 @@ class LWPLSR:
Returns:
self.mod (Julia model): the prepared model
"""
# launch Julia Jchemo lwplsr
# launch Julia Jchemo lwplsr
and convert DataFrames from Python Pandas DataFrame to Julia DataFrame
jl
.
seval
(
"""
using DataFrames
using Pandas
...
...
@@ -63,7 +58,7 @@ class LWPLSR:
y_test |> Pandas.DataFrame |> DataFrames.DataFrame
"""
)
print
(
'
LWPLSR - tuning
'
)
# set tuning parameters
# set tuning parameters
to test
jl
.
seval
(
"""
nlvdis = [5; 10; 15] ; metric = [:eucl; :mah]
h = [1; 2; 6; Inf] ; k = [30; 80; 200]
...
...
@@ -83,21 +78,22 @@ class LWPLSR:
ncal = ntrain - nval
"""
)
# Create LWPLSR model and tune
# Create LWPLSR model and tune
with GridScore
jl
.
seval
(
"""
mod = Jchemo.model(Jchemo.lwplsr)
res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false)
u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination
"""
)
# save best lwplsr parameters
self
.
best_lwplsr_params
=
{
'
nlvdis
'
:
jl
.
res
.
nlvdis
[
jl
.
u
],
'
metric
'
:
str
(
jl
.
res
.
metric
[
jl
.
u
]),
'
h
'
:
jl
.
res
.
h
[
jl
.
u
],
'
k
'
:
jl
.
res
.
k
[
jl
.
u
],
'
nlv
'
:
jl
.
res
.
nlv
[
jl
.
u
]}
print
(
'
best lwplsr params
'
+
str
(
self
.
best_lwplsr_params
))
print
(
'
LWPLSR - best params ok
'
)
# calculate LWPLSR model with best parameters
# run LWPLSR model with best parameters
jl
.
seval
(
"""
mod = Jchemo.model(Jchemo.lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u])
# Fit model
Jchemo.fit!(mod, x_train, y_train)
"""
)
# save Julia Jchemo model
self
.
mod
=
jl
.
mod
def
Jchemo_lwplsr_predict
(
self
):
...
...
@@ -128,14 +124,13 @@ class LWPLSR:
print
(
'
LWPLSR - end
'
)
def
Jchemo_lwplsr_cv
(
self
):
"""
Send data to Julia to predict with lwplsr.
"""
Send
Cross-Validation
data to Julia to
fit &
predict with lwplsr.
Args:
self.
mod (Julia model): the prepared model
self.
best_lwplsr_params: the best parameters to use (from tuning) for CV
self.xtr_fold1 (DataFrame):
self.ytr_fold1 (DataFrame):
self.xte_fold1 (DataFrame):
self.yte_fold1 (DataFrame):
Returns:
self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation
...
...
@@ -144,7 +139,7 @@ class LWPLSR:
jl
.
Xtr
=
getattr
(
self
,
"
xtr_fold
"
+
str
(
i
+
1
))
jl
.
Ytr
=
getattr
(
self
,
"
ytr_fold
"
+
str
(
i
+
1
))
jl
.
Xte
=
getattr
(
self
,
"
xte_fold
"
+
str
(
i
+
1
))
#
jl.Yte = getattr(self, "yte_fold"+str(i+1))
#
convert Python Pandas DataFrame to Julia DataFrame
jl
.
seval
(
"""
using DataFrames
using Pandas
...
...
@@ -153,6 +148,7 @@ class LWPLSR:
Ytr |> Pandas.DataFrame |> DataFrames.DataFrame
Xte |> Pandas.DataFrame |> DataFrames.DataFrame
"""
)
# set lwplsr parameters as the best one from tuning
jl
.
nlvdis
=
int
(
self
.
best_lwplsr_params
[
'
nlvdis
'
])
jl
.
metric
=
self
.
best_lwplsr_params
[
'
metric
'
]
jl
.
h
=
self
.
best_lwplsr_params
[
'
h
'
]
...
...
@@ -169,15 +165,14 @@ class LWPLSR:
res = Jchemo.predict(mod_cv, Xte)
res.pred
"""
)
# save predicted values for each KFold in the predicted_results dictionary
self
.
predicted_results
[
"
CV
"
+
str
(
i
+
1
)]
=
pd
.
DataFrame
(
pred_cv
)
@property
def
pred_data_
(
self
):
# convert predicted data from x_test to Pandas DataFrame
self
.
predicted_results_on_test
=
pd
.
DataFrame
(
self
.
pred_test
)
self
.
predicted_results_on_train
=
pd
.
DataFrame
(
self
.
pred_train
)
self
.
predicted_results
[
"
pred_data_train
"
]
=
self
.
predicted_results_on_train
self
.
predicted_results
[
"
pred_data_test
"
]
=
self
.
predicted_results_on_test
self
.
predicted_results
[
"
pred_data_train
"
]
=
pd
.
DataFrame
(
self
.
pred_train
)
self
.
predicted_results
[
"
pred_data_test
"
]
=
pd
.
DataFrame
(
self
.
pred_test
)
return
self
.
predicted_results
@property
...
...
This diff is collapsed.
Click to expand it.
src/Class_Mod/LWPLSR_Call.py
+
9
−
2
View file @
7c9546c2
...
...
@@ -7,35 +7,42 @@ import os
# loading the lwplsr_inputs.json
temp_path
=
Path
(
"
temp/
"
)
data_to_work_with
=
[
'
x_train_np
'
,
'
y_train_np
'
,
'
x_test_np
'
,
'
y_test_np
'
]
# check data for cross-validation depending on KFold number
temp_files_list
=
os
.
listdir
(
temp_path
)
nb_fold
=
0
for
i
in
temp_files_list
:
if
'
fold
'
in
i
:
# add CV file name to data_to_work_with
data_to_work_with
.
append
(
str
(
i
)[:
-
4
])
# and count the number of KFold
nb_fold
+=
1
# Import data from csv files in the temp/ folder
dataset
=
[]
for
i
in
data_to_work_with
:
dataset
.
append
(
np
.
genfromtxt
(
temp_path
/
str
(
i
+
"
.csv
"
),
delimiter
=
'
,
'
))
print
(
'
CSV imported
'
)
# launch LWPLSR Class from LWPLSR_.py in Class_Mod
print
(
'
start model creation
'
)
Reg
=
LWPLSR
(
dataset
)
print
(
'
model created.
\n
now fit
'
)
LWPLSR
.
Jchemo_lwplsr_fit
(
Reg
)
print
(
'
now predict
'
)
LWPLSR
.
Jchemo_lwplsr_predict
(
Reg
)
print
(
'
now CV
'
)
LWPLSR
.
Jchemo_lwplsr_cv
(
Reg
)
# Export results in a json file to bring data back to 2-model_creation.py and streamlit interface
print
(
'
export to json
'
)
pred
=
[
'
pred_data_train
'
,
'
pred_data_test
'
]
# add KFold results to predicted data
for
i
in
range
(
int
(
nb_fold
/
4
)):
pred
.
append
(
"
CV
"
+
str
(
i
+
1
))
json_export
=
{}
for
i
in
pred
:
json_export
[
i
]
=
Reg
.
pred_data_
[
i
].
to_dict
()
# add the lwplsr global model to the json
json_export
[
'
model
'
]
=
str
(
Reg
.
model_
)
# add the best parameters for the lwplsr obtained from GridScore tuning
json_export
[
'
best_lwplsr_params
'
]
=
Reg
.
best_lwplsr_params_
with
open
(
temp_path
/
"
lwplsr_outputs.json
"
,
"
w+
"
)
as
outfile
:
json
.
dump
(
json_export
,
outfile
)
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment