Skip to content
Snippets Groups Projects
Commit 7c9546c2 authored by BARTHES Nicolas's avatar BARTHES Nicolas
Browse files

LWPLSR subprocess documentation

parent 996d9c7f
No related branches found
No related tags found
No related merge requests found
......@@ -7,3 +7,4 @@ src/Report/*.zip
src/Report/figures/*.pdf
src/Report/figures/*.png
config/config.json
data/params/Preprocessing.json
\ No newline at end of file
......@@ -7,33 +7,28 @@ class LWPLSR:
Returns:
self.scores (DataFrame): various metrics and scores
self.predicted_results_on_train (DataFrame):
self.predicted_results_on_test (DataFrame):
self.predicted_results (Dictionary): Dict containing all predicted results (train, test, cross-validation)
self.mod (Julia model): the prepared model
"""
def __init__(self, dataset):
"""Initiate the LWPLSR and prepare data for Julia computing."""
# self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))]
# get train / test data from dataset
self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(4)]
# calculate number of KFolds and get CV data from dataset
self.nb_fold = int((len(dataset)-4)/4)
for i in range(self.nb_fold):
setattr(self, "xtr_fold"+str(i+1), dataset[i+7])
setattr(self, "ytr_fold"+str(i+1), dataset[i+13])
setattr(self, "xte_fold"+str(i+1), dataset[i+4])
# setattr(self, "yte_fold"+str(i+1), dataset[i+10])
setattr(jl, "xtr_fold"+str(i+1), dataset[i+7])
setattr(jl, "ytr_fold"+str(i+1), dataset[i+13])
setattr(jl, "xte_fold"+str(i+1), dataset[i+4])
# setattr(jl, "yte_fold"+str(i+1), dataset[i+10])
# prepare to send dataframes to julia and Jchemo
# prepare to send dataframes to julia and Jchemo (with the jl. prefix)
jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
# initialize vars from the class
y_shape = self.y_test.shape
self.predicted_results_on_test = pd.DataFrame
self.predicted_results_on_train = pd.DataFrame
self.pred_test = np.zeros(shape=(y_shape[0], 1))
self.pred_train = np.zeros(shape=(y_shape[0], 1))
self.mod = ""
......@@ -52,7 +47,7 @@ class LWPLSR:
Returns:
self.mod (Julia model): the prepared model
"""
# launch Julia Jchemo lwplsr
# launch Julia Jchemo lwplsr and convert DataFrames from Python Pandas DataFrame to Julia DataFrame
jl.seval("""
using DataFrames
using Pandas
......@@ -63,7 +58,7 @@ class LWPLSR:
y_test |> Pandas.DataFrame |> DataFrames.DataFrame
""")
print('LWPLSR - tuning')
# set tuning parameters
# set tuning parameters to test
jl.seval("""
nlvdis = [5; 10; 15] ; metric = [:eucl; :mah]
h = [1; 2; 6; Inf] ; k = [30; 80; 200]
......@@ -83,21 +78,22 @@ class LWPLSR:
ncal = ntrain - nval
""")
# Create LWPLSR model and tune
# Create LWPLSR model and tune with GridScore
jl.seval("""
mod = Jchemo.model(Jchemo.lwplsr)
res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false)
u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination
""")
# save best lwplsr parameters
self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]}
print('best lwplsr params ' + str(self.best_lwplsr_params))
print('LWPLSR - best params ok')
# calculate LWPLSR model with best parameters
# run LWPLSR model with best parameters
jl.seval("""
mod = Jchemo.model(Jchemo.lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u])
# Fit model
Jchemo.fit!(mod, x_train, y_train)
""")
# save Julia Jchemo model
self.mod = jl.mod
def Jchemo_lwplsr_predict(self):
......@@ -128,14 +124,13 @@ class LWPLSR:
print('LWPLSR - end')
def Jchemo_lwplsr_cv(self):
"""Send data to Julia to predict with lwplsr.
"""Send Cross-Validation data to Julia to fit & predict with lwplsr.
Args:
self.mod (Julia model): the prepared model
self.best_lwplsr_params: the best parameters to use (from tuning) for CV
self.xtr_fold1 (DataFrame):
self.ytr_fold1 (DataFrame):
self.xte_fold1 (DataFrame):
self.yte_fold1 (DataFrame):
Returns:
self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation
......@@ -144,7 +139,7 @@ class LWPLSR:
jl.Xtr = getattr(self, "xtr_fold"+str(i+1))
jl.Ytr = getattr(self, "ytr_fold"+str(i+1))
jl.Xte = getattr(self, "xte_fold"+str(i+1))
# jl.Yte = getattr(self, "yte_fold"+str(i+1))
# convert Python Pandas DataFrame to Julia DataFrame
jl.seval("""
using DataFrames
using Pandas
......@@ -153,6 +148,7 @@ class LWPLSR:
Ytr |> Pandas.DataFrame |> DataFrames.DataFrame
Xte |> Pandas.DataFrame |> DataFrames.DataFrame
""")
# set lwplsr parameters as the best one from tuning
jl.nlvdis = int(self.best_lwplsr_params['nlvdis'])
jl.metric = self.best_lwplsr_params['metric']
jl.h = self.best_lwplsr_params['h']
......@@ -169,15 +165,14 @@ class LWPLSR:
res = Jchemo.predict(mod_cv, Xte)
res.pred
""")
# save predicted values for each KFold in the predicted_results dictionary
self.predicted_results["CV" + str(i+1)] = pd.DataFrame(pred_cv)
@property
def pred_data_(self):
# convert predicted data from x_test to Pandas DataFrame
self.predicted_results_on_test = pd.DataFrame(self.pred_test)
self.predicted_results_on_train = pd.DataFrame(self.pred_train)
self.predicted_results["pred_data_train"] = self.predicted_results_on_train
self.predicted_results["pred_data_test"] = self.predicted_results_on_test
self.predicted_results["pred_data_train"] = pd.DataFrame(self.pred_train)
self.predicted_results["pred_data_test"] = pd.DataFrame(self.pred_test)
return self.predicted_results
@property
......
......@@ -7,35 +7,42 @@ import os
# loading the lwplsr_inputs.json
temp_path = Path("temp/")
data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
# check data for cross-validation depending on KFold number
temp_files_list = os.listdir(temp_path)
nb_fold = 0
for i in temp_files_list:
if 'fold' in i:
# add CV file name to data_to_work_with
data_to_work_with.append(str(i)[:-4])
# and count the number of KFold
nb_fold += 1
# Import data from csv files in the temp/ folder
dataset = []
for i in data_to_work_with:
dataset.append(np.genfromtxt(temp_path / str(i + ".csv"), delimiter=','))
print('CSV imported')
# launch LWPLSR Class from LWPLSR_.py in Class_Mod
print('start model creation')
Reg = LWPLSR(dataset)
print('model created. \nnow fit')
LWPLSR.Jchemo_lwplsr_fit(Reg)
print('now predict')
LWPLSR.Jchemo_lwplsr_predict(Reg)
print('now CV')
LWPLSR.Jchemo_lwplsr_cv(Reg)
# Export results in a json file to bring data back to 2-model_creation.py and streamlit interface
print('export to json')
pred = ['pred_data_train', 'pred_data_test']
# add KFold results to predicted data
for i in range(int(nb_fold/4)):
pred.append("CV" + str(i+1))
json_export = {}
for i in pred:
json_export[i] = Reg.pred_data_[i].to_dict()
# add the lwplsr global model to the json
json_export['model'] = str(Reg.model_)
# add the best parameters for the lwplsr obtained from GridScore tuning
json_export['best_lwplsr_params'] = Reg.best_lwplsr_params_
with open(temp_path / "lwplsr_outputs.json", "w+") as outfile:
json.dump(json_export, outfile)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment