Skip to content
Snippets Groups Projects
Commit 9fdcc282 authored by Nicolas Barthes's avatar Nicolas Barthes
Browse files

LWPLSR CV can export results as a json back to streamlit

parent ec96eafc
No related branches found
No related tags found
No related merge requests found
...@@ -16,15 +16,17 @@ class LWPLSR: ...@@ -16,15 +16,17 @@ class LWPLSR:
# self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))] # self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))]
self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(4)] self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(4)]
nb_fold = int((len(dataset)-4)/4) self.nb_fold = int((len(dataset)-4)/4)
for i in range(nb_fold): for i in range(self.nb_fold):
vars()["self.xtr_fold"+str(i+1)] = dataset[i+7] setattr(self, "xtr_fold"+str(i+1), dataset[i+7])
vars()["self.ytr_fold"+str(i+1)] = dataset[i+13] setattr(self, "ytr_fold"+str(i+1), dataset[i+13])
vars()["self.xte_fold"+str(i+1)] = dataset[i+4] setattr(self, "xte_fold"+str(i+1), dataset[i+4])
vars()["self.yte_fold"+str(i+1)] = dataset[i+10] # setattr(self, "yte_fold"+str(i+1), dataset[i+10])
setattr(jl, "xtr_fold"+str(i+1), dataset[i+7])
setattr(jl, "ytr_fold"+str(i+1), dataset[i+13])
setattr(jl, "xte_fold"+str(i+1), dataset[i+4])
# setattr(jl, "yte_fold"+str(i+1), dataset[i+10])
print(self.xtr_fold1)
# prepare to send dataframes to julia and Jchemo # prepare to send dataframes to julia and Jchemo
jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
...@@ -32,20 +34,20 @@ class LWPLSR: ...@@ -32,20 +34,20 @@ class LWPLSR:
y_shape = self.y_test.shape y_shape = self.y_test.shape
self.predicted_results_on_test = pd.DataFrame self.predicted_results_on_test = pd.DataFrame
self.predicted_results_on_train = pd.DataFrame self.predicted_results_on_train = pd.DataFrame
self.predicted_results_on_cv = pd.DataFrame
self.pred_test = np.zeros(shape=(y_shape[0], 1)) self.pred_test = np.zeros(shape=(y_shape[0], 1))
self.pred_train = np.zeros(shape=(y_shape[0], 1)) self.pred_train = np.zeros(shape=(y_shape[0], 1))
self.mod = "" self.mod = ""
self.best_lwplsr_params = np.zeros(shape=(5, 1)) self.best_lwplsr_params = np.zeros(shape=(5, 1))
self.predicted_results = {}
def Jchemo_lwplsr_fit(self): def Jchemo_lwplsr_fit(self):
"""Send data to Julia to fit lwplsr. """Send data to Julia to fit lwplsr.
Args: Args:
self.jl.x_train (DataFrame): self.x_train (DataFrame):
self.jl.y_train (DataFrame): self.y_train (DataFrame):
self.jl.x_test (DataFrame): self.x_test (DataFrame):
self.jl.y_test (DataFrame): self.y_test (DataFrame):
Returns: Returns:
self.mod (Julia model): the prepared model self.mod (Julia model): the prepared model
...@@ -88,7 +90,7 @@ class LWPLSR: ...@@ -88,7 +90,7 @@ class LWPLSR:
u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination
""") """)
self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]} self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]}
print('best lwplsr params' + str(self.best_lwplsr_params)) print('best lwplsr params ' + str(self.best_lwplsr_params))
print('LWPLSR - best params ok') print('LWPLSR - best params ok')
# calculate LWPLSR model with best parameters # calculate LWPLSR model with best parameters
jl.seval(""" jl.seval("""
...@@ -103,15 +105,14 @@ class LWPLSR: ...@@ -103,15 +105,14 @@ class LWPLSR:
Args: Args:
self.mod (Julia model): the prepared model self.mod (Julia model): the prepared model
self.jl.x_train (DataFrame): self.x_train (DataFrame):
self.jl.y_train (DataFrame): self.y_train (DataFrame):
self.jl.x_test (DataFrame): self.x_test (DataFrame):
self.jl.y_test (DataFrame): self.y_test (DataFrame):
Returns: Returns:
self.pred_test (Julia DataFrame): predicted values on x_test self.pred_test (Julia DataFrame): predicted values on x_test
self.pred_train (Julia DataFrame): predicted values on x_train self.pred_train (Julia DataFrame): predicted values on x_train
self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation
""" """
# Predictions on x_test and store in self.pred # Predictions on x_test and store in self.pred
self.pred_test = jl.seval(""" self.pred_test = jl.seval("""
...@@ -126,13 +127,58 @@ class LWPLSR: ...@@ -126,13 +127,58 @@ class LWPLSR:
""") """)
print('LWPLSR - end') print('LWPLSR - end')
def Jchemo_lwplsr_cv(self):
"""Send data to Julia to predict with lwplsr.
Args:
self.mod (Julia model): the prepared model
self.xtr_fold1 (DataFrame):
self.ytr_fold1 (DataFrame):
self.xte_fold1 (DataFrame):
self.yte_fold1 (DataFrame):
Returns:
self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation
"""
for i in range(self.nb_fold):
jl.Xtr = getattr(self, "xtr_fold"+str(i+1))
jl.Ytr = getattr(self, "ytr_fold"+str(i+1))
jl.Xte = getattr(self, "xte_fold"+str(i+1))
# jl.Yte = getattr(self, "yte_fold"+str(i+1))
jl.seval("""
using DataFrames
using Pandas
using Jchemo
Xtr |> Pandas.DataFrame |> DataFrames.DataFrame
Ytr |> Pandas.DataFrame |> DataFrames.DataFrame
Xte |> Pandas.DataFrame |> DataFrames.DataFrame
""")
jl.nlvdis = int(self.best_lwplsr_params['nlvdis'])
jl.metric = self.best_lwplsr_params['metric']
jl.h = self.best_lwplsr_params['h']
jl.k = int(self.best_lwplsr_params['k'])
jl.nlv = int(self.best_lwplsr_params['nlv'])
jl.seval("""
println("LWPLSR - start CV mod")
mod_cv = Jchemo.model(Jchemo.lwplsr; nlvdis = nlvdis, metric = Symbol(metric), h = h, k = k, nlv = nlv)
# Fit model
Jchemo.fit!(mod_cv, Xtr, Ytr)
""")
pred_cv = jl.seval("""
println("LWPLSR - start CV predict")
res = Jchemo.predict(mod_cv, Xte)
res.pred
""")
self.predicted_results["CV" + str(i+1)] = pd.DataFrame(pred_cv)
@property @property
def pred_data_(self): def pred_data_(self):
# convert predicted data from x_test to Pandas DataFrame # convert predicted data from x_test to Pandas DataFrame
self.predicted_results_on_test = pd.DataFrame(self.pred_test) self.predicted_results_on_test = pd.DataFrame(self.pred_test)
self.predicted_results_on_train = pd.DataFrame(self.pred_train) self.predicted_results_on_train = pd.DataFrame(self.pred_train)
return self.predicted_results_on_train, self.predicted_results_on_test self.predicted_results["pred_data_train"] = self.predicted_results_on_train
self.predicted_results["pred_data_test"] = self.predicted_results_on_test
return self.predicted_results
@property @property
def model_(self): def model_(self):
......
...@@ -8,9 +8,11 @@ import os ...@@ -8,9 +8,11 @@ import os
temp_path = Path("temp/") temp_path = Path("temp/")
data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np'] data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
temp_files_list = os.listdir(temp_path) temp_files_list = os.listdir(temp_path)
nb_fold = 0
for i in temp_files_list: for i in temp_files_list:
if 'fold' in i: if 'fold' in i:
data_to_work_with.append(str(i)[:-4]) data_to_work_with.append(str(i)[:-4])
nb_fold += 1
dataset = [] dataset = []
for i in data_to_work_with: for i in data_to_work_with:
dataset.append(np.genfromtxt(temp_path / str(i + ".csv"), delimiter=',')) dataset.append(np.genfromtxt(temp_path / str(i + ".csv"), delimiter=','))
...@@ -23,14 +25,16 @@ print('now predict') ...@@ -23,14 +25,16 @@ print('now predict')
LWPLSR.Jchemo_lwplsr_predict(Reg) LWPLSR.Jchemo_lwplsr_predict(Reg)
print('now CV') print('now CV')
LWPLSR.Jchemo_lwplsr_cv(Reg)
print('export to json') print('export to json')
pred = ['pred_data_train', 'pred_data_test'] pred = ['pred_data_train', 'pred_data_test']
for i in range(int(nb_fold/4)):
pred.append("CV" + str(i+1))
json_export = {} json_export = {}
for i in pred: for i in pred:
json_export[i] = Reg.pred_data_[pred.index(i)].to_dict() json_export[i] = Reg.pred_data_[i].to_dict()
json_export['model'] = str(Reg.model_) json_export['model'] = str(Reg.model_)
json_export['best_lwplsr_params'] = Reg.best_lwplsr_params_ json_export['best_lwplsr_params'] = Reg.best_lwplsr_params_
with open(temp_path / "lwplsr_outputs.json", "w+") as outfile: with open(temp_path / "lwplsr_outputs.json", "w+") as outfile:
......
File added
File added
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment