diff --git a/src/Class_Mod/LWPLSR_.py b/src/Class_Mod/LWPLSR_.py index 877ca504ebe13bf7b39ad942cba3e6ad80c3f097..d16d187497284ba00860aa75ee9a6fd977954254 100644 --- a/src/Class_Mod/LWPLSR_.py +++ b/src/Class_Mod/LWPLSR_.py @@ -1,29 +1,31 @@ from juliacall import Main as jl import numpy as np import pandas as pd +from sklearn.model_selection import KFold class LWPLSR: + """The lwpls regression model from Jchemo (M. Lesnoff) + + Returns: + self.scores (DataFrame): various metrics and scores + self.predicted_results_on_train (DataFrame): + self.predicted_results_on_cv (DataFrame): + self.predicted_results_on_test (DataFrame): + self.mod (Julia model): the prepared model """ - The lwpls regression model from Jchemo (M. Lesnoff) - """ - def __init__(self, x_train, y_train, x_test, y_test): + def __init__(self, x_train, y_train, x_test, y_test, x_train_cv1, y_train_cv1, x_test_cv1, y_test_cv1, x_train_cv2, y_train_cv2, x_test_cv2, y_test_cv2, x_train_cv3, y_train_cv3, x_test_cv3, y_test_cv3): """Initiate the LWPLSR and prepare data for Julia computing.""" + self.x_train, self.y_train, self.x_test, self.y_test = x_train, y_train, x_test, y_test + self.x_train_cv1, self.y_train_cv1, self.x_test_cv1, self.y_test_cv1 = x_train_cv1, y_train_cv1, x_test_cv1, y_test_cv1 + self.x_train_cv2, self.y_train_cv2, self.x_test_cv2, self.y_test_cv2 = x_train_cv2, y_train_cv2, x_test_cv2, y_test_cv2 + self.x_train_cv3, self.y_train_cv3, self.x_test_cv3, self.y_test_cv3 = x_train_cv3, y_train_cv3, x_test_cv3, y_test_cv3 # prepare to send dataframes to julia and Jchemo jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test - # Pre-treatment of x_train and x_test - # jl.seval(""" - # # using DataFrames - # # using Pandas - # using Jchemo - # mod1 = Jchemo.model(snv; centr = true, scal = true) - # mod2 = Jchemo.model(savgol; npoint = 15, deriv = 1, degree = 2) - # mod = Jchemo.pip(mod1, mod2) - # Jchemo.fit!(mod, x_train) - # x_train = Jchemo.transf(mod1, x_train) - # Jchemo.fit!(mod, x_test) - # x_test = Jchemo.transf(mod1, x_test) - # """) + jl.x_train_cv1, jl.y_train_cv1, jl.x_test_cv1, jl.y_test_cv1 = self.x_train_cv1, self.y_train_cv1, self.x_test_cv1, self.y_test_cv1 + jl.x_train_cv2, jl.y_train_cv2, jl.x_test_cv2, jl.y_test_cv2 = self.x_train_cv2, self.y_train_cv2, self.x_test_cv2, self.y_test_cv2 + jl.x_train_cv3, jl.y_train_cv3, jl.x_test_cv3, jl.y_test_cv3= self.x_train_cv3, self.y_train_cv3, self.x_test_cv3, self.y_test_cv3 + # optimize lwplsr parameters with Jchemo # jl.seval(""" # ntrain = nro(x_train) # segm = segmkf(ntrain, 4; rep = 5) @@ -36,17 +38,22 @@ class LWPLSR: # initialize vars from the class y_shape = y_test.shape - self.scores = pd.DataFrame + y_shape_cv1 = y_test_cv1.shape + y_shape_cv2 = y_test_cv2.shape + y_shape_cv3 = y_test_cv3.shape + # self.scores = pd.DataFrame self.predicted_results_on_test = pd.DataFrame self.predicted_results_on_train = pd.DataFrame self.predicted_results_on_cv = pd.DataFrame self.pred_test = np.zeros(shape=(y_shape[0], 1)) self.pred_train = np.zeros(shape=(y_shape[0], 1)) - self.pred_cv = np.zeros(shape=(y_shape[0], 1)) + self.pred_cv1 = np.zeros(shape=(y_shape_cv1[0], 1)) + self.pred_cv2 = np.zeros(shape=(y_shape_cv2[0], 1)) + self.pred_cv3 = np.zeros(shape=(y_shape_cv3[0], 1)) self.mod = "" def Jchemo_lwplsr_fit(self): - """Send data to Julia to compute lwplsr. + """Send data to Julia to fit lwplsr. Args: self.jl.x_train (DataFrame): @@ -55,8 +62,7 @@ class LWPLSR: self.jl.y_test (DataFrame): Returns: - self.scores (DataFrame): various metrics and scores - self.predicted_results_on_test (DataFrame): + self.mod (Julia model): the prepared model """ # launch Julia Jchemo lwplsr jl.seval(""" @@ -67,6 +73,18 @@ class LWPLSR: y_train |> Pandas.DataFrame |> DataFrames.DataFrame x_test |> Pandas.DataFrame |> DataFrames.DataFrame y_test |> Pandas.DataFrame |> DataFrames.DataFrame + x_train_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame + y_train_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame + x_test_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame + y_test_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame + x_train_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame + y_train_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame + x_test_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame + y_test_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame + x_train_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame + y_train_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame + x_test_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame + y_test_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame """) # Create LWPLSR model and fit jl.seval(""" @@ -76,6 +94,18 @@ class LWPLSR: # Fit model Jchemo.fit!(mod, x_train, y_train) """) + # CV model and fit + jl.seval(""" + nlvdis = 5 ; metric = :mah + h = 1 ; k = 200 ; nlv = 15 #; scal = true + mod_cv1 = mod + mod_cv2 = mod + mod_cv3 = mod + # Fit model + Jchemo.fit!(mod_cv1, x_train_cv1, y_train_cv1) + Jchemo.fit!(mod_cv2, x_train_cv2, y_train_cv2) + Jchemo.fit!(mod_cv3, x_train_cv3, y_train_cv3) + """) # jl.seval(""" # mod = Jchemo.model(Jchemo.lwplsr) @@ -87,77 +117,107 @@ class LWPLSR: self.mod = jl.mod def Jchemo_lwplsr_predict(self): + """Send data to Julia to predict with lwplsr. + + Args: + self.mod (Julia model): the prepared model + self.jl.x_train (DataFrame): + self.jl.y_train (DataFrame): + self.jl.x_test (DataFrame): + self.jl.y_test (DataFrame): + + Returns: + self.pred_test (Julia DataFrame): predicted values on x_test + self.pred_train (Julia DataFrame): predicted values on x_train + self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation + """ # Predictions on x_test and store in self.pred self.pred_test = jl.seval(""" + println("start test predict") res = Jchemo.predict(mod, x_test) res.pred """) self.pred_train = jl.seval(""" + println("start train predict") res = Jchemo.predict(mod, x_train) res.pred """) - self.pred_cv = self.pred_train - + self.pred_cv1 = jl.seval(""" + println("start test_cv1 predict") + res = Jchemo.predict(mod_cv1, x_test_cv1) + res.pred + """) + self.pred_cv2 = jl.seval(""" + println("start test_cv2 predict") + res = Jchemo.predict(mod_cv2, x_test_cv2) + res.pred + """) + self.pred_cv3 = jl.seval(""" + println("start test_cv3 predict") + res = Jchemo.predict(mod_cv3, x_test_cv3) + res.pred + """) @property def pred_data_(self): # convert predicted data from x_test to Pandas DataFrame self.predicted_results_on_test = pd.DataFrame(self.pred_test) self.predicted_results_on_train = pd.DataFrame(self.pred_train) - # self.predicted_results_on_cv = pd.DataFrame(self.pred_cv) - self.predicted_results_on_cv = pd.DataFrame(self.pred_train) - return self.predicted_results_on_train, self.predicted_results_on_cv, self.predicted_results_on_test + self.predicted_results_on_cv1 = pd.DataFrame(self.pred_cv1) + self.predicted_results_on_cv2 = pd.DataFrame(self.pred_cv2) + self.predicted_results_on_cv3 = pd.DataFrame(self.pred_cv3) + return self.predicted_results_on_train, self.predicted_results_on_cv1, self.predicted_results_on_cv2, self.predicted_results_on_cv3, self.predicted_results_on_test @property def model_(self): return self.mod - @property - def metrics_(self): - jl.pred_test = self.pred_test - jl.seval(""" - using Jchemo - """) - scorermsep_test = jl.seval(""" - first(Jchemo.rmsep(pred_test, y_test)) - """) - scoremr2_test = jl.seval(""" - first(Jchemo.r2(pred_test, y_test)) - """) - scorerpd_test = jl.seval(""" - first(Jchemo.rpd(pred_test, y_test)) - """) - scoremsep_test = jl.seval(""" - first(Jchemo.sep(pred_test, y_test)) - """) - jl.pred_train = self.pred_train - scorermsep_train = jl.seval(""" - first(Jchemo.rmsep(pred_train, y_train)) - """) - scoremr2_train = jl.seval(""" - first(Jchemo.r2(pred_train, y_train)) - """) - scorerpd_train = jl.seval(""" - first(Jchemo.rpd(pred_train, y_train)) - """) - scoremsep_train = jl.seval(""" - first(Jchemo.sep(pred_train, y_train)) - """) - jl.pred_cv = self.pred_cv - scorermsep_cv = jl.seval(""" - first(Jchemo.rmsep(pred_cv, y_train)) - """) - scoremr2_cv = jl.seval(""" - first(Jchemo.r2(pred_cv, y_train)) - """) - scorerpd_cv = jl.seval(""" - first(Jchemo.rpd(pred_cv, y_train)) - """) - scoremsep_cv = jl.seval(""" - first(Jchemo.sep(pred_cv, y_train)) - """) - - - self.scores = pd.DataFrame([[scoremr2_test, scorermsep_test, scoremsep_test, scorerpd_test]], columns=['r2', 'rmsep', 'msep', 'rpd'], index=['test']) - self.scores = pd.concat([self.scores, pd.DataFrame([[scoremr2_train, scorermsep_train, scoremsep_train, scorerpd_train]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["train"]), pd.DataFrame([[scoremr2_cv, scorermsep_cv, scoremsep_cv, scorerpd_cv]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["cv"])])# - return self.scores + # @property + # def metrics_(self): + # jl.pred_test = self.pred_test + # jl.seval(""" + # using Jchemo + # """) + # scorermsep_test = jl.seval(""" + # first(Jchemo.rmsep(pred_test, y_test)) + # """) + # scoremr2_test = jl.seval(""" + # first(Jchemo.r2(pred_test, y_test)) + # """) + # scorerpd_test = jl.seval(""" + # first(Jchemo.rpd(pred_test, y_test)) + # """) + # scoremsep_test = jl.seval(""" + # first(Jchemo.sep(pred_test, y_test)) + # """) + # jl.pred_train = self.pred_train + # scorermsep_train = jl.seval(""" + # first(Jchemo.rmsep(pred_train, y_train)) + # """) + # scoremr2_train = jl.seval(""" + # first(Jchemo.r2(pred_train, y_train)) + # """) + # scorerpd_train = jl.seval(""" + # first(Jchemo.rpd(pred_train, y_train)) + # """) + # scoremsep_train = jl.seval(""" + # first(Jchemo.sep(pred_train, y_train)) + # """) + # jl.pred_cv = self.pred_cv + # scorermsep_cv = jl.seval(""" + # first(Jchemo.rmsep(pred_cv, y_train)) + # """) + # scoremr2_cv = jl.seval(""" + # first(Jchemo.r2(pred_cv, y_train)) + # """) + # scorerpd_cv = jl.seval(""" + # first(Jchemo.rpd(pred_cv, y_train)) + # """) + # scoremsep_cv = jl.seval(""" + # first(Jchemo.sep(pred_cv, y_train)) + # """) + # + # + # self.scores = pd.DataFrame([[scoremr2_test, scorermsep_test, scoremsep_test, scorerpd_test]], columns=['r2', 'rmsep', 'msep', 'rpd'], index=['test']) + # self.scores = pd.concat([self.scores, pd.DataFrame([[scoremr2_train, scorermsep_train, scoremsep_train, scorerpd_train]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["train"]), pd.DataFrame([[scoremr2_cv, scorermsep_cv, scoremsep_cv, scorerpd_cv]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["cv"])])# + # return self.scores diff --git a/src/Class_Mod/LWPLSR_Call.py b/src/Class_Mod/LWPLSR_Call.py index 47bf8c82ef890af6db341d1f359504064d15e421..3d8523b655c09df10e65f14637491e1687179e54 100644 --- a/src/Class_Mod/LWPLSR_Call.py +++ b/src/Class_Mod/LWPLSR_Call.py @@ -5,23 +5,24 @@ from LWPLSR_ import LWPLSR # loading the lwplsr_inputs.json temp_path = Path("temp/") -for i in ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']: +for i in ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']:#,'x_train_np_cv1', 'y_train_np_cv1', 'x_test_np_cv1', 'y_test_np_cv1', 'x_train_np_cv2', 'y_train_np_cv2', 'x_test_np_cv2', 'y_test_np_cv2', 'x_train_np_cv3', 'y_train_np_cv3', 'x_test_np_cv3', 'y_test_np_cv3',]: globals()[i] = np.genfromtxt(temp_path / str(i + ".csv"), delimiter=',') print('CSV imported') print('start model creation') -Reg = LWPLSR(x_train_np, y_train_np, x_test_np, y_test_np) +Reg = LWPLSR(x_train_np, y_train_np, x_test_np, y_test_np, x_train_np_cv1, y_train_np_cv1, x_test_np_cv1, y_test_np_cv1, x_train_np_cv2, y_train_np_cv2, x_test_np_cv2, y_test_np_cv2, x_train_np_cv3, y_train_np_cv3, x_test_np_cv3, y_test_np_cv3) print('model created. \n now fit') LWPLSR.Jchemo_lwplsr_fit(Reg) print('now predict') LWPLSR.Jchemo_lwplsr_predict(Reg) json_export = {} -data_to_export = ['model', 'pred_data', 'metrics'] json_export['pred_data_train'] = Reg.pred_data_[0].to_dict() -json_export['pred_data_cv'] = Reg.pred_data_[1].to_dict() -json_export['pred_data_test'] = Reg.pred_data_[2].to_dict() -json_export['metrics'] = Reg.metrics_.to_dict() +json_export['pred_data_cv1'] = Reg.pred_data_[1].to_dict() +json_export['pred_data_cv2'] = Reg.pred_data_[2].to_dict() +json_export['pred_data_cv3'] = Reg.pred_data_[3].to_dict() +json_export['pred_data_test'] = Reg.pred_data_[4].to_dict() json_export['model'] = str(Reg.model_) +# json_export['metrics'] = Reg.metrics_.to_dict() with open(temp_path / "lwplsr_outputs.json", "w+") as outfile: json.dump(json_export, outfile) print(Reg.metrics_) \ No newline at end of file diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py index cde267b0c88d018f5ac8c0c6049ef380bdd83b99..2556511d54dbe58dbbe768ea857a347182c62cfa 100644 --- a/src/pages/2-model_creation.py +++ b/src/pages/2-model_creation.py @@ -140,8 +140,9 @@ if not spectra.empty and not y.empty: reg_model = Reg.model_ #M2.dataframe(Pin.pred_data_) elif regression_algo == reg_algo[2]: - data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np'] + data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']#,'x_train_np_cv1', 'y_train_np_cv1', 'x_test_np_cv1', 'y_test_np_cv1', 'x_train_np_cv2', 'y_train_np_cv2', 'x_test_np_cv2', 'y_test_np_cv2', 'x_train_np_cv3', 'y_train_np_cv3', 'x_test_np_cv3', 'y_test_np_cv3',] x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy() + # x_train_np_cv1, y_train_np_cv1, x_test_np_cv1, y_test_np_cv1, x_train_np_cv2, y_train_np_cv2, x_test_np_cv2, y_test_np_cv2, x_train_np_cv3, y_train_np_cv3, x_test_np_cv3, y_test_np_cv3, = X_train_cv1.to_numpy(), y_train_cv1.to_numpy(), X_test_cv1.to_numpy(), y_test_cv1.to_numpy(), X_train_cv2.to_numpy(), y_train_cv2.to_numpy(), X_test_cv2.to_numpy(), y_test_cv2.to_numpy(), X_train_cv3.to_numpy(), y_train_cv3.to_numpy(), X_test_cv3.to_numpy(), y_test_cv3.to_numpy() temp_path = Path('temp/') for i in data_to_work_with: np.savetxt(temp_path / str(i + ".csv"), vars()[i], delimiter=",") import subprocess @@ -151,13 +152,17 @@ if not spectra.empty and not y.empty: Reg_json = json.load(outfile) for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) os.unlink(temp_path / "lwplsr_outputs.json") - Reg = type('obj', (object,), {'metrics_' : pd.json_normalize(Reg_json['metrics']), 'pred_data_' : [pd.json_normalize(Reg_json['pred_data_train']), pd.json_normalize(Reg_json['pred_data_cv']),pd.json_normalize(Reg_json['pred_data_test'])]}) + Reg = type('obj', (object,), {'model' : pd.json_normalize(Reg_json['model']), 'pred_data_' : [pd.json_normalize(Reg_json['pred_data_train']), pd.json_normalize(Reg_json['pred_data_cv1']), pd.json_normalize(Reg_json['pred_data_cv2']), pd.json_normalize(Reg_json['pred_data_cv3']), pd.json_normalize(Reg_json['pred_data_test'])]}) Reg.pred_data_[0] = Reg.pred_data_[0].T.reset_index().drop(columns = ['index']) Reg.pred_data_[0].index = list(y_train.index) - Reg.pred_data_[1] = Reg.pred_data_[1].T.reset_index().drop(columns = ['index']) - Reg.pred_data_[1].index = list(y_train.index) - Reg.pred_data_[2] = Reg.pred_data_[2].T.reset_index().drop(columns = ['index']) - Reg.pred_data_[2].index = list(y_test.index) + # Reg.pred_data_[1] = Reg.pred_data_[1].T.reset_index().drop(columns = ['index']) + # Reg.pred_data_[1].index = list(y_train_cv1.index) + # Reg.pred_data_[2] = Reg.pred_data_[2].T.reset_index().drop(columns = ['index']) + # Reg.pred_data_[2].index = list(y_train_cv2.index) + # Reg.pred_data_[3] = Reg.pred_data_[3].T.reset_index().drop(columns = ['index']) + # Reg.pred_data_[3].index = list(y_train_cv3.index) + Reg.pred_data_[4] = Reg.pred_data_[4].T.reset_index().drop(columns = ['index']) + Reg.pred_data_[4].index = list(y_test.index) elif regression_algo == reg_algo[3]: s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3)