diff --git a/src/Class_Mod/LWPLSR_.py b/src/Class_Mod/LWPLSR_.py index d16d187497284ba00860aa75ee9a6fd977954254..2e3d40b734429ef97fa7eb5cff1950279716c564 100644 --- a/src/Class_Mod/LWPLSR_.py +++ b/src/Class_Mod/LWPLSR_.py @@ -1,7 +1,6 @@ from juliacall import Main as jl import numpy as np import pandas as pd -from sklearn.model_selection import KFold class LWPLSR: """The lwpls regression model from Jchemo (M. Lesnoff) @@ -9,48 +8,26 @@ class LWPLSR: Returns: self.scores (DataFrame): various metrics and scores self.predicted_results_on_train (DataFrame): - self.predicted_results_on_cv (DataFrame): self.predicted_results_on_test (DataFrame): self.mod (Julia model): the prepared model """ - def __init__(self, x_train, y_train, x_test, y_test, x_train_cv1, y_train_cv1, x_test_cv1, y_test_cv1, x_train_cv2, y_train_cv2, x_test_cv2, y_test_cv2, x_train_cv3, y_train_cv3, x_test_cv3, y_test_cv3): + def __init__(self, dataset): """Initiate the LWPLSR and prepare data for Julia computing.""" - self.x_train, self.y_train, self.x_test, self.y_test = x_train, y_train, x_test, y_test - self.x_train_cv1, self.y_train_cv1, self.x_test_cv1, self.y_test_cv1 = x_train_cv1, y_train_cv1, x_test_cv1, y_test_cv1 - self.x_train_cv2, self.y_train_cv2, self.x_test_cv2, self.y_test_cv2 = x_train_cv2, y_train_cv2, x_test_cv2, y_test_cv2 - self.x_train_cv3, self.y_train_cv3, self.x_test_cv3, self.y_test_cv3 = x_train_cv3, y_train_cv3, x_test_cv3, y_test_cv3 + self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))] + # prepare to send dataframes to julia and Jchemo jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test - jl.x_train_cv1, jl.y_train_cv1, jl.x_test_cv1, jl.y_test_cv1 = self.x_train_cv1, self.y_train_cv1, self.x_test_cv1, self.y_test_cv1 - jl.x_train_cv2, jl.y_train_cv2, jl.x_test_cv2, jl.y_test_cv2 = self.x_train_cv2, self.y_train_cv2, self.x_test_cv2, self.y_test_cv2 - jl.x_train_cv3, jl.y_train_cv3, jl.x_test_cv3, jl.y_test_cv3= self.x_train_cv3, self.y_train_cv3, self.x_test_cv3, self.y_test_cv3 - # optimize lwplsr parameters with Jchemo - # jl.seval(""" - # ntrain = nro(x_train) - # segm = segmkf(ntrain, 4; rep = 5) - # nlvdis = [5; 10; 15] ; metric = [:mah] - # h = [1; 2; 6; Inf] ; k = [10; 30; 100] - # nlv = 0:15 - # pars = mpar(nlvdis = nlvdis, metric = metric, h = h, k = k) - # println(pars) - # """) # initialize vars from the class - y_shape = y_test.shape - y_shape_cv1 = y_test_cv1.shape - y_shape_cv2 = y_test_cv2.shape - y_shape_cv3 = y_test_cv3.shape - # self.scores = pd.DataFrame + y_shape = self.y_test.shape self.predicted_results_on_test = pd.DataFrame self.predicted_results_on_train = pd.DataFrame self.predicted_results_on_cv = pd.DataFrame self.pred_test = np.zeros(shape=(y_shape[0], 1)) self.pred_train = np.zeros(shape=(y_shape[0], 1)) - self.pred_cv1 = np.zeros(shape=(y_shape_cv1[0], 1)) - self.pred_cv2 = np.zeros(shape=(y_shape_cv2[0], 1)) - self.pred_cv3 = np.zeros(shape=(y_shape_cv3[0], 1)) self.mod = "" + self.best_lwplsr_params = np.zeros(shape=(5, 1)) def Jchemo_lwplsr_fit(self): """Send data to Julia to fit lwplsr. @@ -73,47 +50,43 @@ class LWPLSR: y_train |> Pandas.DataFrame |> DataFrames.DataFrame x_test |> Pandas.DataFrame |> DataFrames.DataFrame y_test |> Pandas.DataFrame |> DataFrames.DataFrame - x_train_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame - y_train_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame - x_test_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame - y_test_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame - x_train_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame - y_train_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame - x_test_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame - y_test_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame - x_train_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame - y_train_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame - x_test_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame - y_test_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame """) - # Create LWPLSR model and fit + print('LWPLSR - tuning') + # set tuning parameters jl.seval(""" - nlvdis = 5 ; metric = :mah - h = 1 ; k = 200 ; nlv = 15 #; scal = true - mod = Jchemo.model(Jchemo.lwplsr; nlvdis, metric, h, k, nlv) - # Fit model - Jchemo.fit!(mod, x_train, y_train) + nlvdis = [5; 10; 15] ; metric = [:eucl; :mah] + h = [1; 2; 6; Inf] ; k = [30; 80; 200] + nlv = 5:15 + pars = Jchemo.mpar(nlvdis = nlvdis, metric = metric, h = h, k = k) """) - # CV model and fit + # split Train data into Cal/Val for tuning jl.seval(""" - nlvdis = 5 ; metric = :mah - h = 1 ; k = 200 ; nlv = 15 #; scal = true - mod_cv1 = mod - mod_cv2 = mod - mod_cv3 = mod - # Fit model - Jchemo.fit!(mod_cv1, x_train_cv1, y_train_cv1) - Jchemo.fit!(mod_cv2, x_train_cv2, y_train_cv2) - Jchemo.fit!(mod_cv3, x_train_cv3, y_train_cv3) + pct = .3 + ntrain = Jchemo.nro(x_train) + nval = Int(round(pct * ntrain)) + s = Jchemo.samprand(ntrain, nval) + Xcal = x_train[s.train, :] + ycal = y_train[s.train] + Xval = x_train[s.test, :] + yval = y_train[s.test] + ncal = ntrain - nval """) - # jl.seval(""" - # mod = Jchemo.model(Jchemo.lwplsr) - # res = Jchemo.gridcv(mod, x_train, y_train; segm, score = Jchemo.rmsep, pars, nlv, verbose = true).res - # # u = findall(res.y1 .== minimum(res.y1))[1] - # # mod = Jchemo.model(lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u]) ; - # # Jchemo.fit!(mod, x_train, y_train) - # """) + # Create LWPLSR model and tune + jl.seval(""" + mod = Jchemo.model(Jchemo.lwplsr) + res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false) + u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination + """) + self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]} + print('best lwplsr params' + str(self.best_lwplsr_params)) + print('LWPLSR - best params ok') + # calculate LWPLSR model with best parameters + jl.seval(""" + mod = Jchemo.model(Jchemo.lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u]) + # Fit model + Jchemo.fit!(mod, x_train, y_train) + """) self.mod = jl.mod def Jchemo_lwplsr_predict(self): @@ -133,91 +106,29 @@ class LWPLSR: """ # Predictions on x_test and store in self.pred self.pred_test = jl.seval(""" - println("start test predict") + println("LWPLSR - start test predict") res = Jchemo.predict(mod, x_test) res.pred """) self.pred_train = jl.seval(""" - println("start train predict") + println("LWPLSR - start train predict") res = Jchemo.predict(mod, x_train) res.pred """) - self.pred_cv1 = jl.seval(""" - println("start test_cv1 predict") - res = Jchemo.predict(mod_cv1, x_test_cv1) - res.pred - """) - self.pred_cv2 = jl.seval(""" - println("start test_cv2 predict") - res = Jchemo.predict(mod_cv2, x_test_cv2) - res.pred - """) - self.pred_cv3 = jl.seval(""" - println("start test_cv3 predict") - res = Jchemo.predict(mod_cv3, x_test_cv3) - res.pred - """) + print('LWPLSR - end') + @property def pred_data_(self): # convert predicted data from x_test to Pandas DataFrame self.predicted_results_on_test = pd.DataFrame(self.pred_test) self.predicted_results_on_train = pd.DataFrame(self.pred_train) - self.predicted_results_on_cv1 = pd.DataFrame(self.pred_cv1) - self.predicted_results_on_cv2 = pd.DataFrame(self.pred_cv2) - self.predicted_results_on_cv3 = pd.DataFrame(self.pred_cv3) - return self.predicted_results_on_train, self.predicted_results_on_cv1, self.predicted_results_on_cv2, self.predicted_results_on_cv3, self.predicted_results_on_test + return self.predicted_results_on_train, self.predicted_results_on_test @property def model_(self): return self.mod - # @property - # def metrics_(self): - # jl.pred_test = self.pred_test - # jl.seval(""" - # using Jchemo - # """) - # scorermsep_test = jl.seval(""" - # first(Jchemo.rmsep(pred_test, y_test)) - # """) - # scoremr2_test = jl.seval(""" - # first(Jchemo.r2(pred_test, y_test)) - # """) - # scorerpd_test = jl.seval(""" - # first(Jchemo.rpd(pred_test, y_test)) - # """) - # scoremsep_test = jl.seval(""" - # first(Jchemo.sep(pred_test, y_test)) - # """) - # jl.pred_train = self.pred_train - # scorermsep_train = jl.seval(""" - # first(Jchemo.rmsep(pred_train, y_train)) - # """) - # scoremr2_train = jl.seval(""" - # first(Jchemo.r2(pred_train, y_train)) - # """) - # scorerpd_train = jl.seval(""" - # first(Jchemo.rpd(pred_train, y_train)) - # """) - # scoremsep_train = jl.seval(""" - # first(Jchemo.sep(pred_train, y_train)) - # """) - # jl.pred_cv = self.pred_cv - # scorermsep_cv = jl.seval(""" - # first(Jchemo.rmsep(pred_cv, y_train)) - # """) - # scoremr2_cv = jl.seval(""" - # first(Jchemo.r2(pred_cv, y_train)) - # """) - # scorerpd_cv = jl.seval(""" - # first(Jchemo.rpd(pred_cv, y_train)) - # """) - # scoremsep_cv = jl.seval(""" - # first(Jchemo.sep(pred_cv, y_train)) - # """) - # - # - # self.scores = pd.DataFrame([[scoremr2_test, scorermsep_test, scoremsep_test, scorerpd_test]], columns=['r2', 'rmsep', 'msep', 'rpd'], index=['test']) - # self.scores = pd.concat([self.scores, pd.DataFrame([[scoremr2_train, scorermsep_train, scoremsep_train, scorerpd_train]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["train"]), pd.DataFrame([[scoremr2_cv, scorermsep_cv, scoremsep_cv, scorerpd_cv]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["cv"])])# - # return self.scores + @property + def best_lwplsr_params_(self): + return self.best_lwplsr_params diff --git a/src/Class_Mod/LWPLSR_Call.py b/src/Class_Mod/LWPLSR_Call.py index 9ecb74a33b9f5f24cc0a699f8679fc7805fe8be6..f8445d4b2930400cf0820fffab5804572beba60b 100644 --- a/src/Class_Mod/LWPLSR_Call.py +++ b/src/Class_Mod/LWPLSR_Call.py @@ -5,27 +5,24 @@ from LWPLSR_ import LWPLSR # loading the lwplsr_inputs.json temp_path = Path("temp/") -for i in ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np','x_train_np_cv1', 'y_train_np_cv1', 'x_test_np_cv1', 'y_test_np_cv1', 'x_train_np_cv2', 'y_train_np_cv2', 'x_test_np_cv2', 'y_test_np_cv2', 'x_train_np_cv3', 'y_train_np_cv3', 'x_test_np_cv3', 'y_test_np_cv3',]: - globals()[i] = np.genfromtxt(temp_path / str(i + ".csv"), delimiter=',') +data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np'] +dataset = [] +for i in data_to_work_with: + dataset.append(np.genfromtxt(temp_path / str(i + ".csv"), delimiter=',')) print('CSV imported') print('start model creation') -Reg = LWPLSR(x_train_np, y_train_np, x_test_np, y_test_np, x_train_np_cv1, y_train_np_cv1, x_test_np_cv1, y_test_np_cv1, x_train_np_cv2, y_train_np_cv2, x_test_np_cv2, y_test_np_cv2, x_train_np_cv3, y_train_np_cv3, x_test_np_cv3, y_test_np_cv3) -print('model created. \n now fit') +Reg = LWPLSR(dataset) +print('model created. \nnow fit') LWPLSR.Jchemo_lwplsr_fit(Reg) print('now predict') LWPLSR.Jchemo_lwplsr_predict(Reg) -pred = ['pred_data_train', 'pred_data_cv1', 'pred_data_cv2', 'pred_data_cv3', 'pred_data_test'] +print('export to json') +pred = ['pred_data_train', 'pred_data_test'] json_export = {} for i in pred: json_export[i] = Reg.pred_data_[pred.index(i)].to_dict() -# json_export['pred_data_train'] = Reg.pred_data_[0].to_dict() -# json_export['pred_data_cv1'] = Reg.pred_data_[1].to_dict() -# json_export['pred_data_cv2'] = Reg.pred_data_[2].to_dict() -# json_export['pred_data_cv3'] = Reg.pred_data_[3].to_dict() -# json_export['pred_data_test'] = Reg.pred_data_[4].to_dict() json_export['model'] = str(Reg.model_) -# json_export['metrics'] = Reg.metrics_.to_dict() +json_export['best_lwplsr_params'] = Reg.best_lwplsr_params_ with open(temp_path / "lwplsr_outputs.json", "w+") as outfile: json.dump(json_export, outfile) -print(Reg.metrics_) \ No newline at end of file diff --git a/src/Report/figures/.gitkeep b/src/Report/figures/.gitkeep deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py index 8c9d6056ac06c2d6d463302a0b276bdd3db0fef4..59dabc1e5e120dc4a74a31aadaffcd25fb7ebd57 100644 --- a/src/pages/1-samples_selection.py +++ b/src/pages/1-samples_selection.py @@ -28,7 +28,7 @@ selec_strategy = ['center','random'] if st.session_state["interface"] == 'simple': st.write(':red[Automated Simple Interface]') - hide_pages("Predictions") + # hide_pages("Predictions") if 37 not in st.session_state: default_reduction_option = 1 else: diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py index c5e24976f12c61db947aebacf3ffba0623ab310c..bf4dd10f84148e940b7ed3c1bd26cc1941c2591b 100644 --- a/src/pages/2-model_creation.py +++ b/src/pages/2-model_creation.py @@ -167,24 +167,25 @@ if not spectra.empty and not y.empty: reg_model = Reg.model_ #M2.dataframe(Pin.pred_data_) elif regression_algo == reg_algo[2]: - data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']#,'x_train_np_cv1', 'y_train_np_cv1', 'x_test_np_cv1', 'y_test_np_cv1', 'x_train_np_cv2', 'y_train_np_cv2', 'x_test_np_cv2', 'y_test_np_cv2', 'x_train_np_cv3', 'y_train_np_cv3', 'x_test_np_cv3', 'y_test_np_cv3',] + # export data to csv for Julia + data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np'] x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy() - # x_train_np_cv1, y_train_np_cv1, x_test_np_cv1, y_test_np_cv1, x_train_np_cv2, y_train_np_cv2, x_test_np_cv2, y_test_np_cv2, x_train_np_cv3, y_train_np_cv3, x_test_np_cv3, y_test_np_cv3, = X_train_cv1.to_numpy(), y_train_cv1.to_numpy(), X_test_cv1.to_numpy(), y_test_cv1.to_numpy(), X_train_cv2.to_numpy(), y_train_cv2.to_numpy(), X_test_cv2.to_numpy(), y_test_cv2.to_numpy(), X_train_cv3.to_numpy(), y_train_cv3.to_numpy(), X_test_cv3.to_numpy(), y_test_cv3.to_numpy() temp_path = Path('temp/') for i in data_to_work_with: np.savetxt(temp_path / str(i + ".csv"), vars()[i], delimiter=",") + # run Julia Jchemo import subprocess subprocess_path = Path("Class_Mod/") subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"]) + # retrieve json results from Julia JChemo with open(temp_path / "lwplsr_outputs.json", "r") as outfile: - Reg_json = json.load(outfile) - for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) + Reg_json = json.load(outfile) + for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) os.unlink(temp_path / "lwplsr_outputs.json") - # Reg = type('obj', (object,), {'model' : pd.json_normalize(Reg_json['model']), 'pred_data_' : [pd.json_normalize(Reg_json['pred_data_train']), pd.json_normalize(Reg_json['pred_data_cv1']), pd.json_normalize(Reg_json['pred_data_cv2']), pd.json_normalize(Reg_json['pred_data_cv3']), pd.json_normalize(Reg_json['pred_data_test'])]}) - pred = ['pred_data_train', 'pred_data_cv1', 'pred_data_cv2', 'pred_data_cv3', 'pred_data_test'] - Reg = type('obj', (object,), {'model' : pd.json_normalize(Reg_json['model']), 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]}) + pred = ['pred_data_train', 'pred_data_test'] + Reg = type('obj', (object,), {'model' : Reg_json['model'], 'best_lwplsr_params' : Reg_json['best_lwplsr_params'], 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]}) for i in range(len(pred)): Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index']) - if i != 4: + if i != 1: # if not pred_data_test Reg.pred_data_[i].index = list(y_train.index) else: Reg.pred_data_[i].index = list(y_test.index)