From 65155b6f9f0e4d43bdc55fec6358f4aef10f7afe Mon Sep 17 00:00:00 2001 From: barthes <nicolas.barthes@cefe.cnrs.fr> Date: Tue, 30 Apr 2024 13:04:24 +0200 Subject: [PATCH] now working Julia LWPLSR model with temp files --- .gitignore | 3 +- requirements.txt | 2 +- src/Class_Mod/LWPLSR_.py | 150 +++++++++++++++++++++------------- src/Class_Mod/LWPLSR_Call.py | 27 ++++++ src/Class_Mod/__init__.py | 2 +- src/pages/2-model_creation.py | 36 ++++---- 6 files changed, 145 insertions(+), 75 deletions(-) create mode 100644 src/Class_Mod/LWPLSR_Call.py diff --git a/.gitignore b/.gitignore index c8c9b25..a025df3 100644 --- a/.gitignore +++ b/.gitignore @@ -6,4 +6,5 @@ src/data/predictions/* src/data/sample_selections/* src/Report/*.pdf src/Report/*.tex -src/Report/figures/ \ No newline at end of file +src/Report/figures/ +src/temp/* \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 042ceb5..414efc3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -15,4 +15,4 @@ umap-learn>=0.5.5 jcamp>=1.2.2 mkdocs>=1.5.3 mkdocs-material>=9.5.18 -mkdocstrings[python]>=0.24.3 \ No newline at end of file +mkdocstrings[python]>=0.24.3 diff --git a/src/Class_Mod/LWPLSR_.py b/src/Class_Mod/LWPLSR_.py index 5e86c90..877ca50 100644 --- a/src/Class_Mod/LWPLSR_.py +++ b/src/Class_Mod/LWPLSR_.py @@ -1,5 +1,6 @@ -from Packages import * -from Class_Mod.Miscellaneous import * +from juliacall import Main as jl +import numpy as np +import pandas as pd class LWPLSR: """ @@ -11,36 +12,40 @@ class LWPLSR: # prepare to send dataframes to julia and Jchemo jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test # Pre-treatment of x_train and x_test - jl.seval(""" - # using DataFrames - # using Pandas - using Jchemo - mod1 = Jchemo.model(snv; centr = true, scal = true) - mod2 = Jchemo.model(savgol; npoint = 15, deriv = 1, degree = 2) - mod = Jchemo.pip(mod1, mod2) - Jchemo.fit!(mod, x_train) - x_train = Jchemo.transf(mod1, x_train) - Jchemo.fit!(mod, x_test) - x_test = Jchemo.transf(mod1, x_test) - """) - jl.seval(""" - ntrain = nro(x_train) - segm = segmkf(ntrain, 4; rep = 5) - nlvdis = [5; 10; 15] ; metric = [:mah] - h = [1; 2; 6; Inf] ; k = [10; 30; 100] - nlv = 0:15 - pars = mpar(nlvdis = nlvdis, metric = metric, h = h, k = k) - println(pars) - """) + # jl.seval(""" + # # using DataFrames + # # using Pandas + # using Jchemo + # mod1 = Jchemo.model(snv; centr = true, scal = true) + # mod2 = Jchemo.model(savgol; npoint = 15, deriv = 1, degree = 2) + # mod = Jchemo.pip(mod1, mod2) + # Jchemo.fit!(mod, x_train) + # x_train = Jchemo.transf(mod1, x_train) + # Jchemo.fit!(mod, x_test) + # x_test = Jchemo.transf(mod1, x_test) + # """) + # jl.seval(""" + # ntrain = nro(x_train) + # segm = segmkf(ntrain, 4; rep = 5) + # nlvdis = [5; 10; 15] ; metric = [:mah] + # h = [1; 2; 6; Inf] ; k = [10; 30; 100] + # nlv = 0:15 + # pars = mpar(nlvdis = nlvdis, metric = metric, h = h, k = k) + # println(pars) + # """) # initialize vars from the class y_shape = y_test.shape self.scores = pd.DataFrame self.predicted_results_on_test = pd.DataFrame - self.pred = np.zeros(shape=(y_shape[0], 1)) + self.predicted_results_on_train = pd.DataFrame + self.predicted_results_on_cv = pd.DataFrame + self.pred_test = np.zeros(shape=(y_shape[0], 1)) + self.pred_train = np.zeros(shape=(y_shape[0], 1)) + self.pred_cv = np.zeros(shape=(y_shape[0], 1)) self.mod = "" - def Jchemo_lwplsr(self): + def Jchemo_lwplsr_fit(self): """Send data to Julia to compute lwplsr. Args: @@ -64,35 +69,44 @@ class LWPLSR: y_test |> Pandas.DataFrame |> DataFrames.DataFrame """) # Create LWPLSR model and fit - # jl.seval(""" - # nlvdis = 5 ; metric = :mah - # h = 1 ; k = 200 ; nlv = 15 #; scal = true - # mod = Jchemo.model(Jchemo.lwplsr; nlvdis, metric, h, k, nlv) - # # Fit model - # Jchemo.fit!(mod, x_train, y_train) - # """) - jl.seval(""" - mod = Jchemo.model(Jchemo.lwplsr) - res = Jchemo.gridcv(mod, x_train, y_train; segm, score = Jchemo.rmsep, pars, nlv, verbose = true).res - # u = findall(res.y1 .== minimum(res.y1))[1] - # mod = Jchemo.model(lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u]) ; - # Jchemo.fit!(mod, x_train, y_train) + nlvdis = 5 ; metric = :mah + h = 1 ; k = 200 ; nlv = 15 #; scal = true + mod = Jchemo.model(Jchemo.lwplsr; nlvdis, metric, h, k, nlv) + # Fit model + Jchemo.fit!(mod, x_train, y_train) """) + + # jl.seval(""" + # mod = Jchemo.model(Jchemo.lwplsr) + # res = Jchemo.gridcv(mod, x_train, y_train; segm, score = Jchemo.rmsep, pars, nlv, verbose = true).res + # # u = findall(res.y1 .== minimum(res.y1))[1] + # # mod = Jchemo.model(lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u]) ; + # # Jchemo.fit!(mod, x_train, y_train) + # """) self.mod = jl.mod def Jchemo_lwplsr_predict(self): # Predictions on x_test and store in self.pred - self.pred = jl.seval(""" + self.pred_test = jl.seval(""" res = Jchemo.predict(mod, x_test) res.pred """) - # convert predicted data from x_test to Pandas DataFrame - self.predicted_results_on_test = pd.DataFrame(self.pred) + self.pred_train = jl.seval(""" + res = Jchemo.predict(mod, x_train) + res.pred + """) + self.pred_cv = self.pred_train + @property def pred_data_(self): - return self.predicted_results_on_test, self.predicted_results_on_test, self.predicted_results_on_test + # convert predicted data from x_test to Pandas DataFrame + self.predicted_results_on_test = pd.DataFrame(self.pred_test) + self.predicted_results_on_train = pd.DataFrame(self.pred_train) + # self.predicted_results_on_cv = pd.DataFrame(self.pred_cv) + self.predicted_results_on_cv = pd.DataFrame(self.pred_train) + return self.predicted_results_on_train, self.predicted_results_on_cv, self.predicted_results_on_test @property def model_(self): @@ -100,24 +114,50 @@ class LWPLSR: @property def metrics_(self): - jl.pred = self.pred - st.dataframe(self.pred) - st.dataframe(self.predicted_results_on_test) - st.write('starting metrics') + jl.pred_test = self.pred_test jl.seval(""" using Jchemo """) - scorermsep = jl.seval(""" - first(Jchemo.rmsep(pred, y_test)) + scorermsep_test = jl.seval(""" + first(Jchemo.rmsep(pred_test, y_test)) + """) + scoremr2_test = jl.seval(""" + first(Jchemo.r2(pred_test, y_test)) + """) + scorerpd_test = jl.seval(""" + first(Jchemo.rpd(pred_test, y_test)) + """) + scoremsep_test = jl.seval(""" + first(Jchemo.sep(pred_test, y_test)) """) - scoremr2 = jl.seval(""" - first(Jchemo.r2(pred, y_test)) + jl.pred_train = self.pred_train + scorermsep_train = jl.seval(""" + first(Jchemo.rmsep(pred_train, y_train)) """) - scorerpd = jl.seval(""" - first(Jchemo.rpd(pred, y_test)) + scoremr2_train = jl.seval(""" + first(Jchemo.r2(pred_train, y_train)) """) - scoremsep = jl.seval(""" - first(Jchemo.sep(pred, y_test)) + scorerpd_train = jl.seval(""" + first(Jchemo.rpd(pred_train, y_train)) """) - self.scores = pd.DataFrame([[scoremr2, scorermsep, scoremsep, scorerpd]], columns=['r2', 'rmsep', 'msep', 'rpd'], index=['scores']) + scoremsep_train = jl.seval(""" + first(Jchemo.sep(pred_train, y_train)) + """) + jl.pred_cv = self.pred_cv + scorermsep_cv = jl.seval(""" + first(Jchemo.rmsep(pred_cv, y_train)) + """) + scoremr2_cv = jl.seval(""" + first(Jchemo.r2(pred_cv, y_train)) + """) + scorerpd_cv = jl.seval(""" + first(Jchemo.rpd(pred_cv, y_train)) + """) + scoremsep_cv = jl.seval(""" + first(Jchemo.sep(pred_cv, y_train)) + """) + + + self.scores = pd.DataFrame([[scoremr2_test, scorermsep_test, scoremsep_test, scorerpd_test]], columns=['r2', 'rmsep', 'msep', 'rpd'], index=['test']) + self.scores = pd.concat([self.scores, pd.DataFrame([[scoremr2_train, scorermsep_train, scoremsep_train, scorerpd_train]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["train"]), pd.DataFrame([[scoremr2_cv, scorermsep_cv, scoremsep_cv, scorerpd_cv]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["cv"])])# return self.scores diff --git a/src/Class_Mod/LWPLSR_Call.py b/src/Class_Mod/LWPLSR_Call.py new file mode 100644 index 0000000..47bf8c8 --- /dev/null +++ b/src/Class_Mod/LWPLSR_Call.py @@ -0,0 +1,27 @@ +import numpy as np +from pathlib import Path +import json +from LWPLSR_ import LWPLSR + +# loading the lwplsr_inputs.json +temp_path = Path("temp/") +for i in ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']: + globals()[i] = np.genfromtxt(temp_path / str(i + ".csv"), delimiter=',') +print('CSV imported') +print('start model creation') +Reg = LWPLSR(x_train_np, y_train_np, x_test_np, y_test_np) +print('model created. \n now fit') +LWPLSR.Jchemo_lwplsr_fit(Reg) +print('now predict') +LWPLSR.Jchemo_lwplsr_predict(Reg) + +json_export = {} +data_to_export = ['model', 'pred_data', 'metrics'] +json_export['pred_data_train'] = Reg.pred_data_[0].to_dict() +json_export['pred_data_cv'] = Reg.pred_data_[1].to_dict() +json_export['pred_data_test'] = Reg.pred_data_[2].to_dict() +json_export['metrics'] = Reg.metrics_.to_dict() +json_export['model'] = str(Reg.model_) +with open(temp_path / "lwplsr_outputs.json", "w+") as outfile: + json.dump(json_export, outfile) +print(Reg.metrics_) \ No newline at end of file diff --git a/src/Class_Mod/__init__.py b/src/Class_Mod/__init__.py index 540a093..155f625 100644 --- a/src/Class_Mod/__init__.py +++ b/src/Class_Mod/__init__.py @@ -14,4 +14,4 @@ from .HDBSCAN_Clustering import Hdbscan from .SK_PLSR_ import PlsR from .PLSR_Preprocess import PlsProcess from .NMF_ import Nmf -from .Ap import AP \ No newline at end of file +from .Ap import AP diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py index 986d249..efc2022 100644 --- a/src/pages/2-model_creation.py +++ b/src/pages/2-model_creation.py @@ -1,20 +1,11 @@ +import streamlit from Packages import * st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * from Class_Mod.DATA_HANDLING import * - -# HTML pour le bandeau "CEFE - CNRS" -# bandeau_html = """ -# <div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;"> -# <h1 style="text-align: center; color: white;">CEFE - CNRS / UM</h1> -# </div> -# """ -# # Injecter le code HTML du bandeau -# st.markdown(bandeau_html, unsafe_allow_html=True) add_header() - st.session_state["interface"] = st.session_state.get('interface') if st.session_state["interface"] == 'simple': hide_pages("Predictions") @@ -134,13 +125,24 @@ if not spectra.empty and not y.empty: reg_model = Reg.model_ #M2.dataframe(Pin.pred_data_) elif regression_algo == reg_algo[2]: - x_train, y_train, x_test, y_test = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy() - Reg = LWPLSR(x_train, y_train, x_test, y_test) - LWPLSR.Jchemo_lwplsr(Reg) - reg_model = Reg.model_ - # LWPLSR.Jchemo_lwplsr_predict(Reg) - # st.dataframe(Reg.pred_data_) - # st.dataframe(Reg.metrics_) + data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np'] + x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy() + temp_path = Path('temp/') + for i in data_to_work_with: np.savetxt(temp_path / str(i + ".csv"), vars()[i], delimiter=",") + import subprocess + subprocess_path = Path("Class_Mod/") + subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"]) + with open(temp_path / "lwplsr_outputs.json", "r") as outfile: + Reg_json = json.load(outfile) + for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) + os.unlink(temp_path / "lwplsr_outputs.json") + Reg = type('obj', (object,), {'metrics_' : pd.json_normalize(Reg_json['metrics']), 'pred_data_' : [pd.json_normalize(Reg_json['pred_data_train']), pd.json_normalize(Reg_json['pred_data_cv']),pd.json_normalize(Reg_json['pred_data_test'])]}) + Reg.pred_data_[0] = Reg.pred_data_[0].T.reset_index().drop(columns = ['index']) + Reg.pred_data_[0].index = list(y_train.index) + Reg.pred_data_[1] = Reg.pred_data_[1].T.reset_index().drop(columns = ['index']) + Reg.pred_data_[1].index = list(y_train.index) + Reg.pred_data_[2] = Reg.pred_data_[2].T.reset_index().drop(columns = ['index']) + Reg.pred_data_[2].index = list(y_test.index) elif regression_algo == reg_algo[3]: s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3) -- GitLab