diff --git a/src/Class_Mod/LWPLSR_.py b/src/Class_Mod/LWPLSR_.py index 2e3d40b734429ef97fa7eb5cff1950279716c564..b11eb6ed01e50e72ad918d8dde59caf8d04f274f 100644 --- a/src/Class_Mod/LWPLSR_.py +++ b/src/Class_Mod/LWPLSR_.py @@ -14,8 +14,17 @@ class LWPLSR: def __init__(self, dataset): """Initiate the LWPLSR and prepare data for Julia computing.""" - self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))] + # self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))] + self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(4)] + nb_fold = int((len(dataset)-4)/4) + for i in range(nb_fold): + vars()["self.xtr_fold"+str(i+1)] = dataset[i+7] + vars()["self.ytr_fold"+str(i+1)] = dataset[i+13] + vars()["self.xte_fold"+str(i+1)] = dataset[i+4] + vars()["self.yte_fold"+str(i+1)] = dataset[i+10] + + print(self.xtr_fold1) # prepare to send dataframes to julia and Jchemo jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test diff --git a/src/Class_Mod/LWPLSR_Call.py b/src/Class_Mod/LWPLSR_Call.py index f8445d4b2930400cf0820fffab5804572beba60b..46a65e44ddb13a4a4e8097d35d58178065a3925b 100644 --- a/src/Class_Mod/LWPLSR_Call.py +++ b/src/Class_Mod/LWPLSR_Call.py @@ -2,10 +2,15 @@ import numpy as np from pathlib import Path import json from LWPLSR_ import LWPLSR +import os # loading the lwplsr_inputs.json temp_path = Path("temp/") data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np'] +temp_files_list = os.listdir(temp_path) +for i in temp_files_list: + if 'fold' in i: + data_to_work_with.append(str(i)[:-4]) dataset = [] for i in data_to_work_with: dataset.append(np.genfromtxt(temp_path / str(i + ".csv"), delimiter=',')) @@ -17,6 +22,10 @@ LWPLSR.Jchemo_lwplsr_fit(Reg) print('now predict') LWPLSR.Jchemo_lwplsr_predict(Reg) +print('now CV') + + + print('export to json') pred = ['pred_data_train', 'pred_data_test'] json_export = {} diff --git a/src/Modules.py b/src/Modules.py index c399a6ec5019f5daba315b26bf8330ede04c53c3..8699e5616f258a6b0fad449942a497c1edf9aa87 100644 --- a/src/Modules.py +++ b/src/Modules.py @@ -10,10 +10,3 @@ from style.header import add_header, add_sidebar from config.config import pdflatex_path local_css(css_file / "style.css") -# path = os.path.dirname(os.path.abspath(__file__)).replace('\\','/') -# d1 = path.find('/') -# css_file = path[:d1]+'/style' -# st.session_state["interface"] = st.session_state.get('interface') -# if st.session_state["interface"] == 'simple': -# hide_pages("Predictions") -# local_css(css_file +"/style.css") diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py index bf4dd10f84148e940b7ed3c1bd26cc1941c2591b..f4b43c2938e041b53d182bd871f51085b1ee2a8a 100644 --- a/src/pages/2-model_creation.py +++ b/src/pages/2-model_creation.py @@ -1,4 +1,5 @@ # import streamlit +import pandas as pd from Packages import * st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * @@ -123,7 +124,7 @@ if not spectra.empty and not y.empty: colnames = spectra.columns else: colnames = np.arange(spectra.shape[1]) - + #rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i") # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing @@ -132,9 +133,9 @@ if not spectra.empty and not y.empty: # Assign data to training and test sets X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index] X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index] - - #### insight on loaded data + + #### insight on loaded data fig, ax1 = plt.subplots( figsize = (12,3)) spectra.T.plot(legend=False, ax = ax1, linestyle = '--') ax1.set_ylabel('Signal intensity') @@ -167,29 +168,54 @@ if not spectra.empty and not y.empty: reg_model = Reg.model_ #M2.dataframe(Pin.pred_data_) elif regression_algo == reg_algo[2]: - # export data to csv for Julia + info = M1.info('Starting LWPLSR model creation... Please wait a few minutes.') + # export data to csv for Julia train/test data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np'] x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy() + # Cross-Validation calculation + nb_folds = 3 + st.write('KFold = ' + str(nb_folds)) + folds = KF_CV.CV(x_train_np, y_train_np, nb_folds) + d = {} + for i in range(nb_folds): + d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]] + data_to_work_with.append("xtr_fold{0}".format(i+1)) + data_to_work_with.append("ytr_fold{0}".format(i+1)) + data_to_work_with.append("xte_fold{0}".format(i+1)) + data_to_work_with.append("yte_fold{0}".format(i+1)) temp_path = Path('temp/') - for i in data_to_work_with: np.savetxt(temp_path / str(i + ".csv"), vars()[i], delimiter=",") + for i in data_to_work_with: + if 'fold' in i: + j = d[i] + else: + j = globals()[i] + np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",") # run Julia Jchemo import subprocess subprocess_path = Path("Class_Mod/") subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"]) # retrieve json results from Julia JChemo - with open(temp_path / "lwplsr_outputs.json", "r") as outfile: - Reg_json = json.load(outfile) - for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) - os.unlink(temp_path / "lwplsr_outputs.json") - pred = ['pred_data_train', 'pred_data_test'] - Reg = type('obj', (object,), {'model' : Reg_json['model'], 'best_lwplsr_params' : Reg_json['best_lwplsr_params'], 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]}) - for i in range(len(pred)): - Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index']) - if i != 1: # if not pred_data_test - Reg.pred_data_[i].index = list(y_train.index) - else: - Reg.pred_data_[i].index = list(y_test.index) - + try: + with open(temp_path / "lwplsr_outputs.json", "r") as outfile: + Reg_json = json.load(outfile) + for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) + os.unlink(temp_path / "lwplsr_outputs.json") + pred = ['pred_data_train', 'pred_data_test'] + Reg = type('obj', (object,), {'model' : Reg_json['model'], 'best_lwplsr_params' : Reg_json['best_lwplsr_params'], 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]}) + for i in range(len(pred)): + Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index']) + if i != 1: # if not pred_data_test + Reg.pred_data_[i].index = list(y_train.index) + else: + Reg.pred_data_[i].index = list(y_test.index) + Reg.CV_results_ = pd.DataFrame() + Reg.cv_data_ = pd.DataFrame() + info.empty() + M1.success('Model created!') + except FileNotFoundError as e: + info.empty() + M1.warning('- ERROR during model creation -') + Reg = None elif regression_algo == reg_algo[3]: s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3) it = M1.number_input(label='Enter the number of iterations', min_value=2, max_value=10, value=3) @@ -218,7 +244,7 @@ if not spectra.empty and not y.empty: ################# Model analysis ############ - if regression_algo in reg_algo[1:]: + if regression_algo in reg_algo[1:] and Reg is not None: #M2.write('-- Pretreated data (train) visualization and important spectral regions in the model -- ') fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 6)) @@ -368,7 +394,7 @@ with st.container(): if not spectra.empty and not y.empty: - if regression_algo in reg_algo[1:]: + if regression_algo in reg_algo[1:] and Reg is not None: fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 4), sharex=True) ax1.plot(colnames, np.mean(X_train, axis = 0), color = 'black', label = 'Average spectrum (Raw)') ax2.plot(colnames, np.mean(Reg.pretreated_spectra_ , axis = 0), color = 'black', label = 'Average spectrum (pretreated)')