From a9eda16eaf3f64cae88c1665b5c5cb276aa493ea Mon Sep 17 00:00:00 2001
From: DIANE <abderrahim.diane@cefe.cnrs.fr>
Date: Mon, 19 Aug 2024 17:14:24 +0200
Subject: [PATCH] all issues were handled

---
 src/Class_Mod/RegModels.py       |  55 ++--
 src/pages/1-samples_selection.py |   4 +-
 src/pages/2-model_creation.py    | 457 +++++++++++++++++--------------
 3 files changed, 272 insertions(+), 244 deletions(-)

diff --git a/src/Class_Mod/RegModels.py b/src/Class_Mod/RegModels.py
index 0063bcc..01fda38 100644
--- a/src/Class_Mod/RegModels.py
+++ b/src/Class_Mod/RegModels.py
@@ -93,8 +93,8 @@ class Regmodel(object):
   
 ########################################### PLSR   #########################################
 class Plsr(Regmodel):
-    def __init__(self, train, test, n_iter = 10):
-        super().__init__(train, test, n_iter, add_hyperparams = {'n_components': hp.randint('n_components', 2,20)})
+    def __init__(self, train, test, n_iter = 10, nfolds = 3):
+        super().__init__(train, test, n_iter, nfolds = nfolds, add_hyperparams = {'n_components': hp.randint('n_components', 1,20)})
         ### parameters in common
         
     def objective(self, params):
@@ -114,14 +114,9 @@ class Plsr(Regmodel):
         params['deriv'], params['polyorder'], params['window_length']  = a, b, c
         x2 = [savgol_filter(x1[i], polyorder=params['polyorder'], deriv=params['deriv'], window_length = params['window_length']) for i in range(2)]
 
-        Model = PLSRegression(scale = False, n_components = params['n_components'])
-        # self._cv_df = KF_CV().process(model = Model, x = x2[0], y = self._ytrain, n_folds = self._nfolds)
-        # self._cv_df['Average'] = self._cv_df.mean(axis = 1)
-        # self._cv_df['S'] = self._cv_df.std(axis = 1)
-        # self._cv_df['CV(%)'] = self._cv_df['S'] * 100 / self._cv_df['Average']
-        # self._cv_df = self._cv_df.T.round(2)
+        model = PLSRegression(scale = False, n_components = params['n_components'])
         folds = KF_CV().CV(x = x2[0], y = np.array(self._ytrain), n_folds = self._nfolds)
-        yp = KF_CV().cross_val_predictor(model = Model, folds = folds, x = x2[0], y = np.array(self._ytrain))
+        yp = KF_CV().cross_val_predictor(model = model, folds = folds, x = x2[0], y = np.array(self._ytrain))
         self._cv_df = KF_CV().metrics_cv(y = np.array(self._ytrain), ypcv = yp, folds =folds)[1]
                 
         score = self._cv_df.loc["cv",'rmse']
@@ -147,15 +142,15 @@ class Plsr(Regmodel):
 
     ############################################ iplsr #########################################
 class TpeIpls(Regmodel):
-    def __init__(self, train, test, n_iter = 10, n_intervall = 5):
+    def __init__(self, train, test, n_iter = 10, n_intervall = 5, nfolds = 3):
         self.n_intervall = n_intervall
         self.n_arrets = self.n_intervall*2
         
         
-        r = {'n_components': hp.randint('n_components', 2,10)}
+        r = {'n_components': hp.randint('n_components', 1,20)}
         r.update({f'v{i}': hp.randint(f'v{i}', 0, train[0].shape[1]) for i in range(1,self.n_arrets+1)})
 
-        super().__init__(train, test, n_iter, add_hyperparams = r)
+        super().__init__(train, test, n_iter, add_hyperparams = r, nfolds = nfolds)
         
         ### parameters in common
         
@@ -166,7 +161,7 @@ class TpeIpls(Regmodel):
         arrays = [np.arange(self.idx[2*i],self.idx[2*i+1]+1) for i in range(self.n_intervall)]
         id = np.unique(np.concatenate(arrays, axis=0), axis=0)
 
-        # ## Preprocessing
+        ### Preprocessing
         x0 = [self._xc, self._xt]
         x1 = [eval(str(params['normalization'])+"(x0[i])") for i in range(2)]
 
@@ -180,35 +175,35 @@ class TpeIpls(Regmodel):
 
         params['deriv'], params['polyorder'], params['window_length']  = a, b, c
         x2 = [savgol_filter(x1[i], polyorder=params['polyorder'], deriv=params['deriv'], window_length = params['window_length']) for i in range(2)]
-        # print(x2)
         
-        # ## Modelling
-        folds = KF_CV().CV(x = x2[0], y = np.array(self._ytrain), n_folds = self._nfolds)
+        
+        prepared_data = [x2[i][:,id] for i in range(2)]
+
+        
+        ### Modelling
+        folds = KF_CV().CV(x = prepared_data[0], y = np.array(self._ytrain), n_folds = self._nfolds)
         try:
-            
-            Model = PLSRegression(scale = False, n_components = params['n_components'])
-            yp = KF_CV().cross_val_predictor(model = Model, folds = folds, x = x2[0], y = np.array(self._ytrain))
+            model = PLSRegression(scale = False, n_components = params['n_components'])
+            yp = KF_CV().cross_val_predictor(model = model, folds = folds, x = prepared_data[0], y = np.array(self._ytrain))
             self._cv_df = KF_CV().metrics_cv(y = np.array(self._ytrain), ypcv = yp, folds =folds)[1]
         except ValueError as ve:
-            Model = PLSRegression(scale = False, n_components = 1)
             params["n_components"] = 1
-            yp = KF_CV().cross_val_predictor(model = Model, folds = folds, x = x2[0], y = np.array(self._ytrain))
+            model = PLSRegression(scale = False, n_components = params["n_components"])
+            yp = KF_CV().cross_val_predictor(model = model, folds = folds, x = prepared_data[0], y = np.array(self._ytrain))
             self._cv_df = KF_CV().metrics_cv(y = np.array(self._ytrain), ypcv = yp, folds =folds)[1]
-        # self._cv_df['Average'] = self._cv_df.mean(axis = 1)
-        # self._cv_df['S'] = self._cv_df.std(axis = 1)
-        # self._cv_df['CV(%)'] = self._cv_df['S'] * 100 / self._cv_df['Average']
-        # self._cv_df = self._cv_df.T.round(2)
+
+
         score = self._cv_df.loc['cv','rmse']
         
-        Model = PLSRegression(scale = False, n_components = params['n_components'])
-        Model.fit(x2[0][:,id], self._ytrain)
+        Model = PLSRegression(scale = False, n_components = model.n_components)
+        Model.fit(prepared_data[0], self._ytrain)
 
         if self.SCORE > score:
             self.SCORE = score
             self._ycv = KF_CV().meas_pred_eq(y = np.array(self._ytrain), ypcv=yp, folds=folds)
             
-            self._yc = Model.predict(x2[0][:,id])
-            self._yt = Model.predict(x2[1][:,id])
+            self._yc = Model.predict(prepared_data[0])
+            self._yt = Model.predict(prepared_data[1])
             self._model = Model
             for key,value in params.items():
                 try: params[key] =  int(value)
@@ -231,4 +226,4 @@ class TpeIpls(Regmodel):
 class Pcr(Regmodel):
     def __init__(self, train, test, n_iter = 10, n_val = 5):
         super.__init__()
-        {f'pc{i}': hp.randint(f'pc{i+1}', 0, train[0].shape[1]) for i in range(self.n_val)}
\ No newline at end of file
+        {f'pc{i}': hp.randint(f'pc{i+1}', 0, train[0].shape[1]) for i in range(self.n_val)}
diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py
index 1aed061..19e2ab6 100644
--- a/src/pages/1-samples_selection.py
+++ b/src/pages/1-samples_selection.py
@@ -658,6 +658,6 @@ if not sam.empty:
         zipname = json.load(f)
     if os.path.split(recent_file)[1] == os.path.split(zipname)[1]:
         with open("./temp/"+zipname, "rb") as fp:
-                st.write('Download the Analysis Results')
+                st.subheader('Download the Analysis Results')
                 st.download_button('Download', data = fp, file_name=zipname, mime="application/zip",
-                                args=None, kwargs=None,type="primary",use_container_width=True)
+                                args=None, kwargs=None,type="primary",use_container_width=True)
\ No newline at end of file
diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py
index a9f995a..ad94b1f 100644
--- a/src/pages/2-model_creation.py
+++ b/src/pages/2-model_creation.py
@@ -1,5 +1,5 @@
 from Packages import *
-st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
+st.set_page_config(page_title = "NIRS Utils", page_icon = ":goat:", layout = "wide")
 from Modules import *
 from Class_Mod.DATA_HANDLING import *
 add_header()
@@ -33,6 +33,7 @@ def delete_dir():
     
 def increment():
     st.session_state.counter += 1
+
 # ####################################  Methods ##############################################
 class lw:
     def __init__(self, Reg_json, pred):
@@ -40,28 +41,12 @@ class lw:
         self.best_hyperparams_ = Reg_json['best_lwplsr_params']
         self.pred_data_ = [pd.json_normalize(Reg_json[i]) for i in pred]
 
-# @st.cache_data
-# # def tpeipls_(change, n_intervall, n_iter):
-#     Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = n_intervall, n_iter=n_iter)
-#     # time.sleep(1)
-#     # reg_model = Reg.model_
-#     # global intervalls
-#     # intervalls = Reg.selected_features_.T
-#     # intervalls_with_cols = Reg.selected_features_.T
-#     # for i in range(intervalls.shape[0]):
-#     #     for j in range(intervalls.shape[1]):
-#     #         intervalls_with_cols.iloc[i,j] = spectra.columns[intervalls.iloc[i,j]]
-#     # rega = Reg.selected_features_
-#     return Reg #, reg_model, intervalls, intervalls_with_cols, rega
-def auto_execute(func):
-    func()
-    return func
 
 # ####################################### page preamble #######################################
 st.title("Calibration Model Development") # page title
 st.markdown("Create a predictive model, then use it for predicting your target variable (chemical data) from NIRS spectra")
 M0, M00 = st.columns([1, .4])
-M0.image("./images/model_creation.png", use_column_width=True) # graphical abstract
+M0.image("./images/model_creation.png", use_column_width = True) # graphical abstract
 
 
 
@@ -69,7 +54,7 @@ M0.image("./images/model_creation.png", use_column_width=True) # graphical abstr
 
 ################################################################# Begin : I- Data loading and preparation ######################################
 files_format = ['csv', 'dx'] # Supported files format
-file = M00.radio('Select files format:', options = files_format,horizontal=True) # Select a file format
+file = M00.radio('Select files format:', options = files_format,horizontal = True) # Select a file format
 spectra = pd.DataFrame() # preallocate the spectral data block
 y = pd.DataFrame() # preallocate the target(s) data block
 match file:
@@ -77,12 +62,12 @@ match file:
     case 'csv':
         with M00:
             # Load X-block data
-            xcal_csv = st.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
+            xcal_csv = st.file_uploader("Select NIRS Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and lambdas as columns")
             if xcal_csv:
                 sepx = st.radio("Select separator (X file) - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)),
-                                        options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0,horizontal=True)
+                                        options = [";", ","], index = [";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key = 0,horizontal = True)
                 hdrx = st.radio("samples name (X file)? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)),
-                                        options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1,horizontal=True)
+                                        options = ["no", "yes"], index = ["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key = 1,horizontal = True)
                 match hdrx:
                     case "yes":
                         col = 0
@@ -92,12 +77,12 @@ match file:
                 st.warning('Insert your spectral data file here!')
             
             # Load Y-block data
-            ycal_csv = st.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
+            ycal_csv = st.file_uploader("Select corresponding Chemical Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and chemical values as a column")
             if ycal_csv:
                 sepy = st.radio("Select separator (Y file) - _detected_: " + str(find_delimiter('data/'+ycal_csv.name)),
-                                options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+ycal_csv.name))), key=2,horizontal=True)
+                                options = [";",  ","], index = [";", ","].index(str(find_delimiter('data/'+ycal_csv.name))), key = 2, horizontal = True)
                 hdry = st.radio("samples name (Y file)? - _detected_: " + str(find_col_index('data/'+ycal_csv.name)),
-                                options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+ycal_csv.name))), key=3,horizontal=True)
+                                options = ["no", "yes"], index = ["no", "yes"].index(str(find_col_index('data/'+ycal_csv.name))), key = 3, horizontal = True)
                 
                 match hdry:
                     case "yes":
@@ -121,50 +106,59 @@ match file:
                 @st.cache_data
                 def csv_loader(change):
                     file_name = str(xcal_csv.name) +' and '+ str(ycal_csv.name)
-                    xfile = pd.read_csv(xcal_csv, decimal='.', sep=sepx, index_col=col, header=0)
-                    yfile =  pd.read_csv(ycal_csv, decimal='.', sep=sepy, index_col=col)
-                    xfile.to_csv("./Report/datasets/"+xcal_csv.name,sep = ';', encoding='utf-8', mode='a')
-                    yfile.to_csv("./Report/datasets/"+ycal_csv.name,sep = ';', encoding='utf-8', mode='a')
+                    xfile = pd.read_csv(xcal_csv, decimal = '.', sep = sepx, index_col = col, header = 0)
+                    yfile =  pd.read_csv(ycal_csv, decimal = '.', sep = sepy, index_col = col)
+                    xfile.to_csv("./Report/datasets/"+xcal_csv.name,sep = ';', encoding = 'utf-8', mode = 'a')
+                    yfile.to_csv("./Report/datasets/"+ycal_csv.name,sep = ';', encoding = 'utf-8', mode = 'a')
                     return xfile, yfile, file_name
-                xfile, yfile, file_name = csv_loader(change =xy_hash)
+                
+                xfile, yfile, file_name = csv_loader(change = xy_hash)
 
 
                 if yfile.shape[1]>0 and xfile.shape[1]>0 :
 
                     # prepare x data
-                    spectra, meta_data = col_cat(xfile)
+                    try: 
+                        spectra, meta_data = col_cat(xfile)
+                    except:
+                        st.error('Error: The format of the X-file does not correspond to the expected dialect settings. To read the file correctly, please adjust the separator parameters.')
                     spectra = pd.DataFrame(spectra).astype(float)
                     
                     # prepare y data
-                    chem_data, idx = col_cat(yfile)
-                    if chem_data.shape[1]>1:
-                        yname = M00.selectbox('Select target', options=chem_data.columns)
-                        y = chem_data.loc[:,yname]
-                    else:
-                        y = chem_data.iloc[:,0]
-                    
+                    try:
+                        chem_data, idx = col_cat(yfile)
+                    except:
+                        st.error('Error: The format of the Y-file does not correspond to the expected dialect settings. To read the file correctly, please adjust the separator parameters.')
+
+                    if 'chem_data' in globals():
+                        if chem_data.shape[1]>1:
+                            yname = M00.selectbox('Select target', options = chem_data.columns)
+                            y = chem_data.loc[:, yname]
+                        else:
+                            y = chem_data.iloc[:, 0]
+                        
                     ### warning
                     if spectra.shape[0] != y.shape[0]:
-                        st.warning('X and Y have different sample size')
+                        st.error('Error: X and Y have different sample size')
                         y = pd.DataFrame
                         spectra = pd.DataFrame
 
                 else:
-                    st.error('Error: The data has not been loaded successfully, please consider tuning the decimal and separator !')
+                    st.error('Error: The data has not been loaded successfully, please consider tuning the dialect settings !')
     
     # Load .dx file
     case 'dx':
         with M00:
-            data_file = st.file_uploader("Select Data", type=".dx", help=" :mushroom: select a dx file")
+            data_file = st.file_uploader("Select Data", type = ".dx", help = " :mushroom: select a dx file")
             if data_file:
                 file_name = str(data_file.name)
                 ## creating the temp file
-                with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
+                with NamedTemporaryFile(delete = False, suffix = ".dx") as tmp:
                         tmp.write(data_file.read())
                         tmp_path = tmp.name
                         with open(tmp.name, 'r') as dd:
                             dxdata = dd.read()
-                            xy_hash = str(dxdata)
+                            xy_hash = hash_data(str(dxdata))
                         with open('Report/datasets/'+data_file.name, 'w') as dd:
                             dd.write(dxdata)
                 ## load and parse the temp dx file
@@ -173,18 +167,18 @@ match file:
                     chem_data, spectra, meta_data, meta_data_st = read_dx(file =  tmp_path)    
                     os.unlink(tmp_path)
                     return chem_data, spectra, meta_data, meta_data_st
-                chem_data, spectra, meta_data, meta_data_st = dx_loader(change = hash_data(xy_hash))
+                chem_data, spectra, meta_data, meta_data_st = dx_loader(change = dxdata)
                 
                 if not spectra.empty:
-                    st.success("The data have been loaded successfully", icon="✅")
+                    st.success("The data have been loaded successfully", icon = "✅")
                 if chem_data.shape[1]>0:
                     
-                    yname = st.selectbox('Select target', options=chem_data.columns)
-                    measured = chem_data.loc[:,yname] > 0
-                    y = chem_data.loc[:,yname].loc[measured]
+                    yname = st.selectbox('Select target', options = chem_data.columns)
+                    measured = chem_data.loc[:, yname] > 0
+                    y = chem_data.loc[:, yname].loc[measured]
                     spectra = spectra.loc[measured]
                 else:
-                    st.warning('Warning: your file includes no target variables to model !', icon="⚠️")
+                    st.warning('Warning: your file includes no target variables to model !', icon = "⚠️")
 
 
             else :
@@ -198,19 +192,18 @@ match file:
 
 
 ################################################### BEGIN : visualize and split the data ####################################################
-st.header("I - Data visualization", divider='blue')
+st.header("I - Data visualization", divider = 'blue')
 if not spectra.empty and not y.empty:
     @st.cache_data
     def visualize(change):
-        
-        if np.array(spectra.columns).dtype.kind in ['i','f']:
+        if np.array(spectra.columns).dtype.kind in ['i', 'f']:
             colnames = spectra.columns
         else:
             colnames = np.arange(spectra.shape[1])
 
 
         # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
-        train_index, test_index = train_test_split_idx(spectra, y = y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=42)
+        train_index, test_index = train_test_split_idx(spectra, y = y, method = "kennard_stone", metric = "correlation", test_size = 0.25, random_state = 42)
 
         # Assign data to training and test sets
         X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index]
@@ -219,30 +212,32 @@ if not spectra.empty and not y.empty:
 
         #### insight on loaded data
         # M0, M000 = st.columns([1, .4])
-        fig1, ax1 = plt.subplots( figsize = (12,3))
-        spectra.T.plot(legend=False, ax = ax1, linestyle = '-', linewidth = 0.6)
+        fig1, ax1 = plt.subplots( figsize = (12, 3))
+        spectra.T.plot(legend = False, ax = ax1, linestyle = '-', linewidth = 0.6)
         ax1.set_ylabel('Signal intensity')
         ax1.margins(0)
         plt.tight_layout()
 
         fig2, ax2 = plt.subplots(figsize = (12,3))
-        sns.histplot(y, color="deeppink", kde = True,label="y",ax = ax2, fill=True)
-        sns.histplot(y_train, color="blue", kde = True,label="y (train)",ax = ax2, fill=True)
-        sns.histplot(y_test, color="green", kde = True,label="y (test)",ax = ax2, fill=True)
+        sns.histplot(y, color = "deeppink", kde = True, label = "y", ax = ax2, fill = True)
+        sns.histplot(y_train, color = "blue", kde = True, label = "y (train)", ax = ax2, fill = True)
+        sns.histplot(y_test, color = "green", kde = True, label = "y (test)", ax = ax2, fill = True)
         ax2.set_xlabel('y')
         plt.legend()
         plt.tight_layout()
-        stats=pd.DataFrame([desc_stats(y_train), desc_stats(y_test), desc_stats(y)], index =['train', 'test', 'total'] ).round(2)
+        stats = pd.DataFrame([desc_stats(y_train), desc_stats(y_test), desc_stats(y)], index =['train', 'test', 'total'] ).round(2)
+
+        fig1.savefig("./Report/figures/spectra_plot.png")
+        fig2.savefig("./Report/figures/histogram.png")
         
-        return X_train, X_test,y_train, y_test, colnames, train_index, test_index, stats, fig1, fig2
-    X_train, X_test,y_train, y_test, colnames, train_index, test_index, stats, fig1, fig2= visualize(change = hash_data(y+np.median(spectra)))
+        return X_train, X_test, y_train, y_test, colnames, train_index, test_index, stats, fig1, fig2
+    X_train, X_test, y_train, y_test, colnames, train_index, test_index, stats, fig1, fig2 = visualize(change = xy_hash)
     
     M0, M000 = st.columns([1, .4])
     with M0:
         st.pyplot(fig1) ######## Loaded graph
         st.pyplot(fig2)
-        fig1.savefig("./Report/figures/spectra_plot.png")
-        fig2.savefig("./Report/figures/Histogram.png")
+
     with M000:
         st.write('Loaded data summary')
         st.write(stats)
@@ -254,25 +249,25 @@ if not spectra.empty and not y.empty:
 
 
 
-##########################################################   BEGIN : Create Model    ####################################################
+    ###################################################     BEGIN : Create Model     ####################################################
 regression_algo = None # initialize the selected regression algorithm
 Reg = None  # initialize the regression model object
-intervalls_with_cols = pd.DataFrame()
+# intervalls_with_cols = pd.DataFrame()
 
-st.header("II - Model creation", divider='blue')
+st.header("II - Model creation", divider = 'blue')
 if not (spectra.empty and y.empty):
-    M10, M20, M30, M40, M50 = st.columns([1,1,1,1,1])
+    M10, M20, M30, M40, M50 = st.columns([1, 1, 1, 1, 1])
 
     # select type of supervised modelling problem
     modes = ['regression', 'classification']
-    mode =M10.radio("Analysis Methods", options=modes)
+    mode = M10.radio("Analysis Methods", options=modes)
     match mode:
         case "regression":
-            reg_algo = ["","PLS", "LW-PLS", "TPE-iPLS"]
-            regression_algo = M20.selectbox("Choose the regression algorithm", options= reg_algo, key = "regression_algo", format_func=lambda x: x if x else "<Select>")
+            reg_algo = ["", "PLS", "LW-PLS", "TPE-iPLS"]
+            regression_algo = M20.selectbox("Choose the regression algorithm", options = reg_algo, key = "regression_algo", format_func = lambda x: x if x else "<Select>")
         case 'classification':
-            reg_algo = ["","PLS", "LW-PLS", "TPE-iPLS"]
-            regression_algo = M20.selectbox("Choose the classification algorithm", options= reg_algo, key = 12, format_func=lambda x: x if x else "<Select>")
+            reg_algo = ["", "PLS", "LW-PLS", "TPE-iPLS"]
+            regression_algo = M20.selectbox("Choose the classification algorithm", options = reg_algo, key = 12, format_func = lambda x: x if x else "<Select>")
     
 
 #     # Training set preparation for cross-validation(CV)
@@ -282,121 +277,131 @@ if not (spectra.empty and y.empty):
     
     # Model creation-M20 columns
     with M20:
-        if regression_algo:
-            info = st.info('The model is being created. This may take a few minutes.')
-            if regression_algo == 'TPE-iPLS':# if model type is ipls then ask for the number of iterations and intervalls
-                s = st.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6)
-                it = st.number_input(label='Enter the number of iterations', min_value=2, max_value=500, value=2)
-
-        if regression_algo: # if a regression method is selected then create the model
-            @st.cache_data
-            def RequestingModelCreation(change, regression_algo):
-                match regression_algo:
-                    case 'PLS':
-                        Reg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=5)
-                        reg_model = Reg.model_
-                        rega = Reg.selected_features_
-                    case 'LW-PLS':
-                        # export data to csv for Julia train/test
-                        global x_train_np, y_train_np, x_test_np, y_test_np
-                        data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
-                        x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
-                        # Cross-Validation calculation
-
-                        d = {}
+        @st.cache_data
+        def RequestingModelCreation(xydata, change, regression_algo, s, it):
+            match regression_algo:
+                case 'PLS':
+                    Reg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter = 50, nfolds = nb_folds)
+                    reg_model = Reg.model_
+                    rega = Reg.selected_features_
+
+                case 'LW-PLS':
+                    # export data to csv for Julia train/test
+                    global x_train_np, y_train_np, x_test_np, y_test_np
+                    data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
+                    x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
+                    # Cross-Validation calculation
+                    d = {}
+                    for i in range(nb_folds):
+                        d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
+                        data_to_work_with.append("xtr_fold{0}".format(i+1))
+                        data_to_work_with.append("ytr_fold{0}".format(i+1))
+                        data_to_work_with.append("xte_fold{0}".format(i+1))
+                        data_to_work_with.append("yte_fold{0}".format(i+1))
+                    # check best pre-treatment with a global PLSR model
+                    preReg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=20)
+                    temp_path = Path('temp/')
+                    with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile:
+                        json.dump(preReg.best_hyperparams_, outfile)
+                    # export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files
+                    for i in data_to_work_with:
+                        if 'fold' in i:
+                            j = d[i]
+                        else:
+                            j = globals()[i]
+                            # st.write(j)
+                        np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",")
+                    # run Julia Jchemo as subprocess
+                    import subprocess
+                    subprocess_path = Path("Class_Mod/")
+                    subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
+                    # retrieve json results from Julia JChemo
+                    try:
+                        with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
+                            Reg_json = json.load(outfile)
+                            # delete csv files
+                            for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
+                        # delete json file after import
+                        os.unlink(temp_path / "lwplsr_outputs.json")
+                        os.unlink(temp_path / "lwplsr_preTreatments.json")
+                        # format result data into Reg object
+                        pred = ['pred_data_train', 'pred_data_test']### keys of the dict
                         for i in range(nb_folds):
-                            d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
-                            data_to_work_with.append("xtr_fold{0}".format(i+1))
-                            data_to_work_with.append("ytr_fold{0}".format(i+1))
-                            data_to_work_with.append("xte_fold{0}".format(i+1))
-                            data_to_work_with.append("yte_fold{0}".format(i+1))
-                        # check best pre-treatment with a global PLSR model
-                        preReg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=20)
-                        temp_path = Path('temp/')
-                        with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile:
-                            json.dump(preReg.best_hyperparams_, outfile)
-                        # export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files
-                        for i in data_to_work_with:
-                            if 'fold' in i:
-                                j = d[i]
-                            else:
-                                j = globals()[i]
-                                # st.write(j)
-                            np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",")
-                        # run Julia Jchemo as subprocess
-                        import subprocess
-                        subprocess_path = Path("Class_Mod/")
-                        subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
-                        # retrieve json results from Julia JChemo
-                        try:
-                            with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
-                                Reg_json = json.load(outfile)
-                                # delete csv files
-                                for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
-                            # delete json file after import
-                            os.unlink(temp_path / "lwplsr_outputs.json")
-                            os.unlink(temp_path / "lwplsr_preTreatments.json")
-                            # format result data into Reg object
-                            pred = ['pred_data_train', 'pred_data_test']### keys of the dict
-                            for i in range(nb_folds):
-                                pred.append("CV" + str(i+1)) ### add cv folds keys to pred
-                            # global Reg
-                            # Reg = type('obj', (object,), {'model_' : Reg_json['model'], 'best_hyperparams_' : Reg_json['best_lwplsr_params'],
-                            #                             'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
-                            # global Reg
-                            Reg = lw(Reg_json=Reg_json, pred = pred)
-                            reg_model = Reg.model_
-                            Reg.CV_results_ = pd.DataFrame()
-                            Reg.cv_data_ = {'YpredCV' : {}, 'idxCV' : {}}
-                            # set indexes to Reg.pred_data (train, test, folds idx)
-                            for i in range(len(pred)):
-                                Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
-                                if i == 0: # data_train
-                                    # Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
-                                    Reg.pred_data_[i].index = list(y_train.index)
-                                    Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
-                                elif i == 1: # data_test
-                                    # Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
-                                    Reg.pred_data_[i].index = list(y_test.index)
-                                    Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
-                                else:
-                                    # CVi
-                                    Reg.pred_data_[i].index = folds[list(folds)[i-2]]
-                                    # Reg.CV_results_ = pd.concat([Reg.CV_results_, Reg.pred_data_[i]])
-                                    Reg.cv_data_['YpredCV']['Fold' + str(i-1)] = np.array(Reg.pred_data_[i]).reshape(-1)
-                                    Reg.cv_data_['idxCV']['Fold' + str(i-1)] = np.array(folds[list(folds)[i-2]]).reshape(-1)
-
-                            Reg.CV_results_= KF_CV.metrics_cv(y = y_train, ypcv = Reg.cv_data_['YpredCV'], folds = folds)[1]
-                            #### cross validation results print
-                            Reg.best_hyperparams_print = Reg.best_hyperparams_
-                            ## plots
-                            Reg.cv_data_ = KF_CV().meas_pred_eq(y = np.array(y_train), ypcv= Reg.cv_data_['YpredCV'], folds=folds)
-                            Reg.pretreated_spectra_ = preReg.pretreated_spectra_
+                            pred.append("CV" + str(i+1)) ### add cv folds keys to pred
                             
-                            Reg.best_hyperparams_print = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
-                            Reg.best_hyperparams_ = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
-
-                            Reg.__hash__ = hash_data(Reg.best_hyperparams_print)
-                        except FileNotFoundError as e:
-                            Reg = None
-                            for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
-                            Reg.__hash__ = 0
-                    case 'TPE-iPLS':
-                        Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it)
+                        # global Reg
+                        # Reg = type('obj', (object,), {'model_' : Reg_json['model'], 'best_hyperparams_' : Reg_json['best_lwplsr_params'],
+                        #                             'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
+                        # global Reg
+                        Reg = lw(Reg_json = Reg_json, pred = pred)
                         reg_model = Reg.model_
+                        Reg.CV_results_ = pd.DataFrame()
+                        Reg.cv_data_ = {'YpredCV' : {}, 'idxCV' : {}}
+                        # set indexes to Reg.pred_data (train, test, folds idx)
+                        for i in range(len(pred)):
+                            Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
+                            if i == 0: # data_train
+                                # Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
+                                Reg.pred_data_[i].index = list(y_train.index)
+                                Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
+                            elif i == 1: # data_test
+                                # Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
+                                Reg.pred_data_[i].index = list(y_test.index)
+                                Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
+                            else:
+                                # CVi
+                                Reg.pred_data_[i].index = folds[list(folds)[i-2]]
+                                # Reg.CV_results_ = pd.concat([Reg.CV_results_, Reg.pred_data_[i]])
+                                Reg.cv_data_['YpredCV']['Fold' + str(i-1)] = np.array(Reg.pred_data_[i]).reshape(-1)
+                                Reg.cv_data_['idxCV']['Fold' + str(i-1)] = np.array(folds[list(folds)[i-2]]).reshape(-1)
+
+                        Reg.CV_results_= KF_CV.metrics_cv(y = y_train, ypcv = Reg.cv_data_['YpredCV'], folds = folds)[1]
+                        #### cross validation results print
+                        Reg.best_hyperparams_print = Reg.best_hyperparams_
+                        ## plots
+                        Reg.cv_data_ = KF_CV().meas_pred_eq(y = np.array(y_train), ypcv = Reg.cv_data_['YpredCV'], folds = folds)
+                        Reg.pretreated_spectra_ = preReg.pretreated_spectra_
                         
-                        intervalls = Reg.selected_features_.T
-                        intervalls_with_cols = Reg.selected_features_.T
-                        
-                        for i in range(intervalls.shape[0]):
-                            for j in range(intervalls.shape[1]):
-                                intervalls_with_cols.iloc[i,j] = spectra.columns[intervalls.iloc[i,j]]
-                        rega = Reg.selected_features_
-
-                        st.session_state.intervalls = Reg.selected_features_.T
-                        st.session_state.intervalls_with_cols =intervalls_with_cols
-                return Reg
-            Reg = RequestingModelCreation(change =st.session_state.counter, regression_algo = regression_algo)
+                        Reg.best_hyperparams_print = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
+                        Reg.best_hyperparams_ = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
+
+                        Reg.__hash__ = hash_data(Reg.best_hyperparams_print)
+                    except FileNotFoundError as e:
+                        Reg = None
+                        for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
+
+                case 'TPE-iPLS':
+                    Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it, nfolds = nb_folds)
+                    reg_model = Reg.model_
+                    
+                    global intervalls, intervalls_with_cols
+                    intervalls = Reg.selected_features_.T
+                    intervalls_with_cols = Reg.selected_features_.T
+                    
+                    for i in range(intervalls.shape[0]):
+                        for j in range(intervalls.shape[1]):
+                            intervalls_with_cols.iloc[i,j] = spectra.columns[intervalls.iloc[i,j]]
+                    rega = Reg.selected_features_
+
+                    st.session_state.intervalls = Reg.selected_features_.T
+                    st.session_state.intervalls_with_cols = intervalls_with_cols
+            return Reg
+        
+
+
+
+
+        if regression_algo:
+
+            info = st.info('The model is being created. This may take a few minutes.')
+            if regression_algo == 'TPE-iPLS':# if model type is ipls then ask for the number of iterations and intervalls
+                s = st.number_input(label = 'Enter the maximum number of intervals', min_value = 1, max_value = 6)
+                it = st.number_input(label = 'Enter the number of iterations', min_value = 2, max_value = 500, value = 2)
+            else:
+                s, it = None, None
+
+            st.write()
+            Reg = RequestingModelCreation( xydata = hash_data(xy_hash), change = st.session_state.counter, regression_algo = regression_algo, s = s, it = it)
         else:
             st.warning('Choose a modelling algorithm from the dropdown list !')
                 
@@ -409,13 +414,13 @@ if not (spectra.empty and y.empty):
         
         if regression_algo:
             if regression_algo == 'TPE-iPLS':
-                intervalls = st.session_state.intervalls
-                intervalls_with_cols = st.session_state.intervalls_with_cols
+                 if ('intervalls' and 'intervalls_with_cols') in st.session_state:
+                    intervalls = st.session_state.intervalls
+                    intervalls_with_cols = st.session_state.intervalls_with_cols
 
 
 
 if Reg:
-
     if st.button('re-model the data', key=4, help=None, type="primary", use_container_width=True):# remodel feature for re-tuning the model
         increment()
 
@@ -435,9 +440,9 @@ if Reg:
         # Show the model performance table
         st.write("-- Model performance --")
         if regression_algo != reg_algo[2]:
-            model_per=pd.DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
+            model_per = pd.DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method = 'regression').scores_)
         else:
-            model_per=pd.DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)    
+            model_per = pd.DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method = 'regression').scores_)    
         st.dataframe(model_per)
 
 
@@ -453,7 +458,7 @@ if Reg:
         plt.tight_layout()
 
         for i in range(2):
-            eval(f'ax{i+1}').grid(color='grey', linestyle=':', linewidth=0.2)
+            eval(f'ax{i+1}').grid(color = 'grey', linestyle = ':', linewidth = 0.2)
             eval(f'ax{i+1}').margins(x = 0)
             eval(f'ax{i+1}').legend(loc = 'upper right')
             eval(f'ax{i+1}').set_ylabel('Intensity')
@@ -461,14 +466,14 @@ if Reg:
                 a = change
                 for j in range(s):
                     if np.array(spectra.columns).dtype.kind in ['i','f']:
-                        min, max = intervalls_with_cols['from'][j], intervalls_with_cols['to'][j]
+                        min, max = intervalls_with_cols.iloc[j,0], intervalls_with_cols.iloc[j,1]
                     else:
-                        min, max = intervalls['from'][j], intervalls['to'][j]
+                        min, max = intervalls.iloc[j,0], intervalls.iloc[j,1]
 
-                    eval(f'ax{i+1}').axvspan(min, max, color='#00ff00', alpha=0.5, lw=0)
+                    eval(f'ax{i+1}').axvspan(min, max, color = '#00ff00', alpha = 0.5, lw = 0)
 
         if regression_algo == 'PLS':
-            ax1.scatter(colnames[np.array(Reg.sel_ratio_.index)], np.mean(X_train, axis = 0)[np.array(Reg.sel_ratio_.index)],
+            ax1.scatter(colnames[np.array(Reg.sel_ratio_.index)], np.mean(X_train, axis = 0).iloc[np.array(Reg.sel_ratio_.index)],
                             color = '#7ab0c7', label = 'Important variables')
             ax2.scatter(colnames[Reg.sel_ratio_.index], np.mean(Reg.pretreated_spectra_, axis = 0)[np.array(Reg.sel_ratio_.index)],
                             color = '#7ab0c7', label = 'Important variables')
@@ -476,14 +481,15 @@ if Reg:
             ax2.legend()
         return fig
 
-    fig = prep_important(change = st.session_state.counter, regression_algo = regression_algo, model_hash = str(Reg.__hash__))
+    if Reg:
+        fig = prep_important(change = st.session_state.counter, regression_algo = regression_algo, model_hash = str(Reg.__hash__))
     
     with M2:## Visualize raw,preprocessed spectra, and selected intervalls(in case of ipls) 
-        if not intervalls_with_cols.empty:
-            st.write('-- Important Spectral regions used for model creation --')
-            st.table(intervalls_with_cols)
+        if regression_algo =='TPE-iPLS' :
+                st.write('-- Important Spectral regions used for model creation --')
+                st.table(intervalls_with_cols)
         st.write('-- Visualization of the spectral regions used for model creation --')
-        fig.savefig("./Report/figures/Variable_importance.png")
+        fig.savefig("./Report/figures/variable_importance.png")
         st.pyplot(fig)
 
 
@@ -493,23 +499,22 @@ if Reg:
                         6: "Six",7: "Seven",8: "Eight",9: "Nine",10: "Ten"}
         st.header(f" {numbers_dict[nb_folds]}-Fold Cross-Validation results")
 
-        cv1, cv2 = st.columns([2,2])
+        cv1, cv2 = st.columns([2, 2])
         with cv2:
+            cv_results = pd.DataFrame(Reg.CV_results_).round(4)# CV table
             st.write('-- Cross-Validation Summary--')
-            st.write(Reg.CV_results_.style.map(lambda _: "background-color: #cecece;", subset=(Reg.CV_results_.index.drop(['sd', 'mean', 'cv']), slice(None))))
-            # st.write(Reg.CV_results_)
-            cv_results=pd.DataFrame(Reg.CV_results_)# CV table
+            st.write(cv_results.astype(str).style.map(lambda _: "background-color: #cecece;", subset = (cv_results.index.drop(['sd', 'mean', 'cv']), slice(None))))
 
             st.write('-- Out-of-Fold Predictions Visualization (All in one) --')
-            fig1 = px.scatter(Reg.cv_data_[0], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds",
+            fig1 = px.scatter(Reg.cv_data_[0], x = 'Measured', y = 'Predicted' , trendline = 'ols', color = 'Folds', symbol = 'Folds',
                     color_discrete_sequence=px.colors.qualitative.G10)
-            fig1.add_shape(type='line', x0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), x1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']),
-                            y0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), y1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']), line = dict(color='black', dash = "dash"))
-            fig1.update_traces(marker_size=7, showlegend=False)
-            st.plotly_chart(fig1, use_container_width=True)
-            fig0 = px.scatter(Reg.cv_data_[0], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds", facet_col = 'Folds',facet_col_wrap=1,
-                    color_discrete_sequence=px.colors.qualitative.G10, text='index', width=800, height=1000)
-            fig0.update_traces(marker_size=8, showlegend=False)
+            fig1.add_shape(type = 'line', x0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), x1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']),
+                            y0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), y1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']), line = dict(color = 'black', dash = "dash"))
+            fig1.update_traces(marker_size = 7, showlegend=False)
+            st.plotly_chart(fig1, use_container_width = True)
+            fig0 = px.scatter(Reg.cv_data_[0], x ='Measured', y = 'Predicted' , trendline = 'ols', color = 'Folds', symbol = "Folds", facet_col = 'Folds',facet_col_wrap = 1,
+                    color_discrete_sequence = px.colors.qualitative.G10, text = 'index', width = 800, height = 1000)
+            fig0.update_traces(marker_size = 8, showlegend = False)
             fig0.write_image("./Report/figures/meas_vs_pred_cv_onebyone.png")
 
         with cv1:
@@ -539,7 +544,7 @@ if Reg:
     # reg plot and residuals plot
     if regression_algo != reg_algo[2]:
         regression_plot = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
-        residual_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)
+        residual_plot = resid_plot([y_train, y_test], [yc, yt], train_idx = train_index, test_idx = test_index)
     else:
         regression_plot = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
         residual_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)
@@ -556,8 +561,36 @@ if Reg:
         residual_plot.savefig('./Report/figures/residuals_plot.png')
 
 ###################################################      END : Model Diagnosis   #######################################################
-st.write('Download the Analysis Results')
+        
+
+
+###################################################    BEGIN : Download results    ####################################################
+date_time = datetime.datetime.now().strftime('_%y_%m_%d_%H_%M_')
+# 1-
+# 2-
+# 3-
+# 4-
+#5-
+
+if Reg:
+    @st.cache_data
+    def download_res(file,sam):
+        zipname = f'results{date_time}subset_selection_{file.name.split('.')[0]}.zip' # name of the zipfile
+        with open('./temp/fname.json', 'w') as f: # dump filename and save it as a .json file
+            json.dump(zipname, f)
+        shutil.make_archive(base_name = zipname.split('.')[0],format = "zip",root_dir = "./Report", base_dir = "figures")# create zip containing figures and report
+
+
+
+
+
+
+
+
 
 
 
 
+st.subheader('Download the Analysis Results')
+# st.download_button('Download', data = fp, file_name = zipname, mime ="application/zip",
+#                 args = None, kwargs = None,type = "primary",use_container_width = True)
-- 
GitLab