diff --git a/src/Class_Mod/DATA_HANDLING.py b/src/Class_Mod/DATA_HANDLING.py
index 892c0c0854533b346a4e2363a61408c4d114a4ae..17e4dcb44161db3710fec2ccf94ad8363e35cfbc 100644
--- a/src/Class_Mod/DATA_HANDLING.py
+++ b/src/Class_Mod/DATA_HANDLING.py
@@ -80,6 +80,7 @@ def No_transformation(X):
 ######################################## Cross val split ############################
 class KF_CV:
     ### method for generating test sets index
+    ### KFCV(dict) returns a testset indices/Fold 
     @staticmethod
     def CV(x, y, n_folds:int):
         test_folds = {}
@@ -90,30 +91,45 @@ class KF_CV:
             for _, i_test in kf.split(x, y):
                 d.append(i_test)
             test_folds[folds_name[i]] = d[i]        
-        return test_folds
+        return test_folds ## returns a tuple where keys are the name of each fold, and the corresponding values is a 1d numpy array filled with indices of test set
     
     ### Cross validate the model and return the predictions and samples index
     @staticmethod
-    def cross_val_predictor(model, x, y, n_folds:int):
+    def cross_val_predictor(model, folds, x, y):
+        """" model: the object to be cross-validated,
+          folds: a tuple where keys are the name of each fold, and the corresponding values is a 1d numpy array filled with indices of test set(from CV method)
+          x and y: the data used for CV"""
         x = np.array(x)
         y = np.array(y)
 
         yp = {}
-        folds = KF_CV.CV(x=x, y=y, n_folds=n_folds)### Test index
         key = list(folds.keys())
+        n_folds = len(folds.keys())
 
         for i in range(n_folds):
             model.fit(np.delete(x, folds[key[i]], axis=0), np.delete(y, folds[key[i]], axis=0))
             yp[key[i]] = model.predict(x[folds[key[i]]]) #### predictions/fold
-        
-
+        return yp # returns a tuple with keys are names of folds and the corresponding values are the predicted Y/fold
+    @staticmethod
+    def meas_pred_eq(y, ypcv, folds):
+        """" y: the target variable,
+          ypcv: a tuple where keys are the name of each fold, and the corresponding values is a 1d numpy array filled with predictions/fold (from cross_val_predictor method)
+          folds: a tuple where keys are the name of each fold, and the corresponding values is a 1d numpy array filled with indices of test set(from CV method)
+          x and y: the data used for CV
+          
+        returns:
+        two dataframe:
+        - a n x 4 dataframe containing measured values, predicted values, ols reg equation, and index (n is the total number of samples)
+        -  a 2 x k dataframe containing ols regression coefficients(k is the number of folds)
+        """
         cvcv = {}
         coeff = {}
+        y = np.array(y)
         for i, Fname in enumerate(folds.keys()):
             r = pd.DataFrame()
-            r['Predicted'] = yp[Fname]
+            r['Predicted'] = ypcv[Fname]
             r['Measured'] = y[folds[Fname]]
-            ols = LinearRegression().fit(pd.DataFrame(y[folds[Fname]]),yp[Fname].reshape(-1,1))
+            ols = LinearRegression().fit(pd.DataFrame(y[folds[Fname]]), ypcv[Fname].reshape(-1,1))
             r.index = folds[Fname]
             r['Folds'] = [f'{Fname} (Predicted = {np.round(ols.intercept_[0], 2)} + {np.round(ols.coef_[0][0],2)} x Measured'] * r.shape[0]
             cvcv[i] = r
@@ -123,37 +139,47 @@ class KF_CV:
         data['index'] = [data.index[i][1] for i in range(data.shape[0])]
         data.index = data['index']
         coeff = pd.DataFrame(coeff, index = ['Slope', 'Intercept'])    
-        return yp, folds, data, coeff
-
-    ### compute metrics for each fold
+        return data, coeff ## returns  values predicted in cross validation, ,coefficients of regression
+    
     @staticmethod
-    def process(model, x, y, n_folds:int):
-        f, idx,_ , _ = KF_CV.cross_val_predictor(model, x=x,y=y, n_folds=n_folds)
+    def metrics_cv(y, ypcv, folds):
+        y = np.array(y)
         e = {}
-        for i in idx.keys():
-            e[i] = metrics().reg_(y.iloc[idx[i]],f[i])
+        for i in folds.keys():
+            e[i] = metrics().reg_(y[folds[i]],ypcv[i])
         r = pd.DataFrame(e)
-        return r
+        r_print = r.copy()
+        r_print['mean'] = r.mean(axis = 1)
+        r_print['sd'] = r.std(axis = 1)
+        r_print['cv'] = 100*r.std(axis = 1)/r.mean(axis = 1)
+        return r.T, r_print.T
     
-    ### bias and variance
+    ### compute metrics for each fold
     @staticmethod
-    def cv_scores(model, x, y, n_folds:int):
-        x = KF_CV.process(model, x, y, n_folds)
-        mean = x.mean(axis = 1)
-        sd = x.std(axis = 1)
-        rsd = sd*100/mean
-        data = pd.concat([mean, sd, rsd], axis = 1).round(2)
-        data.columns = ['mean', 'sd', 'cv(%)']
-        return data
+    def cv_scores(y, ypcv, folds):
+        """ Takes as input the Y vactor, the tuple of preducted values/fold(from cross_val_predictor method), and the index/fold(from CV method)
+        and returns two dataframes, the first is containing metrics scores/fold and the second is similar to the first by with additional mean, sd, and rsd variables
+        """
+        y = np.array(y)
+        e = {}
+        for i in folds.keys():
+            e[i] = metrics().reg_(y[folds[i]],ypcv[i])
+        r = pd.DataFrame(e)
+        r_print = r
+        r_print['mean'] = r.mean(axis = 1)
+        r_print['sd'] = r.std(axis = 1)
+        r_print['cv'] = 100*r.std(axis = 1)/r.mean(axis = 1)
+        return r.T, r_print.T
     
-    ### Return ycv
-    @staticmethod
-    def ycv(model, x, y, n_folds:int):
-        ycv = np.zeros(y.shape[0])
-        f, idx,_,_ = KF_CV.cross_val_predictor(model, x,y, n_folds)
-        for i in f.keys():
-            ycv[idx[i]] = f[i]            
-        return ycv
+    
+    # ### Return ycv
+    # @staticmethod
+    # def ycv(model, x, y, n_folds:int):
+    #     ycv = np.zeros(y.shape[0])
+    #     f, idx,_,_ = KF_CV.cross_val_predictor(model, x,y, n_folds)
+    #     for i in f.keys():
+    #         ycv[idx[i]] = f[i]            
+    #     return ycv
 
 
 ### Selectivity ratio
diff --git a/src/Class_Mod/Hash.py b/src/Class_Mod/Hash.py
new file mode 100644
index 0000000000000000000000000000000000000000..8a96cbd6009e356e4b3011f35778de27d3e58a9c
--- /dev/null
+++ b/src/Class_Mod/Hash.py
@@ -0,0 +1,26 @@
+from Packages import *
+
+def create_hash(spectra):
+    #using the md5 hash function.
+    hash_func = hashlib.md5()
+    spectra = str(spectra)
+    encoded_spectra = spectra.encode()
+    hash_func.update(encoded_spectra)
+    hash = hash_func.hexdigest()
+    return hash
+
+def check_hash(hash):
+    # path to hash file and grep/cat functions for Win
+    subprocess_path = Path("src/data/hash/")
+    # run a grep from the hash onto the hash file
+    nb_hash = subprocess.run([subprocess_path / 'grep.exe', '-c', hash, subprocess_path / "hash.txt"], shell=True)
+    # if hash present
+    if 'returncode=0' in str(nb_hash):
+        return 'existing hash'
+    # if hash not present, add it to the file with cat function
+    else:
+        add_hash = subprocess.run(['echo', str(hash) + '>>', subprocess_path / "hash.txt"], shell=True)
+        if 'returncode=0' in str(add_hash):
+            return 'hash added'
+        else:
+            return 'error while adding the new hash'
\ No newline at end of file
diff --git a/src/Class_Mod/LWPLSR_.py b/src/Class_Mod/LWPLSR_.py
index a7bd37980855274ec8c93ec5f0e188116188385f..da661d6be6fea9c1627cfc0e8df17da25dc7e627 100644
--- a/src/Class_Mod/LWPLSR_.py
+++ b/src/Class_Mod/LWPLSR_.py
@@ -7,33 +7,28 @@ class LWPLSR:
 
     Returns:
         self.scores (DataFrame): various metrics and scores
-        self.predicted_results_on_train (DataFrame):
-        self.predicted_results_on_test (DataFrame):
+        self.predicted_results (Dictionary): Dict containing all predicted results (train, test, cross-validation)
         self.mod (Julia model): the prepared model
     """
     def __init__(self, dataset):
         """Initiate the LWPLSR and prepare data for Julia computing."""
-
-        # self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))]
+        # get train / test data from dataset
         self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(4)]
+        # calculate number of KFolds and get CV data from dataset
         self.nb_fold = int((len(dataset)-4)/4)
         for i in range(self.nb_fold):
             setattr(self, "xtr_fold"+str(i+1), dataset[i+7])
             setattr(self, "ytr_fold"+str(i+1), dataset[i+13])
             setattr(self, "xte_fold"+str(i+1), dataset[i+4])
-            # setattr(self, "yte_fold"+str(i+1), dataset[i+10])
             setattr(jl, "xtr_fold"+str(i+1), dataset[i+7])
             setattr(jl, "ytr_fold"+str(i+1), dataset[i+13])
             setattr(jl, "xte_fold"+str(i+1), dataset[i+4])
-            # setattr(jl, "yte_fold"+str(i+1), dataset[i+10])
 
-        # prepare to send dataframes to julia and Jchemo
+        # prepare to send dataframes to julia and Jchemo (with the jl. prefix)
         jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
 
         # initialize vars from the class
         y_shape = self.y_test.shape
-        self.predicted_results_on_test = pd.DataFrame
-        self.predicted_results_on_train = pd.DataFrame
         self.pred_test = np.zeros(shape=(y_shape[0], 1))
         self.pred_train = np.zeros(shape=(y_shape[0], 1))
         self.mod = ""
@@ -52,7 +47,7 @@ class LWPLSR:
         Returns:
             self.mod (Julia model): the prepared model
         """
-        # launch Julia Jchemo lwplsr
+        # launch Julia Jchemo lwplsr and convert DataFrames from Python Pandas DataFrame to Julia DataFrame
         jl.seval("""
         using DataFrames
         using Pandas
@@ -63,7 +58,7 @@ class LWPLSR:
         y_test |> Pandas.DataFrame |> DataFrames.DataFrame
         """)
         print('LWPLSR - tuning')
-        # set tuning parameters
+        # set tuning parameters to test
         jl.seval("""
         nlvdis = [5; 10; 15] ; metric = [:eucl; :mah] 
         h = [1; 2; 6; Inf] ; k = [30; 80; 200]  
@@ -83,21 +78,22 @@ class LWPLSR:
         ncal = ntrain - nval 
         """)
 
-        # Create LWPLSR model and tune
+        # Create LWPLSR model and tune with GridScore
         jl.seval("""
         mod = Jchemo.model(Jchemo.lwplsr)
         res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false)
         u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination
         """)
+        # save best lwplsr parameters
         self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]}
         print('best lwplsr params ' + str(self.best_lwplsr_params))
-        print('LWPLSR - best params ok')
-        # calculate LWPLSR model with best parameters
+        # run LWPLSR model with best parameters
         jl.seval("""
         mod = Jchemo.model(Jchemo.lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u])
         # Fit model
         Jchemo.fit!(mod, x_train, y_train)
         """)
+        # save Julia Jchemo model
         self.mod = jl.mod
 
     def Jchemo_lwplsr_predict(self):
@@ -128,14 +124,13 @@ class LWPLSR:
         print('LWPLSR - end')
 
     def Jchemo_lwplsr_cv(self):
-        """Send data to Julia to predict with lwplsr.
+        """Send Cross-Validation data to Julia to fit & predict with lwplsr.
 
         Args:
-            self.mod (Julia model): the prepared model
+            self.best_lwplsr_params: the best parameters to use (from tuning) for CV
             self.xtr_fold1 (DataFrame):
             self.ytr_fold1 (DataFrame):
             self.xte_fold1 (DataFrame):
-            self.yte_fold1 (DataFrame):
 
         Returns:
             self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation
@@ -144,7 +139,7 @@ class LWPLSR:
             jl.Xtr = getattr(self, "xtr_fold"+str(i+1))
             jl.Ytr = getattr(self, "ytr_fold"+str(i+1))
             jl.Xte = getattr(self, "xte_fold"+str(i+1))
-            # jl.Yte = getattr(self, "yte_fold"+str(i+1))
+            # convert Python Pandas DataFrame to Julia DataFrame
             jl.seval("""
             using DataFrames
             using Pandas
@@ -153,6 +148,7 @@ class LWPLSR:
             Ytr |> Pandas.DataFrame |> DataFrames.DataFrame
             Xte |> Pandas.DataFrame |> DataFrames.DataFrame
             """)
+            # set lwplsr parameters as the best one from tuning
             jl.nlvdis = int(self.best_lwplsr_params['nlvdis'])
             jl.metric = self.best_lwplsr_params['metric']
             jl.h = self.best_lwplsr_params['h']
@@ -169,15 +165,14 @@ class LWPLSR:
             res = Jchemo.predict(mod_cv, Xte)
             res.pred
             """)
+            # save predicted values for each KFold in the predicted_results dictionary
             self.predicted_results["CV" + str(i+1)] = pd.DataFrame(pred_cv)
 
     @property
     def pred_data_(self):
         # convert predicted data from x_test to Pandas DataFrame
-        self.predicted_results_on_test = pd.DataFrame(self.pred_test)
-        self.predicted_results_on_train = pd.DataFrame(self.pred_train)
-        self.predicted_results["pred_data_train"] = self.predicted_results_on_train
-        self.predicted_results["pred_data_test"] = self.predicted_results_on_test
+        self.predicted_results["pred_data_train"] = pd.DataFrame(self.pred_train)
+        self.predicted_results["pred_data_test"] = pd.DataFrame(self.pred_test)
         return self.predicted_results
 
     @property
diff --git a/src/Class_Mod/LWPLSR_Call.py b/src/Class_Mod/LWPLSR_Call.py
index 49c674cdae90369fad0a5db1757cc722f7d17b4a..007009125fbab59c62d4a587ab62e5cc065b54fd 100644
--- a/src/Class_Mod/LWPLSR_Call.py
+++ b/src/Class_Mod/LWPLSR_Call.py
@@ -7,35 +7,42 @@ import os
 # loading the lwplsr_inputs.json
 temp_path = Path("temp/")
 data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
+# check data for cross-validation depending on KFold number
 temp_files_list = os.listdir(temp_path)
 nb_fold = 0
 for i in temp_files_list:
     if 'fold' in i:
+        # add CV file name to data_to_work_with
         data_to_work_with.append(str(i)[:-4])
+        # and count the number of KFold
         nb_fold += 1
+# Import data from csv files in the temp/ folder
 dataset = []
 for i in data_to_work_with:
     dataset.append(np.genfromtxt(temp_path / str(i + ".csv"), delimiter=','))
 print('CSV imported')
+# launch LWPLSR Class from LWPLSR_.py in Class_Mod
 print('start model creation')
 Reg = LWPLSR(dataset)
 print('model created. \nnow fit')
 LWPLSR.Jchemo_lwplsr_fit(Reg)
 print('now predict')
 LWPLSR.Jchemo_lwplsr_predict(Reg)
-
 print('now CV')
 LWPLSR.Jchemo_lwplsr_cv(Reg)
 
-
+# Export results in a json file to bring data back to 2-model_creation.py and streamlit interface
 print('export to json')
 pred = ['pred_data_train', 'pred_data_test']
+# add KFold results to predicted data
 for i in range(int(nb_fold/4)):
     pred.append("CV" + str(i+1))
 json_export = {}
 for i in pred:
     json_export[i] = Reg.pred_data_[i].to_dict()
+# add the lwplsr global model to the json
 json_export['model'] = str(Reg.model_)
+# add the best parameters for the lwplsr obtained from GridScore tuning
 json_export['best_lwplsr_params'] = Reg.best_lwplsr_params_
 with open(temp_path / "lwplsr_outputs.json", "w+") as outfile:
     json.dump(json_export, outfile)
diff --git a/src/Class_Mod/RegModels.py b/src/Class_Mod/RegModels.py
index 056253d9148483608628553d00b195f6808b57f4..ce07a07e6bf541d8e078dfe12846d96d4868e28a 100644
--- a/src/Class_Mod/RegModels.py
+++ b/src/Class_Mod/RegModels.py
@@ -115,19 +115,23 @@ class Plsr(Regmodel):
         x2 = [savgol_filter(x1[i], polyorder=params['polyorder'], deriv=params['deriv'], window_length = params['window_length']) for i in range(2)]
 
         Model = PLSRegression(scale = False, n_components = params['n_components'])
-        self._cv_df = KF_CV().process(model = Model, x = x2[0], y = self._ytrain, n_folds = self._nfolds)
-        self._cv_df['Average'] = self._cv_df.mean(axis = 1)
-        self._cv_df['S'] = self._cv_df.std(axis = 1)
-        self._cv_df['CV(%)'] = self._cv_df['S'] * 100 / self._cv_df['Average']
-        self._cv_df = self._cv_df.T.round(2)
-        score = self._cv_df.loc["CV(%)",'rmse']
+        # self._cv_df = KF_CV().process(model = Model, x = x2[0], y = self._ytrain, n_folds = self._nfolds)
+        # self._cv_df['Average'] = self._cv_df.mean(axis = 1)
+        # self._cv_df['S'] = self._cv_df.std(axis = 1)
+        # self._cv_df['CV(%)'] = self._cv_df['S'] * 100 / self._cv_df['Average']
+        # self._cv_df = self._cv_df.T.round(2)
+        folds = KF_CV().CV(x = x2[0], y = np.array(self._ytrain), n_folds = self._nfolds)
+        yp = KF_CV().cross_val_predictor(model = Model, folds = folds, x = x2[0], y = np.array(self._ytrain))
+        self._cv_df = KF_CV().metrics_cv(y = np.array(self._ytrain), ypcv = yp, folds =folds)[1]
+                
+        score = self._cv_df.loc["cv",'rmse']
         
         Model = PLSRegression(scale = False, n_components = params['n_components'])
         Model.fit(x2[0], self._ytrain)
 
         if self.SCORE > score:
             self.SCORE = score
-            self._ycv = KF_CV().cross_val_predictor(model = Model, x = x2[0], y = self._ytrain, n_folds = self._nfolds)
+            self._ycv = KF_CV().meas_pred_eq(y = np.array(self._ytrain), ypcv=yp, folds=folds)
             self._yc = Model.predict(x2[0])
             self._yt = Model.predict(x2[1])
             self._model = Model
@@ -179,26 +183,29 @@ class TpeIpls(Regmodel):
         # print(x2)
         
         # ## Modelling
+        folds = KF_CV().CV(x = x2[0], y = np.array(self._ytrain), n_folds = self._nfolds)
         try:
             Model = PLSRegression(scale = False, n_components = params['n_components'])
-            self._cv_df = KF_CV().process(model = Model, x = x2[0][:,id], y = self._ytrain, n_folds = self._nfolds)
+            yp = KF_CV().cross_val_predictor(model = Model, folds = folds, x = x2[0], y = np.array(self._ytrain))
+            self._cv_df = KF_CV().metrics_cv(y = np.array(self._ytrain), ypcv = yp, folds =folds)[1]
         except ValueError as ve:
             params["n_components"] = 1
             Model = PLSRegression(scale = False, n_components = params['n_components'])
-            self._cv_df = KF_CV().process(model = Model, x = x2[0][:,id], y = self._ytrain, n_folds = self._nfolds)
-
-        self._cv_df['Average'] = self._cv_df.mean(axis = 1)
-        self._cv_df['S'] = self._cv_df.std(axis = 1)
-        self._cv_df['CV(%)'] = self._cv_df['S'] * 100 / self._cv_df['Average']
-        self._cv_df = self._cv_df.T.round(2)
-        score = self._cv_df.loc['CV(%)','rmse']
+            yp = KF_CV().cross_val_predictor(model = Model, folds = folds, x = x2[0], y = np.array(self._ytrain))
+            self._cv_df = KF_CV().metrics_cv(y = np.array(self._ytrain), ypcv = yp, folds =folds)[1]
+        # self._cv_df['Average'] = self._cv_df.mean(axis = 1)
+        # self._cv_df['S'] = self._cv_df.std(axis = 1)
+        # self._cv_df['CV(%)'] = self._cv_df['S'] * 100 / self._cv_df['Average']
+        # self._cv_df = self._cv_df.T.round(2)
+        score = self._cv_df.loc['cv','rmse']
         
         Model = PLSRegression(scale = False, n_components = params['n_components'])
         Model.fit(x2[0][:,id], self._ytrain)
 
         if self.SCORE > score:
             self.SCORE = score
-            self._ycv = KF_CV().cross_val_predictor(model = Model, x = x2[0][:,id], y = self._ytrain, n_folds = self._nfolds)
+            self._ycv = KF_CV().meas_pred_eq(y = np.array(self._ytrain), ypcv=yp, folds=folds)
+            
             self._yc = Model.predict(x2[0][:,id])
             self._yt = Model.predict(x2[1][:,id])
             self._model = Model
diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py
index a56a13e8b7fc389a407cca1bc63ffc2254bb5453..12d2c3c395cc120d0d09cbbd35446ac156fae990 100644
--- a/src/pages/2-model_creation.py
+++ b/src/pages/2-model_creation.py
@@ -38,18 +38,11 @@ M9 = st.container()
 M9.write("-- Save the model --")
     ##############################################################################################
 
-reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR"]
-regression_algo = None
-      #######################################        ###########################################
 
 files_format = ['.csv', '.dx']
 file = M00.radio('Select files format:', options = files_format)
-
-### Data
 spectra = pd.DataFrame()
 y = pd.DataFrame()
-
-
 # load .csv file
 if file == files_format[0]:
     xcal_csv = M00.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
@@ -60,6 +53,8 @@ if file == files_format[0]:
                                 options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1)
         if hdrx == "yes": col = 0
         else: col = False
+    else:
+        M00.warning('Insert your spectral data file here!')
         
     ycal_csv = M00.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
     if ycal_csv:
@@ -67,6 +62,8 @@ if file == files_format[0]:
         hdry = M00.radio("samples name (Y file)?: ", options=["no", "yes"], key=3)
         if hdry == "yes": col = 0
         else: col = False
+    else:
+        M00.warning('Insert your target data file here!')
     
     if xcal_csv and ycal_csv:
         file_name = str(xcal_csv.name) +' and '+ str(ycal_csv.name)
@@ -93,17 +90,14 @@ if file == files_format[0]:
                 spectra = pd.DataFrame
 
         else:
-            M1.warning('Tune decimal and separator parameters')
-
-        
-
-
-        
+            M00.error('Error: The data has not been loaded successfully, please consider tuning the decimal and separator !')
 
 ## Load .dx file
 elif file == files_format[1]:
     data_file = M00.file_uploader("Select Data", type=".dx", help=" :mushroom: select a dx file")
-    if data_file:
+    if not data_file:
+        M00.warning('Load your file here!')
+    else :
         file_name = str(data_file.name)
         with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
             tmp.write(data_file.read())
@@ -116,7 +110,7 @@ elif file == files_format[1]:
                 y = chem_data.loc[:,yname].loc[measured]
                 spectra = spectra.loc[measured]
             else:
-                M00.warning('Warning: Chemical data are not included in your file !', icon="âš ï¸")
+                M00.warning('Warning: your file includes no target variables to model !', icon="âš ï¸")
         os.unlink(tmp_path)
 
 ### split the data
@@ -157,27 +151,36 @@ if not spectra.empty and not y.empty:
 
 
     M0.write('Loaded data summary')
-    M0.write(pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['Train', 'Test', 'Total'] ).round(2))
-    stats=pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['Train', 'Test', 'Total'] ).round(2)
+    M0.write(pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['train', 'test', 'total'] ).round(2))
+    stats=pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['train', 'test', 'total'] ).round(2)
     ####################################### Insight into the loaded data
 
-    #######################################
+
+    ####################################### Model creation ###################################################
+    reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR"]
+    regression_algo = None
+    Reg = None
     regression_algo = M1.selectbox("Choose the algorithm for regression", options= reg_algo, key = 12, placeholder ="Choose an option")
+    # split train data into nb_folds for cross_validation
+    nb_folds = 3
+    folds = KF_CV.CV(X_train, y_train, nb_folds)
+
+    if not regression_algo:
+        M1.warning('Choose a modelling algorithm from the dropdown list !')
     if regression_algo == reg_algo[1]:
         # Train model with model function from application_functions.py
         Reg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=1)
         reg_model = Reg.model_
         #M2.dataframe(Pin.pred_data_)
+
     elif regression_algo == reg_algo[2]:
         info = M1.info('Starting LWPLSR model creation... Please wait a few minutes.')
         # export data to csv for Julia train/test
         data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
         x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
         # Cross-Validation calculation
-        nb_folds = 3
-        st.write('KFold for Cross-Validation = ' + str(nb_folds))
-        # split train data into nb_folds
-        folds = KF_CV.CV(x_train_np, y_train_np, nb_folds)
+        
+        st.write('KFold for Cross-Validation = ' + str(nb_folds))        
         d = {}
         for i in range(nb_folds):
             d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
@@ -203,39 +206,60 @@ if not spectra.empty and not y.empty:
                 Reg_json = json.load(outfile)
                 # delete csv files
                 for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
-            # delete json file after import
+            # # delete json file after import
             os.unlink(temp_path / "lwplsr_outputs.json")
             # format result data into Reg object
-            pred = ['pred_data_train', 'pred_data_test']
+            pred = ['pred_data_train', 'pred_data_test']### keys of the dict
             for i in range(nb_folds):
-                pred.append("CV" + str(i+1))
-            Reg = type('obj', (object,), {'model' : Reg_json['model'], 'best_lwplsr_params' : Reg_json['best_lwplsr_params'], 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
+                pred.append("CV" + str(i+1)) ### add cv folds keys to pred
+
+            Reg = type('obj', (object,), {'model_' : Reg_json['model'], 'best_hyperparams_' : Reg_json['best_lwplsr_params'],
+                                          'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
+   
             Reg.CV_results_ = pd.DataFrame()
             Reg.cv_data_ = {'YpredCV' : {}, 'idxCV' : {}}
-            # set indexes to Reg.pred_data (train, test, folds idx)
+            # # set indexes to Reg.pred_data (train, test, folds idx)
             for i in range(len(pred)):
                 Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
                 if i == 0: # data_train
+                    # Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
                     Reg.pred_data_[i].index = list(y_train.index)
+                    Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
                 elif i == 1: # data_test
+                    # Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
                     Reg.pred_data_[i].index = list(y_test.index)
-                else: # CVi
+                    Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
+                else:
+                    # CVi
                     Reg.pred_data_[i].index = folds[list(folds)[i-2]]
-                    Reg.CV_results_ = pd.concat([Reg.CV_results_, Reg.pred_data_[i]])
-                    Reg.cv_data_['YpredCV']['Fold' + str(i-1)] = Reg.pred_data_[i]
-                    Reg.cv_data_['idxCV']['Fold' + str(i-1)] = folds[list(folds)[i-2]]
-            Reg.CV_results_.sort_index(inplace = True)
-            Reg.CV_results_.columns = ['Ypredicted_CV']
-            # if you want to display Reg.cv_data_ containing by fold YpredCV and idxCV
-            # cv2.json(Reg.cv_data_)
-            # Display end of modeling message on the interface
-            info.empty()
+                    # Reg.CV_results_ = pd.concat([Reg.CV_results_, Reg.pred_data_[i]])
+                    Reg.cv_data_['YpredCV']['Fold' + str(i-1)] = np.array(Reg.pred_data_[i]).reshape(-1)
+                    Reg.cv_data_['idxCV']['Fold' + str(i-1)] = np.array(folds[list(folds)[i-2]]).reshape(-1)
+            #Reg.cv_data_['idxCV'] and folds contains the same data
+            
+            Reg.CV_results_= KF_CV.metrics_cv(y = y_train, ypcv = Reg.cv_data_['YpredCV'], folds = folds)[1]
+        #     #### cross validation results print
+            Reg.best_hyperparams_print = Reg.best_hyperparams_
+        #     ## plots
+            Reg.cv_data_ = KF_CV().meas_pred_eq(y = np.array(y_train), ypcv= Reg.cv_data_['YpredCV'], folds=folds)
+            # st.write(Reg.cv_data_ )
+        #     # Reg.CV_results_.sort_index(inplace = True)
+        #     # Reg.CV_results_.columns = ['Ypredicted_CV']
+        #     # if you want to display Reg.cv_data_ containing, by fold, YpredCV and idxCV
+        #     # cv2.json(Reg.cv_data_)
+        #     # Display end of modeling message on the interface
+        #     info.empty()
             M1.success('Model created!')
         except FileNotFoundError as e:
             # Display error message on the interface if modeling is wrong
             info.empty()
             M1.warning('- ERROR during model creation -')
             Reg = None
+
+#######################
+
+
+            
     elif regression_algo == reg_algo[3]:
         s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3)
         it = M1.number_input(label='Enter the number of iterations', min_value=2, max_value=10, value=3)
@@ -263,7 +287,8 @@ if not spectra.empty and not y.empty:
 
 
 
-        ################# Model analysis ############
+#         ###############################################################################################################DDDVVVVVVVVVV
+#        ################# Model analysis ############
     if regression_algo in reg_algo[1:] and Reg is not None:
         #M2.write('-- Pretreated data (train) visualization and important spectral regions in the model --   ')
 
@@ -311,19 +336,20 @@ if not spectra.empty and not y.empty:
         cv_results=pd.DataFrame(Reg.CV_results_)
         cv2.write('-- Out-of-Fold Predictions Visualization (All in one) --')
 
-        fig1 = px.scatter(Reg.cv_data_[2], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds", 
+        fig1 = px.scatter(Reg.cv_data_[0], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds", 
                  color_discrete_sequence=px.colors.qualitative.G10)
-        fig1.add_shape(type='line', x0 = .95 * min(Reg.cv_data_[2].loc[:,'Measured']), x1 = 1.05 * max(Reg.cv_data_[2].loc[:,'Measured']), y0 = .95 * min(Reg.cv_data_[2].loc[:,'Measured']), y1 = 1.05 * max(Reg.cv_data_[2].loc[:,'Measured']), line = dict(color='black', dash = "dash"))
+        fig1.add_shape(type='line', x0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), x1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']),
+                        y0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), y1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']), line = dict(color='black', dash = "dash"))
         fig1.update_traces(marker_size=7, showlegend=False)
         cv2.plotly_chart(fig1, use_container_width=True)
-        fig0 = px.scatter(Reg.cv_data_[2], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds", facet_col = 'Folds',facet_col_wrap=1,
+        fig0 = px.scatter(Reg.cv_data_[0], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds", facet_col = 'Folds',facet_col_wrap=1,
                  color_discrete_sequence=px.colors.qualitative.G10, text='index', width=800, height=1000)
         fig0.update_traces(marker_size=8, showlegend=False)
-        fig0.write_image("./Report/figures/Allinone.png")
+        fig0.write_image("./Report/figures/meas_vs_pred_cv_onebyone.png")
 
         cv1.write('-- Out-of-Fold Predictions Visualization (Separate plots) --')
         cv1.plotly_chart(fig0, use_container_width=True)
-        fig1.write_image("./Report/figures/Predictions_V.png")
+        fig1.write_image("./Report/figures/meas_vs_pred_cv_all.png")
 
         
         yc = Reg.pred_data_[0]
@@ -337,10 +363,12 @@ if not spectra.empty and not y.empty:
             json.dump(Reg.best_hyperparams_, outfile)
         
         
-##########
+# ##########
         M1.write("-- Model performance --")
-        M1.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
-
+        if regression_algo != "Locally Weighted PLSR":
+            M1.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
+        else:
+            M1.dataframe(metrics(t = [y_test, yt], method='regression').scores_)
         model_per=pd.DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
         #from st_circular_progress import CircularProgress
         #my_circular_progress = CircularProgress(label = 'Performance',value = 50, key = 'my performance',
@@ -348,26 +376,34 @@ if not spectra.empty and not y.empty:
         
         #my_circular_progress.st_circular_progress()
         #my_circular_progress.update_value(progress=20)
-        a = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
+        if regression_algo != "Locally Weighted PLSR":
+            a = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
+        else:
+            a = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
 
         M7.pyplot(a)
-        plt.savefig('./Report/figures/Predictedvs.png')
+        plt.savefig('./Report/figures/measured_vs_predicted.png')
         prep_para = Reg.best_hyperparams_
-        prep_para.pop('n_components')
-
-        for i in ['deriv','polyorder']:
-            if Reg.best_hyperparams_[i] == 0:
-                prep_para[i] = '0'
-            elif Reg.best_hyperparams_[i] == 1:
-                prep_para[i] = '1st'
-            elif Reg.best_hyperparams_[i] > 1:
-                prep_para[i] = f"{Reg.best_hyperparams_[i]}nd"
+        if regression_algo != "Locally Weighted PLSR":
+            prep_para.pop('n_components')
+            for i in ['deriv','polyorder']:
+                if Reg.best_hyperparams_[i] == 0:
+                    prep_para[i] = '0'
+                elif Reg.best_hyperparams_[i] == 1:
+                    prep_para[i] = '1st'
+                elif Reg.best_hyperparams_[i] > 1:
+                    prep_para[i] = f"{Reg.best_hyperparams_[i]}nd"
+        
+        if regression_algo != "Locally Weighted PLSR":
+            residual_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)
+        else:
+            residual_plot = resid_plot([y_train, y_test], [yt, yt], train_idx=train_index, test_idx=test_index)
 
-        residual_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)
         M8.pyplot(residual_plot)
-        plt.savefig('./Report/figures/residual_plot.png')
-
-        rega = Reg.selected_features_  ##### ADD FEATURES IMPORTANCE PLOT
+        plt.savefig('./Report/figures/residuals_plot.png')
+        
+        if regression_algo != "Locally Weighted PLSR":
+            rega = Reg.selected_features_  ##### ADD FEATURES IMPORTANCE PLOT
             
             #model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20)
         model_name = M9.text_input('Give it a name')
@@ -413,7 +449,8 @@ if not spectra.empty and not y.empty and regression_algo:
     if regression_algo in reg_algo[1:] and Reg is not None:
         fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 4), sharex=True)
         ax1.plot(colnames, np.mean(X_train, axis = 0), color = 'black', label = 'Average spectrum (Raw)')
-        ax2.plot(colnames, np.mean(Reg.pretreated_spectra_ , axis = 0), color = 'black', label = 'Average spectrum (pretreated)')
+        if regression_algo != "Locally Weighted PLSR":
+            ax2.plot(colnames, np.mean(Reg.pretreated_spectra_ , axis = 0), color = 'black', label = 'Average spectrum (pretreated)')
         ax2.set_xlabel('Wavelenghts')
         plt.tight_layout()
         
@@ -443,16 +480,19 @@ if not spectra.empty and not y.empty and regression_algo:
         M2.pyplot(fig)
 
 ## Load .dx file
+if Reg is not None:
+    with st.container():
+        if st.button("Download the report"):
+            if regression_algo == reg_algo[1]:
+                        latex_report = report.report('Predictive model development', file_name, stats, list(Reg.best_hyperparams_.values()), regression_algo, model_per, cv_results)
+                        report.compile_latex()
+            if regression_algo is None:
+                st.warning('Data processing has not been performed or finished yet!', icon = "âš ï¸")
+            else:
+                pass
 
-with st.container():
-    if st.button("Download the report"):
-        if regression_algo == reg_algo[1]:
-                    latex_report = report.report('Predictive model development', file_name, stats, list(Reg.best_hyperparams_.values()), regression_algo, model_per, cv_results)
-                    report.compile_latex()
-        if regression_algo is None:
-            st.warning('Data processing has not been performed or finished yet!', icon = "âš ï¸")
         else:
             pass
 
-    else:
-        pass
+
+ 
\ No newline at end of file