From cdf5cba66beb63481ec44d0105699633c279decf Mon Sep 17 00:00:00 2001
From: Nicolas Barthes <nicolas.barthes@cnrs.fr>
Date: Fri, 13 Sep 2024 16:37:20 +0200
Subject: [PATCH] starting prediction for LWPLSR models (all csv exporter ready
 to use with LWPLSR_Call)

---
 src/pages/3-prediction.py | 92 ++++++++++++++++-----------------------
 1 file changed, 37 insertions(+), 55 deletions(-)

diff --git a/src/pages/3-prediction.py b/src/pages/3-prediction.py
index 5455083..410c5a4 100644
--- a/src/pages/3-prediction.py
+++ b/src/pages/3-prediction.py
@@ -151,29 +151,28 @@ with c2:
                         pred_data = spectra
                 os.unlink(tmp_path)
 
-
 # Load parameters
 st.subheader("I - Spectral data preprocessing & visualization", divider='blue')
 # try:
 if not pred_data.empty:# Load the model with joblib
     @st.cache_data
-    def preprocess_spectra(change):
+    def preprocess_spectra(data, change):
         # M4.write(ProcessLookupError)
         
         if system_data['spec-preprocessing']['normalization'] == 'Snv':
-            x1 = Snv(pred_data)
+            x1 = Snv(data)
             norm = 'Standard Normal Variate'
         else:
             norm = 'No Normalization was applied'
-            x1 = pred_data
+            x1 = data
         x2 = savgol_filter(x1,
                             window_length = int(system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)'][1]),
                             polyorder = int(system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)'][0]),
                             deriv = int(system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)'][2]),
                                 delta=1.0, axis=-1, mode="interp", cval=0.0)
-        preprocessed = DataFrame(x2, index = pred_data.index, columns = pred_data.columns)
+        preprocessed = DataFrame(x2, index = data.index, columns = data.columns)
         return norm, preprocessed
-    norm, preprocessed = preprocess_spectra(change= hash_)
+    norm, preprocessed = preprocess_spectra(pred_data, change= hash_)
 
                         # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
     # @st.cache_data
@@ -247,60 +246,43 @@ if not pred_data.empty:# Load the model with joblib
                             st.error(f'''Error: Length mismatch: the number of samples indices is {len(rownames)}, while the model produced 
                                             {len(model.predict(preprocesseddf))} values. correct the "indexes column in csv?" parameter''')
                     case 'LW-PLS':
-                        # export data to csv for Julia train/test
-                        train_idx, test_idx =  system_data['data']['training_data_idx'], system_data['data']['testing_data_idx']
-                        spectra = system_data['data']['raw-spectra']
-                        y = system_data['data']['target']
-                        X_train, y_train, X_test, y_test = spectra.iloc[train_idx,:], y.iloc[train_idx], spectra.iloc[test_idx,:], y.iloc[test_idx]
-                        nb_folds = 3
-                        folds = KF_CV.CV(X_train, y_train, nb_folds)
-                        #['raw-spectra', 'target', 'training_data_idx', 'testing_data_idx']
-                        data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np', 'x_pred']
-                        x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
-                        x_pred = pred_data.to_numpy()
-                        # Cross-Validation calculation
-                        d = {}
-                        for i in range(nb_folds):
-                            d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
-                            data_to_work_with.append("xtr_fold{0}".format(i+1))
-                            data_to_work_with.append("ytr_fold{0}".format(i+1))
-                            data_to_work_with.append("xte_fold{0}".format(i+1))
-                            data_to_work_with.append("yte_fold{0}".format(i+1))
-                        # check best pre-treatment with a global PLSR model
-                        preReg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=20)
                         temp_path = Path('temp/')
-                        with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile:
-                            json.dump(preReg.best_hyperparams_, outfile)
-                        # export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files
+                        # export data to csv for Julia train/pred
+                        st.write(system_data['data'])
+                        # spectra = system_data['data']['raw-spectra'] # without pretreatments
+                        spectra = preprocess_spectra(system_data['data']['raw-spectra'], change= hash_)
+                        # with pretreatments
+                        x_pred = preprocessed
+                        y = system_data['data']['target']
+                        data_to_work_with = ['spectra', 'y', 'x_pred']
+                        spectra_np, y_np, x_pred_np = spectra.to_numpy(), y.to_numpy(), x_pred.to_numpy()
+                        # export spectra, y, x_pred to temp folder as csv files
                         for i in data_to_work_with:
-                            if 'fold' in i:
-                                j = d[i]
-                            else:
-                                j = globals()[i]
-                                # st.write(j)
+                            j = globals()[i]
+                            # st.write(j)
                             np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",")
-                        # run Julia Jchemo as subprocess
+                        # # run Julia Jchemo as subprocess
                         import subprocess
                         subprocess_path = Path("utils/")
-                        subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
-                        # retrieve json results from Julia JChemo
-                        try:
-                            with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
-                                Reg_json = json.load(outfile)
-                                # delete csv files
-                                for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
-                            # delete json file after import
-                            os.unlink(temp_path / "lwplsr_outputs.json")
-                            os.unlink(temp_path / "lwplsr_preTreatments.json")
-                            # format result data into Reg object
-                            pred = ['pred_data_train', 'pred_data_test']### keys of the dict
-                            for i in range(nb_folds):
-                                pred.append("CV" + str(i+1)) ### add cv folds keys to pred
-                        except FileNotFoundError as e:
-                            Reg = None
-                            for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
-                        
-                        st.write(Reg_json)
+                        # subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
+                        # # retrieve json results from Julia JChemo
+                        # try:
+                        #     with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
+                        #         Reg_json = json.load(outfile)
+                        #         # delete csv files
+                        #         for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
+                        #     # delete json file after import
+                        #     os.unlink(temp_path / "lwplsr_outputs.json")
+                        #     os.unlink(temp_path / "lwplsr_preTreatments.json")
+                        #     # format result data into Reg object
+                        #     pred = ['pred_data_train', 'pred_data_test']### keys of the dict
+                        #     for i in range(nb_folds):
+                        #         pred.append("CV" + str(i+1)) ### add cv folds keys to pred
+                        # except FileNotFoundError as e:
+                        #     Reg = None
+                        #     for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
+                        #
+                        # st.write(Reg_json)
                     
 
             ################################### results display ###################################
-- 
GitLab