From e06c7cadfcaf4dfa715e85b3e62d7ba87d8b53e9 Mon Sep 17 00:00:00 2001
From: DIANE <abderrahim.diane@cefe.cnrs.fr>
Date: Tue, 21 May 2024 16:16:38 +0200
Subject: [PATCH] Model creation:

- CSS
- Valsel error
- Add equations to regplot and resid plots
---
 src/Class_Mod/Miscellaneous.py   | 47 ++++++++++++++------
 src/Class_Mod/PLSR_Preprocess.py |  2 +-
 src/Class_Mod/VarSel.py          |  7 +--
 src/Packages.py                  |  1 +
 src/pages/2-model_creation.py    | 73 +++++++++++++++++++-------------
 5 files changed, 82 insertions(+), 48 deletions(-)

diff --git a/src/Class_Mod/Miscellaneous.py b/src/Class_Mod/Miscellaneous.py
index a4e934e..c597b53 100644
--- a/src/Class_Mod/Miscellaneous.py
+++ b/src/Class_Mod/Miscellaneous.py
@@ -22,13 +22,25 @@ def prediction(NIRS_csv, qsep, qhdr, model):
 
 @st.cache_data
 def reg_plot( meas, pred, train_idx, test_idx):
+    a0 = np.ones(2)
+    a1 = np.ones(2)
+    
+    for i in range(len(meas)):
+        meas[i] = np.array(meas[i]).reshape(-1, 1) 
+        pred[i] = np.array(pred[i]).reshape(-1, 1)
+
+        M = LinearRegression()
+        M.fit(meas[i], pred[i])
+        a1[i] = np.round(M.coef_[0][0],2)
+        a0[i] = np.round(M.intercept_[0],2)
+
     ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
     et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
 
     fig, ax = plt.subplots(figsize = (12,4))
-    sns.regplot(x = meas[0] , y = pred[0], color='blue', label = 'Calib')
-    sns.regplot(x = meas[1], y = pred[1], color='green', label = 'Test')
-    plt.plot([np.min(meas[0])-0.05, np.max([meas[0]])+0.05], [np.min(meas[0])-0.05, np.max([meas[0]])+0.05], color = 'black')
+    sns.regplot(x = meas[0] , y = pred[0], color='blue', label = f'Calib (Predicted = {a0[0]} + {a1[0]} x Measured)')
+    sns.regplot(x = meas[1], y = pred[1], color='green', label = f'Test (Predicted = {a0[1]} + {a1[1]} x Measured)')
+    plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], [np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color = 'black')
 
     for i, txt  in enumerate(train_idx):
         #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
@@ -46,29 +58,36 @@ def reg_plot( meas, pred, train_idx, test_idx):
 
 @st.cache_data
 def resid_plot( meas, pred, train_idx, test_idx):
-    
-    ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1))
-    et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1))
+    a0 = np.ones(2)
+    a1 = np.ones(2)
+    e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])]
+
+    for i in range(len(meas)):
+        M = LinearRegression()
+        M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1))
+        a1[i] = np.round(M.coef_[0],2)
+        a0[i] = np.round(M.intercept_,2)
     
 
     fig, ax = plt.subplots(figsize = (12,4))
-    sns.scatterplot(x = meas[0], y = ec, color='blue', label = 'Calib')
-    sns.scatterplot(x = meas[1], y = et, color='green', label = 'Test')
+    sns.scatterplot(x = meas[0], y = e[0], color='blue', label = f'Calib (Residual = {a0[0]} + {a1[0]} * Measured)')
+    sns.scatterplot(x = meas[1], y = e[1], color='green', label = f'Test (Residual = {a0[1]} + {a1[1]} * Measured)')
     plt.axhline(y= 0, c ='black', linestyle = ':')
-    lim = np.max(abs(np.concatenate([ec, et], axis = 0)))*1.1
+    lim = np.max(abs(np.concatenate([e[0], e[1]], axis = 0)))*1.1
     plt.ylim(- lim, lim )    
     
 
-
+    for i in range(2):
+        e[i] = np.array(e[i]).reshape(-1,1)
 
     for i, txt  in enumerate(train_idx):
         #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
-        if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec):
-            plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i]))
+        if np.abs(e[0][i])> np.mean(e[0])+ 3*np.std(e[0]):
+            plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],e[0][i]))
 
     for i, txt  in enumerate(test_idx):
-        if np.abs(et[i])> np.mean(et)+ 3*np.std(et):
-            plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i],et[i]))
+        if np.abs(e[1][i])> np.mean(e[1])+ 3*np.std(e[1]):
+            plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i],e[1][i]))
     ax.set_xlabel(f'{ train_idx.shape}')
     ax.set_ylabel('Residuals')
     ax.set_xlabel('Measured values')
diff --git a/src/Class_Mod/PLSR_Preprocess.py b/src/Class_Mod/PLSR_Preprocess.py
index 7904ef6..aeb0066 100644
--- a/src/Class_Mod/PLSR_Preprocess.py
+++ b/src/Class_Mod/PLSR_Preprocess.py
@@ -60,7 +60,7 @@ class PlsProcess:
         rmset = np.sqrt(mean_squared_error(self.y_test, yt))
         
 
-        score = rmsecv/rmsec * np.round(rmset/rmsecv) * rmsecv*100/self.y_train.mean() * rmset*1000/self.y_test.mean()
+        score = rmsecv/rmsec*np.round(rmset/rmsecv)*rmsecv*100/self.y_train.mean()*rmset*1000/self.y_test.mean()
         if score < PlsProcess.SCORE-0.5 :
             PlsProcess.SCORE = score
             self.nlv = params['n_components']
diff --git a/src/Class_Mod/VarSel.py b/src/Class_Mod/VarSel.py
index 76fb576..a2d5363 100644
--- a/src/Class_Mod/VarSel.py
+++ b/src/Class_Mod/VarSel.py
@@ -117,9 +117,10 @@ class TpeIpls:
                            verbose=0)
 
         ban = {}
-        for i in range(len(self.segments)):
-            ban[f'band{i+1}'] = [self.segments[i][0], self.segments[i][self.segments[i].shape[0]-1]]
-        
+        if self.segments:####### test
+            for i in range(len(self.segments)):
+                ban[f'band{i+1}'] = [self.segments[i][0], self.segments[i][self.segments[i].shape[0]-1]]
+            
         self.bands = pd.DataFrame(ban).T
         self.bands.columns = ['from', 'to']
 
diff --git a/src/Packages.py b/src/Packages.py
index c367f3d..b090edf 100644
--- a/src/Packages.py
+++ b/src/Packages.py
@@ -41,6 +41,7 @@ from sklearn.pipeline import Pipeline, FeatureUnion
 from sklearn.compose import TransformedTargetRegressor
 from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score
 from sklearn.cross_decomposition import PLSRegression
+from sklearn.linear_model import LinearRegression
 ## Images and plots
 from PIL import Image
 import plotly.express as px
diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py
index 44f97e7..e1c9a86 100644
--- a/src/pages/2-model_creation.py
+++ b/src/pages/2-model_creation.py
@@ -10,7 +10,29 @@ add_header()
 st.session_state["interface"] = st.session_state.get('interface')
 if st.session_state["interface"] == 'simple':
     hide_pages("Predictions")
-
+st.markdown(
+    """
+    <style>
+        div[data-testid="column"]:nth-of-type(1)
+        {
+            border:2px solid black;border-radius: 50px;padding: 15px;
+        } 
+
+        div[data-testid="column"]:nth-of-type(2)
+        {
+            border:2px solid black;border-radius: 50px;padding: 15px;
+            text-align: left;
+        } 
+
+        div[data-testid="column"]:nth-of-type(3)
+        {
+            border:2px solid black;border-radius: 50px;padding: 15px;
+            text-align: left;
+        } 
+
+    </style>
+    """,unsafe_allow_html=True
+)
 
     ####################################### page Design #######################################
 st.title("Calibration Model Development")
@@ -18,23 +40,20 @@ st.markdown("Create a predictive model, then use it for predicting your target v
 st.header("I - Data visualization", divider='blue')
 M0, M00 = st.columns([1, .4])
 st.header("II - Model creation", divider='blue')
-
-M1, M2, M3 = st.columns([2,3,2])
-M4, M5 = st.columns([6,2])
-st.header("Model Diagnosis", divider='blue')
-
+M1, M2, M3 = st.columns([2,2,2])
+st.header("III - Model Diagnosis", divider='blue')
 M7, M8 = st.columns([2,2])
 M7.write('Predicted vs Measured values')
 M8.write('Residuals plot')
-M9, M10 = st.columns([2,2])
+M9 = st.container()
 M9.write("-- Save the model --")
             ######################################################################
 
-reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR", "Full-PLSR-sklearn", "PrePLStester"]
+reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR"]
       #######################################        ###########################################
 
 files_format = ['.csv', '.dx']
-file = M00.radio('select files format:', options = files_format)
+file = M00.radio('Select files format:', options = files_format)
 
 ### Data
 spectra = pd.DataFrame
@@ -147,7 +166,8 @@ if not spectra.empty and not y.empty:
     regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12)
     if regression_algo == reg_algo[1]:
         # Train model with model function from application_functions.py
-        Reg = PinardPlsr(x_train = X_train, x_test = X_test,y_train = y_train, y_test = y_test)
+        Reg = PlsProcess(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test, scale = False, Kfold=3)
+        Reg.tune(n_iter=500)
         reg_model = Reg.model_
         #M2.dataframe(Pin.pred_data_)
     elif regression_algo == reg_algo[2]:
@@ -185,38 +205,31 @@ if not spectra.empty and not y.empty:
         M1.progress(100, text = "The model has successfully been  created!")            
         time.sleep(1)
         reg_model = Reg.model_
-        M1.write('-- Table of selected wavelengths --')
+        M3.write('-- Spectral regions used for model creation --')
         wls = rega[0]
-        M1.table(wls)
+        M3.table(wls)
         fig, ax = plt.subplots(figsize = (12, 6))
         X_train.mean().plot(ax = ax)
         for i in range(s):
             colnames = np.array(y)
-            num = {'u', # unsigned integer
-                    'i', # signed integer
-                    'f', # floats
-                    'c'} # co:
+            num = {'u','i','f','c'}
             if np.array(X_train.columns).dtype.kind in num:
-                plt.plot(X_train.columns, X_train.mean())
-                ax.axvspan(X_train.columns[rega[0]['from'][i]], X_train.columns[rega[0]['to'][i]], color='#80ff00', alpha=0.5, lw=0)
+                plt.plot(X_train.columns, X_train.mean(), color = 'black')
+                ax.axvspan(X_train.columns[rega[0]['from'][i]], X_train.columns[rega[0]['to'][i]], color='#2a52be', alpha=0.5, lw=0)
                 plt.tight_layout()
                 plt.margins(x = 0)
             else:
                 plt.plot(np.arange(X_train.shape[1]), X_train.mean())
-                ax.axvspan(rega[0]['from'][i], rega[0]['to'][i], color='#80ff00', alpha=0.5, lw=0)
+                ax.axvspan(rega[0]['from'][i], rega[0]['to'][i], color='#2a52be', alpha=0.5, lw=0)
                 plt.tight_layout()
                 plt.margins(x = 0)
-        M1.pyplot(fig)
         
-    elif regression_algo == reg_algo[4]:
-        Reg = PlsR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test)
-        reg_model = Reg.model_
-    
-    
-    elif regression_algo == reg_algo[5]:
-        Reg = PlsProcess(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test, scale = False, Kfold=3)
-        Reg.tune(n_iter=500)
-        reg_model = Reg.model_
+        M3.write('-- Visualization of the spectral regions used for model creation --   ')
+        M3.pyplot(fig)
+        
+    # elif regression_algo == reg_algo[4]:
+    #     Reg = PlsR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test)
+    #     reg_model = Reg.model_
 
         ################# Model analysis ############
     if regression_algo in reg_algo[1:]:
@@ -229,7 +242,7 @@ if not spectra.empty and not y.empty:
         with open("data/params/Preprocessing.json", "w") as outfile: 
             json.dump(Reg.best_hyperparams, outfile)      
 
-        M2.write("-- Performance metrics --")
+        M2.write("-- Model performance --")
         M2.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
         #from st_circular_progress import CircularProgress
         #my_circular_progress = CircularProgress(label = 'Performance',value = 50, key = 'my performance',
-- 
GitLab