From e06c7cadfcaf4dfa715e85b3e62d7ba87d8b53e9 Mon Sep 17 00:00:00 2001 From: DIANE <abderrahim.diane@cefe.cnrs.fr> Date: Tue, 21 May 2024 16:16:38 +0200 Subject: [PATCH] Model creation: - CSS - Valsel error - Add equations to regplot and resid plots --- src/Class_Mod/Miscellaneous.py | 47 ++++++++++++++------ src/Class_Mod/PLSR_Preprocess.py | 2 +- src/Class_Mod/VarSel.py | 7 +-- src/Packages.py | 1 + src/pages/2-model_creation.py | 73 +++++++++++++++++++------------- 5 files changed, 82 insertions(+), 48 deletions(-) diff --git a/src/Class_Mod/Miscellaneous.py b/src/Class_Mod/Miscellaneous.py index a4e934e..c597b53 100644 --- a/src/Class_Mod/Miscellaneous.py +++ b/src/Class_Mod/Miscellaneous.py @@ -22,13 +22,25 @@ def prediction(NIRS_csv, qsep, qhdr, model): @st.cache_data def reg_plot( meas, pred, train_idx, test_idx): + a0 = np.ones(2) + a1 = np.ones(2) + + for i in range(len(meas)): + meas[i] = np.array(meas[i]).reshape(-1, 1) + pred[i] = np.array(pred[i]).reshape(-1, 1) + + M = LinearRegression() + M.fit(meas[i], pred[i]) + a1[i] = np.round(M.coef_[0][0],2) + a0[i] = np.round(M.intercept_[0],2) + ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1)) et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1)) fig, ax = plt.subplots(figsize = (12,4)) - sns.regplot(x = meas[0] , y = pred[0], color='blue', label = 'Calib') - sns.regplot(x = meas[1], y = pred[1], color='green', label = 'Test') - plt.plot([np.min(meas[0])-0.05, np.max([meas[0]])+0.05], [np.min(meas[0])-0.05, np.max([meas[0]])+0.05], color = 'black') + sns.regplot(x = meas[0] , y = pred[0], color='blue', label = f'Calib (Predicted = {a0[0]} + {a1[0]} x Measured)') + sns.regplot(x = meas[1], y = pred[1], color='green', label = f'Test (Predicted = {a0[1]} + {a1[1]} x Measured)') + plt.plot([np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], [np.min(meas[0]) - 0.05, np.max([meas[0]]) + 0.05], color = 'black') for i, txt in enumerate(train_idx): #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i])) @@ -46,29 +58,36 @@ def reg_plot( meas, pred, train_idx, test_idx): @st.cache_data def resid_plot( meas, pred, train_idx, test_idx): - - ec = np.subtract(np.array(meas[0]).reshape(-1), np.array(pred[0]).reshape(-1)) - et = np.subtract(np.array(meas[1]).reshape(-1), np.array(pred[1]).reshape(-1)) + a0 = np.ones(2) + a1 = np.ones(2) + e = [np.subtract(meas[0] ,pred[0]), np.subtract(meas[1], pred[1])] + + for i in range(len(meas)): + M = LinearRegression() + M.fit( np.array(meas[i]).reshape(-1,1), np.array(e[i]).reshape(-1,1)) + a1[i] = np.round(M.coef_[0],2) + a0[i] = np.round(M.intercept_,2) fig, ax = plt.subplots(figsize = (12,4)) - sns.scatterplot(x = meas[0], y = ec, color='blue', label = 'Calib') - sns.scatterplot(x = meas[1], y = et, color='green', label = 'Test') + sns.scatterplot(x = meas[0], y = e[0], color='blue', label = f'Calib (Residual = {a0[0]} + {a1[0]} * Measured)') + sns.scatterplot(x = meas[1], y = e[1], color='green', label = f'Test (Residual = {a0[1]} + {a1[1]} * Measured)') plt.axhline(y= 0, c ='black', linestyle = ':') - lim = np.max(abs(np.concatenate([ec, et], axis = 0)))*1.1 + lim = np.max(abs(np.concatenate([e[0], e[1]], axis = 0)))*1.1 plt.ylim(- lim, lim ) - + for i in range(2): + e[i] = np.array(e[i]).reshape(-1,1) for i, txt in enumerate(train_idx): #plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i])) - if np.abs(ec[i])> np.mean(ec)+ 3*np.std(ec): - plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],ec[i])) + if np.abs(e[0][i])> np.mean(e[0])+ 3*np.std(e[0]): + plt.annotate(txt ,(np.array(meas[0]).reshape(-1)[i],e[0][i])) for i, txt in enumerate(test_idx): - if np.abs(et[i])> np.mean(et)+ 3*np.std(et): - plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i],et[i])) + if np.abs(e[1][i])> np.mean(e[1])+ 3*np.std(e[1]): + plt.annotate(txt ,(np.array(meas[1]).reshape(-1)[i],e[1][i])) ax.set_xlabel(f'{ train_idx.shape}') ax.set_ylabel('Residuals') ax.set_xlabel('Measured values') diff --git a/src/Class_Mod/PLSR_Preprocess.py b/src/Class_Mod/PLSR_Preprocess.py index 7904ef6..aeb0066 100644 --- a/src/Class_Mod/PLSR_Preprocess.py +++ b/src/Class_Mod/PLSR_Preprocess.py @@ -60,7 +60,7 @@ class PlsProcess: rmset = np.sqrt(mean_squared_error(self.y_test, yt)) - score = rmsecv/rmsec * np.round(rmset/rmsecv) * rmsecv*100/self.y_train.mean() * rmset*1000/self.y_test.mean() + score = rmsecv/rmsec*np.round(rmset/rmsecv)*rmsecv*100/self.y_train.mean()*rmset*1000/self.y_test.mean() if score < PlsProcess.SCORE-0.5 : PlsProcess.SCORE = score self.nlv = params['n_components'] diff --git a/src/Class_Mod/VarSel.py b/src/Class_Mod/VarSel.py index 76fb576..a2d5363 100644 --- a/src/Class_Mod/VarSel.py +++ b/src/Class_Mod/VarSel.py @@ -117,9 +117,10 @@ class TpeIpls: verbose=0) ban = {} - for i in range(len(self.segments)): - ban[f'band{i+1}'] = [self.segments[i][0], self.segments[i][self.segments[i].shape[0]-1]] - + if self.segments:####### test + for i in range(len(self.segments)): + ban[f'band{i+1}'] = [self.segments[i][0], self.segments[i][self.segments[i].shape[0]-1]] + self.bands = pd.DataFrame(ban).T self.bands.columns = ['from', 'to'] diff --git a/src/Packages.py b/src/Packages.py index c367f3d..b090edf 100644 --- a/src/Packages.py +++ b/src/Packages.py @@ -41,6 +41,7 @@ from sklearn.pipeline import Pipeline, FeatureUnion from sklearn.compose import TransformedTargetRegressor from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error, r2_score from sklearn.cross_decomposition import PLSRegression +from sklearn.linear_model import LinearRegression ## Images and plots from PIL import Image import plotly.express as px diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py index 44f97e7..e1c9a86 100644 --- a/src/pages/2-model_creation.py +++ b/src/pages/2-model_creation.py @@ -10,7 +10,29 @@ add_header() st.session_state["interface"] = st.session_state.get('interface') if st.session_state["interface"] == 'simple': hide_pages("Predictions") - +st.markdown( + """ + <style> + div[data-testid="column"]:nth-of-type(1) + { + border:2px solid black;border-radius: 50px;padding: 15px; + } + + div[data-testid="column"]:nth-of-type(2) + { + border:2px solid black;border-radius: 50px;padding: 15px; + text-align: left; + } + + div[data-testid="column"]:nth-of-type(3) + { + border:2px solid black;border-radius: 50px;padding: 15px; + text-align: left; + } + + </style> + """,unsafe_allow_html=True +) ####################################### page Design ####################################### st.title("Calibration Model Development") @@ -18,23 +40,20 @@ st.markdown("Create a predictive model, then use it for predicting your target v st.header("I - Data visualization", divider='blue') M0, M00 = st.columns([1, .4]) st.header("II - Model creation", divider='blue') - -M1, M2, M3 = st.columns([2,3,2]) -M4, M5 = st.columns([6,2]) -st.header("Model Diagnosis", divider='blue') - +M1, M2, M3 = st.columns([2,2,2]) +st.header("III - Model Diagnosis", divider='blue') M7, M8 = st.columns([2,2]) M7.write('Predicted vs Measured values') M8.write('Residuals plot') -M9, M10 = st.columns([2,2]) +M9 = st.container() M9.write("-- Save the model --") ###################################################################### -reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR", "Full-PLSR-sklearn", "PrePLStester"] +reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR"] ####################################### ########################################### files_format = ['.csv', '.dx'] -file = M00.radio('select files format:', options = files_format) +file = M00.radio('Select files format:', options = files_format) ### Data spectra = pd.DataFrame @@ -147,7 +166,8 @@ if not spectra.empty and not y.empty: regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12) if regression_algo == reg_algo[1]: # Train model with model function from application_functions.py - Reg = PinardPlsr(x_train = X_train, x_test = X_test,y_train = y_train, y_test = y_test) + Reg = PlsProcess(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test, scale = False, Kfold=3) + Reg.tune(n_iter=500) reg_model = Reg.model_ #M2.dataframe(Pin.pred_data_) elif regression_algo == reg_algo[2]: @@ -185,38 +205,31 @@ if not spectra.empty and not y.empty: M1.progress(100, text = "The model has successfully been created!") time.sleep(1) reg_model = Reg.model_ - M1.write('-- Table of selected wavelengths --') + M3.write('-- Spectral regions used for model creation --') wls = rega[0] - M1.table(wls) + M3.table(wls) fig, ax = plt.subplots(figsize = (12, 6)) X_train.mean().plot(ax = ax) for i in range(s): colnames = np.array(y) - num = {'u', # unsigned integer - 'i', # signed integer - 'f', # floats - 'c'} # co: + num = {'u','i','f','c'} if np.array(X_train.columns).dtype.kind in num: - plt.plot(X_train.columns, X_train.mean()) - ax.axvspan(X_train.columns[rega[0]['from'][i]], X_train.columns[rega[0]['to'][i]], color='#80ff00', alpha=0.5, lw=0) + plt.plot(X_train.columns, X_train.mean(), color = 'black') + ax.axvspan(X_train.columns[rega[0]['from'][i]], X_train.columns[rega[0]['to'][i]], color='#2a52be', alpha=0.5, lw=0) plt.tight_layout() plt.margins(x = 0) else: plt.plot(np.arange(X_train.shape[1]), X_train.mean()) - ax.axvspan(rega[0]['from'][i], rega[0]['to'][i], color='#80ff00', alpha=0.5, lw=0) + ax.axvspan(rega[0]['from'][i], rega[0]['to'][i], color='#2a52be', alpha=0.5, lw=0) plt.tight_layout() plt.margins(x = 0) - M1.pyplot(fig) - elif regression_algo == reg_algo[4]: - Reg = PlsR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test) - reg_model = Reg.model_ - - - elif regression_algo == reg_algo[5]: - Reg = PlsProcess(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test, scale = False, Kfold=3) - Reg.tune(n_iter=500) - reg_model = Reg.model_ + M3.write('-- Visualization of the spectral regions used for model creation -- ') + M3.pyplot(fig) + + # elif regression_algo == reg_algo[4]: + # Reg = PlsR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test) + # reg_model = Reg.model_ ################# Model analysis ############ if regression_algo in reg_algo[1:]: @@ -229,7 +242,7 @@ if not spectra.empty and not y.empty: with open("data/params/Preprocessing.json", "w") as outfile: json.dump(Reg.best_hyperparams, outfile) - M2.write("-- Performance metrics --") + M2.write("-- Model performance --") M2.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_) #from st_circular_progress import CircularProgress #my_circular_progress = CircularProgress(label = 'Performance',value = 50, key = 'my performance', -- GitLab