diff --git a/src/Class_Mod/RegModels.py b/src/Class_Mod/RegModels.py index fb56b487c0312e842ab76404d3319e2a9d705a91..ef87faf6385192ec006467e30d66e983e90ad0c1 100644 --- a/src/Class_Mod/RegModels.py +++ b/src/Class_Mod/RegModels.py @@ -12,8 +12,8 @@ class Regmodel(object): self._yc, self._ycv, self._yt = None, None, None self._cv_df = pd.DataFrame self._nfolds = nfolds - self.bands = pd.DataFrame() - self.important_features = pd.DataFrame() + self._selected_bands = pd.DataFrame(index = ['from', 'to']) + self.important_features = None self._hyper_params = {'polyorder': hp.choice('polyorder', [0, 1, 2]), 'deriv': hp.choice('deriv', [0, 1, 2]), 'window_length': hp.choice('window_length', [15, 21, 27, 33]), @@ -38,6 +38,10 @@ class Regmodel(object): def test_data_(self): return [self._xt, self._ytest] + @property + def pretreated_spectra_(self): + return self.pretreated + @property def get_params_(self): return self._hyper_params @@ -46,8 +50,19 @@ class Regmodel(object): pass @property - def best_hyperparams(self): - return self._best + def best_hyperparams_(self): + return self._best + @property + def best_hyperparams_print(self): + if self._best['scatter'] == 'Snv': + a = 'Standard Normal Variate (SNV)' + + elif self._best['scatter'] == 'No_transformation': + a = " No transformation was performed" + + SG = f'- Savitzky-Golay derivative parameters (Window_length:{self._best['window_length']}; polynomial order: {self._best['polyorder']}; Derivative order : {self._best['deriv']})' + Norm = f'- Spectral Normalization: {a}' + return SG+"\n"+Norm @property def model_(self): @@ -67,6 +82,9 @@ class Regmodel(object): @property def important_features_(self): return self.important_features + @property + def selected_features_(self): + return self._selected_bands ########################################### ######################################### class Plsr(Regmodel): @@ -108,7 +126,7 @@ class Plsr(Regmodel): self._yt = Model.predict(x2[1]) self._model = Model self._best = params - self.x2 = x2[0] + self.pretreated = pd.DataFrame(x2[0]) return score @@ -117,13 +135,13 @@ class TpeIpls(Regmodel): def __init__(self, train: [pd.DataFrame, pd.DataFrame], test: [pd.DataFrame, pd.DataFrame], n_iter = 10, n_intervall = 5): self.n_intervall = n_intervall self.n_arrets = self.n_intervall*2 - self.bands = pd.DataFrame() - self.bands.index = ['from', 'to'] + r = {'n_components': hp.randint('n_components', 2,20)} r.update({f'v{i}': hp.randint(f'v{i}', 0, train[0].shape[1]) for i in range(1,self.n_arrets+1)}) super().__init__(train, test, n_iter, add_hyperparams = r) + ### parameters in common def objective(self, params): @@ -148,10 +166,16 @@ class TpeIpls(Regmodel): params['deriv'], params['polyorder'], params['window_length'] = a, b, c x2 = [savgol_filter(x1[i], polyorder=params['polyorder'], deriv=params['deriv'], window_length = params['window_length']) for i in range(2)] # print(x2) - + # ## Modelling - Model = PLSRegression(scale = False, n_components = params['n_components']) - self._cv_df = KF_CV().process(model = Model, x = x2[0][:,id], y = self._ytrain, n_folds = self._nfolds) + try: + Model = PLSRegression(scale = False, n_components = params['n_components']) + self._cv_df = KF_CV().process(model = Model, x = x2[0][:,id], y = self._ytrain, n_folds = self._nfolds) + except ValueError as ve: + params["n_components"] = 1 + Model = PLSRegression(scale = False, n_components = params['n_components']) + self._cv_df = KF_CV().process(model = Model, x = x2[0][:,id], y = self._ytrain, n_folds = self._nfolds) + self._cv_df['Average'] = self._cv_df.mean(axis = 1) self._cv_df['S'] = self._cv_df.std(axis = 1) self._cv_df['CV(%)'] = self._cv_df['S'] * 100 / self._cv_df['Average'] @@ -168,12 +192,12 @@ class TpeIpls(Regmodel): self._yt = Model.predict(x2[1][:,id]) self._model = Model self._best = params - self.x2 = x2[0][:,id] + self.pretreated = pd.DataFrame(x2[0]) self.segments = arrays for i in range(len(self.segments)): - self.bands[f'band{i+1}'] = [self.segments[i][0], self.segments[i][self.segments[i].shape[0]-1]] - self.bands.index = ['from','to'] + self._selected_bands[f'band{i+1}'] = [self.segments[i][0], self.segments[i][self.segments[i].shape[0]-1]] + self._selected_bands.index = ['from','to'] return score diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py index 6a047b5aff69df1e098f232ec1533a5355d9c310..c1b3e0e1204fbca46a4966e1615265a522474af7 100644 --- a/src/pages/1-samples_selection.py +++ b/src/pages/1-samples_selection.py @@ -15,6 +15,9 @@ tcr=pd.DataFrame() sam=pd.DataFrame() sam1=pd.DataFrame() +# path = os.path.dirname(os.path.abspath(__file__)).replace('\\','/') +# css_file = path[:path.find('/pages')]+'/style' +# local_css(css_file +"/style_model.css") local_css(css_file / "style_model.css") st.session_state["interface"] = st.session_state.get('interface') diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py index 45b37a0707955c4bde5b76f82471ac03be7e29d6..729db1838827b31e25cd0e03c70335ada93250a6 100644 --- a/src/pages/2-model_creation.py +++ b/src/pages/2-model_creation.py @@ -5,8 +5,14 @@ from Modules import * from Class_Mod.DATA_HANDLING import * from pandas.api.types import is_float_dtype from Class_Mod.Miscellaneous import desc_stats +from plotly.subplots import make_subplots +import plotly.graph_objects as go +from matplotlib.cm import ScalarMappable add_header() +import matplotlib.pyplot as plt, mpld3 +import streamlit.components.v1 as components + st.session_state["interface"] = st.session_state.get('interface') if st.session_state["interface"] == 'simple': hide_pages("Predictions") @@ -22,8 +28,8 @@ st.markdown("Create a predictive model, then use it for predicting your target v st.header("I - Data visualization", divider='blue') M0, M00 = st.columns([1, .4]) st.header("II - Model creation", divider='blue') -M1, M2, M3 = st.columns([2,2,2]) -st.header("Cross_Validation") +M1, M2 = st.columns([2 ,4]) +st.header("Cross-Validation") cv1, cv2 = st.columns([2,2]) cv3 = st.container() @@ -182,7 +188,7 @@ if not spectra.empty and not y.empty: elif regression_algo == reg_algo[3]: s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3) - it = M1.number_input(label='Enter the number of iterations', min_value=50, max_value=1000, value=100) + it = M1.number_input(label='Enter the number of iterations', min_value=2, max_value=10, value=3) progress_text = "The model is being created. Please wait." Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it) @@ -191,35 +197,100 @@ if not spectra.empty and not y.empty: M1.progress(100, text = "The model has successfully been created!") time.sleep(1) reg_model = Reg.model_ - M3.write('-- Spectral regions used for model creation --') - intervalls = Reg.bands.T - M3.table(intervalls) - fig, ax = plt.subplots(figsize = (12, 6)) - X_train.mean().plot(ax = ax) - for i in range(s): - colnames = np.array(y) - num = {'u','i','f','c'} - if np.array(X_train.columns).dtype.kind in num: - plt.plot(X_train.columns, X_train.mean(), color = 'black') - ax.axvspan(X_train.columns[intervalls['from'][i]], X_train.columns[intervalls['to'][i]], - color='#2a52be', alpha=0.5, lw=0) - plt.tight_layout() - plt.margins(x = 0) - else: - plt.plot(np.arange(X_train.shape[1]), X_train.mean(), color = 'black') - ax.axvspan(intervalls['from'][i], intervalls['to'][i], color='#2a52be', alpha=0.5, lw=0) - plt.tight_layout() - plt.margins(x = 0) - - M3.write('-- Visualization of the spectral regions used for model creation -- ') - M3.pyplot(fig) + # M3.write('-- Spectral regions used for model creation --') + # intervalls = Reg.bands.T + # M3.table(intervalls) + # fig, ax = plt.subplots(figsize = (12, 6)) + # X_train.mean().plot(ax = ax) + # for i in range(s): + # colnames = np.array(y) + # num = {'u','i','f','c'} + # if np.array(X_train.columns).dtype.kind in num: + # plt.plot(X_train.columns, X_train.mean(), color = 'black') + # ax.axvspan(X_train.columns[intervalls['from'][i]], X_train.columns[intervalls['to'][i]], + # color='#2a52be', alpha=0.5, lw=0) + # plt.tight_layout() + # plt.margins(x = 0) + # else: + # plt.plot(np.arange(X_train.shape[1]), X_train.mean(), color = 'black') + # ax.axvspan(intervalls['from'][i], intervalls['to'][i], color='#2a52be', alpha=0.5, lw=0) + # plt.tight_layout() + # plt.margins(x = 0) + # M3.write('-- Visualization of the spectral regions used for model creation -- ') + # M3.pyplot(fig) + M2.write('-- Spectral regions used for model creation --') + intervalls = Reg.selected_features_.T + M2.table(intervalls) + # elif regression_algo == reg_algo[4]: # Reg = PlsR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test) # reg_model = Reg.model_ ################# Model analysis ############ if regression_algo in reg_algo[1:]: + M2.write('-- Pretreated data (train) visualization and important spectral regions in the model -- ') + + fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 6)) + fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.02) + # fig.append_trace(go.Scatter(x=[3, 4, 5], + # y=[1000, 1100, 1200],), row=1, col=1) + + # fig.append_trace(go.Scatter(x=[2, 3, 4], + # y=[100, 110, 120],), row=2, col=1) + + # fig.append_trace(go.Scatter(x=[0, 1, 2], + # y=[10, 11, 12]), row=3, col=1) + + # fig.update_layout(height=600, width=600, title_text="Stacked Subplots") + # a = Reg.pretreated_spectra_ + # r = pd.concat([y_train, a], axis = 1) + # rr = r.melt("x") + # rr.columns = ['y values', 'x_axis', 'y_axis'] + # fig = px.scatter(rr, x = 'x_axis', y = 'y_axis', color_continuous_scale=px.colors.sequential.Viridis, color = 'y values') + # M3.plotly_chart(fig) + + + from matplotlib.colors import Normalize + color_variable = y_train + norm = Normalize(vmin=color_variable.min(), vmax= color_variable.max()) + cmap = plt.get_cmap('viridis') + colors = cmap(norm(color_variable.values)) + fig, ax = plt.subplots(figsize = (10,3)) + + for i in range(Reg.pretreated_spectra_.shape[0]): + ax.plot(Reg.pretreated_spectra_.columns, Reg.pretreated_spectra_.iloc[i,:], color = colors[i]) + sm = ScalarMappable(norm = norm, cmap = cmap) + cbar = plt.colorbar(sm, ax = ax) + # cbar.set_label('Target range') + plt.tight_layout() + htmlfig = mpld3.fig_to_html(fig) + with M2: + st.components.v1.html(htmlfig, height=600) + + + + # X_train.mean().plot(ax = ax2) + # for i in range(s): + # colnames = np.array(y) + # num = {'u','i','f','c'} + # if np.array(X_train.columns).dtype.kind in num: + # plt.plot(X_train.columns, X_train.mean(), color = 'black') + # ax2.axvspan(X_train.columns[intervalls['from'][i]], X_train.columns[intervalls['to'][i]], + # color='#2a52be', alpha=0.5, lw=0) + # plt.tight_layout() + # plt.margins(x = 0) + # else: + # plt.plot(np.arange(X_train.shape[1]), X_train.mean(), color = 'black') + # ax2.axvspan(intervalls['from'][i], intervalls['to'][i], color='#2a52be', alpha=0.5, lw=0) + # plt.tight_layout() + # plt.margins(x = 0) + + # pd.DataFrame(Reg.pretreated_spectra_).plot(ax = ax1) + # M3.pyplot(fig) + + + ############ cv2.write('-- Cross-Validation Summary--') cv2.write(Reg.CV_results_) cv99=pd.DataFrame(Reg.CV_results_) @@ -229,12 +300,9 @@ if not spectra.empty and not y.empty: color_discrete_sequence=px.colors.qualitative.G10) fig1.add_shape(type='line', x0 = .95 * min(Reg.cv_data_[2].loc[:,'Measured']), x1 = 1.05 * max(Reg.cv_data_[2].loc[:,'Measured']), y0 = .95 * min(Reg.cv_data_[2].loc[:,'Measured']), y1 = 1.05 * max(Reg.cv_data_[2].loc[:,'Measured']), line = dict(color='black', dash = "dash")) fig1.update_traces(marker_size=7, showlegend=False) - fig1.write_image("./Report/figures/Allinone.png") - cv2.plotly_chart(fig1) - - fig0 = px.scatter(Reg.cv_data_[2], x='Measured', y='Predicted', trendline='ols', color='Folds', symbol="Folds", facet_col='Folds', facet_col_wrap=1, - color_discrete_sequence=px.colors.qualitative.G10, text='index', width=800, height=1000) + fig0 = px.scatter(Reg.cv_data_[2], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds", facet_col = 'Folds',facet_col_wrap=1, + color_discrete_sequence=px.colors.qualitative.G10, text='index', width=800, height=1000) fig0.update_traces(marker_size=8, showlegend=False) cv1.write('-- Visualisation des prédictions hors échantillon (Graphiques séparés) --') @@ -246,13 +314,13 @@ if not spectra.empty and not y.empty: yt = Reg.pred_data_[1] #if - M2.write('-- Spectral preprocessing info --') - M2.write(Reg.best_hyperparams) - json_sp = pd.DataFrame([Reg.best_hyperparams]) - - # with open("data/params/Preprocessing.json", "w") as outfile: - # json.dump(Reg.best_hyperparams, outfile) + M1.write('-- Spectral preprocessing info --') + M1.write(Reg.best_hyperparams_print) + with open("data/params/Preprocessing.json", "w") as outfile: + json.dump(Reg.best_hyperparams_, outfile) + M1.write("-- Model performance --") + M1.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_) M2.write("-- Model performance --") M2.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_) model_per=pd.DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)