diff --git a/src/Class_Mod/Miscellaneous.py b/src/Class_Mod/Miscellaneous.py index 5ff007a69bebc8a0bc2629a7e07a9cee4013b72c..fed7bc2058882217d63fbd24e5571d0a8b8b54b7 100644 --- a/src/Class_Mod/Miscellaneous.py +++ b/src/Class_Mod/Miscellaneous.py @@ -92,15 +92,17 @@ def download_results(data, export_name): @st.cache_resource def plot_spectra(df): + fig, ax = plt.subplots(figsize = (30,7)) if isinstance(df.columns[0], str): - m = 0 + df.T.plot(legend=False, ax = ax, color = 'blue') + min = 0 else: - m = np.min(df.columns) + min = np.max(df.columns) + df.T.plot(legend=False, ax = ax, color = 'blue').invert_xaxis() - fig, ax = plt.subplots(figsize = (30,7)) - df.T.plot(legend=False, ax = ax, color = 'blue') + plt.annotate(text = f'The total number of spectra is {df.shape[0]}', xy =(min, np.max(df)), size=20, color = 'black', backgroundcolor='red') ax.set_xlabel('Wavelength/Wavenumber', fontsize=18) ax.set_ylabel('Signal intensity', fontsize=18) plt.margins(x = 0) - plt.annotate(text = f'The total number of spectra is {df.shape[0]}', xy =(m, np.max(df)), size=20, color = 'black', backgroundcolor='red') + return fig diff --git a/src/Class_Mod/PLSR_Preprocess.py b/src/Class_Mod/PLSR_Preprocess.py index ff18620068f08342aab30348697974b9f28bddef..72aa9841093da02e3bafc84371f9cbd3ce4e12f5 100644 --- a/src/Class_Mod/PLSR_Preprocess.py +++ b/src/Class_Mod/PLSR_Preprocess.py @@ -86,7 +86,11 @@ class PlsProcess: @property def best_hyperparams(self): - return self.best + self.b = {'Scatter':self.best['scatter'], 'Saitzky-Golay derivative parameters':{'polyorder':self.best['polyorder'], + 'deriv':self.best['deriv'], + 'window_length':self.best['window_length']}} + return self.b + @property def model_(self): return self.model diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py index 3e497f7f0ebe15f9af81bfc228cee3bf8d907e70..a0ffac8779ddd55d009465444fd660a62d60ea71 100644 --- a/src/pages/1-samples_selection.py +++ b/src/pages/1-samples_selection.py @@ -82,7 +82,6 @@ scores, loadings, pc = st.columns([2, 3, 0.5]) influence, hotelling, qexp = st.columns([2, 2, 1]) st.header('Selected samples for chemical analysis') selected_s, selected_samples_metd = st.columns([3, 3]) -selected_s.write('Samples scores') dim_red_methods=['', 'PCA','UMAP', 'NMF'] # List of dimensionality reduction algos cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP'] # List of clustering algos @@ -101,7 +100,7 @@ if not spectra.empty: if dim_red_method == dim_red_methods[1]: - dr_model = LinearPCA(xc, Ncomp=5) + dr_model = LinearPCA(xc, Ncomp=8) elif dim_red_method == dim_red_methods[2]: if not meta_data.empty: filter = meta_data.columns[1:] @@ -152,6 +151,7 @@ if labels: # list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster closest, _ = pairwise_distances_argmin_min(clu_centers, tcr) selected_samples_idx = list(closest) + elif selection == selec_strategy[1]: selection_number = scores.number_input('How many samples per cluster?', min_value = 1, step=1, value = 3) for i in np.unique(labels): @@ -161,9 +161,7 @@ if labels: km2 = KMeans(n_clusters = selection_number) km2.fit(tcr.iloc[C,:]) clos, _ = pairwise_distances_argmin_min(km2.cluster_centers_, tcr.iloc[C,:]) - selected_samples_idx2 = list(clos) - selected_samples_idx.extend(tcr.iloc[C,:].index[selected_samples_idx2]) - # selected_samples_idx.extend(tcr.iloc[C,:].sample(n=selection_number).index.to_list()) + selected_samples_idx.extend(list(clos)) else: selected_samples_idx.extend(tcr.iloc[C,:].index.to_list()) # list indexes of selected samples for colored plot @@ -171,9 +169,11 @@ if labels: if labels: if selected_samples_idx: sam = pd.DataFrame({'cluster':np.array(labels)[selected_samples_idx], - 'index': spectra.index[selected_samples_idx]}) + 'index': spectra.index[selected_samples_idx]}, index = selected_samples_idx) + selected_s.write(sam) + if not meta_data.empty: selected_samples_metd.write('Corresponding meta-data') meta = meta_data.iloc[selected_samples_idx,:] diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py index 996302ad78e801c5b6a606064699e0ea878f3a9b..cfed211cd23bdeb57c997ec2eb27fa521bfdcd57 100644 --- a/src/pages/2-model_creation.py +++ b/src/pages/2-model_creation.py @@ -160,7 +160,7 @@ if not spectra.empty and not y.empty: elif regression_algo == reg_algo[5]: Reg = PlsProcess(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test, scale = False, Kfold=3) - Reg.tune(n_iter=100) + Reg.tune(n_iter=500) reg_model = Reg.model_ ################# Model analysis ############ @@ -169,7 +169,9 @@ if not spectra.empty and not y.empty: ycv = Reg.pred_data_[1] yt = Reg.pred_data_[2] - + + M2.write('-- Spectral preprocessing info --') + M2.write(Reg.best_hyperparams) M2.write("-- Performance metrics --") M2.dataframe(Reg.metrics_)