diff --git a/src/Class_Mod/DATA_HANDLING.py b/src/Class_Mod/DATA_HANDLING.py index ddce505fe8da9a984e8b5f8de6a74bfad1287708..892c0c0854533b346a4e2363a61408c4d114a4ae 100644 --- a/src/Class_Mod/DATA_HANDLING.py +++ b/src/Class_Mod/DATA_HANDLING.py @@ -154,3 +154,23 @@ class KF_CV: for i in f.keys(): ycv[idx[i]] = f[i] return ycv + + +### Selectivity ratio +def sel_ratio(model, x ): + from scipy.stats import f + + x = pd.DataFrame(x) + wtp = model.coef_.T/ np.linalg.norm(model.coef_.T) + ttp = np.array(x @ wtp) + ptp = np.array(x.T) @ np.array(ttp)/(ttp.T @ ttp) + qexpi = np.linalg.norm(ttp @ ptp.T, axis = 0)**2 + e = np.array(x-x.mean()) - ttp @ ptp.T + qres = np.linalg.norm(e, axis = 0)**2 + sr = pd.DataFrame(qexpi/qres, index = x.columns, columns = ['sr']) + + fcr = f.ppf(0.05, sr.shape[0]-2, sr.shape[0]-3) + c = sr > fcr + sr.index = np.arange(x.shape[1]) + SR = sr.iloc[c.to_numpy(),:] + return SR \ No newline at end of file diff --git a/src/Class_Mod/RegModels.py b/src/Class_Mod/RegModels.py index 18ce604d7737a8bffa14426eafd1bfe1160946b0..49813dbdd93dc5ca4a2a08d0c413a202fb3cf2cb 100644 --- a/src/Class_Mod/RegModels.py +++ b/src/Class_Mod/RegModels.py @@ -1,5 +1,5 @@ from Packages import * -from Class_Mod import metrics, Snv, No_transformation, KF_CV +from Class_Mod import metrics, Snv, No_transformation, KF_CV, sel_ratio class Regmodel(object): @@ -10,14 +10,15 @@ class Regmodel(object): self._nc, self._nt, self._p = train[0].shape[0], test[0].shape[0], train[0].shape[1] self._model, self._best = None, None self._yc, self._ycv, self._yt = None, None, None - self._cv_df = pd.DataFrame + self._cv_df = pd.DataFrame() + self._sel_ratio = pd.DataFrame() self._nfolds = nfolds self._selected_bands = pd.DataFrame(index = ['from', 'to']) self.important_features = None self._hyper_params = {'polyorder': hp.choice('polyorder', [0, 1, 2]), 'deriv': hp.choice('deriv', [0, 1, 2]), 'window_length': hp.choice('window_length', [15, 21, 27, 33]), - 'scatter': hp.choice('scatter', ['Snv', 'No_transformation'])} + 'normalization': hp.choice('normalization', ['Snv', 'No_transformation'])} if add_hyperparams is not None: self._hyper_params.update(add_hyperparams) self._best = None @@ -54,10 +55,10 @@ class Regmodel(object): return self._best @property def best_hyperparams_print(self): - if self._best['scatter'] == 'Snv': + if self._best['normalization'] == 'Snv': a = 'Standard Normal Variate (SNV)' - elif self._best['scatter'] == 'No_transformation': + elif self._best['normalization'] == 'No_transformation': a = " No transformation was performed" SG = f'- Savitzky-Golay derivative parameters \:(Window_length:{self._best['window_length']}; polynomial order: {self._best['polyorder']}; Derivative order : {self._best['deriv']})' @@ -85,6 +86,10 @@ class Regmodel(object): @property def selected_features_(self): return self._selected_bands + + @property + def sel_ratio_(self): + return self._sel_ratio ########################################### ######################################### class Plsr(Regmodel): @@ -93,9 +98,10 @@ class Plsr(Regmodel): ### parameters in common def objective(self, params): + params['n_components'] = int(params['n_components']) x0 = [self._xc, self._xt] - x1 = [eval(str(params['scatter'])+"(x0[i])") for i in range(2)] + x1 = [eval(str(params['normalization'])+"(x0[i])") for i in range(2)] a, b, c = params['deriv'], params['polyorder'], params['window_length'] if a > b or b > c: @@ -125,8 +131,13 @@ class Plsr(Regmodel): self._yc = Model.predict(x2[0]) self._yt = Model.predict(x2[1]) self._model = Model + for key,value in params.items(): + try: params[key] = int(value) + except (TypeError, ValueError): params[key] = value + self._best = params self.pretreated = pd.DataFrame(x2[0]) + self._sel_ratio = sel_ratio(Model, x2[0]) return score @@ -153,7 +164,7 @@ class TpeIpls(Regmodel): # ## Preprocessing x0 = [self._xc, self._xt] - x1 = [eval(str(params['scatter'])+"(x0[i])") for i in range(2)] + x1 = [eval(str(params['normalization'])+"(x0[i])") for i in range(2)] a, b, c = params['deriv'], params['polyorder'], params['window_length'] if a > b or b > c: @@ -191,7 +202,11 @@ class TpeIpls(Regmodel): self._yc = Model.predict(x2[0][:,id]) self._yt = Model.predict(x2[1][:,id]) self._model = Model + for key,value in params.items(): + try: params[key] = int(value) + except (TypeError, ValueError): params[key] = value self._best = params + self.pretreated = pd.DataFrame(x2[0]) self.segments = arrays diff --git a/src/Report/figures/.gitkeep b/src/Report/figures/.gitkeep new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/src/Report/report.py b/src/Report/report.py index 5d58bf8c2ae8ff1cd258573861840ec195c19ce6..3d1e2d2362cbcdf268003c243d2939167d9debe1 100644 --- a/src/Report/report.py +++ b/src/Report/report.py @@ -2,8 +2,6 @@ import subprocess from pathlib import Path import os import pandas as pd -from config.config import pdflatex_path -import zipfile def report(*args): to_report=[] @@ -67,7 +65,7 @@ def report(*args): """ if 'model' in to_report: - latex_report += r"""\subsection{Data Visualization} + latex_report += r"""\subsection*{Data Visualization} Here we have a sub-heading. There is no blank line after the sub-heading. You can have one level of subheadings but not a third i.e. you cannot have Section 1.1.1 as a subheading. \begin{center} @@ -110,8 +108,7 @@ def report(*args): """ latex_report += r""" \begin{center} - Model performance - + """ + df1.to_latex(escape=True) + r""" \end{center} """ latex_report += r""" @@ -425,35 +422,25 @@ def report(*args): # latex_report = report('sample', 'predict',) def compile_latex(): - # path to pdflatex imported from config/config.py - + # path to pdflatex + # pdflatex_path = Path("C:/Users/maimouni/AppData/Local/Programs/MiKTeX/miktex/bin/x64/") + from config.config import pdflatex_path filename_path = Path("Report/") filename = 'report.tex' # run pdflatex with bibtex compilation (2nd run) for i in range(4): + print(i) if i == 1: proc = subprocess.Popen([pdflatex_path / 'bibtex.exe', filename[:-4]], cwd = filename_path) proc.communicate() else: proc = subprocess.Popen([pdflatex_path / 'pdflatex.exe', filename], cwd = filename_path) proc.communicate() - os.rename(filename_path / 'report.pdf', filename_path / 'figures' / 'report.pdf') # remove pdflatex compilation files - extensions = ['.log', '.aux', '.bbl', '.blg', '.out', '.tex'] - for ext in extensions: - os.unlink(str(filename_path / filename[:-4]) + ext) + extensions = ['.log', '.aux', '.bbl', '.blg', '.out'] + #for ext in extensions: + #os.unlink(str(filename_path / filename[:-4]) + ext) # open the report - # proc = subprocess.Popen([str(filename[:-4]) + '.pdf'], cwd = filename_path / 'figures', shell=True) - # proc.communicate() - - folder_path = Path('Report/figures') - zip_path = Path('Report/') - - with zipfile.ZipFile(zip_path / 'archive_zipfile.zip', 'w', compression=zipfile.ZIP_DEFLATED, compresslevel=9) as zf: - for root, dirs, files in os.walk(folder_path): - for file in files: - file_path = os.path.join(root, file) - arcname = os.path.relpath(file_path, folder_path) - zf.write(file_path, arcname) - + proc = subprocess.Popen([str(filename[:-4]) + '.pdf'], cwd = filename_path, shell=True) + proc.communicate() # compile_latex() diff --git a/src/data/models/model_sd_2024_06_07__created_on_Xcal_and_Ycal_data_.pkl b/src/data/models/model_sd_2024_06_07__created_on_Xcal_and_Ycal_data_.pkl new file mode 100644 index 0000000000000000000000000000000000000000..0fa8d65d2256b43f3449286ef19eb701292ac298 Binary files /dev/null and b/src/data/models/model_sd_2024_06_07__created_on_Xcal_and_Ycal_data_.pkl differ diff --git a/src/data/models/model_sd_2024_06_07__on_Xcal_and_Ycal_data_Wavelengths_index.csv b/src/data/models/model_sd_2024_06_07__on_Xcal_and_Ycal_data_Wavelengths_index.csv new file mode 100644 index 0000000000000000000000000000000000000000..08a80dea3691c167a81a8d99332d02772d01cf99 --- /dev/null +++ b/src/data/models/model_sd_2024_06_07__on_Xcal_and_Ycal_data_Wavelengths_index.csv @@ -0,0 +1,4 @@ +;from;to +band1;353;409 +band2;501;882 +band3;1727;2020 diff --git a/src/data/params/Preprocessing.json b/src/data/params/Preprocessing.json new file mode 100644 index 0000000000000000000000000000000000000000..0d814e812cf256fe6987cd6d0eb048d02afc555e --- /dev/null +++ b/src/data/params/Preprocessing.json @@ -0,0 +1 @@ +{"deriv": 0, "n_components": 18, "normalization": "No_transformation", "polyorder": 0, "v1": 752, "v2": 735, "v3": 125, "v4": 1198, "v5": 461, "v6": 522, "window_length": 1} \ No newline at end of file diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py index 12f560e5fa84d0ad60efb1ea0c5c1c7247c02cf5..0b2aac3ad40232791c3527a47055543346edf488 100644 --- a/src/pages/1-samples_selection.py +++ b/src/pages/1-samples_selection.py @@ -509,8 +509,7 @@ Ac_Km = ['Spectra_Plot.png', 'Elbow.png', 'graphe_loadings.png', 'plot_axe1_axe2 # Streamlit container with st.container(): - header3, = st.columns(1) - if header3.button("Export report"): + if st.button("Download report"): if test == '.csv': if dim_red_method == dim_red_methods[1] and clus_method == cluster_methods[1]: latex_report = report.report(sam, tcr, Nb_ech, nb_clu, 'sample', Ac_Km, 'csv', 'kmeans') diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py index 96dd22a5c8cdf95ad810e7e74e15a7511b0d9f64..8c31f316fb407aa9c3dacce3bb285f9f158783ee 100644 --- a/src/pages/2-model_creation.py +++ b/src/pages/2-model_creation.py @@ -52,8 +52,9 @@ files_format = ['.csv', '.dx'] file = M00.radio('Select files format:', options = files_format) ### Data -spectra = pd.DataFrame -y = pd.DataFrame +spectra = pd.DataFrame() +y = pd.DataFrame() + # load .csv file if file == files_format[0]: @@ -124,6 +125,10 @@ elif file == files_format[1]: ### split the data if not spectra.empty and not y.empty: + if np.array(spectra.columns).dtype.kind in ['i','f']: + colnames = spectra.columns + else: + colnames = np.arange(spectra.shape[1]) #rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i") @@ -201,39 +206,25 @@ if not spectra.empty and not y.empty: M1.progress(100, text = "The model has successfully been created!") time.sleep(1) reg_model = Reg.model_ - # M3.write('-- Spectral regions used for model creation --') - # intervalls = Reg.bands.T - # M3.table(intervalls) - # fig, ax = plt.subplots(figsize = (12, 6)) - # X_train.mean().plot(ax = ax) - # for i in range(s): - # colnames = np.array(y) - # num = {'u','i','f','c'} - # if np.array(X_train.columns).dtype.kind in num: - # plt.plot(X_train.columns, X_train.mean(), color = 'black') - # ax.axvspan(X_train.columns[intervalls['from'][i]], X_train.columns[intervalls['to'][i]], - # color='#2a52be', alpha=0.5, lw=0) - # plt.tight_layout() - # plt.margins(x = 0) - # else: - # plt.plot(np.arange(X_train.shape[1]), X_train.mean(), color = 'black') - # ax.axvspan(intervalls['from'][i], intervalls['to'][i], color='#2a52be', alpha=0.5, lw=0) - # plt.tight_layout() - # plt.margins(x = 0) - - # M3.write('-- Visualization of the spectral regions used for model creation -- ') - # M3.pyplot(fig) - M2.write('-- Spectral regions used for model creation --') - intervalls = Reg.selected_features_.T - M2.table(intervalls) + M2.write('-- Important Spectral regions used for model creation --') + intervalls = Reg.selected_features_.T + intervalls_with_cols = Reg.selected_features_.T + for i in range(intervalls.shape[0]): + for j in range(intervalls.shape[1]): + intervalls_with_cols.iloc[i,j] = spectra.columns[intervalls.iloc[i,j]] + M2.table(intervalls_with_cols) + # elif regression_algo == reg_algo[4]: # Reg = PlsR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test) # reg_model = Reg.model_ + + + ################# Model analysis ############ if regression_algo in reg_algo[1:]: - M2.write('-- Pretreated data (train) visualization and important spectral regions in the model -- ') + #M2.write('-- Pretreated data (train) visualization and important spectral regions in the model -- ') fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 6)) fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.02) @@ -253,43 +244,24 @@ if not spectra.empty and not y.empty: # rr.columns = ['y values', 'x_axis', 'y_axis'] # fig = px.scatter(rr, x = 'x_axis', y = 'y_axis', color_continuous_scale=px.colors.sequential.Viridis, color = 'y values') # M3.plotly_chart(fig) - - color_variable = y_train - norm = Normalize(vmin=color_variable.min(), vmax= color_variable.max()) - cmap = plt.get_cmap('viridis') - colors = cmap(norm(color_variable.values)) - fig, ax = plt.subplots(figsize = (10,3)) - - for i in range(Reg.pretreated_spectra_.shape[0]): - ax.plot(Reg.pretreated_spectra_.columns, Reg.pretreated_spectra_.iloc[i,:], color = colors[i]) - sm = ScalarMappable(norm = norm, cmap = cmap) - cbar = plt.colorbar(sm, ax = ax) - # cbar.set_label('Target range') - plt.tight_layout() - htmlfig = mpld3.fig_to_html(fig) - with M2: - st.components.v1.html(htmlfig, height=600) - - # X_train.mean().plot(ax = ax2) - # for i in range(s): - # colnames = np.array(y) - # num = {'u','i','f','c'} - # if np.array(X_train.columns).dtype.kind in num: - # plt.plot(X_train.columns, X_train.mean(), color = 'black') - # ax2.axvspan(X_train.columns[intervalls['from'][i]], X_train.columns[intervalls['to'][i]], - # color='#2a52be', alpha=0.5, lw=0) - # plt.tight_layout() - # plt.margins(x = 0) - # else: - # plt.plot(np.arange(X_train.shape[1]), X_train.mean(), color = 'black') - # ax2.axvspan(intervalls['from'][i], intervalls['to'][i], color='#2a52be', alpha=0.5, lw=0) - # plt.tight_layout() - # plt.margins(x = 0) - - # pd.DataFrame(Reg.pretreated_spectra_).plot(ax = ax1) - # M3.pyplot(fig) + # from matplotlib.colors import Normalize + # color_variable = y_train + # norm = Normalize(vmin=color_variable.min(), vmax= color_variable.max()) + # cmap = plt.get_cmap('viridis') + # colors = cmap(norm(color_variable.values)) + # fig, ax = plt.subplots(figsize = (10,3)) + + # for i in range(Reg.pretreated_spectra_.shape[0]): + # ax.plot(Reg.pretreated_spectra_.columns, Reg.pretreated_spectra_.iloc[i,:], color = colors[i]) + # sm = ScalarMappable(norm = norm, cmap = cmap) + # cbar = plt.colorbar(sm, ax = ax) + # # cbar.set_label('Target range') + # plt.tight_layout() + # htmlfig = mpld3.fig_to_html(fig) + # with M2: + # st.components.v1.html(htmlfig, height=600) ############ @@ -312,7 +284,7 @@ if not spectra.empty and not y.empty: cv1.plotly_chart(fig0, use_container_width=True) fig0.write_image("./Report/figures/Predictions_V.png") - + yc = Reg.pred_data_[0] yt = Reg.pred_data_[1] @@ -321,9 +293,11 @@ if not spectra.empty and not y.empty: M1.write(Reg.best_hyperparams_print) a_Test=Reg.best_hyperparams_print + + with open("data/params/Preprocessing.json", "w") as outfile: + json.dump(Reg.best_hyperparams_, outfile) + - # with open("data/params/Preprocessing.json", "w") as outfile: - # json.dump(Reg.best_hyperparams_, outfile) ########## M1.write("-- Model performance --") M1.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_) @@ -344,7 +318,7 @@ if not spectra.empty and not y.empty: M8.pyplot(residual_plot) plt.savefig('./Report/figures/residual_plot.png') - rega = Reg.important_features_ ##### ADD FEATURES IMPORTANCE PLOT + rega = Reg.selected_features_ ##### ADD FEATURES IMPORTANCE PLOT #model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20) model_name = M9.text_input('Give it a name') @@ -357,8 +331,7 @@ if not spectra.empty and not y.empty: '_and_' + ycal_csv.name[:ycal_csv.name.find(".")] + '_data_' + '.pkl','wb') as f: joblib.dump(reg_model, f) if regression_algo == reg_algo[3]: - rega[1].sort() - pd.DataFrame(rega[1]).to_csv(path + model_name + date_time + '_on_' + xcal_csv.name[:xcal_csv.name.find(".")] + Reg.selected_features_.T.to_csv(path + model_name + date_time + '_on_' + xcal_csv.name[:xcal_csv.name.find(".")] + '_and_' + ycal_csv.name[:ycal_csv.name.find(".")] + '_data_'+'Wavelengths_index.csv', sep = ';') elif file == files_format[1]: @@ -366,13 +339,8 @@ if not spectra.empty and not y.empty: with open(path + model_name + '_on_'+ data_file.name[:data_file.name.find(".")] + '_data_' + '.pkl','wb') as f: joblib.dump(reg_model, f) if regression_algo == reg_algo[3]: - rega[1].sort() - pd.DataFrame(rega[1]).to_csv(path +data_file.name[:data_file.name.find(".")]+ model_name + date_time+ '_on_' + '_data_'+'Wavelengths_index.csv', sep = ';') + Reg.selected_features_.T.to_csv(path +data_file.name[:data_file.name.find(".")]+ model_name + date_time+ '_on_' + '_data_'+'Wavelengths_index.csv', sep = ';') st.write('Model Exported ') - - if regression_algo == reg_algo[3]: - st.write('Model Exported') - # create a report with information on the model ## see https://stackoverflow.com/a/59578663 @@ -401,4 +369,39 @@ with st.container(): pass else: - pass \ No newline at end of file + pass + + +if not spectra.empty and not y.empty: + if regression_algo in reg_algo[1:]: + fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 4), sharex=True) + ax1.plot(colnames, np.mean(X_train, axis = 0), color = 'black', label = 'Average spectrum (Raw)') + ax2.plot(colnames, np.mean(Reg.pretreated_spectra_ , axis = 0), color = 'black', label = 'Average spectrum (pretreated)') + + + ax2.set_xlabel('Wavelenghts') + plt.tight_layout() + + for i in range(2): + eval(f'ax{i+1}').grid(color='grey', linestyle=':', linewidth=0.2) + eval(f'ax{i+1}').margins(x = 0) + eval(f'ax{i+1}').legend(loc = 'upper right') + eval(f'ax{i+1}').set_ylabel('Intensity') + if regression_algo == reg_algo[3]: + for j in range(s): + if np.array(spectra.columns).dtype.kind in ['i','f']: + min, max = intervalls_with_cols['from'][j], intervalls_with_cols['to'][j] + else: + min, max = intervalls['from'][j], intervalls['to'][j] + + eval(f'ax{i+1}').axvspan(min, max, color='#00ff00', alpha=0.5, lw=0) + if regression_algo == reg_algo[1]: + ax1.scatter(colnames[np.array(Reg.sel_ratio_.index)], np.mean(X_train, axis = 0).ravel()[np.array(Reg.sel_ratio_.index)], + color = 'red', label = 'Important variables') + ax2.scatter(colnames[Reg.sel_ratio_.index], np.mean(Reg.pretreated_spectra_, axis = 0).ravel()[np.array(Reg.sel_ratio_.index)], + color = 'red', label = 'Important variables') + ax1.legend() + ax2.legend() + + M2.write('-- Visualization of the spectral regions used for model creation -- ') + M2.pyplot(fig) \ No newline at end of file diff --git a/src/pages/3-prediction.py b/src/pages/3-prediction.py index 2f515438326e5483d76a429e84cfbdbcbbacbb6c..b79dafdfe07ab2e0afc7894fc66d46598d4ccca2 100644 --- a/src/pages/3-prediction.py +++ b/src/pages/3-prediction.py @@ -17,18 +17,21 @@ local_css(css_file / "style_model.css") st.header("Data loading", divider='blue') -model_column1, space1, file_column1= st.columns([2, 1, 1]) +M1, M2= st.columns([2, 1]) + +st.header('Data preprocessing', divider='blue') +M3, M4= st.columns([2, 1]) + st.header("Prediction making", divider='blue') -model_column2, space2, file_column2= st.columns([2, 1, 1]) -_, space3, _ = st.columns([1, 3, 1]) +M5, M6 = st.columns([2, 0.01]) files_format = ['.csv', '.dx'] -file = file_column1.file_uploader("Select NIRS Data to predict", type = files_format, help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns") +file = M2.file_uploader("Select NIRS Data to predict", type = files_format, help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns") export_folder = './data/predictions/' export_name = 'Predictions_of_' reg_algo = ["Interval-PLS"] -pred_data = pd.DataFrame +pred_data = pd.DataFrame() loaded_model = None @@ -38,8 +41,8 @@ if file: if test == files_format[0]: # - qsep = file_column1.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+file.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+file.name))), key=2) - qhdr = file_column1.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+file.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+file.name))), key=3) + qsep = M2.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+file.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+file.name))), key=2) + qhdr = M2.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+file.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+file.name))), key=3) if qhdr == 'yes': col = 0 else: @@ -51,55 +54,61 @@ if file: tmp.write(file.read()) tmp_path = tmp.name chem_data, spectra, meta_data, _ = read_dx(file = tmp_path) - file_column1.success("The data have been loaded successfully", icon="✅") + M2.success("The data have been loaded successfully", icon="✅") if chem_data.to_numpy().shape[1]>0: - yname = file_column1.selectbox('Select target', options=chem_data.columns) + yname = M2.selectbox('Select target', options=chem_data.columns) measured = chem_data.loc[:,yname] == 0 y = chem_data.loc[:,yname].loc[measured] pred_data = spectra.loc[measured] else: pred_data = spectra - os.unlink(tmp_path) # Load parameters if not pred_data.empty:# Load the model with joblib - model_column1.write('Raw spectra') + M1.write('Raw spectra') fig = plot_spectra(pred_data, xunits = 'lab', yunits = "meta_data.loc[:,'yunits'][0]") - model_column1.pyplot(fig) + M1.pyplot(fig) + ### preprocessing preprocessed = pd.DataFrame if not pred_data.empty: - params = file_column1.file_uploader("Load preprocessings params", type = '.json', help=" .json file") + params = M4.file_uploader("Load preprocessings params", type = '.json', help=" .json file") if params: prep = json.load(params) + # M4.write(ProcessLookupError) - if prep['Scatter'] == 'SNV': + if prep['normalization'] == 'Snv': x1 = Snv(pred_data) + norm = 'Standard Normal Variate' else: + norm = 'No Normalization was applied' x1 = pred_data x2 = savgol_filter(x1, - window_length = prep["Saitzky-Golay derivative parameters"]["window_length"], - polyorder = prep["Saitzky-Golay derivative parameters"]["polyorder"], - deriv=prep["Saitzky-Golay derivative parameters"]["deriv"], + window_length = prep["window_length"], + polyorder = prep["polyorder"], + deriv=prep["deriv"], delta=1.0, axis=-1, mode="interp", cval=0.0) preprocessed = pd.DataFrame(x2, index = pred_data.index, columns = pred_data.columns) - + +################################################################################################ ## plot preprocessed spectra if not preprocessed.empty: - model_column1.write('Preprocessed spectra') + M3.write('Preprocessed spectra') fig2 = plot_spectra(preprocessed, xunits = 'lab', yunits = "meta_data.loc[:,'yunits'][0]") - model_column1.pyplot(fig2) - + M3.pyplot(fig2) + SG = f'- Savitzky-Golay derivative parameters \:(Window_length:{prep['window_length']}; polynomial order: {prep['polyorder']}; Derivative order : {prep['deriv']})' + Norm = f'- Spectral Normalization \: {norm}' + M4.write('The spectra were preprocessed using:\n'+SG+"\n"+Norm) ################### Predictions making ########################## if not pred_data.empty:# Load the model with joblib #dir = os.listdir('data/models/')[1:] dir = os.listdir('data/models/') dir.insert(0,'') - model_name = model_column2.selectbox("Select your model from the dropdown list:", options = dir, key = 21) + model_name = M5.selectbox("Select your model from the dropdown list:", options = dir, key = 21) if model_name and model_name !='': export_name += '_with_' + model_name[:model_name.find('.')] @@ -107,16 +116,18 @@ if not pred_data.empty:# Load the model with joblib loaded_model = joblib.load(f) if loaded_model: - model_column2.success("The model has been loaded successfully", icon="✅") - s = model_column2.checkbox('the model is of ipls type?') + M5.success("The model has been loaded successfully", icon="✅") + s = M5.checkbox('the model is of ipls type?') if s: - index = model_column2.file_uploader("select wavelengths index file", type="csv") + index = M5.file_uploader("select wavelengths index file", type="csv") if index: - idx = pd.read_csv(index, sep=';', index_col=0).iloc[:,0].to_numpy() - + intervalls = pd.read_csv(index, sep=';', index_col=0).to_numpy() + idx = [] + for i in range(intervalls.shape[0]): + idx.extend(np.arange(intervalls[i,0], intervalls[i,1]+1)) if loaded_model: - if model_column2.button('Predict'): + if M5.button('Predict'): if s: result = loaded_model.predict(preprocessed.iloc[:,idx]) else: @@ -124,19 +135,14 @@ if loaded_model: result = loaded_model.predict(x2) result = pd.DataFrame(result, index = pred_data.index) - st.write('Predicted values') - st.dataframe(result.T) ############################# + M5.write('Predicted values distribution') # Creating histogram - fig, axs = plt.subplots(1, 1, - figsize =(12, 6), + fig, axs = plt.subplots(1, 1, figsize =(15, 3), tight_layout = True) # Add x, y gridlines - axs.grid( color ='grey', - linestyle ='-.', linewidth = 0.5, - alpha = 0.6) - plt.title('Predicted values distribution') + axs.grid( color ='grey', linestyle ='-.', linewidth = 0.5, alpha = 0.6) # Remove axes splines for s in ['top', 'bottom', 'left', 'right']: axs.spines[s].set_visible(False) @@ -156,13 +162,13 @@ if loaded_model: color = plt.cm.viridis(norm(thisfrac)) thispatch.set_facecolor(color) - space3.pyplot(fig) + M5.pyplot(fig) + M6.write('Predicted values table') + M6.dataframe(result.T) ################################## result.to_csv(export_folder + export_name + '.csv', sep = ';') # export to local drive - Download download_results(export_folder + export_name + '.csv', export_name + '.csv') # create a report with information on the prediction - ## see https://stackoverflow.com/a/59578663 - - + ## see https://stackoverflow.com/a/59578663 \ No newline at end of file