diff --git a/src/Class_Mod/Regression_metrics.py b/src/Class_Mod/Regression_metrics.py index 785cd6f0c9180ef10b5170af770983e5804ef95f..5ec8cacb36eb7eecd515311aa3ec0f2f4120be24 100644 --- a/src/Class_Mod/Regression_metrics.py +++ b/src/Class_Mod/Regression_metrics.py @@ -2,19 +2,13 @@ from Packages import * class metrics: def __init__(self, meas, pred): if isinstance(meas, pd.DataFrame) or isinstance(meas, pd.Series): - self.meas = meas.to_numpy() - else : - self.meas = meas.ravel() - - if isinstance(pred, pd.DataFrame): - self.pred = pred.to_numpy().ravel() - else : - self.pred = pred.ravel() + self.meas = np.array(meas).reshape(-1) + self.pred = np.array(pred).reshape(-1) @property def evaluate_(self): xbar = np.mean(self.meas) # the average of measured values - e = np.subtract(self.meas.ravel(), self.pred.ravel()) + e = np.subtract(self.meas, self.pred) e2 = e**2# the squared error # Sum of squared: @@ -28,7 +22,7 @@ class metrics: # Compute statistical metrics metr = pd.DataFrame() - metr['r'] = [np.corrcoef(self.meas.ravel(), self.pred)[0,1]] + metr['r'] = [np.corrcoef(self.meas, self.pred)[0,1]] metr['r2'] = [1-ssr/sst] metr['rmse'] = [np.sqrt(np.mean(e2))] metr['mae'] = [np.mean(np.abs(e2))] diff --git a/src/Class_Mod/SK_PLSR_.py b/src/Class_Mod/SK_PLSR_.py index 5442971490ac476a2b1665ee112d1830e7243be2..7b9de1cbf2f1e776d91b97d700ab58b2893ec079 100644 --- a/src/Class_Mod/SK_PLSR_.py +++ b/src/Class_Mod/SK_PLSR_.py @@ -1,4 +1,10 @@ -from Packages import * +from Packages import * +from Class_Mod.Miscellaneous import * +from Class_Mod.Regression_metrics import metrics + + + + class PlsR: def __init__(self, x_train, y_train, x_test, y_test): self.x_train = x_train @@ -6,12 +12,53 @@ class PlsR: self.y_train = y_train self.y_test = y_test - def fit_(self): - nlv = 20 - rmse = [] - for i in range(nlv): - m = PLSRegression(n_components= 20) + self.trained = PLSRegression(n_components= self._optimize(), scale = False) + self.trained.fit(self.x_train, self.y_train) + + self.yc = pd.DataFrame(self.trained.predict(self.x_train)) # make predictions on test data and assign to Y_preds variable + self.ycv = pd.DataFrame(cross_val_predict(self.trained, self.x_train, self.y_train, cv = 3)) # make predictions on test data and assign to Y_preds variable + self.yt = pd.DataFrame(self.trained.predict(self.x_test)) # make predictions on test data and assign to Y_preds variable + + + def _optimize(self): + nlv = 21 + rmse = np.ones(21) + rmse[0] = 0.002 + lv = {} + ratio = [] + for i in range(1,nlv): + m = PLSRegression(n_components= i, scale = False) ycv = cross_val_predict(m, self.x_train, self.y_train, cv = 5) - rmse.append(mean_squared_error(self.y_train, ycv)) - print(rmse) + rmse[i] = mean_squared_error(self.y_train, ycv) + + ratio.append(((rmse[i-1]-rmse[i])/rmse[i-1])*100) + return np.argmax(ratio)+1 + + ################################################################################################################ + + + ################################################################################################################ + + @property + def model_(self): + return self.trained + + @property + def metrics_(self): + metc = metrics(self.y_train, self.yc) + metc = metc.evaluate_ + + metcv = metrics(self.y_train, self.ycv) + metcv = metcv.evaluate_ + + mett = metrics( self.y_test, self.yt) + mett = mett.evaluate_ + + met = pd.concat([metc, metcv, mett], axis = 0) + met.index = ['calib','cv','test'] + return met + @property + def pred_data_(self): + + return self.yc, self.ycv, self.yt \ No newline at end of file diff --git a/src/Class_Mod/__init__.py b/src/Class_Mod/__init__.py index a3a0e22f34ebf253c4ffda9782a091f818f2cbd6..a2182b4dc2a93586c062c3c7049927103f2def42 100644 --- a/src/Class_Mod/__init__.py +++ b/src/Class_Mod/__init__.py @@ -11,4 +11,4 @@ from .VarSel import TpeIpls from .Miscellaneous import resid_plot, reg_plot from .DxReader import DxRead, read_dx from .HDBSCAN_Clustering import Hdbscan - +from .SK_PLSR_ import PlsR \ No newline at end of file diff --git a/src/Modules.py b/src/Modules.py index 067b954347564d671ab959111b79457eecd4e007..5de6b6f793e062505b1ac79483954c92a487bda5 100644 --- a/src/Modules.py +++ b/src/Modules.py @@ -1,4 +1,5 @@ -from Class_Mod import LinearPCA, Umap, find_col_index, PinardPlsr, LWPLSR, list_files, metrics, TpeIpls, reg_plot, resid_plot, Sk_Kmeans, DxRead, Hdbscan, read_dx +from Class_Mod import PlsR, LinearPCA, Umap, find_col_index, PinardPlsr +from Class_Mod import LWPLSR, list_files, metrics, TpeIpls, reg_plot, resid_plot, Sk_Kmeans, DxRead, Hdbscan, read_dx # find_col_index from Class_Mod.Miscellaneous import prediction, download_results, plot_spectra diff --git a/src/Packages.py b/src/Packages.py index d2abd429cfe69cdcbd6a031290edc930c928337f..c7bf15dcdb6ef8ba2320d02ee4477c6c00f4ad5b 100644 --- a/src/Packages.py +++ b/src/Packages.py @@ -5,12 +5,14 @@ import csv import re import jcamp import random +import datetime import numpy as np import pandas as pd from os import listdir from os.path import isfile, join from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder import time +from scipy.stats import skew, kurtosis ### Exploratory data analysis-Dimensionality reduction from umap.umap_ import UMAP diff --git a/src/app.py b/src/app.py index 4f9e2b7ac1b0a888d0650e930e3c1ad5c2eb4b30..9b63fe162aa26ebce48e20c0d918e79b41b0c779 100644 --- a/src/app.py +++ b/src/app.py @@ -19,10 +19,13 @@ st.markdown(bandeau_html, unsafe_allow_html=True) # # TOC menu on the left show_pages( [Page("app.py", "Home"), + Page("pages\\4-inputs.py", "Inputs"), + Page("pages\\1-samples_selection.py", "Samples Selection"), Page("pages\\2-model_creation.py", "Models Creation & Predictions"), + ] -) +) # hide_pages("Samples Selection") # hide_pages("Models Creation") # hide_pages("Predictions") @@ -37,13 +40,16 @@ with st.sidebar: elif st.session_state['interface'] == 'advanced': show_pages( [Page("app.py", "Home"), + Page("pages\\4-inputs.py", "Inputs"), Page("pages\\1-samples_selection.py", "Samples Selection"), Page("pages\\2-model_creation.py", "Models Creation"), Page("pages\\3-prediction.py", "Predictions"), + + ] ) st.page_link('pages\\2-model_creation.py') - st.page_link('pages\\3-prediction.py') + st.page_link('pages\\3-prediction.py') # Page header @@ -65,7 +71,9 @@ with st.container(): unsafe_allow_html=True ) - header1, header2, header3 = st.columns(3) + header1, header2, header3,header4 = st.columns(4) + if header3.button("Inputs"): + st.switch_page('pages\\4-inputs.py') if header1.button("Samples Selection"): st.switch_page('pages\\1-samples_selection.py') if header2.button("Model Creation"): diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py index c9f3315d89c2a1059f688bb1ecea69e448b45d85..fd13d437fd63f4e729c7d0d58180aca9af6db37b 100644 --- a/src/pages/1-samples_selection.py +++ b/src/pages/1-samples_selection.py @@ -42,9 +42,9 @@ if data_file: if test== '.csv': with col1: # Select list for CSV delimiter - psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+data_file.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+data_file.name))), key=9) + psep = st.radio("Select csv separator - _detected_: " + str(find_delimiter('data/'+data_file.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+data_file.name))), key=9) # Select list for CSV header True / False - phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+data_file.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+data_file.name))), key=31) + phdr = st.radio("indexes column in csv? - _detected_: " + str(find_col_index('data/'+data_file.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+data_file.name))), key=31) if phdr == 'yes': col = 0 else: @@ -226,30 +226,31 @@ if not t.empty: import plotly.express as px - num_clusters = len(np.unique(labels)) + if labels: + num_clusters = len(np.unique(labels)) - custom_color_palette = px.colors.qualitative.Plotly[:num_clusters] - color_discrete_sequence=custom_color_palette + custom_color_palette = px.colors.qualitative.Plotly[:num_clusters] + color_discrete_sequence=custom_color_palette - # Créer et exporter le graphique Axe1-Axe2 en PNG - fig_axe1_axe2 = px.scatter(tcr, x=axis1, y=axis2, color=labels if list(labels) else None, color_discrete_sequence=custom_color_palette) - fig_axe1_axe2.update_layout(title='Axe1-Axe2') - fig_axe1_axe2.update_traces(marker=dict(size=4)) - fig_axe1_axe2.write_image("plot_axe1_axe2.png") + # Créer et exporter le graphique Axe1-Axe2 en PNG + fig_axe1_axe2 = px.scatter(tcr, x=axis1, y=axis2, color=labels if list(labels) else None, color_discrete_sequence=custom_color_palette) + fig_axe1_axe2.update_layout(title='Axe1-Axe2') + fig_axe1_axe2.update_traces(marker=dict(size=4)) + fig_axe1_axe2.write_image("./Report/Figures/plot_axe1_axe2.png") - # Créer et exporter le graphique Axe1-Axe3 en PNG - fig_axe1_axe3 = px.scatter(tcr, x=axis1, y=axis3, color=labels if list(labels) else None, color_discrete_sequence=custom_color_palette) - fig_axe1_axe3.update_layout(title='Axe1-Axe3') - fig_axe1_axe3.update_traces(marker=dict(size=4)) - fig_axe1_axe3.write_image("plot_axe1_axe3.png") + # Créer et exporter le graphique Axe1-Axe3 en PNG + fig_axe1_axe3 = px.scatter(tcr, x=axis1, y=axis3, color=labels if list(labels) else None, color_discrete_sequence=custom_color_palette) + fig_axe1_axe3.update_layout(title='Axe1-Axe3') + fig_axe1_axe3.update_traces(marker=dict(size=4)) + fig_axe1_axe3.write_image("./Report/Figures/plot_axe1_axe3.png") - # Créer et exporter le graphique Axe2-Axe3 en PNG - fig_axe2_axe3 = px.scatter(tcr, x=axis2, y=axis3, color=labels if list(labels) else None, color_discrete_sequence=custom_color_palette) - fig_axe2_axe3.update_layout(title='Axe2-Axe3') - fig_axe2_axe3.update_traces(marker=dict(size=4)) - fig_axe2_axe3.write_image("plot_axe2_axe3.png") + # Créer et exporter le graphique Axe2-Axe3 en PNG + fig_axe2_axe3 = px.scatter(tcr, x=axis2, y=axis3, color=labels if list(labels) else None, color_discrete_sequence=custom_color_palette) + fig_axe2_axe3.update_layout(title='Axe2-Axe3') + fig_axe2_axe3.update_traces(marker=dict(size=4)) + fig_axe2_axe3.write_image("./Report/Figures/plot_axe2_axe3.png") if not spectra.empty: diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py index 114161c9623660d1cd961d91c57b59787743b573..2b08fda6f3b1851fe04172044adc76e300c6f3a6 100644 --- a/src/pages/2-model_creation.py +++ b/src/pages/2-model_creation.py @@ -2,6 +2,8 @@ from Packages import * st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * from Class_Mod.DATA_HANDLING import * + + # HTML pour le bandeau "CEFE - CNRS" bandeau_html = """ <div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;"> @@ -19,7 +21,7 @@ if st.session_state["interface"] == 'simple': def nn(x): return x is not None ######################################################################################## -reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR"] +reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR", "Full-PLSR-sklearn"] # page Design st.header("Calibration Model Development", divider='blue') @@ -47,26 +49,40 @@ y = pd.DataFrame # load .csv file if file == files_format[0]: xcal_csv = M3.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns") + if xcal_csv: + sepx = M3.radio("Select separator (X file) - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)), + options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0) + hdrx = M3.radio("samples name (X file)? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)), + options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1) + if hdrx == "yes": col = 0 + else: col = False + ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column") + + + if ycal_csv: + sepy = M3.radio("separator (Y file): ", options=[";", ","], key=2) + hdry = M3.radio("samples name (Y file)?: ", options=["no", "yes"], key=3) + if hdry == "yes": col = 0 + else: col = False + if xcal_csv and ycal_csv: - - # Select list for CSV delimiter - sep = M3.radio("Select csv separator - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)), - options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0) - # Select list for CSV header True / False - hdr = M3.radio("indexes column in csv? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)), - options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1) - ############### - if hdr == 'yes': - col = 0 + spectra, meta_data = col_cat(pd.read_csv(xcal_csv, decimal='.', sep=sepx, index_col=col, header=0)) + y, _ = col_cat(pd.read_csv(ycal_csv, decimal='.', sep=sepy, index_col=col)) + + y = pd.DataFrame(y).astype(float).iloc[:,0] + spectra = pd.DataFrame(spectra).astype(float) + st.write(meta_data) + + if spectra.shape[0] == y.shape[0]: + pass else: - col = False - ############### - spectra, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col) - spectra = pd.DataFrame(spectra) - y = pd.DataFrame(y).iloc[:,0] - + M3.warning('The number of samples is different in X and Y') + y = pd.DataFrame + spectra = pd.DataFrame + + ## Load .dx file @@ -94,7 +110,22 @@ if not spectra.empty and not y.empty: # Assign data to training and test sets X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index] X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index] - M2.write("ADD HERE A TABLE WITH SHAPE OF THE DATA: SAMPLES NUMBER AND WAVELENGTHS OR CHEMICAL VALUES NUMBER FOR TRAIN AND TEST") + + sk = lambda x: skew(x, axis=0, bias=True) + ku = lambda x:kurtosis(x, axis=0, bias=True) + cv = lambda x: x.std()*100/x.mean() + + M2.write('Loaded data summary') + M2.write(f'The loaded spectra consist of {spectra.shape[1]} wavelengths') + datainf = pd.DataFrame() + datainf['N samples'] = [X_train.shape[0], X_test.shape[0], spectra.shape[0] ] + datainf['Mean'] = [y_train.mean(), y_test.mean(), y.mean()] + datainf['SD'] = [y_train.std(), y_test.std(), y.std()] + datainf['CV(%)'] = [cv(y_train), cv(y_test), cv(y)] + datainf['Skewness'] = [sk(y_train), sk(y_test), sk(y)] + datainf['Kurtosis'] = [ku(y_train), ku(y_test), ku(y)] + datainf.index = ['Train', 'Test', 'Total'] + M2.write(datainf.round(3)) ####################################### regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12) @@ -121,6 +152,10 @@ if not spectra.empty and not y.empty: M2.write('-- Table of selected wavelengths --') M2.table(rega[0]) + elif regression_algo == reg_algo[4]: + Reg = PlsR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test) + reg_model = Reg.model_ + ################# Model analysis ############ if regression_algo in reg_algo[1:]: yc = Reg.pred_data_[0] @@ -137,24 +172,27 @@ if not spectra.empty and not y.empty: #model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20) model_name = M9.text_input('Give it a name') + date_time = datetime.datetime.strftime(datetime.date.today(), '_%Y_%m_%d_') if M9.button('Export Model'): path = 'data/models/model_' if file == files_format[0]: #export_package = __import__(model_export) - with open(path + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_' + '.pkl','wb') as f: + with open(path + model_name + date_time + '_created_on_' + xcal_csv.name[:xcal_csv.name.find(".")] +""+ + '_and_' + ycal_csv.name[:ycal_csv.name.find(".")] + '_data_' + '.pkl','wb') as f: joblib.dump(reg_model, f) if regression_algo == reg_algo[3]: rega[1].sort() - pd.DataFrame(rega[1]).to_csv(path + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_'+'Wavelengths_index.csv', sep = ';') + pd.DataFrame(rega[1]).to_csv(path + model_name + date_time + '_on_' + xcal_csv.name[:xcal_csv.name.find(".")] + + '_and_' + ycal_csv.name[:ycal_csv.name.find(".")] + '_data_'+'Wavelengths_index.csv', sep = ';') elif file == files_format[1]: #export_package = __import__(model_export) - with open(path + model_name + '_on_' + '_data_' + '.pkl','wb') as f: + with open(path + model_name + '_on_'+ data_file.name[:data_file.name.find(".")] + '_data_' + '.pkl','wb') as f: joblib.dump(reg_model, f) if regression_algo == reg_algo[3]: rega[1].sort() - pd.DataFrame(rega[1]).to_csv(path + model_name + '_on_' + '_data_'+'Wavelengths_index.csv', sep = ';') - st.write('Model Exported') + pd.DataFrame(rega[1]).to_csv(path +data_file.name[:data_file.name.find(".")]+ model_name + date_time+ '_on_' + '_data_'+'Wavelengths_index.csv', sep = ';') + st.write('Model Exported ') if regression_algo == reg_algo[3]: st.write('Model Exported') diff --git a/src/pages/4-inputs.py b/src/pages/4-inputs.py new file mode 100644 index 0000000000000000000000000000000000000000..1480b8768e5b07ceb26d67c8c43c4d95c38787ac --- /dev/null +++ b/src/pages/4-inputs.py @@ -0,0 +1,100 @@ +import streamlit as st + + +# HTML for the banner "CEFE - CNRS" +bandeau_html = """ +<div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;"> + <h1 style="text-align: center; color: white;">CEFE - CNRS</h1> +</div> +""" + + +# Inject the HTML code for the banner +st.markdown(bandeau_html, unsafe_allow_html=True) + + +# Initialize session state +if 'form_submitted' not in st.session_state: + st.session_state['form_submitted'] = False + + +# Page header +with st.container(): + # Center the buttons + st.markdown( + """ + <style> + .stButton>button { + display: block; + margin: 0 auto; + width: 200px; + height: 50px; + font-size: 16px; + } + </style> + """, + unsafe_allow_html=True + ) + + + # Text input fields + with st.form(key='my_form'): + st.markdown("#### Fill in your details: ####") + + + col1, col3,col2 = st.columns((2,0.5,2)) + + + with col1: + meta_contact_name = st.text_input('First and Last name :', 'Life of Brian') + meta_project = st.text_input('Project name :', 'Life of Brian') + meta_sample_species = st.text_input('If relevant, sample species :', 'Life of Brian') + meta_sample_category_options = ["Soil", "Plant", "Animal", "Other"] + meta_sample_category = st.radio("Sample category description :", meta_sample_category_options) + meta_sample_pretreatment_options = ["Powder", "Pastile", "Liquid"] + meta_sample_pretreatment = st.radio("Type of sample pre-treatment :", meta_sample_pretreatment_options) + + + with col2: + meta_contact_email = st.text_input('Email :', 'Example@cefe.cnrs.fr') + meta_machine_ID = st.text_input('NIRS ID :', 'Life of Brian') + meta_sample_sub_category_options = ["Green leave", "Leaf litter", "Litter", "Humus", "Soil", "Animal part", "Animal Powder", "Fungal sample", "Other"] + meta_sample_sub_category = st.radio("Sample category description :", meta_sample_sub_category_options) + meta_sample_humidity_options = ["Dry", "Fresh", "Wet"] + meta_sample_humidity = st.radio("Humidity state of the sample:", meta_sample_humidity_options) + meta_scan_place_options = ["Pace", "Other"] + meta_scan_place = st.radio("If relevant, sample species :", meta_scan_place_options) + + + submitted = st.form_submit_button(label='Send') + + + if submitted: + # Check if email is valid and not null + if '@' not in meta_contact_email or meta_contact_email == '': + st.warning("Please enter a valid email address.") + else: + # Save the form data here + st.session_state['form_submitted'] = True + st.success('Form sent successfully!') + + + # Afficher les boutons seulement si le formulaire a été soumis + if st.session_state['form_submitted']: + # Buttons + with st.container(): + header3, header4 = st.columns(2) + if header3.button("Samples Selection"): + st.session_state['current_page'] = 'pages\\1-samples_selection.py' + st.switch_page('pages\\1-samples_selection.py') + if header4.button("Model Creation"): + st.session_state['current_page'] = 'pages\\2-model_creation.py' + st.switch_page('pages\\2-model_creation.py') + + + + +# Bouton de retour à la page d'accueil +st.markdown('<div style="text-align: left;"><a href="/"> <img src="house.jpg" alt="Home" style="width:50px;height:50px;"> </a></div>', unsafe_allow_html=True) + + diff --git a/src/plot_axe1_axe2.png b/src/plot_axe1_axe2.png new file mode 100644 index 0000000000000000000000000000000000000000..8d466994394313fcf101e4163fc95a2fbef329b4 Binary files /dev/null and b/src/plot_axe1_axe2.png differ diff --git a/src/plot_axe1_axe3.png b/src/plot_axe1_axe3.png new file mode 100644 index 0000000000000000000000000000000000000000..9ba83fbfdd2e3470319c5ff9597824e66a8d11e5 Binary files /dev/null and b/src/plot_axe1_axe3.png differ diff --git a/src/plot_axe2_axe3.png b/src/plot_axe2_axe3.png new file mode 100644 index 0000000000000000000000000000000000000000..168fd6c0f1d50b2810d3e695de65f9d104c4b901 Binary files /dev/null and b/src/plot_axe2_axe3.png differ