diff --git a/Class_Mod/KMEANS_.py b/Class_Mod/KMEANS_.py index 526a43597155183de2241e0fd0b850f8b4af13ad..60d77ae9f702093083095064c2b647326faa6c90 100644 --- a/Class_Mod/KMEANS_.py +++ b/Class_Mod/KMEANS_.py @@ -21,4 +21,5 @@ class Sk_Kmeans: model.fit(self.x) yp = model.predict(self.x)+1 clu = [f'cluster#{i}' for i in yp] - return self.x, clu \ No newline at end of file + + return self.x, clu, model.cluster_centers_ \ No newline at end of file diff --git a/Packages.py b/Packages.py index ff2ca80bdf1a795c23fa63408377a4eee1e0ca56..37544d56e9c13526021796bd1b95af0eaf00908a 100644 --- a/Packages.py +++ b/Packages.py @@ -64,6 +64,8 @@ import joblib from hyperopt import fmin, hp, tpe, Trials, space_eval, STATUS_OK, anneal - - +import plotly.graph_objects as go +import plotly.express as px +import plotly.io as pio +import streamlit as st st.set_option('deprecation.showPyplotGlobalUse', False) diff --git a/Report/figures/Elbow.png b/Report/figures/Elbow.png new file mode 100644 index 0000000000000000000000000000000000000000..4a62ba58b74e10993b39ce30252dd3de420af610 Binary files /dev/null and b/Report/figures/Elbow.png differ diff --git a/Report/figures/Spectra_Plot.png b/Report/figures/Spectra_Plot.png new file mode 100644 index 0000000000000000000000000000000000000000..78c9ac838409d3bef67b875908a85eddf2a01064 Binary files /dev/null and b/Report/figures/Spectra_Plot.png differ diff --git a/Report/figures/fig_regression.png b/Report/figures/fig_regression.png deleted file mode 100644 index de5d60b9696d1a3c672ea54039724dc3532987be..0000000000000000000000000000000000000000 Binary files a/Report/figures/fig_regression.png and /dev/null differ diff --git a/Report/figures/graphe_hotelling.png b/Report/figures/graphe_hotelling.png new file mode 100644 index 0000000000000000000000000000000000000000..a110c8625e8ff172824e140e305b5df5b24dc43f Binary files /dev/null and b/Report/figures/graphe_hotelling.png differ diff --git a/Report/figures/graphe_influence.png b/Report/figures/graphe_influence.png new file mode 100644 index 0000000000000000000000000000000000000000..ea5f0ba7cfe827e1b6f9545c6136102fdc40975c Binary files /dev/null and b/Report/figures/graphe_influence.png differ diff --git a/Report/figures/graphe_loadings.png b/Report/figures/graphe_loadings.png new file mode 100644 index 0000000000000000000000000000000000000000..a3cf864e29b0925d7632492744dbf535d1c213ab Binary files /dev/null and b/Report/figures/graphe_loadings.png differ diff --git a/Report/figures/scores_plot_2d_axis1_axis2.png b/Report/figures/scores_plot_2d_axis1_axis2.png new file mode 100644 index 0000000000000000000000000000000000000000..e565098674220f3e4404ad252314996b97e8256b Binary files /dev/null and b/Report/figures/scores_plot_2d_axis1_axis2.png differ diff --git a/Report/figures/scores_plot_2d_axis1_axis3.png b/Report/figures/scores_plot_2d_axis1_axis3.png new file mode 100644 index 0000000000000000000000000000000000000000..6c913afb4787b56a4f7408acc9d46001859dde4b Binary files /dev/null and b/Report/figures/scores_plot_2d_axis1_axis3.png differ diff --git a/Report/figures/scores_plot_2d_axis2_axis3.png b/Report/figures/scores_plot_2d_axis2_axis3.png new file mode 100644 index 0000000000000000000000000000000000000000..35c9d44c1a74ccc468aefd57b812d5ca339ca1e3 Binary files /dev/null and b/Report/figures/scores_plot_2d_axis2_axis3.png differ diff --git a/app.py b/app.py index 027393b8079257a75cae2893f29cd52e15db1c8a..3ae15c08841d69be3ba72b75a3737d00269f65e6 100644 --- a/app.py +++ b/app.py @@ -6,6 +6,16 @@ from Modules import * from Class_Mod.DATA_HANDLING import * +# HTML pour le bandeau "CEFE - CNRS" +bandeau_html = """ +<div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;"> + <h1 style="text-align: center; color: white;">CEFE - CNRS</h1> +</div> +""" + +# Injecter le code HTML du bandeau +st.markdown(bandeau_html, unsafe_allow_html=True) + # # TOC menu on the left show_pages( [Page("app.py", "Home"), @@ -18,6 +28,7 @@ hide_pages("Samples Selection") hide_pages("Models Creation") hide_pages("Predictions") + with st.sidebar: interface = st.selectbox(label="Interface", options=['simple', 'advanced'], key='interface') st.page_link('pages\\1-samples_selection.py') @@ -35,17 +46,35 @@ with st.sidebar: st.page_link('pages\\2-model_creation.py') st.page_link('pages\\3-prediction.py') + # Page header with st.container(): - st.subheader("Plateforme d'Analyses Chimiques pour l'Ecologie-PACE :goat:") - st.title("NIRS Utils") - st.write("Samples selection (PCA, [UMAP](https://umap-learn.readthedocs.io/en/latest/how_umap_works.html), ...), Predictive Modelling ([Pinard](https://github.com/GBeurier/pinard), [LWPLSR](https://doi.org/10.1002/cem.3209), ...), and Predictions using your data (CSV or DX files) and/or PACE NIRS Database.") - #st.image(img_general) - st.markdown("### We could add documentation here ###") + + # Centrer les boutons + st.markdown( + """ + <style> + .stButton>button { + display: block; + margin: 0 auto; + width: 200px; + height: 50px; + font-size: 16px; + } + </style> + """, + unsafe_allow_html=True + ) + header1, header2, header3 = st.columns(3) if header1.button("Samples Selection"): st.switch_page('pages\\1-samples_selection.py') if header2.button("Model Creation"): st.switch_page('pages\\2-model_creation.py') if header3.button("Predictions"): - st.switch_page('pages\\3-prediction.py') \ No newline at end of file + st.switch_page('pages\\3-prediction.py') + st.subheader("Plateforme d'Analyses Chimiques pour l'Ecologie-PACE :goat:") + st.title("NIRS Utils") + st.write("Samples selection (PCA, [UMAP](https://umap-learn.readthedocs.io/en/latest/how_umap_works.html), ...), Predictive Modelling ([Pinard](https://github.com/GBeurier/pinard), [LWPLSR](https://doi.org/10.1002/cem.3209), ...), and Predictions using your data (CSV or DX files) and/or PACE NIRS Database.") + #st.image(img_general) + st.markdown("### We could add documentation here ###") \ No newline at end of file diff --git a/graphe.png b/graphe.png new file mode 100644 index 0000000000000000000000000000000000000000..3a7ad7924e3754459d2b9ab43f1515df6d201b19 Binary files /dev/null and b/graphe.png differ diff --git a/pages/1-samples_selection.py b/pages/1-samples_selection.py index f97708c18fc87a7fbbd355456f03daaabc3ce002..e0654390e6c46cbf3d7f703464ecb1457fedde46 100644 --- a/pages/1-samples_selection.py +++ b/pages/1-samples_selection.py @@ -3,6 +3,19 @@ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * from Class_Mod.DATA_HANDLING import * + + +# HTML pour le bandeau "CEFE - CNRS" +bandeau_html = """ +<div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;"> + <h1 style="text-align: center; color: white;">CEFE - CNRS</h1> +</div> +""" + + +# Injecter le code HTML du bandeau +st.markdown(bandeau_html, unsafe_allow_html=True) + st.session_state["interface"] = st.session_state.get('interface') if st.session_state["interface"] == 'simple': hide_pages("Predictions") @@ -55,19 +68,21 @@ if data_file: st.success("The data have been loaded successfully", icon="✅") os.unlink(tmp_path) - ## Visualize spectra if not spectra.empty: with col2: fig = plot_spectra(spectra) st.pyplot(fig) - + fig.savefig("./Report/figures/Spectra_Plot.png") ############################## Exploratory data analysis ############################### container2 = st.container(border=True) container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue') scores, loadings, pc = st.columns([2, 3, 0.5]) influence, hotelling, qexp = st.columns([2, 2, 1]) +st.header('Selected samples for chemical analysis') +selected_s, selected_samples_metd = st.columns([3, 3]) +selected_s.write('Samples scores') dim_red_methods=['', 'PCA','UMAP', 'NMF'] # List of dimensionality reduction algos cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP'] # List of clustering algos @@ -111,8 +126,11 @@ if not t.empty: ncluster = scores.number_input(min_value=2, max_value=30, value=3, label = 'Select the desired number of clusters') cl_model = Sk_Kmeans(tcr, max_clusters = 30) fig2 = px.scatter(cl_model.inertia_.T, y = 'inertia') - scores.plotly_chart(fig2, use_container_width=True) - data, labels = cl_model.fit_optimal(nclusters = ncluster) + scores.plotly_chart(fig2,use_container_width=True) + img = pio.to_image(fig2, format="png") + with open("./Report/figures/Elbow.png", "wb") as f: + f.write(img) + data, labels, clu_centers = cl_model.fit_optimal(nclusters = ncluster) elif clus_method == cluster_methods[2]: optimized_hdbscan = Hdbscan(dr_model.scores_raw_) @@ -120,14 +138,58 @@ if not t.empty: ##### Plots -## Scores + +##################################################################################################### +selec_strategy = ['center','random'] +samples_df_chem = pd.DataFrame +selected_samples = [] +selected_samples_idx = [] + +if labels: + selection = scores.radio('Select samples selection strategy:', options = selec_strategy) +#################### selection strategy to be corrected + if selection == selec_strategy[0]: + # list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster + closest, _ = pairwise_distances_argmin_min(clu_centers, tcr) + selected_samples_idx = list(closest) + elif selection == selec_strategy[1]: + selection_number = scores.number_input('How many samples per cluster?', min_value = 1, step=1, value = 3) + for i in np.unique(labels): + C = np.where(np.array(labels) ==i)[0] + if C.shape[0] >= selection_number: + #scores.write(list(tcr.index)[labels== i]) + km2 = KMeans(n_clusters = selection_number) + km2.fit(tcr.iloc[C,:]) + clos, _ = pairwise_distances_argmin_min(km2.cluster_centers_, tcr.iloc[C,:]) + selected_samples_idx2 = list(clos) + selected_samples_idx.extend(tcr.iloc[C,:].index[selected_samples_idx2]) + # selected_samples_idx.extend(tcr.iloc[C,:].sample(n=selection_number).index.to_list()) + else: + selected_samples_idx.extend(tcr.iloc[C,:].index.to_list()) + # list indexes of selected samples for colored plot + +if labels: + if selected_samples_idx: + sam = pd.DataFrame({'cluster':np.array(labels)[selected_samples_idx], + 'index': spectra.index[selected_samples_idx]}) + selected_s.write(sam) + + if not meta_data.empty: + selected_samples_metd.write('Corresponding meta-data') + meta = meta_data.iloc[selected_samples_idx,:] + meta['cluster'] = np.array(labels)[selected_samples_idx] + meta['index'] = spectra.index[selected_samples_idx] + selected_samples_metd.write(meta) + + + ## Scores if not t.empty: with scores: st.write('Scores plot') # scores plot with clustering if list(labels) and meta_data.empty: fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3, color = labels) - + # scores plot with metadata elif len(list(labels)) == 0 and not meta_data.empty: filter = meta_data.columns[1:] @@ -156,29 +218,73 @@ if not t.empty: else: fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3) fig.update_traces(marker=dict(size=4)) + + if selected_samples_idx: + tt = tcr.iloc[selected_samples_idx,:] + fig.add_scatter3d(x = tt.loc[:,axis1], y = tt.loc[:,axis2], + z = tt.loc[:,axis3], mode ='markers', marker = dict(size = 7, color = 'black'), + name = 'selected samples') st.plotly_chart(fig, use_container_width=True) +## Export en 2d Axe1..Axe3 +if not t.empty: + if dim_red_method == dim_red_methods[1]: + + # nombre de clusters + num_clusters = len(np.unique(labels)) + + # Une couleur par cluster + custom_color_palette = px.colors.qualitative.Plotly[:num_clusters] + + # Graphique pour les dimensions (axis1, axis2) + fig_2d_axis1_axis2 = px.scatter(t, x=axis1, y=axis2, color=labels, color_discrete_sequence=custom_color_palette) + img_2d_axis1_axis2 = pio.to_image(fig_2d_axis1_axis2, format="png") + with open("./Report/figures/scores_plot_2d_axis1_axis2.png", "wb") as f: + f.write(img_2d_axis1_axis2) + + # Graphique pour les dimensions (axis1, axis3) + fig_2d_axis1_axis3 = px.scatter(t, x=axis1, y=axis3, color=labels, color_discrete_sequence=custom_color_palette) + img_2d_axis1_axis3 = pio.to_image(fig_2d_axis1_axis3, format="png") + with open("./Report/figures/scores_plot_2d_axis1_axis3.png", "wb") as f: + f.write(img_2d_axis1_axis3) + + # Graphique pour les dimensions (axis2, axis3) + fig_2d_axis2_axis3 = px.scatter(t, x=axis2, y=axis3, color=labels, color_discrete_sequence=custom_color_palette) + img_2d_axis2_axis3 = pio.to_image(fig_2d_axis2_axis3, format="png") + with open("./Report/figures/scores_plot_2d_axis2_axis3.png", "wb") as f: + f.write(img_2d_axis2_axis3) if not spectra.empty: if dim_red_method == dim_red_methods[1]: + with loadings: st.write('Loadings plot') p = dr_model.loadings_ - pp = pd.concat([p, pd.DataFrame(np.arange(p.shape[0]), index=p.index, columns=['wl'])], axis =1) + pp = pd.concat([p, pd.DataFrame(np.arange(p.shape[0]), index=p.index, columns=['wl'])], axis=1) df1 = pp.melt(id_vars="wl") - fig = px.line(df1, x = 'wl', y = 'value', color='variable') - fig.update_layout(legend=dict(x=1, y=0,font=dict(family="Courier", size=12, color="black"), - bordercolor="Black", borderwidth=2)) - st.plotly_chart(fig, use_container_width = True) + fig = px.line(df1, x='wl', y='value', color='variable', color_discrete_sequence=px.colors.qualitative.Plotly) + fig.update_layout(legend=dict(x=1, y=0, font=dict(family="Courier", size=12, color="black"), + bordercolor="black", borderwidth=2)) + st.plotly_chart(fig, use_container_width=True) + + # Export du graphique + img = pio.to_image(fig, format="png") + with open("./Report/figures/graphe_loadings.png", "wb") as f: + f.write(img) with influence: st.write('Influence plot') ax1 = st.selectbox("Component", options=dr_model.scores_.columns, index=3) leverage = dr_model.leverage_ residuals = dr_model.residuals_ - fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1]).update_layout(xaxis_title="Leverage",yaxis_title="Residuals") - st.plotly_chart(fig, use_container_width=True) + fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color=leverage[ax1]*residuals[ax1], color_continuous_scale='Blues') + fig.update_layout(xaxis_title="Leverage", yaxis_title="Residuals") + st.plotly_chart(fig) + img = pio.to_image(fig, format="png") + with open("./Report/figures/graphe_influence.png", "wb") as f: + f.write(img) + with hotelling: st.write('T²-Hotelling vs Q residuals plot') @@ -188,6 +294,7 @@ if not spectra.empty: hotelling = dr_model.hotelling_ fig = px.scatter(t, x=hotelling[ax2], y=residuals[ax2]).update_layout(xaxis_title="T²",yaxis_title="Residuals") st.plotly_chart(fig, use_container_width=True) + fig.write_image("./Report/figures/graphe_hotelling.png", format="png") if dim_red_method == dim_red_methods[2] and clus_method == cluster_methods[2]: # UMAP clustered by HDBSCAN with loadings: # Display some clustering metrics @@ -197,3 +304,12 @@ if not spectra.empty: st.write('Optimal number of clusters = ' + str(len(clusters_number))) st.write('DBCV score (-1 to 1 - higher is better) = ' + str(round(hdbscan_score,3))) st.write('Unclassified samples: ' + str(len(t[labels==-1])) + ' on ' + str(len(t)) + ' samples (' + str(round(len(t[labels==-1])/len(t)*100, 1)) + '%).') + + + + + + + + + diff --git a/pages/2-model_creation.py b/pages/2-model_creation.py index 3a4dea1c8eb1f2edf4aed8979ea0696b5cb27851..50f07cdae1e7f51c466148e255bb7a8fb6ba9a1d 100644 --- a/pages/2-model_creation.py +++ b/pages/2-model_creation.py @@ -2,7 +2,15 @@ from Packages import * st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * from Class_Mod.DATA_HANDLING import * - +# HTML pour le bandeau "CEFE - CNRS" +bandeau_html = """ +<div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;"> + <h1 style="text-align: center; color: white;">CEFE - CNRS</h1> +</div> +""" + +# Injecter le code HTML du bandeau +st.markdown(bandeau_html, unsafe_allow_html=True) st.session_state["interface"] = st.session_state.get('interface') if st.session_state["interface"] == 'simple': diff --git a/pages/3-prediction.py b/pages/3-prediction.py index e2acfc13702b1944a36fb8341797f9912853c354..a3eccd090b2fe97e090c23cf52beb5092bf61ce4 100644 --- a/pages/3-prediction.py +++ b/pages/3-prediction.py @@ -2,7 +2,15 @@ from Packages import * st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * from Class_Mod.DATA_HANDLING import * - +# HTML pour le bandeau "CEFE - CNRS" +bandeau_html = """ +<div style="width: 100%; background-color: #4682B4; padding: 10px; margin-bottom: 10px;"> + <h1 style="text-align: center; color: white;">CEFE - CNRS</h1> +</div> +""" + +# Injecter le code HTML du bandeau +st.markdown(bandeau_html, unsafe_allow_html=True) st.session_state["interface"] = st.session_state.get('interface')