# pour lancer l'appli # streamlit run .\app.py import streamlit as st import time from PIL import Image # help on streamlit input https://docs.streamlit.io/library/api-reference/widgets # Page Title ## emojis code here : https://www.webfx.com/tools/emoji-cheat-sheet/ st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") import numpy as np import pandas as pd import plotly.express as px from sklearn.cluster import KMeans as km from sklearn.metrics import pairwise_distances_argmin_min from application_functions import pca_maker, model, predict, find_delimiter # from scipy.spatial.distance import pdist, squareform # open images img_sselect = Image.open("images\sselect.JPG") img_general = Image.open("images\general.JPG") img_predict = Image.open("images\predict.JPG") with st.sidebar: st.markdown("[Sample Selection](#sample-selection)") st.markdown("[Model Creation](#create-a-model)") st.markdown("[Prediction](#predict)") with st.container(): st.subheader("Plateforme d'Analyses Chimiques pour l'Ecologie :goat:") st.title("NIRS Utils") st.write("Sample selections, Modelisations & Predictions using [Pinard](https://github.com/GBeurier/pinard) and PACE NIRS Database.") st.image(img_general) st.write("---") with st.container(): st.header("Sample Selection") st.image(img_sselect) st.write("Sample selection using PCA and K-Means algorythms") scatter_column, settings_column = st.columns((4, 1)) scatter_column.write("**Multi-Dimensional Analysis**") settings_column.write("**Settings**") sselectx_csv = settings_column.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5) if sselectx_csv is not None: psep = settings_column.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9) phdr = settings_column.selectbox("indexes column in csv?", options=["no", "yes"], key=31) if phdr == 'yes': col = 0 else: col = False import_button = settings_column.button('Import') if import_button: data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col) # pour les tests, ajout d'une colonne Categorielle # from itertools import islice, cycle # data_import['Xcat1'] = list(islice(cycle(np.array(["aek", "muop", "mok"])), len(data_import))) # data_import['Xcat2'] = list(islice(cycle(np.array(["aezfek", "mufzefopfz", "mzefezfok", "fzeo"])), len(data_import))) # data_import['Xcat3'] = list(islice(cycle(np.array(["fezaezfek", "zefzemufzefopfz", "mkyukukzefezfok"])), len(data_import))) pca_data, cat_cols, pca_cols = pca_maker(data_import) pca_1 = settings_column.selectbox("First Principle Component", options=pca_cols, index=0) pca_2 = settings_column.selectbox("Second Principle Component", options=pca_cols, index=1) categorical_variable = settings_column.selectbox("Variable Select", options = cat_cols) categorical_variable_2 = settings_column.selectbox("Second Variable Select (hover data)", options = cat_cols) if cat_cols[0] == "no categories": scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, hover_name=pca_data.index, title="PCA plot of sample spectra")) else: scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, color=categorical_variable, hover_data = [categorical_variable_2], hover_name=pca_data.index, title="PCA plot of sample spectra")) #K-Means ## K-Means choose number of clusters wcss_samples = [] cluster_max = settings_column.slider("Max clusters (K-Means)", min_value=2, max_value=100, value=50, format="%i") clusters_sample = np.arange(2, cluster_max) for i in clusters_sample: kmeans_samples = km(n_clusters = i, init = 'k-means++', random_state = 42) kmeans_samples.fit(pca_data.loc[:,[pca_1,pca_2]]) wcss_samples.append(kmeans_samples.inertia_) settings_column.plotly_chart(px.line(x=clusters_sample, y=wcss_samples, title="K-Means clusters number selection", width=200)) # scatter_column.plotly_chart(px.line(x=clusters_sample, y=wcss_samples, title="K-Means clusters number selection")) ## Draw clustering nb_select = settings_column.slider("Choose cluster number (K-Means)", min_value=2, max_value=cluster_max, value=5, format="%i") kmeans_samples = km(n_clusters=nb_select, random_state=42) kmeans_samples.fit(pca_data.loc[:,[pca_1,pca_2]]) # kmeans_samples.labels_ plot = scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, color=kmeans_samples.labels_, hover_name=pca_data.index, title="PCA projection with K-Means Clusters")) # choose between cluster centered sample and random samples selection = settings_column.select_slider('Centered samples or random ones', options=['center','random']) export = [] scatter_column.write("Selected samples for chemical analysis:") if selection == 'center': # list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster closest, _ = pairwise_distances_argmin_min(kmeans_samples.cluster_centers_, pca_data.loc[:,[pca_1,pca_2]]) scatter_column.dataframe(pca_data.loc[pca_data.index[closest],[pca_1,pca_2]], use_container_width=True) export.append(pca_data.loc[pca_data.index[closest],[pca_1,pca_2]].index.T) # plot.empty() elif selection == 'random': selection_number = settings_column.number_input('How many samples per cluster?', step=1, value = 3) for i in np.unique(kmeans_samples.labels_): if len(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]])) >= selection_number: # scatter_column.write('cluster number - ') # scatter_column.write(i) # scatter_column.write('_samples in this cluster_') # scatter_column.write(len(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]]))) # scatter_column.dataframe(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]]).sample(n=selection_number)) export.append(pca_data.loc[pca_data.index[kmeans_samples.labels_==i]].sample(n=selection_number).index) else: # scatter_column.write('cluster number - ') # scatter_column.write(i) # scatter_column.write("_whole cluster (not enough samples)_") # scatter_column.write(len(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]]))) # scatter_column.dataframe(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]])) export.append(pca_data.loc[pca_data.index[kmeans_samples.labels_==i]].index) scatter_column.write(pd.DataFrame(export).T) if scatter_column.button('Export'): pd.DataFrame(export).T.to_csv('data/Samples_for_Chemical_Analysis.csv') else: scatter_column.write("_Please Choose a file_") st.write("---") with st.container(): st.header("Create a model") st.image(img_predict) st.write("Create a model to then predict chemical values from NIRS spectra") xcal_csv = st.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns") ycal_csv = st.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column") # st.button("Create model", on_click=model) if xcal_csv is not None and ycal_csv is not None: sep = st.selectbox("Select csv separator - CSV Detected separator: " + str(find_delimiter('data/'+xcal_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0) hdr = st.selectbox("column indexes in csv?", options=["yes", "no"], key=1) rd_seed = st.slider("Choose seed", min_value=1, max_value=1212, value=42, format="%i") trained_model = model(xcal_csv, ycal_csv, sep, hdr, rd_seed) st.write("---") with st.container(): st.header("Predict") st.write("---") st.write("Predict chemical values from NIRS") NIRS_csv = st.file_uploader("Select NIRS Data to predict", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns") psep = st.selectbox("Select csv separator", options=[";", ","], key=2) phdr = st.selectbox("indexes column in csv?", options=["yes", "no"], key=3) st.button("Predict", on_click=predict)