Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# pour lancer l'appli
# streamlit run .\app.py
import streamlit as st
import time
from PIL import Image
# help on streamlit input https://docs.streamlit.io/library/api-reference/widgets
# Page Title
## emojis code here : https://www.webfx.com/tools/emoji-cheat-sheet/
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
import numpy as np
import pandas as pd
import plotly.express as px
from sklearn.cluster import KMeans as km
from sklearn.metrics import pairwise_distances_argmin_min
from application_functions import pca_maker, model, predict, find_delimiter
# from scipy.spatial.distance import pdist, squareform
# open images
img_sselect = Image.open("images\sselect.JPG")
img_general = Image.open("images\general.JPG")
img_predict = Image.open("images\predict.JPG")
with st.sidebar:
st.markdown("[Sample Selection](#sample-selection)")
st.markdown("[Model Creation](#create-a-model)")
st.markdown("[Prediction](#predict)")
with st.container():
st.subheader("Plateforme d'Analyses Chimiques pour l'Ecologie :goat:")
st.title("NIRS Utils")
st.write("Sample selections, Modelisations & Predictions using [Pinard](https://github.com/GBeurier/pinard) and PACE NIRS Database.")
st.image(img_general)
st.write("---")
with st.container():
st.header("Sample Selection")
st.image(img_sselect)
st.write("Sample selection using PCA and K-Means algorythms")
scatter_column, settings_column = st.columns((4, 1))
scatter_column.write("**Multi-Dimensional Analysis**")
settings_column.write("**Settings**")
sselectx_csv = settings_column.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
if sselectx_csv is not None:
psep = settings_column.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
phdr = settings_column.selectbox("indexes column in csv?", options=["no", "yes"], key=31)
if phdr == 'yes':
col = 0
else:
col = False
import_button = settings_column.button('Import')
if import_button:
data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
# pour les tests, ajout d'une colonne Categorielle
# from itertools import islice, cycle
# data_import['Xcat1'] = list(islice(cycle(np.array(["aek", "muop", "mok"])), len(data_import)))
# data_import['Xcat2'] = list(islice(cycle(np.array(["aezfek", "mufzefopfz", "mzefezfok", "fzeo"])), len(data_import)))
# data_import['Xcat3'] = list(islice(cycle(np.array(["fezaezfek", "zefzemufzefopfz", "mkyukukzefezfok"])), len(data_import)))
pca_data, cat_cols, pca_cols = pca_maker(data_import)
pca_1 = settings_column.selectbox("First Principle Component", options=pca_cols, index=0)
pca_2 = settings_column.selectbox("Second Principle Component", options=pca_cols, index=1)
categorical_variable = settings_column.selectbox("Variable Select", options = cat_cols)
categorical_variable_2 = settings_column.selectbox("Second Variable Select (hover data)", options = cat_cols)
if cat_cols[0] == "no categories":
scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, hover_name=pca_data.index, title="PCA plot of sample spectra"))
else:
scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, color=categorical_variable, hover_data = [categorical_variable_2], hover_name=pca_data.index, title="PCA plot of sample spectra"))
#K-Means
## K-Means choose number of clusters
wcss_samples = []
cluster_max = settings_column.slider("Max clusters (K-Means)", min_value=2, max_value=100, value=50, format="%i")
clusters_sample = np.arange(2, cluster_max)
for i in clusters_sample:
kmeans_samples = km(n_clusters = i, init = 'k-means++', random_state = 42)
kmeans_samples.fit(pca_data.loc[:,[pca_1,pca_2]])
wcss_samples.append(kmeans_samples.inertia_)
settings_column.plotly_chart(px.line(x=clusters_sample, y=wcss_samples, title="K-Means clusters number selection", width=200))
# scatter_column.plotly_chart(px.line(x=clusters_sample, y=wcss_samples, title="K-Means clusters number selection"))
## Draw clustering
nb_select = settings_column.slider("Choose cluster number (K-Means)", min_value=2, max_value=cluster_max, value=5, format="%i")
kmeans_samples = km(n_clusters=nb_select, random_state=42)
kmeans_samples.fit(pca_data.loc[:,[pca_1,pca_2]])
# kmeans_samples.labels_
plot = scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, color=kmeans_samples.labels_, hover_name=pca_data.index, title="PCA projection with K-Means Clusters"))
# choose between cluster centered sample and random samples
selection = settings_column.select_slider('Centered samples or random ones', options=['center','random'])
export = []
scatter_column.write("Selected samples for chemical analysis:")
if selection == 'center':
# list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster
closest, _ = pairwise_distances_argmin_min(kmeans_samples.cluster_centers_, pca_data.loc[:,[pca_1,pca_2]])
scatter_column.dataframe(pca_data.loc[pca_data.index[closest],[pca_1,pca_2]], use_container_width=True)
export.append(pca_data.loc[pca_data.index[closest],[pca_1,pca_2]].index.T)
# plot.empty()
elif selection == 'random':
selection_number = settings_column.number_input('How many samples per cluster?', step=1, value = 3)
for i in np.unique(kmeans_samples.labels_):
if len(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]])) >= selection_number:
# scatter_column.write('cluster number - ')
# scatter_column.write(i)
# scatter_column.write('_samples in this cluster_')
# scatter_column.write(len(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]])))
# scatter_column.dataframe(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]]).sample(n=selection_number))
export.append(pca_data.loc[pca_data.index[kmeans_samples.labels_==i]].sample(n=selection_number).index)
else:
# scatter_column.write('cluster number - ')
# scatter_column.write(i)
# scatter_column.write("_whole cluster (not enough samples)_")
# scatter_column.write(len(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]])))
# scatter_column.dataframe(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]]))
export.append(pca_data.loc[pca_data.index[kmeans_samples.labels_==i]].index)
scatter_column.write(pd.DataFrame(export).T)
if scatter_column.button('Export'):
pd.DataFrame(export).T.to_csv('data/Samples_for_Chemical_Analysis.csv')
else:
scatter_column.write("_Please Choose a file_")
st.write("---")
with st.container():
st.header("Create a model")
st.image(img_predict)
st.write("Create a model to then predict chemical values from NIRS spectra")
xcal_csv = st.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
ycal_csv = st.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
# st.button("Create model", on_click=model)
if xcal_csv is not None and ycal_csv is not None:
sep = st.selectbox("Select csv separator - CSV Detected separator: " + str(find_delimiter('data/'+xcal_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
hdr = st.selectbox("column indexes in csv?", options=["yes", "no"], key=1)
rd_seed = st.slider("Choose seed", min_value=1, max_value=1212, value=42, format="%i")
trained_model = model(xcal_csv, ycal_csv, sep, hdr, rd_seed)
st.write("---")
with st.container():
st.header("Predict")
st.write("---")
st.write("Predict chemical values from NIRS")
NIRS_csv = st.file_uploader("Select NIRS Data to predict", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
psep = st.selectbox("Select csv separator", options=[";", ","], key=2)
phdr = st.selectbox("indexes column in csv?", options=["yes", "no"], key=3)
st.button("Predict", on_click=predict)