Newer
Older
#from Modules_manager.PCA_ import pca_maker
from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
# load images for web interface
img_sselect = Image.open("images\sselect.JPG")
img_general = Image.open("images\general.JPG")
img_predict = Image.open("images\predict.JPG")
with st.sidebar:
st.markdown("[Sample Selection](#sample-selection)")
st.markdown("[Model Development](#create-a-model)")
st.markdown("[Predictions Making](#predict)")
st.subheader("Plateforme d'Analyses Chimiques pour l'Ecologie-PACE :goat:")
st.write("Samples selection, Predictive Modelling, and Predictions making using [Pinard](https://github.com/GBeurier/pinard) and PACE NIRS Database.")
#st.image(img_general)
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
################################### Data Loading and Visualization ########################################
container1 = st.container(border=True)
col2, col1 = st.columns([3, 1])
container2 = st.container(border=True)
container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue')
scores, loadings, pc = st.columns([2, 2, 0.5])
influence, hotelling, qexp = st.columns([2, 2, 1])
with container1:
col1.header("NIRS Data Loading", divider='blue')
col2.header("Spectral Data Visualization", divider='blue')
with col1:
# loader for csv file containing NIRS spectra
sselectx_csv = st.file_uploader("Load NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
if sselectx_csv is not None:
# Select list for CSV delimiter
psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
# Select list for CSV header True / False
phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31)
if phdr == 'yes':
col = 0
else:
col = False
data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
st.success("The data have been loaded successfully", icon="✅")
## Visualize spectra
if sselectx_csv is not None:
with col2:
fig, ax = plt.subplots(figsize = (30,7))
data_import.T.plot(legend=False, ax = ax, color = 'blue')
ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
ax.set_ylabel('Signal', fontsize=18)
plt.margins(x = 0)
st.pyplot(fig)
st.write("Summary")
info = pd.DataFrame({'N':[data_import.shape[0]],
'Min': [np.min(data_import)],
'Max':[np.max(data_import)],}, index = ['Values']).T
info.rename_axis('information')
st.table(data=info)
######################################################################################
############################## Exploratory data analysis ###############################
with container2:
plot_type=['', 'PCA','UMAP', 'NMF']
cluster_methods = ['', 'Kmeans','UMAP', 'AP']
with pc:
type_plot = st.selectbox("Dimensionality reduction techniques: ", options=plot_type, key=37)
type_cluster = st.selectbox("Clustering techniques: ", options=cluster_methods, key=38)
# compute UMAP - umap_maker in application_functions.py
if type_plot == 'PCA':
model = LinearPCA(data_import, Ncomp=5)
elif type_plot =='UMAP':
pass
if type_plot in ['PCA', 'UMAP']:
# add 2 select lists to choose which component to plot
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
axis1 = pc.selectbox("x-axis", options = model.scores_.columns, index=0)
axis2 = pc.selectbox("y-axis", options = model.scores_.columns, index=1)
axis3 = pc.selectbox("z-axis", options = model.scores_.columns, index=2)
if type_cluster == 'Kmeans':
cl = Sk_Kmeans(pd.concat([model.scores_.loc[:,axis1], model.scores_.loc[:,axis2], model.scores_.loc[:,axis3]], axis = 1), max_clusters = 30)
with scores:
t = model.scores_
if type_cluster in ['Kmeans','UMAP', 'AP']:
st.write('Scree plot')
fig2 = px.scatter(cl.inertia_.T, y = 'inertia')
st.plotly_chart(fig2)
ncluster = st.number_input(min_value=2, max_value=30, value=3, label = 'Select the desired number of clusters')
data, colors = cl.fit_optimal(nclusters=ncluster)
#fig = px.scatter(data, x=axis1, y=axis2, color= colors)
st.write('Scores plot')
fig = px.scatter_3d(data, x=axis1, y=axis2, z = axis3, color=colors)
else:
fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3)
st.plotly_chart(fig)
with loadings:
st.write('Loadings plot')
p = model.loadings_
pp = pd.concat([p, pd.DataFrame(np.arange(p.shape[0]), index=p.index, columns=['wl'])], axis =1)
df1 = pp.melt(id_vars="wl")
fig = px.line(df1, x = 'wl', y = 'value', color='variable')
fig.update_layout(
legend=dict(x=1, y=0,
font=dict(
family="Courier", size=12, color="black"),
bordercolor="Black", borderwidth=2)
)
st.plotly_chart(fig)
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
with influence:
st.write('Influence plot')
ax1 = st.selectbox("Component", options=model.scores_.columns, index=3)
leverage = model.leverage_
residuals = model.residuals
fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1])
st.plotly_chart(fig)
with hotelling:
st.write('T²-Hotelling vs Q residuals plot')
ax2 = st.selectbox("Component", options=model.scores_.columns, index=4)
t = model.scores_
fig = px.scatter(t, x=axis1, y=t.columns[1])
st.plotly_chart(fig)
with qexp:
pass
else:
st.markdown('Select a dimensionality reduction technique from the dropdown list')
########################################################################################
container2 = st.container(border=True)
M1, M2, M3 = st.columns([2,2,2])
M4, M5 = st.columns([6,2])
container3 = st.container(border=True)
M7, M8 = st.columns([2,2])
available_regression_algo = ["","SciKitLearn PLSR", "Jchemo Local Weighted PLSR", "Intervalle Selection PLSR"]
with container2:
st.header("Calibration Model Development", divider='blue')
st.write("Create a predictive model, then use it for predicting your target variable(chemical values) from NIRS spectra")
xcal_csv = M3.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
# Select list for CSV delimiter
sep = M3.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
hdr = M3.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1)
if hdr == 'yes':
col = 0
else:
col = False
rd_seed = M1.slider("Choose seed", min_value=1, max_value=1212, value=42, format="%i")
x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
# Assign data to training and test sets
X_train, y_train, X_test, y_test = pd.DataFrame(x[train_index]), pd.DataFrame(y[train_index]), pd.DataFrame(x[test_index]), pd.DataFrame(y[test_index])
#############################
regression_algo = M1.selectbox("Choose the algorithm for regression", options=available_regression_algo, key = 12)
if regression_algo == 'SciKitLearn PLSR':
# Train model with model function from application_functions.py
Reg = PinardPlsr(x_train=X_train, x_test=X_test,y_train=y_train, y_test=y_test)
reg_model = Reg.model_
#M2.dataframe(Pin.pred_data_)
elif regression_algo == 'Jchemo Local Weighted PLSR':
reg_model = model_LWPLSR(xcal_csv, ycal_csv, sep, hdr)
elif regression_algo == "Intervalle Selection PLSR":
s = M2.number_input(label='Enter the maximum number of intervalls', min_value=1, max_value=6, value="min")
reg_model = TpeIpls(x_train= X_train, y_train= y_train, x_test=X_test, y_test= y_test,Kfold= 3,scale= True, n_intervall = 3)
reg_model.tune(n_iter=10)
if regression_algo in ["SciKitLearn PLSR", "Jchemo Local Weighted PLSR", "Intervalle Selection PLSR"]:
with container3:
st.header("Model Diagnosis", divider='blue')
yc = Reg.pred_data_[0]
ycv = Reg.pred_data_[1]
yt = Reg.pred_data_[2]
M7.write('Predicted vs Measured values')
M7.pyplot(reg_plot([y_train, y_train, y_test],[yc, ycv, yt]))
M8.write('Residuals plot')
M8.pyplot(resid_plot([y_train, y_train, y_test],[yc, ycv, yt]))
# Export the model with pickle or joblib
if regression_algo != '':
M1.write("-- Performance metrics --")
M1.dataframe(Reg.metrics_)
M1.write("-- Save the model --")
#model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20)
model_name = M1.text_input('Give it a name')
if M1.button('Export Model'):
#export_package = __import__(model_export)
with open('data/models/model_' + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_' + '.pkl','wb') as f:
joblib.dump(reg_model,f)
# create a report with information on the model
## see https://stackoverflow.com/a/59578663
#M4.pyplot(reg_plot(meas==(ycal_csv,ycal_csv,ycal_csv], pred=[ycal_csv,ycal_csv,ycal_csv]))
# Prediction module - TO BE DONE !!!!!
st.write("---")
st.write("Predict chemical values from NIRS")
NIRS_csv = file_column.file_uploader("Select NIRS Data to predict", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
export_name = './data/predictions/Predictions_of_'
if NIRS_csv:
export_name += str(NIRS_csv.name[:-4])
qsep = file_column.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+NIRS_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+NIRS_csv.name))), key=2)
qhdr = file_column.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+NIRS_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+NIRS_csv.name))), key=3)
model_column.write("Load your saved predictive model")
model_name_import = model_column.selectbox('Choose file:', options=os.listdir('data/models/'), key = 21)
if model_name_import != ' ':
export_name += '_with_' + str(model_name_import[:-4])
with open('data/models/'+ model_name_import,'rb') as f:
model_column.success("The model has been loaded successfully", icon="✅")
# use prediction function from application_functions.py to predict chemical values
result = prediction(NIRS_csv, qsep, qhdr, model_loaded)
st.write('Predicted values are: ')
pd.DataFrame(result).to_csv(export_name + '.csv')
st.write('Predictions exported to ' + export_name + '.csv')
# export to local drive
url = ('http://localhost:8501' + export_name[1:] + '.csv')
filename = export_name + '.csv'
urlretrieve(url, filename)
# create a report with information on the prediction
## see https://stackoverflow.com/a/59578663
if type(result) is list:
st.write(result)