Newer
Older
from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *
from plotly.subplots import make_subplots
import plotly.graph_objects as go
from matplotlib.cm import ScalarMappable
import matplotlib.pyplot as plt, mpld3
import streamlit.components.v1 as components
st.session_state["interface"] = st.session_state.get('interface')
if st.session_state["interface"] == 'simple':
hide_pages("Predictions")
#path = os.path.dirname(os.path.abspath(__file__)).replace('\\','/')
#css_file = path[:path.find('/pages')]+'/style'
#local_css(css_file +"/style_model.css")
local_css(css_file / "style_model.css")
####################################### page Design #######################################
st.title("Calibration Model Development")
st.markdown("Create a predictive model, then use it for predicting your target variable (chemical data) from NIRS spectra")
st.header("I - Data visualization", divider='blue')
M0, M00 = st.columns([1, .4])
st.header("II - Model creation", divider='blue')
M1, M2 = st.columns([2 ,4])
st.header("Cross-Validation")
cv1, cv2 = st.columns([2,2])
cv3 = st.container()
M7, M8 = st.columns([2,2])
M7.write('Predicted vs Measured values')
M8.write('Residuals plot')
M9.write("-- Save the model --")
##############################################################################################
reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR"]
####################################### ###########################################
xcal_csv = M00.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
sepx = M00.radio("Select separator (X file) - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)),
options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
hdrx = M00.radio("samples name (X file)? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)),
options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1)
if hdrx == "yes": col = 0
else: col = False
ycal_csv = M00.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
sepy = M00.radio("separator (Y file): ", options=[";", ","], key=2)
hdry = M00.radio("samples name (Y file)?: ", options=["no", "yes"], key=3)
xfile = pd.read_csv(xcal_csv, decimal='.', sep=sepx, index_col=col, header=0)
yfile = pd.read_csv(ycal_csv, decimal='.', sep=sepy, index_col=col)
if yfile.shape[1]>0 and xfile.shape[1]>0 :
spectra, meta_data = col_cat(xfile)
y, idx = col_cat(yfile)
if y.shape[1]>1:
yname = M00.selectbox('Select target', options=y.columns)
y = y.loc[:,yname]
else:
y = y.iloc[:,0]
spectra = pd.DataFrame(spectra).astype(float)
if not meta_data.empty :
st.write(meta_data)
M1.warning('Tune decimal and separator parameters')
data_file = M00.file_uploader("Select Data", type=".dx", help=" :mushroom: select a dx file")
if data_file:
with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
tmp.write(data_file.read())
tmp_path = tmp.name
chem_data, spectra, meta_data, meta_data_st = read_dx(file = tmp_path)
M00.success("The data have been loaded successfully", icon="✅")
yname = M00.selectbox('Select target', options=chem_data.columns)
measured = chem_data.loc[:,yname] > 0
y = chem_data.loc[:,yname].loc[measured]
spectra = spectra.loc[measured]
else:
M00.warning('Warning: Chemical data are not included in your file !', icon="⚠️")
os.unlink(tmp_path)
### split the data
if not spectra.empty and not y.empty:
#rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i")
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
train_index, test_index = train_test_split_idx(spectra, y = y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=42)
# Assign data to training and test sets
X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index]
X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index]
#### insight on loaded data
fig, ax1 = plt.subplots( figsize = (12,3))
spectra.T.plot(legend=False, ax = ax1, linestyle = '--')
ax1.set_ylabel('Signal intensity')
ax1.margins(0)
plt.tight_layout()
M0.pyplot(fig) ######## Loaded graph
fig.savefig("./Report/figures/Spectre_mod.png")
fig, ax2 = plt.subplots(figsize = (12,3))
sns.histplot(y, color="deeppink", kde = True,label="y",ax = ax2, fill=True)
sns.histplot(y_train, color="blue", kde = True,label="y (train)",ax = ax2, fill=True)
sns.histplot(y_test, color="green", kde = True,label="y (test)",ax = ax2, fill=True)
ax2.set_xlabel('y')
plt.legend()
plt.tight_layout()
M0.pyplot(fig)
M0.write('Loaded data summary')
M0.write(pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['Train', 'Test', 'Total'] ).round(2))
LoDaSum=pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['Train', 'Test', 'Total'] ).round(2)
####################################### Insight into the loaded data
#######################################
regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12)
if regression_algo == reg_algo[1]:
# Train model with model function from application_functions.py
Reg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=10)
reg_model = Reg.model_
#M2.dataframe(Pin.pred_data_)
elif regression_algo == reg_algo[2]:
data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']#,'x_train_np_cv1', 'y_train_np_cv1', 'x_test_np_cv1', 'y_test_np_cv1', 'x_train_np_cv2', 'y_train_np_cv2', 'x_test_np_cv2', 'y_test_np_cv2', 'x_train_np_cv3', 'y_train_np_cv3', 'x_test_np_cv3', 'y_test_np_cv3',]
x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
# x_train_np_cv1, y_train_np_cv1, x_test_np_cv1, y_test_np_cv1, x_train_np_cv2, y_train_np_cv2, x_test_np_cv2, y_test_np_cv2, x_train_np_cv3, y_train_np_cv3, x_test_np_cv3, y_test_np_cv3, = X_train_cv1.to_numpy(), y_train_cv1.to_numpy(), X_test_cv1.to_numpy(), y_test_cv1.to_numpy(), X_train_cv2.to_numpy(), y_train_cv2.to_numpy(), X_test_cv2.to_numpy(), y_test_cv2.to_numpy(), X_train_cv3.to_numpy(), y_train_cv3.to_numpy(), X_test_cv3.to_numpy(), y_test_cv3.to_numpy()
temp_path = Path('temp/')
for i in data_to_work_with: np.savetxt(temp_path / str(i + ".csv"), vars()[i], delimiter=",")
import subprocess
subprocess_path = Path("Class_Mod/")
subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
Reg_json = json.load(outfile)
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
os.unlink(temp_path / "lwplsr_outputs.json")
# Reg = type('obj', (object,), {'model' : pd.json_normalize(Reg_json['model']), 'pred_data_' : [pd.json_normalize(Reg_json['pred_data_train']), pd.json_normalize(Reg_json['pred_data_cv1']), pd.json_normalize(Reg_json['pred_data_cv2']), pd.json_normalize(Reg_json['pred_data_cv3']), pd.json_normalize(Reg_json['pred_data_test'])]})
pred = ['pred_data_train', 'pred_data_cv1', 'pred_data_cv2', 'pred_data_cv3', 'pred_data_test']
Reg = type('obj', (object,), {'model' : pd.json_normalize(Reg_json['model']), 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
for i in range(len(pred)):
Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
Reg.pred_data_[i].index = list(y_train.index)
else:
Reg.pred_data_[i].index = list(y_test.index)
s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3)
it = M1.number_input(label='Enter the number of iterations', min_value=2, max_value=10, value=3)
progress_text = "The model is being created. Please wait."
Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it)
pro = M1.progress(0, text="The model is being created. Please wait!")
pro.empty()
M1.progress(100, text = "The model has successfully been created!")
time.sleep(1)
reg_model = Reg.model_
# M3.write('-- Spectral regions used for model creation --')
# intervalls = Reg.bands.T
# M3.table(intervalls)
# fig, ax = plt.subplots(figsize = (12, 6))
# X_train.mean().plot(ax = ax)
# for i in range(s):
# colnames = np.array(y)
# num = {'u','i','f','c'}
# if np.array(X_train.columns).dtype.kind in num:
# plt.plot(X_train.columns, X_train.mean(), color = 'black')
# ax.axvspan(X_train.columns[intervalls['from'][i]], X_train.columns[intervalls['to'][i]],
# color='#2a52be', alpha=0.5, lw=0)
# plt.tight_layout()
# plt.margins(x = 0)
# else:
# plt.plot(np.arange(X_train.shape[1]), X_train.mean(), color = 'black')
# ax.axvspan(intervalls['from'][i], intervalls['to'][i], color='#2a52be', alpha=0.5, lw=0)
# plt.tight_layout()
# plt.margins(x = 0)
# M3.write('-- Visualization of the spectral regions used for model creation -- ')
# M3.pyplot(fig)
M2.write('-- Spectral regions used for model creation --')
intervalls = Reg.selected_features_.T
M2.table(intervalls)
# elif regression_algo == reg_algo[4]:
# Reg = PlsR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test)
# reg_model = Reg.model_
################# Model analysis ############
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
M2.write('-- Pretreated data (train) visualization and important spectral regions in the model -- ')
fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 6))
fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.02)
# fig.append_trace(go.Scatter(x=[3, 4, 5],
# y=[1000, 1100, 1200],), row=1, col=1)
# fig.append_trace(go.Scatter(x=[2, 3, 4],
# y=[100, 110, 120],), row=2, col=1)
# fig.append_trace(go.Scatter(x=[0, 1, 2],
# y=[10, 11, 12]), row=3, col=1)
# fig.update_layout(height=600, width=600, title_text="Stacked Subplots")
# a = Reg.pretreated_spectra_
# r = pd.concat([y_train, a], axis = 1)
# rr = r.melt("x")
# rr.columns = ['y values', 'x_axis', 'y_axis']
# fig = px.scatter(rr, x = 'x_axis', y = 'y_axis', color_continuous_scale=px.colors.sequential.Viridis, color = 'y values')
# M3.plotly_chart(fig)
from matplotlib.colors import Normalize
color_variable = y_train
norm = Normalize(vmin=color_variable.min(), vmax= color_variable.max())
cmap = plt.get_cmap('viridis')
colors = cmap(norm(color_variable.values))
fig, ax = plt.subplots(figsize = (10,3))
for i in range(Reg.pretreated_spectra_.shape[0]):
ax.plot(Reg.pretreated_spectra_.columns, Reg.pretreated_spectra_.iloc[i,:], color = colors[i])
sm = ScalarMappable(norm = norm, cmap = cmap)
cbar = plt.colorbar(sm, ax = ax)
# cbar.set_label('Target range')
plt.tight_layout()
htmlfig = mpld3.fig_to_html(fig)
with M2:
st.components.v1.html(htmlfig, height=600)
# X_train.mean().plot(ax = ax2)
# for i in range(s):
# colnames = np.array(y)
# num = {'u','i','f','c'}
# if np.array(X_train.columns).dtype.kind in num:
# plt.plot(X_train.columns, X_train.mean(), color = 'black')
# ax2.axvspan(X_train.columns[intervalls['from'][i]], X_train.columns[intervalls['to'][i]],
# color='#2a52be', alpha=0.5, lw=0)
# plt.tight_layout()
# plt.margins(x = 0)
# else:
# plt.plot(np.arange(X_train.shape[1]), X_train.mean(), color = 'black')
# ax2.axvspan(intervalls['from'][i], intervalls['to'][i], color='#2a52be', alpha=0.5, lw=0)
# plt.tight_layout()
# plt.margins(x = 0)
# pd.DataFrame(Reg.pretreated_spectra_).plot(ax = ax1)
# M3.pyplot(fig)
############
cv2.write('-- Cross-Validation Summary--')
cv2.write(Reg.CV_results_)
cv2.write('-- Out-of-Fold Predictions Visualization (All in one) --')
fig1 = px.scatter(Reg.cv_data_[2], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds",
color_discrete_sequence=px.colors.qualitative.G10)
fig1.add_shape(type='line', x0 = .95 * min(Reg.cv_data_[2].loc[:,'Measured']), x1 = 1.05 * max(Reg.cv_data_[2].loc[:,'Measured']), y0 = .95 * min(Reg.cv_data_[2].loc[:,'Measured']), y1 = 1.05 * max(Reg.cv_data_[2].loc[:,'Measured']), line = dict(color='black', dash = "dash"))
fig1.update_traces(marker_size=7, showlegend=False)
cv2.plotly_chart(fig1)
fig0 = px.scatter(Reg.cv_data_[2], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds", facet_col = 'Folds',facet_col_wrap=1,
color_discrete_sequence=px.colors.qualitative.G10, text='index', width=800, height=1000)
fig0.update_traces(marker_size=8, showlegend=False)
cv1.write('-- Visualisation des prédictions hors échantillon (Graphiques séparés) --')
fig0.write_image("./Report/figures/Predictions_V.png")
M1.write('-- Spectral preprocessing info --')
M1.write(Reg.best_hyperparams_print)
with open("data/params/Preprocessing.json", "w") as outfile:
json.dump(Reg.best_hyperparams_, outfile)
M1.write("-- Model performance --")
M1.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
M2.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
model_per=pd.DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
#from st_circular_progress import CircularProgress
#my_circular_progress = CircularProgress(label = 'Performance',value = 50, key = 'my performance',
# size = "medium", track_color = "black", color = "blue")
#my_circular_progress.st_circular_progress()
#my_circular_progress.update_value(progress=20)
a=reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
M7.pyplot(a)
plt.savefig('./Report/figures/Predictedvs.png')
residual_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)
M8.pyplot(residual_plot)
plt.savefig('./Report/figures/residual_plot.png')
# rega = Reg.important_features_ ##### ADD FEATURES IMPORTANCE PLOT
#model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20)
date_time = datetime.datetime.strftime(datetime.date.today(), '_%Y_%m_%d_')
if M9.button('Export Model'):
path = 'data/models/model_'
if file == files_format[0]:
#export_package = __import__(model_export)
with open(path + model_name + date_time + '_created_on_' + xcal_csv.name[:xcal_csv.name.find(".")] +""+
'_and_' + ycal_csv.name[:ycal_csv.name.find(".")] + '_data_' + '.pkl','wb') as f:
pd.DataFrame(rega[1]).to_csv(path + model_name + date_time + '_on_' + xcal_csv.name[:xcal_csv.name.find(".")]
+ '_and_' + ycal_csv.name[:ycal_csv.name.find(".")] + '_data_'+'Wavelengths_index.csv', sep = ';')
elif file == files_format[1]:
#export_package = __import__(model_export)
with open(path + model_name + '_on_'+ data_file.name[:data_file.name.find(".")] + '_data_' + '.pkl','wb') as f:
joblib.dump(reg_model, f)
if regression_algo == reg_algo[3]:
rega[1].sort()
pd.DataFrame(rega[1]).to_csv(path +data_file.name[:data_file.name.find(".")]+ model_name + date_time+ '_on_' + '_data_'+'Wavelengths_index.csv', sep = ';')
st.write('Model Exported ')
st.write('Model Exported')
# create a report with information on the model
## see https://stackoverflow.com/a/59578663
pages_folder = Path("pages/")
show_pages(
[Page("app.py", "Home"),
Page(str(pages_folder / "4-inputs.py"), "Inputs"),
Page(str(pages_folder / "1-samples_selection.py"), "Samples Selection"),
Page(str(pages_folder / "2-model_creation.py"), "Models Creation"),
Page(str(pages_folder / "3-prediction.py"), "Predictions"),
]
)
st.page_link('pages\\3-prediction.py', label = 'Keep on keepin\' on to predict your values !')
## Load .dx file
Ac_Km = ['histo.png', 'Spectre_mod.png','Predictions_V.png','Allinone.png','Predictedvs.png','residual_plot.png']
with st.container():
header3, header4 = st.columns(2)
if header3.button("Exporter le RR"):
if regression_algo == reg_algo[1]:
latex_report = report.report(LoDaSum, 'model',Ac_Km,json_sp,model_per,'full_plsr',cv99)
report.compile_latex()
else:
pass
else:
pass