Newer
Older
from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *
add_sidebar(pages_folder)
repertoire_a_vider = Path('Report/figures')
if os.path.exists(repertoire_a_vider):
for fichier in os.listdir(repertoire_a_vider):
chemin_fichier = repertoire_a_vider / fichier
if os.path.isfile(chemin_fichier) or os.path.islink(chemin_fichier):
os.unlink(chemin_fichier)
elif os.path.isdir(chemin_fichier):
local_css(css_file / "style_model.css")
####################################### page Design #######################################
st.title("Calibration Model Development")
st.markdown("Create a predictive model, then use it for predicting your target variable (chemical data) from NIRS spectra")
st.header("I - Data visualization", divider='blue')
M0, M00 = st.columns([1, .4])
st.header("II - Model creation", divider='blue')
M1, M2 = st.columns([2 ,4])
cv1, cv2 = st.columns([2,2])
cv3 = st.container()
M7, M8 = st.columns([2,2])
M7.write('Predicted vs Measured values')
M8.write('Residuals plot')
M9.write("-- Save the model --")
##############################################################################################
reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR"]
####################################### ###########################################
xcal_csv = M00.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
sepx = M00.radio("Select separator (X file) - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)),
options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
hdrx = M00.radio("samples name (X file)? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)),
options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1)
if hdrx == "yes": col = 0
else: col = False
ycal_csv = M00.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
sepy = M00.radio("separator (Y file): ", options=[";", ","], key=2)
hdry = M00.radio("samples name (Y file)?: ", options=["no", "yes"], key=3)
xfile = pd.read_csv(xcal_csv, decimal='.', sep=sepx, index_col=col, header=0)
yfile = pd.read_csv(ycal_csv, decimal='.', sep=sepy, index_col=col)
if yfile.shape[1]>0 and xfile.shape[1]>0 :
spectra, meta_data = col_cat(xfile)
y, idx = col_cat(yfile)
if y.shape[1]>1:
yname = M00.selectbox('Select target', options=y.columns)
y = y.loc[:,yname]
else:
y = y.iloc[:,0]
spectra = pd.DataFrame(spectra).astype(float)
if not meta_data.empty :
st.write(meta_data)
M1.warning('Tune decimal and separator parameters')
data_file = M00.file_uploader("Select Data", type=".dx", help=" :mushroom: select a dx file")
if data_file:
with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
tmp.write(data_file.read())
tmp_path = tmp.name
chem_data, spectra, meta_data, meta_data_st = read_dx(file = tmp_path)
M00.success("The data have been loaded successfully", icon="✅")
yname = M00.selectbox('Select target', options=chem_data.columns)
measured = chem_data.loc[:,yname] > 0
y = chem_data.loc[:,yname].loc[measured]
spectra = spectra.loc[measured]
else:
M00.warning('Warning: Chemical data are not included in your file !', icon="⚠️")
os.unlink(tmp_path)
### split the data
if not spectra.empty and not y.empty:
if np.array(spectra.columns).dtype.kind in ['i','f']:
colnames = spectra.columns
else:
colnames = np.arange(spectra.shape[1])
#rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i")
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
train_index, test_index = train_test_split_idx(spectra, y = y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=42)
# Assign data to training and test sets
X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index]
X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index]
#### insight on loaded data
fig, ax1 = plt.subplots( figsize = (12,3))
spectra.T.plot(legend=False, ax = ax1, linestyle = '--')
ax1.set_ylabel('Signal intensity')
ax1.margins(0)
plt.tight_layout()
fig.savefig("./Report/figures/Spectre_mod.png")
fig, ax2 = plt.subplots(figsize = (12,3))
sns.histplot(y, color="deeppink", kde = True,label="y",ax = ax2, fill=True)
sns.histplot(y_train, color="blue", kde = True,label="y (train)",ax = ax2, fill=True)
sns.histplot(y_test, color="green", kde = True,label="y (test)",ax = ax2, fill=True)
ax2.set_xlabel('y')
plt.legend()
plt.tight_layout()
M0.pyplot(fig)
fig.savefig("./Report/figures/histo.png")
M0.write('Loaded data summary')
M0.write(pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['Train', 'Test', 'Total'] ).round(2))
LoDaSum=pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['Train', 'Test', 'Total'] ).round(2)
####################################### Insight into the loaded data
#######################################
regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12)
if regression_algo == reg_algo[1]:
# Train model with model function from application_functions.py
Reg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=10)
reg_model = Reg.model_
#M2.dataframe(Pin.pred_data_)
elif regression_algo == reg_algo[2]:
data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']#,'x_train_np_cv1', 'y_train_np_cv1', 'x_test_np_cv1', 'y_test_np_cv1', 'x_train_np_cv2', 'y_train_np_cv2', 'x_test_np_cv2', 'y_test_np_cv2', 'x_train_np_cv3', 'y_train_np_cv3', 'x_test_np_cv3', 'y_test_np_cv3',]
x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
# x_train_np_cv1, y_train_np_cv1, x_test_np_cv1, y_test_np_cv1, x_train_np_cv2, y_train_np_cv2, x_test_np_cv2, y_test_np_cv2, x_train_np_cv3, y_train_np_cv3, x_test_np_cv3, y_test_np_cv3, = X_train_cv1.to_numpy(), y_train_cv1.to_numpy(), X_test_cv1.to_numpy(), y_test_cv1.to_numpy(), X_train_cv2.to_numpy(), y_train_cv2.to_numpy(), X_test_cv2.to_numpy(), y_test_cv2.to_numpy(), X_train_cv3.to_numpy(), y_train_cv3.to_numpy(), X_test_cv3.to_numpy(), y_test_cv3.to_numpy()
temp_path = Path('temp/')
for i in data_to_work_with: np.savetxt(temp_path / str(i + ".csv"), vars()[i], delimiter=",")
import subprocess
subprocess_path = Path("Class_Mod/")
subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
Reg_json = json.load(outfile)
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
os.unlink(temp_path / "lwplsr_outputs.json")
# Reg = type('obj', (object,), {'model' : pd.json_normalize(Reg_json['model']), 'pred_data_' : [pd.json_normalize(Reg_json['pred_data_train']), pd.json_normalize(Reg_json['pred_data_cv1']), pd.json_normalize(Reg_json['pred_data_cv2']), pd.json_normalize(Reg_json['pred_data_cv3']), pd.json_normalize(Reg_json['pred_data_test'])]})
pred = ['pred_data_train', 'pred_data_cv1', 'pred_data_cv2', 'pred_data_cv3', 'pred_data_test']
Reg = type('obj', (object,), {'model' : pd.json_normalize(Reg_json['model']), 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
for i in range(len(pred)):
Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
Reg.pred_data_[i].index = list(y_train.index)
else:
Reg.pred_data_[i].index = list(y_test.index)
s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3)
it = M1.number_input(label='Enter the number of iterations', min_value=2, max_value=10, value=3)
progress_text = "The model is being created. Please wait."
Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it)
pro = M1.progress(0, text="The model is being created. Please wait!")
pro.empty()
M1.progress(100, text = "The model has successfully been created!")
time.sleep(1)
reg_model = Reg.model_
M2.write('-- Important Spectral regions used for model creation --')
intervalls = Reg.selected_features_.T
intervalls_with_cols = Reg.selected_features_.T
for i in range(intervalls.shape[0]):
for j in range(intervalls.shape[1]):
intervalls_with_cols.iloc[i,j] = spectra.columns[intervalls.iloc[i,j]]
M2.table(intervalls_with_cols)
# elif regression_algo == reg_algo[4]:
# Reg = PlsR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test)
# reg_model = Reg.model_
################# Model analysis ############
#M2.write('-- Pretreated data (train) visualization and important spectral regions in the model -- ')
fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 6))
fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.02)
# fig.append_trace(go.Scatter(x=[3, 4, 5],
# y=[1000, 1100, 1200],), row=1, col=1)
# fig.append_trace(go.Scatter(x=[2, 3, 4],
# y=[100, 110, 120],), row=2, col=1)
# fig.append_trace(go.Scatter(x=[0, 1, 2],
# y=[10, 11, 12]), row=3, col=1)
# fig.update_layout(height=600, width=600, title_text="Stacked Subplots")
# a = Reg.pretreated_spectra_
# r = pd.concat([y_train, a], axis = 1)
# rr = r.melt("x")
# rr.columns = ['y values', 'x_axis', 'y_axis']
# fig = px.scatter(rr, x = 'x_axis', y = 'y_axis', color_continuous_scale=px.colors.sequential.Viridis, color = 'y values')
# M3.plotly_chart(fig)
# from matplotlib.colors import Normalize
# color_variable = y_train
# norm = Normalize(vmin=color_variable.min(), vmax= color_variable.max())
# cmap = plt.get_cmap('viridis')
# colors = cmap(norm(color_variable.values))
# fig, ax = plt.subplots(figsize = (10,3))
# for i in range(Reg.pretreated_spectra_.shape[0]):
# ax.plot(Reg.pretreated_spectra_.columns, Reg.pretreated_spectra_.iloc[i,:], color = colors[i])
# sm = ScalarMappable(norm = norm, cmap = cmap)
# cbar = plt.colorbar(sm, ax = ax)
# # cbar.set_label('Target range')
# plt.tight_layout()
# htmlfig = mpld3.fig_to_html(fig)
# with M2:
# st.components.v1.html(htmlfig, height=600)
############
cv2.write('-- Cross-Validation Summary--')
cv2.write(Reg.CV_results_)
cv2.write('-- Out-of-Fold Predictions Visualization (All in one) --')
fig1 = px.scatter(Reg.cv_data_[2], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds",
color_discrete_sequence=px.colors.qualitative.G10)
fig1.add_shape(type='line', x0 = .95 * min(Reg.cv_data_[2].loc[:,'Measured']), x1 = 1.05 * max(Reg.cv_data_[2].loc[:,'Measured']), y0 = .95 * min(Reg.cv_data_[2].loc[:,'Measured']), y1 = 1.05 * max(Reg.cv_data_[2].loc[:,'Measured']), line = dict(color='black', dash = "dash"))
fig1.update_traces(marker_size=7, showlegend=False)
cv2.plotly_chart(fig1, use_container_width=True)
fig0 = px.scatter(Reg.cv_data_[2], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds", facet_col = 'Folds',facet_col_wrap=1,
color_discrete_sequence=px.colors.qualitative.G10, text='index', width=800, height=1000)
fig0.update_traces(marker_size=8, showlegend=False)
fig0.write_image("./Report/figures/Allinone.png")
cv1.write('-- Out-of-Fold Predictions Visualization (Separate plots) --')
cv1.plotly_chart(fig0, use_container_width=True)
fig0.write_image("./Report/figures/Predictions_V.png")
M1.write('-- Spectral preprocessing info --')
M1.write(Reg.best_hyperparams_print)
with open("data/params/Preprocessing.json", "w") as outfile:
json.dump(Reg.best_hyperparams_, outfile)
M1.write("-- Model performance --")
M1.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
model_per=pd.DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
#from st_circular_progress import CircularProgress
#my_circular_progress = CircularProgress(label = 'Performance',value = 50, key = 'my performance',
# size = "medium", track_color = "black", color = "blue")
#my_circular_progress.st_circular_progress()
#my_circular_progress.update_value(progress=20)
a=reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
M7.pyplot(a)
plt.savefig('./Report/figures/Predictedvs.png')
residual_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)
M8.pyplot(residual_plot)
plt.savefig('./Report/figures/residual_plot.png')
rega = Reg.selected_features_ ##### ADD FEATURES IMPORTANCE PLOT
#model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20)
date_time = datetime.datetime.strftime(datetime.date.today(), '_%Y_%m_%d_')
if M9.button('Export Model'):
path = 'data/models/model_'
if file == files_format[0]:
#export_package = __import__(model_export)
with open(path + model_name + date_time + '_created_on_' + xcal_csv.name[:xcal_csv.name.find(".")] +""+
'_and_' + ycal_csv.name[:ycal_csv.name.find(".")] + '_data_' + '.pkl','wb') as f:
Reg.selected_features_.T.to_csv(path + model_name + date_time + '_on_' + xcal_csv.name[:xcal_csv.name.find(".")]
+ '_and_' + ycal_csv.name[:ycal_csv.name.find(".")] + '_data_'+'Wavelengths_index.csv', sep = ';')
elif file == files_format[1]:
#export_package = __import__(model_export)
with open(path + model_name + '_on_'+ data_file.name[:data_file.name.find(".")] + '_data_' + '.pkl','wb') as f:
joblib.dump(reg_model, f)
if regression_algo == reg_algo[3]:
Reg.selected_features_.T.to_csv(path +data_file.name[:data_file.name.find(".")]+ model_name + date_time+ '_on_' + '_data_'+'Wavelengths_index.csv', sep = ';')
# create a report with information on the model
## see https://stackoverflow.com/a/59578663
pages_folder = Path("pages/")
show_pages(
[Page("app.py", "Home"),
Page(str(pages_folder / "4-inputs.py"), "Inputs"),
Page(str(pages_folder / "1-samples_selection.py"), "Samples Selection"),
Page(str(pages_folder / "2-model_creation.py"), "Models Creation"),
Page(str(pages_folder / "3-prediction.py"), "Predictions"),
]
)
st.page_link('pages\\3-prediction.py', label = 'Keep on keepin\' on to predict your values !')
## Load .dx file
Ac_Km = ['histo.png', 'Spectre_mod.png','Predictions_V.png','Allinone.png','Predictedvs.png','residual_plot.png']
with st.container():
latex_report = report.report(LoDaSum, 'model',Ac_Km,a_Test,json_sp,model_per,'full_plsr',cv99)
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
pass
if not spectra.empty and not y.empty:
if regression_algo in reg_algo[1:]:
fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 4), sharex=True)
ax1.plot(colnames, np.mean(X_train, axis = 0), color = 'black', label = 'Average spectrum (Raw)')
ax2.plot(colnames, np.mean(Reg.pretreated_spectra_ , axis = 0), color = 'black', label = 'Average spectrum (pretreated)')
ax2.set_xlabel('Wavelenghts')
plt.tight_layout()
for i in range(2):
eval(f'ax{i+1}').grid(color='grey', linestyle=':', linewidth=0.2)
eval(f'ax{i+1}').margins(x = 0)
eval(f'ax{i+1}').legend(loc = 'upper right')
eval(f'ax{i+1}').set_ylabel('Intensity')
if regression_algo == reg_algo[3]:
for j in range(s):
if np.array(spectra.columns).dtype.kind in ['i','f']:
min, max = intervalls_with_cols['from'][j], intervalls_with_cols['to'][j]
else:
min, max = intervalls['from'][j], intervalls['to'][j]
eval(f'ax{i+1}').axvspan(min, max, color='#00ff00', alpha=0.5, lw=0)
if regression_algo == reg_algo[1]:
ax1.scatter(colnames[np.array(Reg.sel_ratio_.index)], np.mean(X_train, axis = 0).ravel()[np.array(Reg.sel_ratio_.index)],
color = 'red', label = 'Important variables')
ax2.scatter(colnames[Reg.sel_ratio_.index], np.mean(Reg.pretreated_spectra_, axis = 0).ravel()[np.array(Reg.sel_ratio_.index)],
color = 'red', label = 'Important variables')
ax1.legend()
ax2.legend()
M2.write('-- Visualization of the spectral regions used for model creation -- ')
M2.pyplot(fig)