Newer
Older
st.set_page_config(page_title = "NIRS Utils", page_icon = ":goat:", layout = "wide")
# layout
UiComponents(pagespath = pages_folder, csspath= css_file,imgpath=image_path ,
header=True, sidebar= True, bgimg=False, colborders=True)
# Initialize the variable in session state if it doesn't exist for st.cache_data
if 'counter' not in st.session_state:
st.session_state.counter = 0
# #################################### Methods ##############################################
if file != 'logo_cefe.png' and not any(file.endswith(ext) for ext in keep):
class lw:
def __init__(self, Reg_json, pred):
self.model_ = Reg_json['model']
self.best_hyperparams_ = Reg_json['best_lwplsr_params']
################ clean the results dir #############
for i in ['model', 'dataset', 'figures']:
dirpath = Path('./report/out/')/i
if not dirpath.exists():
dirpath.mkdir(parents=True, exist_ok=True)
# ####################################### page preamble #######################################
st.markdown("Create a predictive model, then use it for predicting your target variable (chemical data) from NIRS spectra")
c0, c1 = st.columns([1, .4])
c0.image("./images/model_creation.png", use_column_width = True) # graphical abstract
################################################################# Begin : I- Data loading and preparation ######################################
files_format = ['csv', 'dx'] # Supported files format
file = c1.radio('Select files format:', options = files_format,horizontal = True) # Select a file format
spectra = DataFrame() # preallocate the spectral data block
y = DataFrame() # preallocate the target(s) data block
match file:
# load csv file
case 'csv':
from utils.data_parsing import CsvParser
def read_csv(file = file, change = None, dec = None, sep= None, names = None, hdr = None):
delete_files(keep = ['.py', '.pyc','.bib'])
from utils.data_parsing import CsvParser
par = CsvParser(file= file)
par.parse(decimal = dec, separator = sep, index_col = names, header = hdr)
return par.float, par.meta_data, par.meta_data_st_, par.df
# Load X-block data
xcal_csv = st.file_uploader("Select NIRS Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and lambdas as columns")
c1_1, c2_2 = st.columns([.5, .5])
with c1_1:
decx = st.radio('decimal(x):', options= [".", ","], horizontal = True)
sepx = st.radio("separator(x):", options = [";", ","], horizontal = True)
with c2_2:
phdrx = st.radio("header(x): ", options = ["yes", "no"], horizontal = True)
pnamesx = st.radio("samples name(x):", options = ["yes", "no"], horizontal = True)
hdrx = 0 if phdrx =="yes" else None
namesx = 0 if pnamesx =="yes" else None
try:
spectra, meta_data, md_df_st_, xfile = read_csv(file= xcal_csv, change = hash_, dec = decx, sep = sepx, names =namesx, hdr = hdrx)
st.success('xfile has been loaded successfully')
except:
st.error('Error: The xfile has not been loaded successfully, please consider tuning the dialect settings!')
st.info('Info: Insert your spectral data file above!')
# Load Y-block data
ycal_csv = st.file_uploader("Select corresponding Chemical Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and chemical values as a column")
c1_1, c2_2 = st.columns([.5, .5])
with c1_1:
decy = st.radio('decimal(y):', options= [".", ","], horizontal = True)
sepy = st.radio("separator(y):", options = [";", ","], horizontal = True)
with c2_2:
phdry = st.radio("header(y): ", options = ["yes", "no"], horizontal = True)
pnamesy = st.radio("samples name(y):", options = ["yes", "no"], horizontal = True)
hdry = 0 if phdry =="yes" else None
namesy = 0 if pnamesy =="yes" else None
try:
chem_data, meta_data, md_df_st_, yfile = read_csv(file= ycal_csv, change = hash_, dec = decy, sep = sepy, names =namesy, hdr = hdry)
st.success('yfile has been loaded successfully')
except:
st.error('Error: The yfile has not been loaded successfully, please consider tuning the dialect settings!')
st.info('Info: Insert your target data file above!')
# AFTER LOADING BOTH X AND Y FILES
if xcal_csv and ycal_csv:
# create a str instance for storing the hash of both x and y data
from io import StringIO
for i in ["xcal_csv", "ycal_csv"]:
stringio = StringIO(eval(f'{i}.getvalue().decode("utf-8")'))
# p_hash([xy_str + str(xcal_csv.name) + str(ycal_csv.name), hdrx, sepx, hdry, sepy])
# xfile, yfile, file_name = csv_loader(change = hash_)
# yfile = read_csv(file= ycal_csv, change = hash_)
if yfile.shape[1]>0 and xfile.shape[1]>0 :
yname = c1.selectbox('Select a target', options = ['']+chem_data.columns.tolist(), format_func = lambda x: x if x else "<Select>")
if yname:
y = chem_data.loc[:, yname]
else:
c1.info('Info: Select the target analyte from the drop down list!')
elif chem_data.shape[1] == 1:
if not y.empty:
if spectra.shape[0] != y.shape[0]:
st.error('Error: X and Y have different sample size')
st.error('Error: The data has not been loaded successfully, please consider tuning the dialect settings!')
# Load .dx file
case 'dx':
data_file = st.file_uploader("Select Data", type = ".dx", help = " :mushroom: select a dx file")
if data_file:
file_name = str(data_file.name)
## creating the temp file
with NamedTemporaryFile(delete = False, suffix = ".dx") as tmp:
tmp.write(data_file.read())
tmp_path = tmp.name
with open(tmp.name, 'r') as dd:
dxdata = dd.read()
## load and parse the temp dx file
@st.cache_data
def read_dx(tmp_path):
M = JcampParser(path = tmp_path)
M.parse()
# chem_data, spectra, meta_data, meta_data_st = read_dx(file = tmp_path)
# os.unlink(tmp_path)
chem_data, spectra, meta_data, meta_data_st = read_dx(tmp_path = tmp_path)
if not spectra.empty:
st.success("Info: The data have been loaded successfully", icon = "✅")
if chem_data.shape[1]>0:
yname = st.selectbox('Select the target analyte', options = ['']+chem_data.columns.tolist(), format_func = lambda x: x if x else "<Select>" )
if yname:
measured = chem_data.loc[:, yname] > 0
y = chem_data.loc[:, yname].loc[measured]
spectra = spectra.loc[measured]
else:
st.info('Info: Please select the target analyte from the dropdown list!')
st.warning('Warning: your file includes no target variables to model !', icon = "⚠️")
################################################### END : I- Data loading and preparation ####################################################
################################################### BEGIN : visualize and split the data ####################################################
if not spectra.empty and not y.empty:
if np.array(spectra.columns).dtype.kind in ['i', 'f']:
colnames = spectra.columns
else:
colnames = np.arange(spectra.shape[1])
X_train, X_test, y_train, y_test, train_index, test_index = data_split(x=spectra, y=y)
#### insight on loaded data
spectra_plot = plot_spectra(spectra, xunits = 'Wavelength/Wavenumber', yunits = "Signal intensity")
target_plot = hist(y = y, y_train = y_train, y_test = y_test, target_name=yname)
stats = DataFrame([desc_stats(y_train), desc_stats(y_test), desc_stats(y)], index =['train', 'test', 'total'] ).round(2)
# fig1, ax1 = plt.subplots( figsize = (12, 3))
# spectra.T.plot(legend = False, ax = ax1, linestyle = '-', linewidth = 0.6)
# ax1.set_ylabel('Signal intensity')
# ax1.margins(0)
# plt.tight_layout()
c2, c3 = st.columns([1, .4])
with c2:
st.pyplot(spectra_plot) ######## Loaded graph
st.pyplot(target_plot)
st.write('Loaded data summary')
st.write(stats)
################################################### END : visualize and split the data #######################################################
# if 'model_type' not in st.session_state:
# st.cache_data.model_type = ''
# ################################################### BEGIN : Create Model ####################################################
model_type = None # initialize the selected regression algorithm
c4, c5, c6 = st.columns([1, 1, 3])
with c4:
# select type of supervised modelling problem
var_nature = ['Continuous', 'Categorical']
mode = c4.radio("The nature of the target variable :", options = var_nature)
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
match mode:
case "Continuous":
reg_algo = ["", "PLS", "LW-PLS", "TPE-iPLS"]
st.markdown(f'Example1: Quantifying the volume of nectar consumed by a pollinator during a foraging session.')
st.markdown(f"Example2: Measure the sugar content, amino acids, or other compounds in nectar from different flower species.")
case 'Categorical':
reg_algo = ["", "PLS", "LW-PLS", "TPE-iPLS", 'LDA']
st.markdown(f"Example1: Classifying pollinators into categories such as bees, butterflies, moths, and beetles.")
st.markdown(f"Example2: Classifying plants based on their health status, such as healthy, stressed, or diseased, using NIR spectral data.")
with c5:
model_type = c5.selectbox("Choose a modelling algorithm:", options = reg_algo, key = 12, format_func = lambda x: x if x else "<Select>")
with c6:
st.markdown("-------------")
match model_type:
case "PLS":
st.markdown("#### For further details on the PLS (Partial Least Squares) algorithm, check the following reference:")
st.markdown('##### https://www.tandfonline.com/doi/abs/10.1080/03610921003778225')
case "LW-PLS":
st.markdown("#### For further details on the LW-PLS (Locally Weighted - Partial Least Squares) algorithm, check the following reference:")
st.markdown('##### https://analyticalsciencejournals.onlinelibrary.wiley.com/doi/full/10.1002/cem.3117')
case "TPE-iPLS":
st.markdown("#### For further details on the TPE-iPLS (Tree-structured Parzen Estimator based interval-Partial Least Squares) algorithm, which is a wrapper method for interval selection, check the following references:")
st.markdown("##### https://papers.nips.cc/paper_files/paper/2011/file/86e8f7ab32cfd12577bc2619bc635690-Paper.pdf")
st.markdown('##### https://www.tandfonline.com/doi/abs/10.1080/03610921003778225')
st.markdown('##### https://journals.sagepub.com/doi/abs/10.1366/0003702001949500')
st.markdown("-------------")
# if model_type != st.session_state.model_type:
# st.session_state.model_type = model_type
# increment()

DIANE
committed
# Training set preparation for cross-validation(CV)
nb_folds = 3
# Model creation-M20 columns
# spectra_plot.savefig("./report/figures/spectra_plot.png")
# target_plot.savefig("./report/figures/histogram.png")
# st.session_state['hash_Reg'] = str(np.random.randint(2000000000))
folds = KF_CV.CV(X_train, y_train, nb_folds)# split train data into nb_folds for cross_validation
Reg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter = 100, cv = nb_folds)
rega = Reg.selected_features_
case 'LW-PLS':
# export data to csv for Julia train/test
global x_train_np, y_train_np, x_test_np, y_test_np
data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
# Cross-Validation calculation
d = {}
for i in range(nb_folds):
d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
data_to_work_with.append("xtr_fold{0}".format(i+1))
data_to_work_with.append("ytr_fold{0}".format(i+1))
data_to_work_with.append("xte_fold{0}".format(i+1))
data_to_work_with.append("yte_fold{0}".format(i+1))
# check best pre-treatment with a global PLSR model
preReg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=100)
temp_path = Path('temp/')
with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile:
json.dump(preReg.best_hyperparams_, outfile)
# export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files
for i in data_to_work_with:
if 'fold' in i:
j = d[i]
else:
j = globals()[i]
# st.write(j)
np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",")
subprocess.run([f"{sys.executable}", subprocess_path / "lwplsr_call.py"])
# retrieve json results from Julia JChemo
try:
with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
Reg_json = json.load(outfile)
# delete csv files
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
# delete json file after import
os.unlink(temp_path / "lwplsr_outputs.json")
os.unlink(temp_path / "lwplsr_preTreatments.json")

BARTHES Nicolas
committed
os.unlink(temp_path / 'model')
# format result data into Reg object
pred = ['pred_data_train', 'pred_data_test']### keys of the dict
for i in range(nb_folds):
pred.append("CV" + str(i+1)) ### add cv folds keys to pred
# global Reg
# Reg = type('obj', (object,), {'model_' : Reg_json['model'], 'best_hyperparams_' : Reg_json['best_lwplsr_params'],
# global Reg
Reg = lw(Reg_json = Reg_json, pred = pred)
Reg.cv_data_ = {'YpredCV' : {}, 'idxCV' : {}}
# set indexes to Reg.pred_data (train, test, folds idx)
for i in range(len(pred)):
Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
if i == 0: # data_train
# Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
Reg.pred_data_[i].index = list(y_train.index)
Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
elif i == 1: # data_test
# Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
Reg.pred_data_[i].index = list(y_test.index)
Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
else:
# CVi
Reg.pred_data_[i].index = folds[list(folds)[i-2]]
Reg.cv_data_['YpredCV']['Fold' + str(i-1)] = np.array(Reg.pred_data_[i]).reshape(-1)
Reg.cv_data_['idxCV']['Fold' + str(i-1)] = np.array(folds[list(folds)[i-2]]).reshape(-1)
Reg.CV_results_= KF_CV.metrics_cv(y = y_train, ypcv = Reg.cv_data_['YpredCV'], folds = folds)[1]
#### cross validation results print
Reg.best_hyperparams_print = Reg.best_hyperparams_
## plots
Reg.cv_data_ = KF_CV().meas_pred_eq(y = np.array(y_train), ypcv = Reg.cv_data_['YpredCV'], folds = folds)
Reg.pretreated_spectra_ = preReg.pretreated_spectra_
Reg.best_hyperparams_print = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
Reg.best_hyperparams_ = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
Reg.__hash__ = ObjectHash(current = hash_,add = Reg.best_hyperparams_print)
except FileNotFoundError as e:
Reg = None
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
case 'TPE-iPLS':
Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it, cv = nb_folds)
intervalls = Reg.selected_features_.T.copy()
intervalls_with_cols = Reg.selected_features_.T.copy().astype(str)
for i in range(intervalls.shape[0]):
for j in range(intervalls.shape[1]):
intervalls_with_cols.iloc[i,j] = spectra.columns[intervalls.iloc[i,j]]
rega = Reg.selected_features_
st.session_state.intervalls = Reg.selected_features_.T
st.session_state.intervalls_with_cols = intervalls_with_cols
return Reg
if model_type:
info = st.info('Info: The model is being created. This may take a few minutes.')
if model_type == 'TPE-iPLS':# if model type is ipls then ask for the number of iterations and intervalls
s = st.number_input(label = 'Enter the maximum number of intervals', min_value = 1, max_value = 6)
it = st.number_input(label = 'Enter the number of iterations', min_value = 2, max_value = 500, value = 250)
remodel_button = st.button('re-model the data', key=4, help=None, type="primary", use_container_width=True, on_click=increment)
hash_ = ObjectHash(current = hash_, add = st.session_state.counter)
Reg = RequestingModelCreation(change = hash_)
reg_model = Reg.model_
st.info('Info: Choose a modelling algorithm from the dropdown list!')
info.empty()
if Reg:
st.success('Success! Your model has been created and is ready to use.')
else:
st.error("Error: Model creation failed. Please try again.")
if model_type:
if model_type == 'TPE-iPLS':
if ('intervalls' and 'intervalls_with_cols') in st.session_state:
intervalls = st.session_state.intervalls
intervalls_with_cols = st.session_state.intervalls_with_cols
# remodel_button = st.button('re-model the data', key=4, help=None, type="primary", use_container_width=True)
# if remodel_button:# remodel feature for re-tuning the model
# increment()
# fitted values and predicted values
# Show and export the preprocessing methods
st.write('-- Spectral preprocessing info --')
st.write(Reg.best_hyperparams_print)
json.dump(Reg.best_hyperparams_, outfile)
preprocessings(change=hash_)
# Show the model performance table
st.write("-- Model performance --")
model_per = DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method = 'regression').scores_)
model_per = DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method = 'regression').scores_)
st.dataframe(model_per)
# M1.dataframe(model_per) # duplicate with line 371
def prep_important(change, model_type, model_hash):
fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 4), sharex=True)
ax1.plot(colnames, np.mean(X_train, axis = 0), color = 'black', label = 'Average spectrum (Raw)')
ax2.plot(colnames, np.mean(Reg.pretreated_spectra_ , axis = 0), color = 'black', label = 'Average spectrum (Pretreated)')
ax2.set_xlabel('Wavelenghts')
plt.tight_layout()
eval(f'ax{i+1}').grid(color = 'grey', linestyle = ':', linewidth = 0.2)
eval(f'ax{i+1}').margins(x = 0)
eval(f'ax{i+1}').legend(loc = 'upper right')
eval(f'ax{i+1}').set_ylabel('Intensity')
a = change
for j in range(s):
if np.array(spectra.columns).dtype.kind in ['i','f']:
min, max = intervalls_with_cols.iloc[j,0], intervalls_with_cols.iloc[j,1]
eval(f'ax{i+1}').axvspan(min, max, color = '#00ff00', alpha = 0.5, lw = 0)
ax1.scatter(colnames[np.array(Reg.sel_ratio_.index)], np.mean(X_train, axis = 0).iloc[np.array(Reg.sel_ratio_.index)],
color = '#7ab0c7', label = 'Important variables')
ax2.scatter(colnames[Reg.sel_ratio_.index], np.mean(Reg.pretreated_spectra_, axis = 0)[np.array(Reg.sel_ratio_.index)],
color = '#7ab0c7', label = 'Important variables')
ax1.legend()
ax2.legend()
return fig
with c8:## Visualize raw,preprocessed spectra, and selected intervalls(in case of ipls)
st.write('-- Important Spectral regions used for model creation --')
st.table(intervalls_with_cols)
st.write('-- Visualization of the spectral regions used for model creation --')
imp_fig = prep_important(change = st.session_state.counter, model_type = model_type, model_hash = hash_)
st.pyplot(imp_fig)
# Display CV results
numbers_dict = {1: "One", 2: "Two",3: "Three",4: "Four",5: "Five",
6: "Six",7: "Seven",8: "Eight",9: "Nine",10: "Ten"}
st.subheader(f" {numbers_dict[nb_folds]}-Fold Cross-Validation results")
def cv_display(change):
fig1 = px.scatter(Reg.cv_data_[0], x = 'Measured', y = 'Predicted' , trendline = 'ols', color = 'Folds', symbol = 'Folds',
color_discrete_sequence=px.colors.qualitative.G10)
fig1.add_shape(type = 'line', x0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), x1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']),
y0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), y1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']), line = dict(color = 'black', dash = "dash"))
fig1.update_traces(marker_size = 7, showlegend=False)
fig0 = px.scatter(Reg.cv_data_[0], x ='Measured', y = 'Predicted' , trendline = 'ols', color = 'Folds', symbol = "Folds", facet_col = 'Folds',facet_col_wrap = 1,
color_discrete_sequence = px.colors.qualitative.G10, text = 'index', width = 800, height = 1000)
fig0.update_traces(marker_size = 8, showlegend = False)
return fig0, fig1
fig0, fig1 = cv_display(change= Reg.cv_data_)
st.write('-- Cross-Validation Summary--')
st.write(cv_results.astype(str).style.map(lambda _: "background-color: #cecece;", subset = (cv_results.index.drop(['sd', 'mean', 'cv']), slice(None))))
st.write('-- Out-of-Fold Predictions Visualization (All in one) --')
st.plotly_chart(fig1, use_container_width = True)
with cv1:
st.write('-- Out-of-Fold Predictions Visualization (Separate plots) --')
st.plotly_chart(fig0, use_container_width=True)
################################################### BEGIN : Model Diagnosis ####################################################
if Reg:
# signal preprocessing results preparation for latex report
prep_para = Reg.best_hyperparams_.copy()
prep_para.pop('n_components')
for i in ['deriv','polyorder']:
if Reg.best_hyperparams_[i] == 0:
prep_para[i] = '0'
elif Reg.best_hyperparams_[i] == 1:
prep_para[i] = '1st'
elif Reg.best_hyperparams_[i] > 1:
prep_para[i] = f"{Reg.best_hyperparams_[i]}nd"
# reg plot and residuals plot
if model_type != reg_algo[2]:
measured_vs_predicted = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
residuals_plot = resid_plot([y_train, y_test], [yc, yt], train_idx = train_index, test_idx = test_index)
measured_vs_predicted = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
residuals_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)
with M7:
st.write('Predicted vs Measured values')
# regression_plot.savefig('./report/figures/measured_vs_predicted.png')
with M8:
st.write('Residuals plot')
# residual_plot.savefig('./report/figures/residuals_plot.png')
################################################### END : Model Diagnosis #######################################################
################################################### BEGIN : Download results #######################################################
##########################################################################################################################################
##########################################################################################################################################
st.header('Download the analysis results')
st.write("**Note:** Please check the box only after you have finished processing your data and are satisfied with the results. Checking the box prematurely may slow down the app and could lead to crashes.")
decis = st.checkbox("Yes, I want to download the results")
if decis:
@st.cache_data(show_spinner =False)
def export_report(change):
match model_type:
case 'PLS':
latex_report = report.report('Predictive model development', file_name, stats, list(prep_para.values()), model_type, model_per, cv_results)
case 'LW-PLS':
latex_report = report.report('Predictive model development', file_name, stats,
list({key: Reg.best_hyperparams_[key] for key in ['deriv', 'normalization', 'polyorder', 'window_length'] if key in Reg.best_hyperparams_}.values()), model_type, model_per, cv_results)
case 'TPE-iPLS':
latex_report = report.report('Predictive model development', file_name, stats,
list({key: Reg.best_hyperparams_[key] for key in ['deriv', 'normalization', 'polyorder', 'window_length'] if key in Reg.best_hyperparams_}.values()), model_type, model_per, cv_results)
case _:
st.warning('Data processing has not been performed or finished yet!', icon = "⚠️")
@st.cache_data(show_spinner =False)
def preparing_results_for_downloading(change):
match file:
# load csv file
case 'csv':
xfile.to_csv('report/out/dataset/'+ xcal_csv.name, sep = ';', encoding = 'utf-8', mode = 'a')
yfile.to_csv('report/out/dataset/'+ ycal_csv.name, sep = ';', encoding = 'utf-8', mode = 'a')
with open('./report/out/model/'+ model_type + '.pkl','wb') as f:# export model
spectra_plot.savefig(figpath + "spectra_plot.png")
target_plot.savefig(figpath + "histogram.png")
imp_fig.savefig(figpath + "variable_importance.png")
fig1.write_image(figpath + "meas_vs_pred_cv_all.png")
fig0.write_image(figpath + "meas_vs_pred_cv_onebyone.png")
measured_vs_predicted.savefig(figpath + 'measured_vs_predicted.png')
residuals_plot.savefig(figpath + 'residuals_plot.png')
# with open('report/out/Preprocessing.json', "w") as outfile:
# json.dump(Reg.best_hyperparams_, outfile)
if model_type == 'TPE-iPLS': # export selected wavelengths
wlfilename = './report/out/model/'+ model_type+'-selected_wavelengths.xlsx'
all = concat([intervalls_with_cols.T, Reg.selected_features_], axis = 0, ignore_index=True).T
all.columns=['wl_from','wl_to','idx_from', 'idx_to']
all.to_excel(wlfilename)
export_report(change = hash_)
move("./report/report.pdf", "./report/out/report.pdf")
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
# pklfile = {'model_': Reg.model_,"model_type" : model_type, 'training_data':{'raw-spectra':spectra,'target':y, },
# 'spec-preprocessing':{"normalization": Reg.best_hyperparams_['normalization'], 'SavGol(polyorder,window_length,deriv)': [Reg.best_hyperparams_["polyorder"],
# Reg.best_hyperparams_['window_length'],
# Reg.best_hyperparams_['deriv']]}}
pklfile = {'model_': Reg.model_,"model_type" : model_type, 'data':{'raw-spectra':spectra,'target':y, 'training_data_idx':train_index,'testing_data_idx':test_index},
'spec-preprocessing':{"normalization": Reg.best_hyperparams_['normalization'], 'SavGol(polyorder,window_length,deriv)': [Reg.best_hyperparams_["polyorder"],
Reg.best_hyperparams_['window_length'],
Reg.best_hyperparams_['deriv']]}}
if model_type == 'TPE-iPLS': # export selected wavelengths

BARTHES Nicolas
committed
pklfile['selected-wls'] = {'idx':Reg.selected_features_.T , "wls":intervalls_with_cols }
elif model_type == 'LW-PLS': # export LWPLS best model parameters
pklfile['selected-wls'] = {'idx':None, "wls":None }
pklfile['lwpls_params'] = Reg.best_hyperparams_
else:
pklfile['selected-wls'] = {'idx':None, "wls":None }
with open('./report/out/file_system.pkl', "wb") as pkl:
dump(pklfile, pkl)
return change
preparing_results_for_downloading(change = hash_)
@st.cache_data(show_spinner =False)
def tempdir(change):
with TemporaryDirectory( prefix="results", dir="./report") as temp_dir:# create a temp directory
make_archive(base_name="./report/Results", format="zip", base_dir="out", root_dir = "./report")# create a zip file
move("./report/Results.zip", f"./report/{tempdirname}/Results.zip")# put the inside the temp dir
with open(f"./report/{tempdirname}/Results.zip", "rb") as f:
zip_data = f.read()
return tempdirname, zip_data
try :
tempdirname, zip_data = tempdir(change = hash_)
except:
pass
date_time = datetime.now().strftime('%y%m%d%H%M')
disabled_down = True if zip_data=='' else False
st.download_button(label = 'Download', data = zip_data, file_name = f'Nirs_Workflow_{date_time}_Reg_.zip', mime ="application/zip",
args = None, kwargs = None,type = "primary",use_container_width = True, disabled = disabled_down)