Newer
Older
from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
# HTML pour le bandeau "CEFE - CNRS"
add_sidebar(pages_folder)
local_css(css_file / "style_model.css")#load specific model page css
hash_ = ''
def p_hash(add):
global hash_
hash_ = hash_data(hash_+str(add))
return hash_
# #################################### Methods ##############################################
# empty temp figures
def delete_files(keep):
supp = []
# Walk through the directory
for root, dirs, files in os.walk('Report/', topdown=False):
for file in files:
if file != 'logo_cefe.png' and not any(file.endswith(ext) for ext in keep):
os.remove(os.path.join(root, file))
dirpath = Path('Report/out/model')
if dirpath.exists() and dirpath.is_dir():
shutil.rmtree(dirpath)
# algorithms available on our app
dim_red_methods=['PCA','UMAP', 'NMF'] # List of dimensionality reduction algos
cluster_methods = ['Kmeans','HDBSCAN', 'AP'] # List of clustering algos
selec_strategy = ['center','random']
match st.session_state["interface"]:
case 'simple':
st.write(':red[Automated Simple Interface]')
# hide_pages("Predictions")
if 37 not in st.session_state:
default_reduction_option = 1
else:
default_reduction_option = dim_red_methods.index(st.session_state.get(37))
if 38 not in st.session_state:
default_clustering_option = 1
else:
default_clustering_option = cluster_methods.index(st.session_state.get(38))
if 102 not in st.session_state:
default_sample_selection_option = 1
else:
default_sample_selection_option = selec_strategy.index(st.session_state.get(102))
case'advanced':
default_reduction_option = 0
default_clustering_option = 0
default_sample_selection_option = 0
################ clean the results dir #############
delete_files(keep = ['.py', '.pyc','.bib'])
# ####################################### page preamble #######################################
st.title("Calibration Subset Selection") # page title
st.markdown("Create a predictive model, then use it for predicting your target variable (chemical data) from NIRS spectra")
col2, col1 = st.columns([3, 1])
col2.image("./images/sample selection.png", use_column_width=True) # graphical abstract
################################### I - Data Loading and Visualization ########################################
files_format = ['csv', 'dx'] # Supported files format
# loader for datafile
file = col1.file_uploader("Data file", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
spectra = pd.DataFrame()
meta_data = pd.DataFrame()
tcr=pd.DataFrame()
sam=pd.DataFrame()
sam1=pd.DataFrame()
selected_samples = pd.DataFrame()
labels = []
color_palette = None
dr_model = None # dimensionality reduction model
cl_model = None # clustering model
samples_df_chem = pd.DataFrame
selected_samples = []
selected_samples_idx = []
if not file:
col1.info('Info: Please load data file !')

DIANE
committed
else:
extension = file.name.split(".")[-1]
userfilename = file.name.replace(f".{extension}", '')
psep = st.radio("Select csv separator - _detected_: " + str(find_delimiter('data/'+file.name)), options = [";", ","], index = [";", ","].index(str(find_delimiter('data/'+file.name))),horizontal=True, key=9)
phdr = st.radio("indexes column in csv? - _detected_: " + str(find_col_index('data/'+file.name)), options = ["no", "yes"], index = ["no", "yes"].index(str(find_col_index('data/'+file.name))),horizontal=True, key=31)
if phdr == 'yes':col = 0
else:col = False
from io import StringIO
stringio = StringIO(file.getvalue().decode("utf-8"))
data_str = str(stringio.read())
p_hash([data_str + str(file.name) , psep, phdr])
@st.cache_data
def csv_loader(change):
imp = pd.read_csv(file, sep = psep, index_col=col)
spectra, md_df_st_ = col_cat(imp)
meta_data = md_df_st_
return spectra, md_df_st_, meta_data, imp
try :
spectra, md_df_st_, meta_data, imp = csv_loader(change = hash_)
st.success("The data have been loaded successfully", icon="✅")
except:
st.error('''Error: The format of the file does not correspond to the expected dialect settings.
To read the file correctly, please adjust the separator parameters.''')
with col1:
# Create a temporary file to save the uploaded file
with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
tmp.write(file.read())
tmp_path = tmp.name
with open(tmp.name, 'r') as dd:
dxdata = dd.read()
p_hash(str(dxdata)+str(file.name))
## load and parse the temp dx file
@st.cache_data
def dx_loader(change):
_, spectra, meta_data, md_df_st_ = read_dx(file = tmp_path)
# os.unlink(tmp_path)
return _, spectra, meta_data, md_df_st_
_, spectra, meta_data, md_df_st_ = dx_loader(change = hash_)
################################################### END : I- Data loading and preparation ####################################################
# with open('Report/datasets/'+file.name, 'w') as dd:
# dd.write(dxdata)
# tmp_path = tmp.name
# imp.to_csv("./Report/datasets/"+file.name,sep = ';', encoding='utf-8', mode='a')
# fig.savefig("./Report/figures/spectra_plot.png", dpi=400) ## Export report
################################################### BEGIN : visualize and split the data ####################################################

DIANE
committed
st.header("I - Spectral Data Visualization", divider='blue')

DIANE
committed
n_samples = spectra.shape[0]
nwl = spectra.shape[1]
# retrieve columns name and rows name of the dataframe
colnames = list(spectra.columns)
rownames = [str(i) for i in list(spectra.index)]
spectra.index = rownames
@st.cache_data
def spectra_visualize(change):
lab = ['Wavenumber (1/cm)' if meta_data.loc[:,'xunits'][0] == '1/cm' else 'Wavelength (nm)']
if lab[0] =='Wavenumber (1/cm)':
spectra.T.plot(legend=False, ax = ax).invert_xaxis()
else :
spectra.T.plot(legend=False, ax = ax)
ax.set_xlabel(lab[0], fontsize=18)
spectra.T.plot(legend=False, ax = ax)
ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
ax.set_ylabel('Signal intensity', fontsize=18)
plt.margins(x = 0)
plt.tight_layout()
data_info = pd.DataFrame({'Name': [file.name],
'Number of scanned samples': [n_samples]},
index = ['Input file'])
# update lines size to export for report

DIANE
committed
for line in ax.get_lines():
line.set_linewidth(0.8) # Set the desired line width here
# Update the size of plot axis for exprotation to report
l, w = fig.get_size_inches()
fig.set_size_inches(8, 3)
for label in (ax.get_xticklabels()+ax.get_yticklabels()):

DIANE
committed
ax.xaxis.label.set_size(9.5)
ax.yaxis.label.set_size(9.5)
plt.tight_layout()
fig.set_size_inches(l, w)# reset the plot size to its original size
return fig, data_info
fig_spectra, data_info = spectra_visualize(change = hash_)
col1, col2 = st.columns([3, 1])

DIANE
committed
with col1:

DIANE
committed
st.info('Information on the loaded data file')
st.write(data_info) ## table showing the number of samples in the data file
################################################### END : visualize and split the data ####################################################
############################## Exploratory data analysis ###############################
st.header("II - Exploratory Data Analysis-Multivariable Data Analysis", divider='blue')
t = pd.DataFrame # scores
p = pd.DataFrame # loadings
if not spectra.empty:
xc = standardize(spectra, center=True, scale=False)
bb1, bb2, bb3, bb4, bb5, bb6, bb7 = st.columns([1,1,0.6,0.6,0.6,1.5,1.5])
with bb1:
dim_red_method = st.selectbox("Dimensionality reduction techniques: ", options = ['']+dim_red_methods, index = default_reduction_option, key = 37, format_func = lambda x: x if x else "<Select>")
if dim_red_method == '':
st.info('Info: Select a dimensionality reduction technique!')
p_hash(dim_red_method)
if dim_red_method == "UMAP":
filter = md_df_st_.columns.tolist()
supervised = st.selectbox('Supervised UMAP by(optional):', options = ['']+filter, format_func = lambda x: x if x else "<Select>", key=108)
umapsupervisor = [None if supervised == '' else md_df_st_[supervised]][0]
else:
supervised = st.selectbox('Supervised UMAP by:', options = ["Meta-data is not available"], disabled=True, format_func = lambda x: x if x else "<Select>", key=108)
umapsupervisor = None
p_hash(supervised)
disablewidgets = [False if dim_red_method else True][0]
clus_method = st.selectbox("Clustering techniques(optional): ", options = ['']+cluster_methods, index = default_clustering_option, key = 38, format_func = lambda x: x if x else "<Select>", disabled= disablewidgets)
# if disablewidgets == False and dim_red_method in dim_red_methods:
# inf = st.info('Info: Select a clustering technique!')
if dim_red_method:
@st.cache_data
def dimensionality_reduction(change):
match dim_red_method:
case "PCA":
dr_model = LinearPCA(xc, Ncomp=8)
case "UMAP":
dr_model = Umap(numerical_data = MinMaxScale(spectra), cat_data = umapsupervisor)
case 'NMF':
dr_model = Nmf(spectra, Ncomp= 3)
return dr_model
dr_model = dimensionality_reduction(change = hash_)
if dr_model:
axis1 = bb3.selectbox("x-axis", options = dr_model.scores_.columns, index=0)
axis2 = bb4.selectbox("y-axis", options = dr_model.scores_.columns, index=1)
axis3 = bb5.selectbox("z-axis", options = dr_model.scores_.columns, index=2)
axis = np.unique([axis1, axis2, axis3])
p_hash(axis)
t = dr_model.scores_.loc[:,np.unique(axis)]
tcr = standardize(t)

DIANE
committed
if dim_red_method == 'UMAP':
scores = st.container()
else:
scores, loadings= st.columns([3,3])
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
if not spectra.empty:
sel_ratio = bb2.number_input('Enter the number/fraction of samples to be selected:',min_value=0.01, max_value=float("{:.2f}".format(spectra.shape[0])), value=0.20, format="%.2f", disabled= disablewidgets)
if sel_ratio:
p_hash(sel_ratio)
if sel_ratio > 1.00:
ratio = int(sel_ratio)
elif sel_ratio < 1.00:
ratio = int(sel_ratio*spectra.shape[0])
if dr_model and not clus_method:
clus_method = bb2.radio('Select samples selection strategy:',
options = ['RDM', 'KS'],)
elif dr_model and clus_method:
# sel_ratio = bb2.number_input('Enter the ratio/precentage of samples to be selected:',min_value=0.01, max_value=float("{:.2f}".format(spectra.shape[0])), value=0.20, format="%.2f")
# p_hash(sel_ratio)
# if sel_ratio > 1.00:
# ratio = int(sel_ratio)
# elif sel_ratio < 1.00:
# ratio = int(sel_ratio*spectra.shape[0])
if clus_method in cluster_methods:
selection = bb2.radio('Select samples selection strategy:',
options = selec_strategy, index = default_sample_selection_option,key=102,disabled = False)
else:
selection = bb2.radio('Select samples selection strategy:',
options = selec_strategy, horizontal=True, key=102,disabled = True)
if dr_model and sel_ratio:
cl_model = Sk_Kmeans(tcr, max_clusters = ratio)
data, labels, clu_centers = cl_model.fit_optimal_
ncluster = clu_centers.shape[0]
cl_model = Hdbscan(np.array(tcr))
labels, clu_centers, non_clustered = cl_model.labels_,cl_model.centers_, cl_model.non_clustered
ncluster = len(clu_centers)
# 3- Affinity propagation
case 'AP':
cl_model = AP(X = tcr)
data, labels, clu_centers = cl_model.fit_optimal_
ncluster = len(clu_centers)
case 'KS':
cl_model = RDM(x = tcr, rset = ratio)
# if clus_method in cluster_methods:
# inf.empty()
_, selected_samples_idx = cl_model.calset
labels = ["ind"]*n_samples
ncluster = "1"
selection_number = 'None'
selection = 'None'
# #################################################### III - Samples selection using the reduced data presentation ######

DIANE
committed
if not labels:
custom_color_palette = px.colors.qualitative.Plotly[:1]
elif labels:
num_clusters = len(np.unique(labels))
custom_color_palette = px.colors.qualitative.Plotly[:num_clusters]
match selection:
# Strategy 0
case 'center':
# list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster
closest, _ = pairwise_distances_argmin_min(clu_centers, new_tcr)
selected_samples_idx = np.array(new_tcr.index)[list(closest)]
selected_samples_idx = selected_samples_idx.tolist()
#### Strategy 1
case 'random':
selection_number = int(ratio/num_clusters)
p_hash(selection_number)
s = np.array(labels)[np.where(np.array(labels) !='Non clustered')[0]]
for i in np.unique(s):
C = np.where(np.array(labels) == i)[0]
if C.shape[0] >= selection_number:
# scores.write(list(tcr.index)[labels== i])
km2 = KMeans(n_clusters = selection_number)
km2.fit(tcr.iloc[C,:])
clos, _ = pairwise_distances_argmin_min(km2.cluster_centers_, tcr.iloc[C,:])
selected_samples_idx.extend(tcr.iloc[C,:].iloc[list(clos)].index)
else:
selected_samples_idx.extend(new_tcr.iloc[C,:].index.to_list())
# ################################ Plots visualization ############################################

DIANE
committed
if meta_data.empty and clus_method in cluster_methods:
filter = ['', clus_method]
elif not meta_data.empty and clus_method in cluster_methods:
filter = ['',clus_method] + md_df_st_.columns.tolist()
elif not meta_data.empty and clus_method not in cluster_methods:
filter = [''] + md_df_st_.columns.tolist()
elif meta_data.empty and not clus_method in cluster_methods:
filter = []
with scores:
st.write('Scores plot')
tcr_plot = tcr.copy()
colfilter = st.selectbox('Color by:', options= filter,format_func = lambda x: x if x else "<Select>")
if colfilter in cluster_methods:
tcr_plot[colfilter] = labels
elif not meta_data.empty and colfilter in md_df_st_.columns.tolist():
tcr_plot[f'{colfilter} :'] = list(map(str.lower,md_df_st_.loc[:,colfilter]))
tcr_plot[f'{colfilter} :'] = ['sample'] * tcr_plot.shape[0]
col_var_name = tcr_plot.columns.tolist()[-1]
n_categories = len(np.unique(tcr_plot[col_var_name]))
custom_color_palette = px.colors.qualitative.Plotly[:n_categories]
with scores:
if selected_samples_idx:# color selected samples
t_selected = tcr_plot.iloc[selected_samples_idx,:]
match t.shape[1]:
case 3:
fig = px.scatter_3d(tcr_plot, x = axis[0], y = axis[1], z = axis[2], color = col_var_name ,color_discrete_sequence = custom_color_palette)
fig.update_traces(marker=dict(size=4))
if selected_samples_idx:# color selected samples
fig.add_scatter3d(x = t_selected.loc[:,axis[0]], y = t_selected.loc[:,axis[1]], z = t_selected.loc[:,axis[2]],
mode ='markers', marker = dict(size = 5, color = 'black'), name = 'selected samples')
case 2:
fig = px.scatter(tcr_plot, x = axis[0], y = axis[1], color = col_var_name ,color_discrete_sequence = custom_color_palette)
if selected_samples_idx:# color selected samples
fig.add_scatter(x = t_selected.loc[:,axis[0]], y = t_selected.loc[:,axis[1]],
mode ='markers', marker = dict(size = 5, color = 'black'), name = 'selected samples')
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
case 1:
fig = px.scatter(tcr_plot, x = axis[0], y = [0]*tcr_plot.shape[0], color = col_var_name ,color_discrete_sequence = custom_color_palette)
fig.add_scatter(x = t_selected.loc[:,axis[0]], y = [0]*tcr_plot.shape[0],
mode ='markers', marker = dict(size = 5, color = 'black'), name = 'selected samples')
fig.update_yaxes(visible=False)
st.plotly_chart(fig, use_container_width = True)
if labels:
fig_export = {}
# export 2D scores plot
if len(axis)== 3:
comb = [i for i in combinations(np.arange(len(axis)), 2)]
subcap = ['a','b','c']
for i in range(len(comb)):
fig_= px.scatter(tcr_plot, x = axis[(comb[i][0])], y=axis[(comb[i][1])],color = labels if list(labels) else None,color_discrete_sequence = custom_color_palette)
fig_.add_scatter(x = t_selected.loc[:,axis[(comb[i][0])]], y = t_selected.loc[:,axis[(comb[i][1])]], mode ='markers', marker = dict(size = 5, color = 'black'),
name = 'selected samples')
fig_.update_layout(font=dict(size=23))
fig_.add_annotation(text= f'({subcap[i]})', align='center', showarrow= False, xref='paper', yref='paper', x=-0.13, y= 1,
font= dict(color= "black", size= 35), bgcolor ='white', borderpad= 2, bordercolor= 'black', borderwidth= 3)
fig_.update_traces(marker=dict(size= 10), showlegend= False)
fig_export[f'scores_pc{comb[i][0]}_pc{comb[i][1]}'] = fig_
# fig_export.write_image(f'./Report/out/figures/scores_pc{str(comb[i][0])}_pc{str(comb[i][1])}.png')
else:
fig_export['fig'] = fig
with loadings:
st.write('Loadings plot')
p = dr_model.loadings_
if meta_data.loc[:,'xunits'][0] == '1/cm':
freq.columns = ['Wavenumber (1/cm)']
pp = pd.concat([p, freq], axis=1)
#########################################
df1 = pp.melt(id_vars=freq.columns)
loadingsplot = px.line(df1, x=freq.columns, y='value', color='variable', color_discrete_sequence=px.colors.qualitative.Plotly)
loadingsplot.update_layout(legend=dict(x=1, y=0, font=dict(family="Courier", size=12, color="black"),
bordercolor="black", borderwidth=2))
loadingsplot.update_layout(xaxis_title = xlab,yaxis_title = "Intensity" ,xaxis = dict(autorange= inv))
st.plotly_chart(loadingsplot, use_container_width=True)
#############################################################################################################

DIANE
committed
influence, hotelling = st.columns([3, 3])
with influence:
st.write('Influence plot')
# Laverage
Hat = t.to_numpy() @ np.linalg.inv(np.transpose(t.to_numpy()) @ t.to_numpy()) @ np.transpose(t.to_numpy())
leverage = np.diag(Hat) / np.trace(Hat)

DIANE
committed
tresh3 = 2 * tcr.shape[1]/n_samples
# Matrix reconstruction
xp = np.dot(t,p.T)
# Q residuals: Q residuals represent the magnitude of the variation remaining in each sample after projection through the model
residuals = np.diag(np.subtract(xc.to_numpy(), xp)@ np.subtract(xc.to_numpy(), xp).T)

DIANE
committed
l1 = ["Samples"]* n_samples
# elif meta_data.empty and clus_method:
# l1 = labels
# elif meta_data.empty and not clus_method:
# l1 = ["Samples"]* n_samples
# elif not meta_data.empty and not clus_method:
# l1 = list(map(str.lower,md_df_st_[col]))
tcr_plot["leverage"] = leverage
tcr_plot["residuals"] = residuals
influence_plot = px.scatter(data_frame =tcr_plot, x = "leverage", y = "residuals", color=col_var_name,
influence_plot.add_vline(x = tresh3, line_width = 1, line_dash = 'solid', line_color = 'red')
influence_plot.add_hline(y=tresh4, line_width=1, line_dash='solid', line_color='red')
influence_plot.update_layout(xaxis_title="Leverage", yaxis_title = "Q-residuals", font=dict(size=20), width=800, height=600)
out3 = leverage > tresh3
out4 = residuals > tresh4

DIANE
committed
for i in range(n_samples):
if out3[i]:
if not meta_data.empty:
ann = meta_data.loc[:,'name'][i]
else:
ann = t.index[i]
influence_plot.add_annotation(dict(x = leverage[i], y = residuals[i], showarrow=True, text = str(ann),font= dict(color= "black", size= 15),
influence_plot.update_traces(marker=dict(size= 6), showlegend= True)
influence_plot.update_layout(font=dict(size=23), width=800, height=500)
st.plotly_chart(influence_plot, use_container_width=True)
for annotation in influence_plot.layout.annotations:
influence_plot.update_layout(font=dict(size=23), width=800, height=600)
influence_plot.update_traces(marker=dict(size= 10), showlegend= False)
influence_plot.add_annotation(text= '(a)', align='center', showarrow= False, xref='paper', yref='paper', x=-0.125, y= 1,
font= dict(color= "black", size= 35), bgcolor ='white', borderpad= 2, bordercolor= 'black', borderwidth= 3)
# influence_plot.write_image('./Report/out/figures/influence_plot.png', engine = 'kaleido')

DIANE
committed
# Hotelling
hotelling = t.var(axis = 1)
# Q residuals: Q residuals represent the magnitude of the variation remaining in each sample after projection through the model
residuals = np.diag(np.subtract(xc.to_numpy(), xp)@ np.subtract(xc.to_numpy(), xp).T)

DIANE
committed
fcri = sc.stats.f.isf(0.05, 3, n_samples)
tresh0 = (3 * (n_samples ** 2 - 1) * fcri) / (n_samples * (n_samples - 3))
hotelling_plot = px.scatter(t, x = hotelling, y = residuals, color=labels if list(labels) else None,
hotelling_plot.update_layout(xaxis_title="Hotelling-T² distance",yaxis_title="Q-residuals")
hotelling_plot.add_vline(x=tresh0, line_width=1, line_dash='solid', line_color='red')
hotelling_plot.add_hline(y=tresh1, line_width=1, line_dash='solid', line_color='red')
out0 = hotelling > tresh0
out1 = residuals > tresh1

DIANE
committed
for i in range(n_samples):
if out0[i]:
if not meta_data.empty:
ann = meta_data.loc[:,'name'][i]
else:
ann = t.index[i]
hotelling_plot.add_annotation(dict(x = hotelling[i], y = residuals[i], showarrow=True, text = str(ann), font= dict(color= "black", size= 15),
hotelling_plot.update_traces(marker=dict(size= 6), showlegend= True)
hotelling_plot.update_layout(font=dict(size=23), width=800, height=500)
st.plotly_chart(hotelling_plot, use_container_width=True)
for annotation in hotelling_plot.layout.annotations:
hotelling_plot.update_layout(font=dict(size=23), width=800, height=600)
hotelling_plot.update_traces(marker=dict(size= 10), showlegend= False)
hotelling_plot.add_annotation(text= '(b)', align='center', showarrow= False, xref='paper', yref='paper', x=-0.125, y= 1,
font= dict(color= "black", size= 35), bgcolor ='white', borderpad= 2, bordercolor= 'black', borderwidth= 3)
# hotelling_plot.write_image("./Report/out/figures/hotelling_plot.png", format="png")

DIANE
committed
st.header('III - Selected Samples for Reference Analysis', divider='blue')
if labels:
sel, info = st.columns([3, 1])
sel.write("Tabular identifiers of selected samples for reference analysis:")
if selected_samples_idx:
if meta_data.empty:
sam1 = pd.DataFrame({'name': spectra.index[clustered][selected_samples_idx],
'cluster':np.array(labels)[clustered][selected_samples_idx]},
index = selected_samples_idx)
else:
sam1 = meta_data.iloc[clustered,:].iloc[selected_samples_idx,:]
sam1.insert(loc=0, column='index', value=selected_samples_idx)
sam1.insert(loc=1, column='cluster', value=np.array(labels)[selected_samples_idx])
sam1.index = np.arange(len(selected_samples_idx))+1
info.info(f'Information !\n - The total number of samples: {n_samples}.\n- The number of samples selected for reference analysis: {sam1.shape[0]}.\n - The proportion of samples selected for reference analysis: {round(sam1.shape[0]/n_samples*100)}%.')
sam = sam1
# if clus_method == cluster_methods[2]:
# unclus = sel.checkbox("Include non clustered samples (for HDBSCAN clustering)", value=True)

DIANE
committed
if clus_method == cluster_methods[2]:
unclus = sel.checkbox("Include non clustered samples (for HDBSCAN clustering)", value=True)
if selected_samples_idx:
if unclus:
if meta_data.empty:
sam2 = pd.DataFrame({'name': spectra.index[non_clustered],
'cluster':['Non clustered']*len(spectra.index[non_clustered])},
index = spectra.index[non_clustered])
else :
sam2 = meta_data.iloc[non_clustered,:]
sam2.insert(loc=0, column='index', value= spectra.index[non_clustered])
sam2.insert(loc=1, column='cluster', value=['Non clustered']*len(spectra.index[non_clustered]))
sam = pd.concat([sam1, sam2], axis = 0)
sam.index = np.arange(sam.shape[0])+1
info.info(f'- The number of Non-clustered samples: {sam2.shape[0]}.\n - The proportion of Non-clustered samples: {round(sam2.shape[0]/n_samples*100)}%')

DIANE
committed
else:
sam = sam1
sel.write(sam)

DIANE
committed
Nb_ech = str(n_samples)
nb_clu = str(sam1.shape[0])
# ## generate report
@st.cache_data
def export_report(change):
latex_report = report.report('Representative subset selection', file.name, dim_red_method,
clus_method, Nb_ech, ncluster, selection, selection_number, nb_clu,tcr, sam)

Nicolas Barthes
committed
def preparing_results_for_downloading(change):
match extension:
# load csv file
case 'csv':
imp.to_csv('Report/out/dataset/'+ file.name, sep = ';', encoding = 'utf-8', mode = 'a')
case 'dx':
with open('Report/out/dataset/'+file.name, 'w') as dd:
dd.write(dxdata)
fig_spectra.savefig("./Report/out/figures/spectra_plot.png", dpi=400) ## Export report
if len(axis) == 3:
for i in range(len(comb)):
fig_export[f'scores_pc{comb[i][0]}_pc{comb[i][1]}'].write_image(f'./Report/out/figures/scores_pc{str(comb[i][0]+1)}_pc{str(comb[i][1]+1)}.png')
elif len(axis)==2 :
fig_export['fig'].write_image(f'./Report/out/figures/scores_plot2D.png')
elif len(axis)==1 :
fig_export['fig'].write_image(f'./Report/out/figures/scores_plot1D.png')
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
# Export du graphique
if dim_red_method in ['PCA','NMF']:
img = pio.to_image(loadingsplot, format="png")
with open("./Report/out/figures/loadings_plot.png", "wb") as f:
f.write(img)
if dim_red_method == 'PCA':
hotelling_plot.write_image("./Report/out/figures/hotelling_plot.png", format="png")
influence_plot.write_image('./Report/out/figures/influence_plot.png', engine = 'kaleido')
sam.to_csv('./Report/out/Selected_subset_for_calib_development.csv', sep = ';')
export_report(change = hash_)
if Path("./Report/report.tex").exists():
report.generate_report(change = hash_)
if Path("./Report/report.pdf").exists():
shutil.move("./Report/report.pdf", "./Report/out/report.pdf")
return change
preparing_results_for_downloading(change = hash_)
report.generate_report(change = hash_)
st.header('Download the analysis results')
import tempfile
@st.cache_data
def tempdir(change):
with tempfile.TemporaryDirectory( prefix="results", dir="./Report") as temp_dir:# create a temp directory
tempdirname = os.path.split(temp_dir)[1]
if len(os.listdir('./Report/out/figures/'))>=2:
shutil.make_archive(base_name="./Report/Results", format="zip", base_dir="out", root_dir = "./Report")# create a zip file
shutil.move("./Report/Results.zip", f"./Report/{tempdirname}/Results.zip")# put the inside the temp dir
with open(f"./Report/{tempdirname}/Results.zip", "rb") as f:
zip_data = f.read()
return tempdirname, zip_data
date_time = datetime.datetime.now().strftime('%y%m%d%H%M')
try :
tempdirname, zip_data = tempdir(change = hash_)
st.download_button(label = 'Download', data = zip_data, file_name = f'Nirs_Workflow_{date_time}_SamSel_.zip', mime ="application/zip",
args = None, kwargs = None,type = "primary",use_container_width = True)
except: