From f25a975c37536bb473e74b3d52c9b94c40a20109 Mon Sep 17 00:00:00 2001 From: DIANE <abderrahim.diane@cefe.cnrs.fr> Date: Thu, 8 Aug 2024 16:15:39 +0200 Subject: [PATCH] download --- src/pages/1-samples_selection.py | 76 +++++++++++++++++--------------- 1 file changed, 40 insertions(+), 36 deletions(-) diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py index f46e65d..bc12af6 100644 --- a/src/pages/1-samples_selection.py +++ b/src/pages/1-samples_selection.py @@ -263,18 +263,11 @@ if not t.empty: case 'KS': rset = scores.number_input(min_value=0, max_value=100, value=20, label = 'The ratio of data to be sampled (%)') cl_model = KS(x = tcr, rset = rset) - calset = cl_model.calset - labels = ["ind"]*n_samples - ncluster = "1" - selection_number = 'None' + case 'RDM': rset = scores.number_input(min_value=0, max_value=100, value=20, label = 'The ratio of data to be sampled (%)') - cl_model = RDM(x = tcr, rset = rset) - calset = cl_model.calset - labels = ["ind"]*n_samples - ncluster = "1" - selection_number = 'None' + cl_model = RDM(x = tcr, rset = rset) new_tcr = tcr.iloc[clustered,:] @@ -291,35 +284,39 @@ elif labels: custom_color_palette = px.colors.qualitative.Plotly[:num_clusters] if clus_method: if clus_method in ['KS', 'RDM']: + calset = cl_model.calset + labels = ["ind"]*n_samples + ncluster = "1" + selection_number = 'None' selected_samples_idx = calset[1] selection = 'None' else: selection = scores.radio('Select samples selection strategy:', options = selec_strategy, index = default_sample_selection_option, key=102) - match selection: - # Strategy 0 - case 'center': - # list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster - closest, _ = pairwise_distances_argmin_min(clu_centers, new_tcr) - selected_samples_idx = np.array(new_tcr.index)[list(closest)] - selected_samples_idx = selected_samples_idx.tolist() - - #### Strategy 1 - case 'random': - selection_number = scores.number_input('How many samples per cluster?', - min_value = 1, step=1, value = 3) - s = np.array(labels)[np.where(np.array(labels) !='Non clustered')[0]] - for i in np.unique(s): - C = np.where(np.array(labels) == i)[0] - if C.shape[0] >= selection_number: - # scores.write(list(tcr.index)[labels== i]) - km2 = KMeans(n_clusters = selection_number) - km2.fit(tcr.iloc[C,:]) - clos, _ = pairwise_distances_argmin_min(km2.cluster_centers_, tcr.iloc[C,:]) - selected_samples_idx.extend(tcr.iloc[C,:].iloc[list(clos)].index) - else: - selected_samples_idx.extend(new_tcr.iloc[C,:].index.to_list()) + match selection: + # Strategy 0 + case 'center': + # list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster + closest, _ = pairwise_distances_argmin_min(clu_centers, new_tcr) + selected_samples_idx = np.array(new_tcr.index)[list(closest)] + selected_samples_idx = selected_samples_idx.tolist() + + #### Strategy 1 + case 'random': + selection_number = scores.number_input('How many samples per cluster?', + min_value = 1, step=1, value = 3) + s = np.array(labels)[np.where(np.array(labels) !='Non clustered')[0]] + for i in np.unique(s): + C = np.where(np.array(labels) == i)[0] + if C.shape[0] >= selection_number: + # scores.write(list(tcr.index)[labels== i]) + km2 = KMeans(n_clusters = selection_number) + km2.fit(tcr.iloc[C,:]) + clos, _ = pairwise_distances_argmin_min(km2.cluster_centers_, tcr.iloc[C,:]) + selected_samples_idx.extend(tcr.iloc[C,:].iloc[list(clos)].index) + else: + selected_samples_idx.extend(new_tcr.iloc[C,:].index.to_list()) # list indexes of selected samples for colored plot ################################ Plots visualization ############################################ @@ -591,14 +588,21 @@ if labels: else: sam = sam1 sel.write(sam) - + +Nb_ech = str(n_samples) +nb_clu = str(sam1.shape[0]) +for i in ['Representative subset selection', data_file.name, dim_red_method, + clus_method, Nb_ech, ncluster, selection, selection_number, nb_clu]: + st.write(i) # st.write(hash_data(change = './Report/report.tex')) -with open('./Report/report.tex') as myfile: - filehash = hash_data(myfile.read()) +# file_pathtex = Path('./Report/report.tex') +# if file_pathtex.is_file(): +# with open('./Report/report.tex') as myfile: +# filehash = hash_data(myfile.read()) # figs_list = os.listdir("./Report/figures") -if data_file: +if not (sam.empty and sam1.empty): Nb_ech = str(n_samples) nb_clu = str(sam1.shape[0]) ################################################### -- GitLab