From f25a975c37536bb473e74b3d52c9b94c40a20109 Mon Sep 17 00:00:00 2001
From: DIANE <abderrahim.diane@cefe.cnrs.fr>
Date: Thu, 8 Aug 2024 16:15:39 +0200
Subject: [PATCH] download

---
 src/pages/1-samples_selection.py | 76 +++++++++++++++++---------------
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py
index f46e65d..bc12af6 100644
--- a/src/pages/1-samples_selection.py
+++ b/src/pages/1-samples_selection.py
@@ -263,18 +263,11 @@ if not t.empty:
         case 'KS':
             rset = scores.number_input(min_value=0, max_value=100, value=20, label = 'The ratio of data to be sampled (%)')
             cl_model = KS(x = tcr, rset = rset)
-            calset = cl_model.calset
-            labels = ["ind"]*n_samples
-            ncluster = "1"
-            selection_number = 'None'
+
 
         case 'RDM':
             rset = scores.number_input(min_value=0, max_value=100, value=20, label = 'The ratio of data to be sampled (%)')
-            cl_model = RDM(x = tcr, rset = rset)
-            calset = cl_model.calset
-            labels = ["ind"]*n_samples
-            ncluster = "1"
-            selection_number = 'None'            
+            cl_model = RDM(x = tcr, rset = rset)         
 
     new_tcr = tcr.iloc[clustered,:]    
     
@@ -291,35 +284,39 @@ elif labels:
     custom_color_palette = px.colors.qualitative.Plotly[:num_clusters]
     if clus_method:
         if clus_method in ['KS', 'RDM']:
+            calset = cl_model.calset
+            labels = ["ind"]*n_samples
+            ncluster = "1"
+            selection_number = 'None'
             selected_samples_idx = calset[1]
             selection = 'None'
         else:
             selection = scores.radio('Select samples selection strategy:',
                                         options = selec_strategy, index = default_sample_selection_option, key=102)
         
-        match selection:
-        # Strategy 0
-            case 'center':
-                # list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster
-                closest, _ = pairwise_distances_argmin_min(clu_centers, new_tcr)
-                selected_samples_idx = np.array(new_tcr.index)[list(closest)]
-                selected_samples_idx = selected_samples_idx.tolist()
-                
-            #### Strategy 1
-            case 'random':
-                selection_number = scores.number_input('How many samples per cluster?',
-                                                        min_value = 1, step=1, value = 3)
-                s = np.array(labels)[np.where(np.array(labels) !='Non clustered')[0]]
-                for i in np.unique(s):
-                    C = np.where(np.array(labels) == i)[0]
-                    if C.shape[0] >= selection_number:
-                        # scores.write(list(tcr.index)[labels== i])
-                        km2 = KMeans(n_clusters = selection_number)
-                        km2.fit(tcr.iloc[C,:])
-                        clos, _ = pairwise_distances_argmin_min(km2.cluster_centers_, tcr.iloc[C,:])
-                        selected_samples_idx.extend(tcr.iloc[C,:].iloc[list(clos)].index)
-                    else:
-                        selected_samples_idx.extend(new_tcr.iloc[C,:].index.to_list())
+            match selection:
+            # Strategy 0
+                case 'center':
+                    # list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster
+                    closest, _ = pairwise_distances_argmin_min(clu_centers, new_tcr)
+                    selected_samples_idx = np.array(new_tcr.index)[list(closest)]
+                    selected_samples_idx = selected_samples_idx.tolist()
+                    
+                #### Strategy 1
+                case 'random':
+                    selection_number = scores.number_input('How many samples per cluster?',
+                                                            min_value = 1, step=1, value = 3)
+                    s = np.array(labels)[np.where(np.array(labels) !='Non clustered')[0]]
+                    for i in np.unique(s):
+                        C = np.where(np.array(labels) == i)[0]
+                        if C.shape[0] >= selection_number:
+                            # scores.write(list(tcr.index)[labels== i])
+                            km2 = KMeans(n_clusters = selection_number)
+                            km2.fit(tcr.iloc[C,:])
+                            clos, _ = pairwise_distances_argmin_min(km2.cluster_centers_, tcr.iloc[C,:])
+                            selected_samples_idx.extend(tcr.iloc[C,:].iloc[list(clos)].index)
+                        else:
+                            selected_samples_idx.extend(new_tcr.iloc[C,:].index.to_list())
                     # list indexes of selected samples for colored plot    
 
 ################################      Plots visualization          ############################################
@@ -591,14 +588,21 @@ if labels:
         else:
             sam = sam1
         sel.write(sam)
-        
+
+Nb_ech = str(n_samples)
+nb_clu = str(sam1.shape[0])
+for i in ['Representative subset selection', data_file.name, dim_red_method,
+                                  clus_method, Nb_ech, ncluster, selection, selection_number, nb_clu]:
+    st.write(i)
 
 # st.write(hash_data(change = './Report/report.tex'))
-with open('./Report/report.tex') as myfile:
-    filehash = hash_data(myfile.read())
+# file_pathtex = Path('./Report/report.tex') 
+# if file_pathtex.is_file():
+#     with open('./Report/report.tex') as myfile:
+#         filehash = hash_data(myfile.read())
 
 # figs_list = os.listdir("./Report/figures")
-if data_file:
+if not (sam.empty and sam1.empty):
     Nb_ech = str(n_samples)
     nb_clu = str(sam1.shape[0])
     ###################################################
-- 
GitLab