diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py index 83314bd23b947f4ad41b9556c247c6088f1b8d2f..b430ec5b77b107ac7d1db0763aa0fa2203e852f5 100644 --- a/src/pages/1-samples_selection.py +++ b/src/pages/1-samples_selection.py @@ -267,7 +267,15 @@ if not t.empty: case 'RDM': rset = scores.number_input(min_value=0, max_value=100, value=20, label = 'The ratio of data to be sampled (%)') - cl_model = RDM(x = tcr, rset = rset) + cl_model = RDM(x = tcr, rset = rset) + + if clus_method in ['KS', 'RDM']: + calset = cl_model.calset + labels = ["ind"]*n_samples + ncluster = "1" + selection_number = 'None' + selected_samples_idx = calset[1] + selection = 'None' new_tcr = tcr.iloc[clustered,:] @@ -283,41 +291,41 @@ elif labels: num_clusters = len(np.unique(labels)) custom_color_palette = px.colors.qualitative.Plotly[:num_clusters] if clus_method: - if clus_method in ['KS', 'RDM']: - calset = cl_model.calset - labels = ["ind"]*n_samples - ncluster = "1" - selection_number = 'None' - selected_samples_idx = calset[1] - selection = 'None' - else: - selection = scores.radio('Select samples selection strategy:', - options = selec_strategy, index = default_sample_selection_option, key=102) - - match selection: - # Strategy 0 - case 'center': - # list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster - closest, _ = pairwise_distances_argmin_min(clu_centers, new_tcr) - selected_samples_idx = np.array(new_tcr.index)[list(closest)] - selected_samples_idx = selected_samples_idx.tolist() - - #### Strategy 1 - case 'random': - selection_number = scores.number_input('How many samples per cluster?', - min_value = 1, step=1, value = round(n_samples*0.1)) - s = np.array(labels)[np.where(np.array(labels) !='Non clustered')[0]] - for i in np.unique(s): - C = np.where(np.array(labels) == i)[0] - if C.shape[0] >= selection_number: - # scores.write(list(tcr.index)[labels== i]) - km2 = KMeans(n_clusters = selection_number) - km2.fit(tcr.iloc[C,:]) - clos, _ = pairwise_distances_argmin_min(km2.cluster_centers_, tcr.iloc[C,:]) - selected_samples_idx.extend(tcr.iloc[C,:].iloc[list(clos)].index) - else: - selected_samples_idx.extend(new_tcr.iloc[C,:].index.to_list()) - # list indexes of selected samples for colored plot + # if clus_method in ['KS', 'RDM']: + # calset = cl_model.calset + # labels = ["ind"]*n_samples + # ncluster = "1" + # selection_number = 'None' + # selected_samples_idx = calset[1] + # selection = 'None' + # else: + selection = scores.radio('Select samples selection strategy:', + options = selec_strategy, index = default_sample_selection_option, key=102) + + match selection: + # Strategy 0 + case 'center': + # list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster + closest, _ = pairwise_distances_argmin_min(clu_centers, new_tcr) + selected_samples_idx = np.array(new_tcr.index)[list(closest)] + selected_samples_idx = selected_samples_idx.tolist() + + #### Strategy 1 + case 'random': + selection_number = scores.number_input('How many samples per cluster?', + min_value = 1, step=1, value = round(n_samples*0.1)) + s = np.array(labels)[np.where(np.array(labels) !='Non clustered')[0]] + for i in np.unique(s): + C = np.where(np.array(labels) == i)[0] + if C.shape[0] >= selection_number: + # scores.write(list(tcr.index)[labels== i]) + km2 = KMeans(n_clusters = selection_number) + km2.fit(tcr.iloc[C,:]) + clos, _ = pairwise_distances_argmin_min(km2.cluster_centers_, tcr.iloc[C,:]) + selected_samples_idx.extend(tcr.iloc[C,:].iloc[list(clos)].index) + else: + selected_samples_idx.extend(new_tcr.iloc[C,:].index.to_list()) + # list indexes of selected samples for colored plot ################################ Plots visualization ############################################