Skip to content
Snippets Groups Projects
Commit a3c30c9f authored by DIANE's avatar DIANE
Browse files

samples_selection:

- export samples for wet-chem analysis
- export metadata if it exists
- color selected samples on scores plot
parent 8ad6a010
No related branches found
No related tags found
No related merge requests found
...@@ -21,4 +21,5 @@ class Sk_Kmeans: ...@@ -21,4 +21,5 @@ class Sk_Kmeans:
model.fit(self.x) model.fit(self.x)
yp = model.predict(self.x)+1 yp = model.predict(self.x)+1
clu = [f'cluster#{i}' for i in yp] clu = [f'cluster#{i}' for i in yp]
return self.x, clu
\ No newline at end of file return self.x, clu, model.cluster_centers_
\ No newline at end of file
...@@ -68,6 +68,9 @@ container2 = st.container(border=True) ...@@ -68,6 +68,9 @@ container2 = st.container(border=True)
container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue') container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue')
scores, loadings, pc = st.columns([2, 3, 0.5]) scores, loadings, pc = st.columns([2, 3, 0.5])
influence, hotelling, qexp = st.columns([2, 2, 1]) influence, hotelling, qexp = st.columns([2, 2, 1])
st.header('Selected samples for chemical analysis')
selected_s, selected_samples_metd = st.columns([3, 3])
selected_s.write('Samples scores')
dim_red_methods=['', 'PCA','UMAP', 'NMF'] # List of dimensionality reduction algos dim_red_methods=['', 'PCA','UMAP', 'NMF'] # List of dimensionality reduction algos
cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP'] # List of clustering algos cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP'] # List of clustering algos
...@@ -106,7 +109,7 @@ if not t.empty: ...@@ -106,7 +109,7 @@ if not t.empty:
cl_model = Sk_Kmeans(tcr, max_clusters = 30) cl_model = Sk_Kmeans(tcr, max_clusters = 30)
fig2 = px.scatter(cl_model.inertia_.T, y = 'inertia') fig2 = px.scatter(cl_model.inertia_.T, y = 'inertia')
scores.plotly_chart(fig2) scores.plotly_chart(fig2)
data, labels = cl_model.fit_optimal(nclusters = ncluster) data, labels, clu_centers = cl_model.fit_optimal(nclusters = ncluster)
elif clus_method == cluster_methods[2]: elif clus_method == cluster_methods[2]:
optimized_hdbscan = Hdbscan(model.scores_raw_) optimized_hdbscan = Hdbscan(model.scores_raw_)
...@@ -114,7 +117,51 @@ if not t.empty: ...@@ -114,7 +117,51 @@ if not t.empty:
##### Plots ##### Plots
## Scores
#####################################################################################################
selec_strategy = ['center','random']
samples_df_chem = pd.DataFrame
selected_samples = []
selected_samples_idx = []
if labels:
selection = scores.radio('Select samples selection strategy:', options = selec_strategy)
#################### selection strategy to be corrected
if selection == selec_strategy[0]:
# list samples at clusters centers - Use sklearn.metrics.pairwise_distances_argmin if you want more than 1 sample per cluster
closest, _ = pairwise_distances_argmin_min(clu_centers, tcr)
selected_samples_idx = list(closest)
elif selection == selec_strategy[1]:
selection_number = scores.number_input('How many samples per cluster?', min_value = 1, step=1, value = 3)
for i in np.unique(labels):
C = np.where(np.array(labels) ==i)[0]
if C.shape[0] >= selection_number:
#scores.write(list(tcr.index)[labels== i])
km2 = KMeans(n_clusters = selection_number)
km2.fit(tcr.iloc[C,:])
clos, _ = pairwise_distances_argmin_min(km2.cluster_centers_, tcr.iloc[C,:])
selected_samples_idx2 = list(clos)
selected_samples_idx.extend(tcr.iloc[C,:].index[selected_samples_idx2])
# selected_samples_idx.extend(tcr.iloc[C,:].sample(n=selection_number).index.to_list())
else:
selected_samples_idx.extend(tcr.iloc[C,:].index.to_list())
# list indexes of selected samples for colored plot
if labels:
if selected_samples_idx:
sam = pd.DataFrame({'cluster':np.array(labels)[selected_samples_idx],
'index': spectra.index[selected_samples_idx]})
selected_s.write(sam)
if not meta_data.empty:
selected_samples_metd.write('Corresponding meta-data')
meta = meta_data.iloc[selected_samples_idx,:]
meta['cluster'] = np.array(labels)[selected_samples_idx]
meta['index'] = spectra.index[selected_samples_idx]
selected_samples_metd.write(meta)
## Scores
if not t.empty: if not t.empty:
with scores: with scores:
st.write('Scores plot') st.write('Scores plot')
...@@ -150,6 +197,12 @@ if not t.empty: ...@@ -150,6 +197,12 @@ if not t.empty:
else: else:
fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3) fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3)
fig.update_traces(marker=dict(size=4)) fig.update_traces(marker=dict(size=4))
if selected_samples_idx:
tt = tcr.iloc[selected_samples_idx,:]
fig.add_scatter3d(x = tt.loc[:,axis1], y = tt.loc[:,axis2],
z = tt.loc[:,axis3], mode ='markers', marker = dict(size = 7, color = 'black'),
name = 'selected samples')
st.plotly_chart(fig) st.plotly_chart(fig)
...@@ -181,4 +234,12 @@ if not spectra.empty: ...@@ -181,4 +234,12 @@ if not spectra.empty:
hotelling = dr_model.hotelling_ hotelling = dr_model.hotelling_
fig = px.scatter(t, x=hotelling[ax2], y=residuals[ax2]).update_layout(xaxis_title="",yaxis_title="Residuals") fig = px.scatter(t, x=hotelling[ax2], y=residuals[ax2]).update_layout(xaxis_title="",yaxis_title="Residuals")
st.plotly_chart(fig) st.plotly_chart(fig)
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment