diff --git a/.idea/vcs.xml b/.idea/vcs.xml
new file mode 100644
index 0000000000000000000000000000000000000000..35eb1ddfbbc029bcab630581847471d7f238ec53
--- /dev/null
+++ b/.idea/vcs.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="VcsDirectoryMappings">
+    <mapping directory="" vcs="Git" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/app.py b/app.py
index 6cc896d1fb8ea68b59d881c7aba8ad3cac75aafe..0bac962ed2f64a1255e59383a9eee65725291175 100644
--- a/app.py
+++ b/app.py
@@ -48,8 +48,7 @@ with st.container():
             col = 0
         else:
             col = False
-    import_button = settings_column.button('Import')
-    if import_button:
+
         data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
         # compute PCA - pca_maker function in application_functions.py
         pca_data, cat_cols, pca_cols = pca_maker(data_import)
@@ -58,11 +57,11 @@ with st.container():
         pca_2 = settings_column.selectbox("Second Principle Component", options=pca_cols, index=1)
         # if categorical variables exist, add 2 select lists to choose the categorical variables to color the PCA
         if cat_cols[0] == "no categories":
-            scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, hover_name=pca_data.index, title="PCA plot of sample spectra"))
+            plot_pca = scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, hover_name=pca_data.index, title="PCA plot of sample spectra"))
         else:
             categorical_variable = settings_column.selectbox("Variable Select", options = cat_cols)
             categorical_variable_2 = settings_column.selectbox("Second Variable Select (hover data)", options = cat_cols)
-            scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, color=categorical_variable, hover_data = [categorical_variable_2], hover_name=pca_data.index, title="PCA plot of sample spectra"))
+            plot_pca = scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, color=categorical_variable, hover_data = [categorical_variable_2], hover_name=pca_data.index, title="PCA plot of sample spectra"))
         #K-Means
         ## K-Means choose number of clusters
         wcss_samples = []
@@ -72,14 +71,14 @@ with st.container():
             kmeans_samples = km(n_clusters = i, init = 'k-means++', random_state = 42)
             kmeans_samples.fit(pca_data.loc[:,[pca_1,pca_2]])
             wcss_samples.append(kmeans_samples.inertia_)
-        settings_column.plotly_chart(px.line(x=clusters_sample, y=wcss_samples, title="K-Means clusters number selection", width=200))
-        # scatter_column.plotly_chart(px.line(x=clusters_sample, y=wcss_samples, title="K-Means clusters number selection"))
+        settings_column.plotly_chart(px.line(x=clusters_sample, y=wcss_samples, title="K-Means clusters nb sel", width=200))
         ## Draw clustering
         nb_select = settings_column.slider("Choose cluster number (K-Means)", min_value=2, max_value=cluster_max, value=5, format="%i")
         kmeans_samples = km(n_clusters=nb_select, random_state=42)
         kmeans_samples.fit(pca_data.loc[:,[pca_1,pca_2]])
-        # kmeans_samples.labels_
-        plot = scatter_column.plotly_chart(px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, color=kmeans_samples.labels_, hover_name=pca_data.index, title="PCA projection with K-Means Clusters"))
+        # plot the pca with clustering only (no selected samples)
+        # graph = px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, color=kmeans_samples.labels_, hover_name=pca_data.index, title="PCA projection with K-Means Clusters")
+        # plot = scatter_column.plotly_chart(graph)
         # choose between cluster centered sample and random samples
         selection = settings_column.select_slider('Centered samples or random ones', options=['center','random'])
         export = []
@@ -89,25 +88,30 @@ with st.container():
             closest, _ = pairwise_distances_argmin_min(kmeans_samples.cluster_centers_, pca_data.loc[:,[pca_1,pca_2]])
             scatter_column.dataframe(pca_data.loc[pca_data.index[closest],[pca_1,pca_2]], use_container_width=True)
             export.append(pca_data.loc[pca_data.index[closest],[pca_1,pca_2]].index.T)
-            # plot.empty()
+            # list indexes of selected samples for colored plot
+            te = pca_data.loc[pca_data.index[closest],[pca_1,pca_2]].index.values.tolist()
         elif selection == 'random':
             selection_number = settings_column.number_input('How many samples per cluster?', step=1, value = 3)
             for i in np.unique(kmeans_samples.labels_):
                 if len(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]])) >= selection_number:
-                    # scatter_column.write('cluster number - ')
-                    # scatter_column.write(i)
-                    # scatter_column.write('_samples in this cluster_')
-                    # scatter_column.write(len(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]])))
-                    # scatter_column.dataframe(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]]).sample(n=selection_number))
                     export.append(pca_data.loc[pca_data.index[kmeans_samples.labels_==i]].sample(n=selection_number).index)
                 else:
-                    # scatter_column.write('cluster number - ')
-                    # scatter_column.write(i)
-                    # scatter_column.write("_whole cluster (not enough samples)_")
-                    # scatter_column.write(len(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]])))
-                    # scatter_column.dataframe(pd.DataFrame(pca_data.loc[pca_data.index[kmeans_samples.labels_==i],[pca_1,pca_2]]))
                     export.append(pca_data.loc[pca_data.index[kmeans_samples.labels_==i]].index)
+            # list indexes of selected samples for colored plot
+            te = []
+            for sublist in export:
+                for item in sublist:
+                    te.append(item)
+            # display a matrix of selected samples
             scatter_column.write(pd.DataFrame(export).T)
+        # convert cluster number to text for optimized coloring
+        kmeans_samples.labels_ = kmeans_samples.labels_.astype(str)
+        for j in te:
+            kmeans_samples.labels_[pca_data.index.get_loc(j)] = 'selected'
+        # plot de pca with colored clusters and selected samples
+        graph_selected = px.scatter(data_frame=pca_data, x=pca_1, y=pca_2, template="simple_white", height=800, color=kmeans_samples.labels_, hover_name=pca_data.index, title="PCA projection with K-Means Clusters and selected samples")
+        plot = scatter_column.plotly_chart(graph_selected)
+        # button to export the names of selected samples - by cluster if random - in a csv
         if scatter_column.button('Export'):
             pd.DataFrame(export).T.to_csv('./data/Samples_for_Chemical_Analysis.csv')
     else: