From 8915f27a3e4aa259d09e8d4d3fac82d1e9a7eb09 Mon Sep 17 00:00:00 2001
From: DIANE <abderrahim.diane@cefe.cnrs.fr>
Date: Mon, 15 Apr 2024 09:48:34 +0200
Subject: [PATCH] color filters

---
 pages/1-samples_selection.py | 65 ++++++++++++++++++------------------
 1 file changed, 33 insertions(+), 32 deletions(-)

diff --git a/pages/1-samples_selection.py b/pages/1-samples_selection.py
index 76c2454..121d467 100644
--- a/pages/1-samples_selection.py
+++ b/pages/1-samples_selection.py
@@ -12,58 +12,57 @@ if st.session_state["interface"] == 'simple':
     hide_pages("Predictions")
 
 ################################### Data Loading and Visualization ########################################
-# container1 = st.header("Data loading",border=True)
 col2, col1 = st.columns([3, 1])
 col1.header("Data Loading", divider='blue')
 col2.header("Spectral Data Visualization", divider='blue')
 
 
 ## Preallocation of data structure
-data_import = pd.DataFrame
+spectra = pd.DataFrame
 meta_data = pd.DataFrame
 selected_samples = pd.DataFrame
 
 
-# loader for csv file containing NIRS spectra
-sselectx_csv = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
+# loader for datafile
+data_file = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
 
 
-#with container1:
-if sselectx_csv:
-    test = sselectx_csv.name[sselectx_csv.name.find('.'):]
+if data_file:
+    # Retrieve the extension of the file
+    test = data_file.name[data_file.name.find('.'):]
 
+    ## Load .csv file
     if test== '.csv':
         with col1:
             # Select list for CSV delimiter
-            psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
+            psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+data_file.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+data_file.name))), key=9)
                 # Select list for CSV header True / False
-            phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31)
+            phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+data_file.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+data_file.name))), key=31)
             if phdr == 'yes':
                 col = 0
             else:
                 col = False
-            imp = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
-            data_import = col_cat(imp)[0]
+            imp = pd.read_csv(data_file, sep=psep, index_col=col)
+            spectra = col_cat(imp)[0]
             meta_data = col_cat(imp)[1]
             st.success("The data have been loaded successfully", icon="✅")
 
+    ## Load .dx file
     elif test == '.dx':
         # Create a temporary file to save the uploaded file
         with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
-            tmp.write(sselectx_csv.read())
+            tmp.write(data_file.read())
             tmp_path = tmp.name
             with col1:
-                _, data_import, meta_data = read_dx(file =  tmp_path)
+                _, spectra, meta_data = read_dx(file =  tmp_path)
                 st.success("The data have been loaded successfully", icon="✅")
         os.unlink(tmp_path)
 
 
-if not data_import.empty:
-    ## Visualize spectra
+## Visualize spectra
+if not spectra.empty:
     with col2:
-        fig = plot_spectra(data_import)
-
-        #plt.annotate(text = info.T, xy =(m, info.loc[:,"Max"]), size=20, color = 'black', backgroundcolor='red')
+        fig = plot_spectra(spectra)
         st.pyplot(fig)
 
 
@@ -73,20 +72,25 @@ container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divid
 scores, loadings, pc = st.columns([2, 3, 0.5])
 influence, hotelling, qexp = st.columns([2, 2, 1])
 
-dim_red_methods=['', 'PCA','UMAP', 'NMF']
-cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP']
-dr_model = None
-cl_model = None
+dim_red_methods=['', 'PCA','UMAP', 'NMF']  # List of dimensionality reduction algos
+cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP'] # List of clustering algos
+
+dr_model = None # dimensionality reduction model
+cl_model = None # clustering model
 
 # Dimensionality reduction
-t = pd.DataFrame
-if not data_import.empty:
+t = pd.DataFrame # scores
+p = pd.DataFrame # loadings
+labels = []
+if not spectra.empty:
     dim_red_method = pc.selectbox("Dimensionality reduction techniques: ", options = dim_red_methods, key = 37)
     clus_method = pc.selectbox("Clustering techniques: ", options = cluster_methods, key = 38)
+    xc = standardize(spectra) 
+
     if dim_red_method == dim_red_methods[1]:
-        dr_model = LinearPCA(data_import, Ncomp=5)
+        dr_model = LinearPCA(xc, Ncomp=5)
     elif dim_red_method == dim_red_methods[2]:
-        dr_model = Umap(x = data_import, n_components = 5, n_neighbors = 20 , min_dist = 0)
+        dr_model = Umap(x = xc, n_components = 5, n_neighbors = 20 , min_dist = 0)
         
     if dr_model:
         axis1 = pc.selectbox("x-axis", options = dr_model.scores_.columns, index=0)
@@ -96,7 +100,6 @@ if not data_import.empty:
 
 
 # clustering
-labels = pd.DataFrame
 if not t.empty:
         # Clustering
         if clus_method == cluster_methods[1]:
@@ -106,21 +109,19 @@ if not t.empty:
             scores.plotly_chart(fig2)
             data, labels = cl_model.fit_optimal(nclusters = ncluster)
 
-        elif clus_method == cluster_methods[1]:
+        elif clus_method == cluster_methods[2]:
                 from hdbscan import HDBSCAN_function
                 labels, hdbscan_score = HDBSCAN_function(t, min_cluster_size=10)
                 
 
 ##### Plots 
 
-
 ## Scores
-
 if not t.empty:
     with scores:
         st.write('Scores plot')
         # scores plot with clustering
-        if not pd.DataFrame(labels).empty:
+        if list(labels):
             fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3, color = labels)
         else:
         # scores plot with metadata
@@ -140,7 +141,7 @@ if not t.empty:
 
 
 
-if not data_import.empty:
+if not spectra.empty:
     if dim_red_method == dim_red_methods[1]:
         with loadings:
             st.write('Loadings plot')
-- 
GitLab