From dd2317d56c73f49ca4ace080602ad207be59dd81 Mon Sep 17 00:00:00 2001
From: DIANE <abderrahim.diane@cefe.cnrs.fr>
Date: Wed, 24 Apr 2024 12:35:53 +0200
Subject: [PATCH] Umap correction Error managment Plots export

---
 src/Class_Mod/UMAP_.py           |  5 ++--
 src/pages/1-samples_selection.py | 49 +++++++++++++-------------------
 src/pages/2-model_creation.py    | 27 +++++++++---------
 3 files changed, 34 insertions(+), 47 deletions(-)

diff --git a/src/Class_Mod/UMAP_.py b/src/Class_Mod/UMAP_.py
index 05ee5b3..28d0436 100644
--- a/src/Class_Mod/UMAP_.py
+++ b/src/Class_Mod/UMAP_.py
@@ -6,8 +6,7 @@ class Umap:
     """
     The UMAP dimension reduction algorithm from scikit learn
     """
-    def __init__(self, data_import, numerical_data, cat_data):
-        self.x = data_import
+    def __init__(self, numerical_data, cat_data):
         self.numerical_data = numerical_data
         if cat_data is None:
             self.categorical_data_encoded = cat_data
@@ -21,7 +20,7 @@ class Umap:
         self.model = UMAP(n_neighbors=20, n_components=3, min_dist=0.0, random_state=42,)
         self.model.fit(self.numerical_data, y = self.categorical_data_encoded)
         self.scores_raw = self.model.transform(self.numerical_data)
-        self.scores = pd.DataFrame(self.scores_raw, index = self.x.index)
+        self.scores = pd.DataFrame(self.scores_raw, index = self.numerical_data.index)
 
     @property
     def scores_(self):
diff --git a/src/pages/1-samples_selection.py b/src/pages/1-samples_selection.py
index c9085bd..3e497f7 100644
--- a/src/pages/1-samples_selection.py
+++ b/src/pages/1-samples_selection.py
@@ -109,7 +109,7 @@ if not spectra.empty:
             supervised = meta_data[col]
         else:
             supervised = None
-        dr_model = Umap(data_import = imp, numerical_data = MinMaxScale(spectra), cat_data = supervised)
+        dr_model = Umap(numerical_data = MinMaxScale(spectra), cat_data = supervised)
 
     if dr_model:
         axis1 = pc.selectbox("x-axis", options = dr_model.scores_.columns, index=0)
@@ -181,14 +181,18 @@ if labels:
             meta['index'] = spectra.index[selected_samples_idx]
             selected_samples_metd.write(meta)
 
-
+############################################################################
     ## Scores
 if not t.empty:
     with scores:
+        fig1, ((ax1, ax2),(ax3,ax4)) = plt.subplots(2,2)
         st.write('Scores plot')
         # scores plot with clustering
         if list(labels) and meta_data.empty:
             fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3, color = labels)
+            sns.scatterplot(data = tcr, x = axis1, y =axis2 , hue = labels, ax = ax1)
+            
+
     
         # scores plot with metadata
         elif len(list(labels)) == 0 and not meta_data.empty:
@@ -196,9 +200,12 @@ if not t.empty:
             col = st.selectbox('Color by:', options= filter)
             if col == 0:
                 fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3)
+                sns.scatterplot(data = tcr, x = axis1, y =axis2 , ax = ax1)
+                
             else:
                 fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3, color = list(map(str.lower,meta_data[col])) )
-
+                sns.scatterplot(data = tcr, x = axis1, y =axis2 , hue = list(map(str.lower,meta_data[col])), ax = ax1)
+                
         # color with scores and metadata
         elif len(list(labels)) > 0  and not meta_data.empty:
             if clus_method in cluster_methods[1:]:
@@ -210,13 +217,19 @@ if not t.empty:
             col = st.selectbox('Color by:', options= filter)
             if col == "None":
                 fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3)
+                sns.scatterplot(data = tcr, x = axis1, y =axis2 , ax = ax1)
             elif col == clus_method:
                 fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3, color = labels)
+                sns.scatterplot(data = tcr, x = axis1, y =axis2 , ax = ax1)
             else:
                 fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3, color = list(map(str.lower,meta_data[col])))
+                sns.scatterplot(data = tcr, x = axis1, y =axis2 , hue = list(map(str.lower,meta_data[col])), ax = ax1)
+                sns.scatterplot(data = tcr, x = axis1, y =axis2 , hue = list(map(str.lower,meta_data[col])), ax = ax2)
+                sns.scatterplot(data = tcr, x = axis1, y =axis2 , hue = list(map(str.lower,meta_data[col])), ax = ax3)
 
         else:
             fig = px.scatter_3d(tcr, x=axis1, y=axis2, z = axis3)
+            sns.scatterplot(data = tcr, x = axis1, y =axis2 , ax = ax1)
         fig.update_traces(marker=dict(size=4))
 
         if selected_samples_idx:
@@ -224,37 +237,13 @@ if not t.empty:
             fig.add_scatter3d(x = tt.loc[:,axis1], y = tt.loc[:,axis2],
                               z = tt.loc[:,axis3], mode ='markers', marker = dict(size = 7, color = 'black'),
                               name = 'selected samples')
+        
+        plt.savefig("./Report/Figures/test.png")
         st.plotly_chart(fig, use_container_width=True)
 
-        import plotly.express as px
-
-        if labels:
-            num_clusters = len(np.unique(labels))
-
-            custom_color_palette = px.colors.qualitative.Plotly[:num_clusters]
-            color_discrete_sequence=custom_color_palette
-
-            # Créer et exporter le graphique Axe1-Axe2 en PNG
-            fig_axe1_axe2 = px.scatter(tcr, x=axis1, y=axis2, color=labels if list(labels) else None, color_discrete_sequence=custom_color_palette)
-            fig_axe1_axe2.update_layout(title='Axe1-Axe2')
-            fig_axe1_axe2.update_traces(marker=dict(size=4))
-            fig_axe1_axe2.write_image("./Report/Figures/plot_axe1_axe2.png")
-
-
-            # Créer et exporter le graphique Axe1-Axe3 en PNG
-            fig_axe1_axe3 = px.scatter(tcr, x=axis1, y=axis3, color=labels if list(labels) else None, color_discrete_sequence=custom_color_palette)
-            fig_axe1_axe3.update_layout(title='Axe1-Axe3')
-            fig_axe1_axe3.update_traces(marker=dict(size=4))
-            fig_axe1_axe3.write_image("./Report/Figures/plot_axe1_axe3.png")
-
-
-            # Créer et exporter le graphique Axe2-Axe3 en PNG
-            fig_axe2_axe3 = px.scatter(tcr, x=axis2, y=axis3, color=labels if list(labels) else None, color_discrete_sequence=custom_color_palette)
-            fig_axe2_axe3.update_layout(title='Axe2-Axe3')
-            fig_axe2_axe3.update_traces(marker=dict(size=4))
-            fig_axe2_axe3.write_image("./Report/Figures/plot_axe2_axe3.png")
 
 
+#################################################################
 if not spectra.empty:
     if dim_red_method == dim_red_methods[1]:
 
diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py
index e83931e..996302a 100644
--- a/src/pages/2-model_creation.py
+++ b/src/pages/2-model_creation.py
@@ -19,10 +19,8 @@ st.session_state["interface"] = st.session_state.get('interface')
 if st.session_state["interface"] == 'simple':
     hide_pages("Predictions")
 
-########################################################################################
-reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR", "Full-PLSR-sklearn", "PrePLStester"]
 
-# page Design
+    ####################################### page Design #######################################
 st.header("Calibration Model Development", divider='blue')
 st.write("Create a predictive model, then use it for predicting your target variable (chemical values) from NIRS spectra")
 M1, M2, M3 = st.columns([2,3,2])
@@ -35,12 +33,15 @@ M7.write('Predicted vs Measured values')
 M8.write('Residuals plot')
 M9, M10 = st.columns([2,2])
 M9.write("-- Save the model --")
+            ######################################################################
+
 
+reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR", "Full-PLSR-sklearn", "PrePLStester"]
+      #######################################        ###########################################
 
 files_format = ['.csv', '.dx']
 file = M3.radio('select data file format:', options = files_format)
 
-
 ### Data
 spectra = pd.DataFrame
 y = pd.DataFrame
@@ -57,14 +58,11 @@ if file == files_format[0]:
         else: col = False
         
     ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
-        
-            
     if ycal_csv:
         sepy = M3.radio("separator (Y file): ", options=[";", ","], key=2)
         hdry = M3.radio("samples name (Y file)?: ", options=["no", "yes"], key=3)
         if hdry == "yes": col = 0
         else: col = False
-        
     
     if xcal_csv and ycal_csv:
         spectra, meta_data = col_cat(pd.read_csv(xcal_csv, decimal='.', sep=sepx, index_col=col, header=0))
@@ -77,13 +75,11 @@ if file == files_format[0]:
 
         if spectra.shape[0] == y.shape[0]:
             pass
+
         else:
             M3.warning('The number of samples is different in X and Y')
             y = pd.DataFrame
             spectra = pd.DataFrame
-            
-            
-
 
 ## Load .dx file
 elif file == files_format[1]:
@@ -94,10 +90,13 @@ elif file == files_format[1]:
             tmp_path = tmp.name
             chem_data, spectra, meta_data = read_dx(file =  tmp_path)
             M3.success("The data have been loaded successfully", icon="✅")
-            yname = M3.selectbox('Select target', options=chem_data.columns)
-            measured = chem_data.loc[:,yname] > 0
-            y = chem_data.loc[:,yname].loc[measured]
-            spectra = spectra.loc[measured]
+            if chem_data.shape[1]>0:
+                yname = M3.selectbox('Select target', options=chem_data.columns)
+                measured = chem_data.loc[:,yname] > 0
+                y = chem_data.loc[:,yname].loc[measured]
+                spectra = spectra.loc[measured]
+            else:
+                M3.warning('Warning: Chemical data are not included in your file !', icon="⚠️")
         os.unlink(tmp_path)
 
 ### split the data
-- 
GitLab