From 41e614bc51c4846cdf1a5089bcbb35985d9f522f Mon Sep 17 00:00:00 2001
From: DIANE <abderrahim.diane@cefe.cnrs.fr>
Date: Tue, 16 Apr 2024 09:53:41 +0200
Subject: [PATCH] NaN values in .dx

---
 Class_Mod/DxReader.py           |  3 ++-
 Class_Mod/Miscellaneous.py      |  2 +-
 Class_Mod/Regression_metrics.py |  4 +---
 pages/2-model_creation.py       | 16 ++++++++--------
 4 files changed, 12 insertions(+), 13 deletions(-)

diff --git a/Class_Mod/DxReader.py b/Class_Mod/DxReader.py
index f024894..d094294 100644
--- a/Class_Mod/DxReader.py
+++ b/Class_Mod/DxReader.py
@@ -49,7 +49,7 @@ class DxRead:
         self.metadata_ = pd.DataFrame(self.__met).T
             
 
-        self.spectra = pd.DataFrame(np.fliplr(specs), columns= self.__wl[::-1]) # Storing spectra in a pd.dataframe
+        self.spectra = pd.DataFrame(np.fliplr(specs), columns= self.__wl[::-1], index = self.metadata_['name']) # Storing spectra in a pd.dataframe
 
 
 
@@ -72,6 +72,7 @@ class DxRead:
 
         ### dataframe conntaining chemical data
         self.chem_data = pd.DataFrame(cc, index=elements_name).T.astype(float)
+        self.chem_data.index = self.metadata_['name']
 
     ### Method for retrieving the concentration of a single sample
     def conc(self,sample):
diff --git a/Class_Mod/Miscellaneous.py b/Class_Mod/Miscellaneous.py
index 79d1708..e1b1271 100644
--- a/Class_Mod/Miscellaneous.py
+++ b/Class_Mod/Miscellaneous.py
@@ -27,7 +27,7 @@ def reg_plot( meas, pred):
     sns.regplot(x = meas[0] , y = pred[0], color='blue', label = 'Calib')
     sns.regplot(x = meas[1], y = pred[1], color='red', label = 'CV')
     sns.regplot(x = meas[2], y = pred[2], color='green', label = 'Test')
-    plt.plot([np.min(meas[0])+0.1, np.max([meas[0]])+0.1], [np.min(meas[0])+0.1, np.max([meas[0]])+0.1], color = 'black')
+    plt.plot([np.min(meas[0])-0.05, np.max([meas[0]])+0.05], [np.min(meas[0])-0.05, np.max([meas[0]])+0.05], color = 'black')
     ax.set_ylabel('Predicted values')
     ax.set_xlabel('Measured values')
     plt.legend()
diff --git a/Class_Mod/Regression_metrics.py b/Class_Mod/Regression_metrics.py
index f958d8c..5d7cbe3 100644
--- a/Class_Mod/Regression_metrics.py
+++ b/Class_Mod/Regression_metrics.py
@@ -3,8 +3,7 @@ from Packages import *
 
 class metrics:
     def __init__(self, meas, pred):
-
-        if isinstance(meas, pd.DataFrame):
+        if isinstance(meas, pd.DataFrame) or isinstance(meas, pd.Series):
             self.meas = meas.to_numpy()
         else :
             self.meas = meas.ravel()
@@ -28,7 +27,6 @@ class metrics:
            # REGRESSION OR MODEL
            ssm = np.sum(self.pred - xbar)
 
-           
 
           # Compute statistical metrics
            metr = pd.DataFrame()
diff --git a/pages/2-model_creation.py b/pages/2-model_creation.py
index 3f506ea..adea651 100644
--- a/pages/2-model_creation.py
+++ b/pages/2-model_creation.py
@@ -72,22 +72,22 @@ elif file == files_format[1]:
             chem_data, spectra, meta_data = read_dx(file =  tmp_path)
             M3.success("The data have been loaded successfully", icon="âœ…")
             yname = M3.selectbox('Select target', options=chem_data.columns)
-            spectra = spectra
-            y = chem_data.loc[:,yname]
-
+            measured = chem_data.loc[:,yname] > 0
+            y = chem_data.loc[:,yname].loc[measured]
+            spectra = spectra.loc[measured]
         os.unlink(tmp_path)
 
 ### split the data
 if not spectra.empty and not y.empty:
     rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i")
     # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
-    train_index, test_index = train_test_split_idx(spectra, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
-    # Assign data to training and test sets
-    X_train, y_train, X_test, y_test = pd.DataFrame(spectra.iloc[train_index,:]), pd.DataFrame(y.iloc[train_index]), pd.DataFrame(spectra.iloc[test_index,:]), pd.DataFrame(y.iloc[test_index])
-    y_train = y_train.iloc[:,0]
-    y_test = y_test.iloc[:,0]
+    train_index, test_index = train_test_split_idx(spectra, y = y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
     
 
+    # Assign data to training and test sets
+    X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index]
+    X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index]
+
 #######################################
     regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12)
     if regression_algo == reg_algo[1]:
-- 
GitLab