NaN values in .dx

41e614bc · DIANE · a3d36997 · 41e614bc · 41e614bc · 41e614bc
Commit 41e614bc authored 1 year ago by DIANE
--- a/Class_Mod/DxReader.py
+++ b/Class_Mod/DxReader.py
@@ -49,7 +49,7 @@ class DxRead:
        self.metadata_ = pd.DataFrame(self.__met).T
-        self.spectra = pd.DataFrame(np.fliplr(specs), columns= self.__wl[::-1]) # Storing spectra in a pd.dataframe
+        self.spectra = pd.DataFrame(np.fliplr(specs), columns= self.__wl[::-1], index = self.metadata_['name']) # Storing spectra in a pd.dataframe
@@ -72,6 +72,7 @@ class DxRead:
        ### dataframe conntaining chemical data
        self.chem_data = pd.DataFrame(cc, index=elements_name).T.astype(float)
+        self.chem_data.index = self.metadata_['name']
    ### Method for retrieving the concentration of a single sample
    def conc(self,sample):

--- a/Class_Mod/Miscellaneous.py
+++ b/Class_Mod/Miscellaneous.py
@@ -27,7 +27,7 @@ def reg_plot( meas, pred):
    sns.regplot(x = meas[0] , y = pred[0], color='blue', label = 'Calib')
    sns.regplot(x = meas[1], y = pred[1], color='red', label = 'CV')
    sns.regplot(x = meas[2], y = pred[2], color='green', label = 'Test')
-    plt.plot([np.min(meas[0])+0.1, np.max([meas[0]])+0.1], [np.min(meas[0])+0.1, np.max([meas[0]])+0.1], color = 'black')
+    plt.plot([np.min(meas[0])-0.05, np.max([meas[0]])+0.05], [np.min(meas[0])-0.05, np.max([meas[0]])+0.05], color = 'black')
    ax.set_ylabel('Predicted values')
    ax.set_xlabel('Measured values')
    plt.legend()

--- a/Class_Mod/Regression_metrics.py
+++ b/Class_Mod/Regression_metrics.py
@@ -3,8 +3,7 @@ from Packages import *
 class metrics:
    def __init__(self, meas, pred):
+        if isinstance(meas, pd.DataFrame) or isinstance(meas, pd.Series):
-        if isinstance(meas, pd.DataFrame):
            self.meas = meas.to_numpy()
        else :
            self.meas = meas.ravel()
@@ -28,7 +27,6 @@ class metrics:
           # REGRESSION OR MODEL
           ssm = np.sum(self.pred - xbar)
          # Compute statistical metrics
           metr = pd.DataFrame()

--- a/pages/2-model_creation.py
+++ b/pages/2-model_creation.py
@@ -72,22 +72,22 @@ elif file == files_format[1]:
            chem_data, spectra, meta_data = read_dx(file =  tmp_path)
            M3.success("The data have been loaded successfully", icon="✅")
            yname = M3.selectbox('Select target', options=chem_data.columns)
-            spectra = spectra
+            measured = chem_data.loc[:,yname] > 0
-            y = chem_data.loc[:,yname]
+            y = chem_data.loc[:,yname].loc[measured]
+            spectra = spectra.loc[measured]
        os.unlink(tmp_path)
 ### split the data
 if not spectra.empty and not y.empty:
    rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i")
    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
-    train_index, test_index = train_test_split_idx(spectra, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
+    train_index, test_index = train_test_split_idx(spectra, y = y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
-    # Assign data to training and test sets
-    X_train, y_train, X_test, y_test = pd.DataFrame(spectra.iloc[train_index,:]), pd.DataFrame(y.iloc[train_index]), pd.DataFrame(spectra.iloc[test_index,:]), pd.DataFrame(y.iloc[test_index])
-    y_train = y_train.iloc[:,0]
-    y_test = y_test.iloc[:,0]
+    # Assign data to training and test sets
+    X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index]
+    X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index]
 #######################################
    regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12)
    if regression_algo == reg_algo[1]: