From 41e614bc51c4846cdf1a5089bcbb35985d9f522f Mon Sep 17 00:00:00 2001 From: DIANE <abderrahim.diane@cefe.cnrs.fr> Date: Tue, 16 Apr 2024 09:53:41 +0200 Subject: [PATCH] NaN values in .dx --- Class_Mod/DxReader.py | 3 ++- Class_Mod/Miscellaneous.py | 2 +- Class_Mod/Regression_metrics.py | 4 +--- pages/2-model_creation.py | 16 ++++++++-------- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/Class_Mod/DxReader.py b/Class_Mod/DxReader.py index f024894..d094294 100644 --- a/Class_Mod/DxReader.py +++ b/Class_Mod/DxReader.py @@ -49,7 +49,7 @@ class DxRead: self.metadata_ = pd.DataFrame(self.__met).T - self.spectra = pd.DataFrame(np.fliplr(specs), columns= self.__wl[::-1]) # Storing spectra in a pd.dataframe + self.spectra = pd.DataFrame(np.fliplr(specs), columns= self.__wl[::-1], index = self.metadata_['name']) # Storing spectra in a pd.dataframe @@ -72,6 +72,7 @@ class DxRead: ### dataframe conntaining chemical data self.chem_data = pd.DataFrame(cc, index=elements_name).T.astype(float) + self.chem_data.index = self.metadata_['name'] ### Method for retrieving the concentration of a single sample def conc(self,sample): diff --git a/Class_Mod/Miscellaneous.py b/Class_Mod/Miscellaneous.py index 79d1708..e1b1271 100644 --- a/Class_Mod/Miscellaneous.py +++ b/Class_Mod/Miscellaneous.py @@ -27,7 +27,7 @@ def reg_plot( meas, pred): sns.regplot(x = meas[0] , y = pred[0], color='blue', label = 'Calib') sns.regplot(x = meas[1], y = pred[1], color='red', label = 'CV') sns.regplot(x = meas[2], y = pred[2], color='green', label = 'Test') - plt.plot([np.min(meas[0])+0.1, np.max([meas[0]])+0.1], [np.min(meas[0])+0.1, np.max([meas[0]])+0.1], color = 'black') + plt.plot([np.min(meas[0])-0.05, np.max([meas[0]])+0.05], [np.min(meas[0])-0.05, np.max([meas[0]])+0.05], color = 'black') ax.set_ylabel('Predicted values') ax.set_xlabel('Measured values') plt.legend() diff --git a/Class_Mod/Regression_metrics.py b/Class_Mod/Regression_metrics.py index f958d8c..5d7cbe3 100644 --- a/Class_Mod/Regression_metrics.py +++ b/Class_Mod/Regression_metrics.py @@ -3,8 +3,7 @@ from Packages import * class metrics: def __init__(self, meas, pred): - - if isinstance(meas, pd.DataFrame): + if isinstance(meas, pd.DataFrame) or isinstance(meas, pd.Series): self.meas = meas.to_numpy() else : self.meas = meas.ravel() @@ -28,7 +27,6 @@ class metrics: # REGRESSION OR MODEL ssm = np.sum(self.pred - xbar) - # Compute statistical metrics metr = pd.DataFrame() diff --git a/pages/2-model_creation.py b/pages/2-model_creation.py index 3f506ea..adea651 100644 --- a/pages/2-model_creation.py +++ b/pages/2-model_creation.py @@ -72,22 +72,22 @@ elif file == files_format[1]: chem_data, spectra, meta_data = read_dx(file = tmp_path) M3.success("The data have been loaded successfully", icon="✅") yname = M3.selectbox('Select target', options=chem_data.columns) - spectra = spectra - y = chem_data.loc[:,yname] - + measured = chem_data.loc[:,yname] > 0 + y = chem_data.loc[:,yname].loc[measured] + spectra = spectra.loc[measured] os.unlink(tmp_path) ### split the data if not spectra.empty and not y.empty: rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i") # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing - train_index, test_index = train_test_split_idx(spectra, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed) - # Assign data to training and test sets - X_train, y_train, X_test, y_test = pd.DataFrame(spectra.iloc[train_index,:]), pd.DataFrame(y.iloc[train_index]), pd.DataFrame(spectra.iloc[test_index,:]), pd.DataFrame(y.iloc[test_index]) - y_train = y_train.iloc[:,0] - y_test = y_test.iloc[:,0] + train_index, test_index = train_test_split_idx(spectra, y = y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed) + # Assign data to training and test sets + X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index] + X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index] + ####################################### regression_algo = M1.selectbox("Choose the algorithm for regression", options=reg_algo, key = 12) if regression_algo == reg_algo[1]: -- GitLab