PLSR pinard update

2d045dd9 · DIANE · f05a6645 · 2d045dd9
Commit 2d045dd9 authored 11 months ago by DIANE
--- a/Class_Mod/PLSR_.py
+++ b/Class_Mod/PLSR_.py
 from Packages import *
 from Class_Mod.Miscellaneous import * 
+from Class_Mod.Regression_metrics import metrics
-# create model module with PINARD
+class PinardPlsr:
-def model_PLSR(xcal_csv, ycal_csv, sep, hdr, rd_seed):
+    def __init__(self, x_train, y_train, x_test, y_test):
-    np.random.seed(rd_seed)
+        self.x_train = x_train
-    # hdr var correspond to column header True or False in the CSV
+        self.x_test = x_test 
-    if hdr == 'yes':
+        self.y_train = y_train
-        col = 0
+        self.y_test = y_test
-    else:
-        col = False
+        # create model module with PINARD
-    # loading the csv
+        # Declare preprocessing pipeline
-    x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
+        svgolay = [   ('_sg1',pp.SavitzkyGolay()),
-    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
+                    ('_sg2',pp.SavitzkyGolay())  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
-    train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
+                    ]
-    # Assign data to training and test sets
+        preprocessing = [   ('id', pp.IdentityTransformer()), # Identity transformer, no change to the data
-    X_train, y_train, X_test, y_test = x[train_index], y[train_index], x[test_index], y[test_index]
+                            ('savgol', pp.SavitzkyGolay()), # Savitzky-Golay smoothing filter
-    st.write("Size of train and test sets: train " + str(X_train.shape) + ' ' + str(y_train.shape) + ' / test ' + str(X_test.shape) + ' ' + str(y_test.shape))
+                            ('derivate', pp.Derivate()), # Calculate the first derivative of the data
-    # Declare preprocessing pipeline
+                            ('SVG', FeatureUnion(svgolay))
-    svgolay = [   ('_sg1',pp.SavitzkyGolay()),
+                            # Pipeline([('_sg1',pp.SavitzkyGolay()),('_sg2',pp.SavitzkyGolay())])  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
-                  ('_sg2',pp.SavitzkyGolay())  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
+                            ]
-                  ]
+        # Declare complete pipeline
-    preprocessing = [   ('id', pp.IdentityTransformer()), # Identity transformer, no change to the data
+        pipeline = Pipeline([
-                        ('savgol', pp.SavitzkyGolay()), # Savitzky-Golay smoothing filter
+            ('scaler', MinMaxScaler()), # scaling the data
-                        ('derivate', pp.Derivate()), # Calculate the first derivative of the data
+            ('preprocessing', FeatureUnion(preprocessing)), # preprocessing
-                        ('SVG', FeatureUnion(svgolay))
+            ('PLS',  PLSRegression())])
-                        # Pipeline([('_sg1',pp.SavitzkyGolay()),('_sg2',pp.SavitzkyGolay())])  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
+        # Estimator including y values scaling
-                        ]
+        estimator = TransformedTargetRegressor(regressor = pipeline, transformer = MinMaxScaler())
-    # Declare complete pipeline
+        # Training
-    pipeline = Pipeline([
+        self.trained = estimator.fit(self.x_train, self.y_train)
-        ('scaler', MinMaxScaler()), # scaling the data
-        ('preprocessing', FeatureUnion(preprocessing)), # preprocessing
-        ('PLS',  PLSRegression()) # regressor
+        # fit scores
-    ])
+        # Predictions on test set
-    # Estimator including y values scaling
+        self.yc = pd.DataFrame(self.trained.predict(self.x_train)) # make predictions on test data and assign to Y_preds variable
-    estimator = TransformedTargetRegressor(regressor = pipeline, transformer = MinMaxScaler())
+        self.ycv = pd.DataFrame(cross_val_predict(self.trained, self.x_train, self.y_train, cv = 3)) # make predictions on test data and assign to Y_preds variable
-    # Training
+        self.yt = pd.DataFrame(self.trained.predict(self.x_test)) # make predictions on test data and assign to Y_preds variable
-    trained = estimator.fit(X_train, y_train)
-    # fit scores
+        ################################################################################################################
-    st.write("fit scores / R²: " + str(estimator.score(X_test,y_test)))
-    # Predictions on test set
-    Y_preds = estimator.predict(X_test) # make predictions on test data and assign to Y_preds variable
-    ################################################################################################################
+        ################################################################################################################
-    met= {"MAE: ":[5],
-          "MSE: ":[5],
+    @property
-          "MSE: ":[8]}
+    def model_(self):
-    met = pd.DataFrame(met).T
+        return self.trained
-    ################################################################################################################
-    st.table(met)
+    @property
-    st.write("MAE: " + str(mean_absolute_error(y_test, Y_preds)))
+    def metrics_(self):
-    st.write("MSE: " + str(mean_squared_error(y_test, Y_preds)))
+        metc = metrics(train=(self.y_train, self.yc))
-    st.write("MAPE: " + str(mean_absolute_percentage_error(y_test, Y_preds)))
+        metcv = metrics(train=(self.y_train, self.ycv))
+        mett = metrics( train=(self.y_test, self.yt))
-    # Cross-Validate the model
+        met = pd.concat([metc, metcv, mett], axis = 0)
-    CV_model(estimator, X_train, y_train, 3)
+        met.index = ['calib','cv','test']
+        return met
-    return (trained)
\ No newline at end of file
+    @property
+    def pred_data_(self):
+        return self.yc, self.ycv, self.yt
\ No newline at end of file