From 2d045dd9eecc05d93eb1d80d7da740cb194d0069 Mon Sep 17 00:00:00 2001
From: DIANE <abderrahim.diane@cefe.cnrs.fr>
Date: Thu, 4 Apr 2024 09:59:48 +0200
Subject: [PATCH] PLSR pinard update

---
 Class_Mod/PLSR_.py | 113 +++++++++++++++++++++++----------------------
 1 file changed, 59 insertions(+), 54 deletions(-)

diff --git a/Class_Mod/PLSR_.py b/Class_Mod/PLSR_.py
index c76718a..4070347 100644
--- a/Class_Mod/PLSR_.py
+++ b/Class_Mod/PLSR_.py
@@ -1,58 +1,63 @@
 from Packages import *
 from Class_Mod.Miscellaneous import * 
+from Class_Mod.Regression_metrics import metrics
 
 
-# create model module with PINARD
-def model_PLSR(xcal_csv, ycal_csv, sep, hdr, rd_seed):
-    np.random.seed(rd_seed)
-    # hdr var correspond to column header True or False in the CSV
-    if hdr == 'yes':
-        col = 0
-    else:
-        col = False
-    # loading the csv
-    x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
-    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
-    train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
-    # Assign data to training and test sets
-    X_train, y_train, X_test, y_test = x[train_index], y[train_index], x[test_index], y[test_index]
-    st.write("Size of train and test sets: train " + str(X_train.shape) + ' ' + str(y_train.shape) + ' / test ' + str(X_test.shape) + ' ' + str(y_test.shape))
-    # Declare preprocessing pipeline
-    svgolay = [   ('_sg1',pp.SavitzkyGolay()),
-                  ('_sg2',pp.SavitzkyGolay())  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
-                  ]
-    preprocessing = [   ('id', pp.IdentityTransformer()), # Identity transformer, no change to the data
-                        ('savgol', pp.SavitzkyGolay()), # Savitzky-Golay smoothing filter
-                        ('derivate', pp.Derivate()), # Calculate the first derivative of the data
-                        ('SVG', FeatureUnion(svgolay))
-                        # Pipeline([('_sg1',pp.SavitzkyGolay()),('_sg2',pp.SavitzkyGolay())])  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
-                        ]
-    # Declare complete pipeline
-    pipeline = Pipeline([
-        ('scaler', MinMaxScaler()), # scaling the data
-        ('preprocessing', FeatureUnion(preprocessing)), # preprocessing
-        ('PLS',  PLSRegression()) # regressor
-    ])
-    # Estimator including y values scaling
-    estimator = TransformedTargetRegressor(regressor = pipeline, transformer = MinMaxScaler())
-    # Training
-    trained = estimator.fit(X_train, y_train)
-    # fit scores
-    st.write("fit scores / R²: " + str(estimator.score(X_test,y_test)))
-    # Predictions on test set
-    Y_preds = estimator.predict(X_test) # make predictions on test data and assign to Y_preds variable
-    ################################################################################################################
-    met= {"MAE: ":[5],
-          "MSE: ":[5],
-          "MSE: ":[8]}
-    met = pd.DataFrame(met).T
-    ################################################################################################################
-    st.table(met)
-    st.write("MAE: " + str(mean_absolute_error(y_test, Y_preds)))
-    st.write("MSE: " + str(mean_squared_error(y_test, Y_preds)))
-    st.write("MAPE: " + str(mean_absolute_percentage_error(y_test, Y_preds)))
-
-    # Cross-Validate the model
-    CV_model(estimator, X_train, y_train, 3)
-
-    return (trained)
\ No newline at end of file
+class PinardPlsr:
+    def __init__(self, x_train, y_train, x_test, y_test):
+        self.x_train = x_train
+        self.x_test = x_test 
+        self.y_train = y_train
+        self.y_test = y_test
+
+        # create model module with PINARD
+        # Declare preprocessing pipeline
+        svgolay = [   ('_sg1',pp.SavitzkyGolay()),
+                    ('_sg2',pp.SavitzkyGolay())  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
+                    ]
+        preprocessing = [   ('id', pp.IdentityTransformer()), # Identity transformer, no change to the data
+                            ('savgol', pp.SavitzkyGolay()), # Savitzky-Golay smoothing filter
+                            ('derivate', pp.Derivate()), # Calculate the first derivative of the data
+                            ('SVG', FeatureUnion(svgolay))
+                            # Pipeline([('_sg1',pp.SavitzkyGolay()),('_sg2',pp.SavitzkyGolay())])  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
+                            ]
+        # Declare complete pipeline
+        pipeline = Pipeline([
+            ('scaler', MinMaxScaler()), # scaling the data
+            ('preprocessing', FeatureUnion(preprocessing)), # preprocessing
+            ('PLS',  PLSRegression())])
+        # Estimator including y values scaling
+        estimator = TransformedTargetRegressor(regressor = pipeline, transformer = MinMaxScaler())
+        # Training
+        self.trained = estimator.fit(self.x_train, self.y_train)
+
+
+        # fit scores
+        # Predictions on test set
+        self.yc = pd.DataFrame(self.trained.predict(self.x_train)) # make predictions on test data and assign to Y_preds variable
+        self.ycv = pd.DataFrame(cross_val_predict(self.trained, self.x_train, self.y_train, cv = 3)) # make predictions on test data and assign to Y_preds variable
+        self.yt = pd.DataFrame(self.trained.predict(self.x_test)) # make predictions on test data and assign to Y_preds variable
+
+        ################################################################################################################
+
+        
+
+        ################################################################################################################
+
+    @property
+    def model_(self):
+        return self.trained
+    
+    @property
+    def metrics_(self):
+        metc = metrics(train=(self.y_train, self.yc))
+        metcv = metrics(train=(self.y_train, self.ycv))
+        mett = metrics( train=(self.y_test, self.yt))
+        met = pd.concat([metc, metcv, mett], axis = 0)
+        met.index = ['calib','cv','test']
+        return met
+
+    @property
+    def pred_data_(self):
+        
+        return self.yc, self.ycv, self.yt
\ No newline at end of file
-- 
GitLab