Skip to content
Snippets Groups Projects
PLSR_.py 3.14 KiB
Newer Older
  • Learn to ignore specific revisions
  • DIANE's avatar
    DIANE committed
    from Packages import *
    from Class_Mod.Miscellaneous import * 
    
    
    # create model module with PINARD
    def model_PLSR(xcal_csv, ycal_csv, sep, hdr, rd_seed):
        np.random.seed(rd_seed)
        # hdr var correspond to column header True or False in the CSV
        if hdr == 'yes':
            col = 0
        else:
            col = False
        # loading the csv
        x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
        # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
        train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
        # Assign data to training and test sets
        X_train, y_train, X_test, y_test = x[train_index], y[train_index], x[test_index], y[test_index]
        st.write("Size of train and test sets: train " + str(X_train.shape) + ' ' + str(y_train.shape) + ' / test ' + str(X_test.shape) + ' ' + str(y_test.shape))
        # Declare preprocessing pipeline
        svgolay = [   ('_sg1',pp.SavitzkyGolay()),
                      ('_sg2',pp.SavitzkyGolay())  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
                      ]
        preprocessing = [   ('id', pp.IdentityTransformer()), # Identity transformer, no change to the data
                            ('savgol', pp.SavitzkyGolay()), # Savitzky-Golay smoothing filter
                            ('derivate', pp.Derivate()), # Calculate the first derivative of the data
                            ('SVG', FeatureUnion(svgolay))
                            # Pipeline([('_sg1',pp.SavitzkyGolay()),('_sg2',pp.SavitzkyGolay())])  # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
                            ]
        # Declare complete pipeline
        pipeline = Pipeline([
            ('scaler', MinMaxScaler()), # scaling the data
            ('preprocessing', FeatureUnion(preprocessing)), # preprocessing
            ('PLS',  PLSRegression()) # regressor
        ])
        # Estimator including y values scaling
        estimator = TransformedTargetRegressor(regressor = pipeline, transformer = MinMaxScaler())
        # Training
        trained = estimator.fit(X_train, y_train)
        # fit scores
        st.write("fit scores / R²: " + str(estimator.score(X_test,y_test)))
        # Predictions on test set
        Y_preds = estimator.predict(X_test) # make predictions on test data and assign to Y_preds variable
        ################################################################################################################
        met= {"MAE: ":[5],
              "MSE: ":[5],
              "MSE: ":[8]}
        met = pd.DataFrame(met).T
        ################################################################################################################
        st.table(met)
        st.write("MAE: " + str(mean_absolute_error(y_test, Y_preds)))
        st.write("MSE: " + str(mean_squared_error(y_test, Y_preds)))
        st.write("MAPE: " + str(mean_absolute_percentage_error(y_test, Y_preds)))
    
        # Cross-Validate the model
        CV_model(estimator, X_train, y_train, 3)
    
        return (trained)