Skip to content
Snippets Groups Projects
Commit 2d045dd9 authored by DIANE's avatar DIANE
Browse files

PLSR pinard update

parent f05a6645
No related branches found
No related tags found
No related merge requests found
from Packages import * from Packages import *
from Class_Mod.Miscellaneous import * from Class_Mod.Miscellaneous import *
from Class_Mod.Regression_metrics import metrics
# create model module with PINARD class PinardPlsr:
def model_PLSR(xcal_csv, ycal_csv, sep, hdr, rd_seed): def __init__(self, x_train, y_train, x_test, y_test):
np.random.seed(rd_seed) self.x_train = x_train
# hdr var correspond to column header True or False in the CSV self.x_test = x_test
if hdr == 'yes': self.y_train = y_train
col = 0 self.y_test = y_test
else:
col = False # create model module with PINARD
# loading the csv # Declare preprocessing pipeline
x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col) svgolay = [ ('_sg1',pp.SavitzkyGolay()),
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing ('_sg2',pp.SavitzkyGolay()) # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed) ]
# Assign data to training and test sets preprocessing = [ ('id', pp.IdentityTransformer()), # Identity transformer, no change to the data
X_train, y_train, X_test, y_test = x[train_index], y[train_index], x[test_index], y[test_index] ('savgol', pp.SavitzkyGolay()), # Savitzky-Golay smoothing filter
st.write("Size of train and test sets: train " + str(X_train.shape) + ' ' + str(y_train.shape) + ' / test ' + str(X_test.shape) + ' ' + str(y_test.shape)) ('derivate', pp.Derivate()), # Calculate the first derivative of the data
# Declare preprocessing pipeline ('SVG', FeatureUnion(svgolay))
svgolay = [ ('_sg1',pp.SavitzkyGolay()), # Pipeline([('_sg1',pp.SavitzkyGolay()),('_sg2',pp.SavitzkyGolay())]) # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
('_sg2',pp.SavitzkyGolay()) # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing ]
] # Declare complete pipeline
preprocessing = [ ('id', pp.IdentityTransformer()), # Identity transformer, no change to the data pipeline = Pipeline([
('savgol', pp.SavitzkyGolay()), # Savitzky-Golay smoothing filter ('scaler', MinMaxScaler()), # scaling the data
('derivate', pp.Derivate()), # Calculate the first derivative of the data ('preprocessing', FeatureUnion(preprocessing)), # preprocessing
('SVG', FeatureUnion(svgolay)) ('PLS', PLSRegression())])
# Pipeline([('_sg1',pp.SavitzkyGolay()),('_sg2',pp.SavitzkyGolay())]) # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing # Estimator including y values scaling
] estimator = TransformedTargetRegressor(regressor = pipeline, transformer = MinMaxScaler())
# Declare complete pipeline # Training
pipeline = Pipeline([ self.trained = estimator.fit(self.x_train, self.y_train)
('scaler', MinMaxScaler()), # scaling the data
('preprocessing', FeatureUnion(preprocessing)), # preprocessing
('PLS', PLSRegression()) # regressor # fit scores
]) # Predictions on test set
# Estimator including y values scaling self.yc = pd.DataFrame(self.trained.predict(self.x_train)) # make predictions on test data and assign to Y_preds variable
estimator = TransformedTargetRegressor(regressor = pipeline, transformer = MinMaxScaler()) self.ycv = pd.DataFrame(cross_val_predict(self.trained, self.x_train, self.y_train, cv = 3)) # make predictions on test data and assign to Y_preds variable
# Training self.yt = pd.DataFrame(self.trained.predict(self.x_test)) # make predictions on test data and assign to Y_preds variable
trained = estimator.fit(X_train, y_train)
# fit scores ################################################################################################################
st.write("fit scores / R²: " + str(estimator.score(X_test,y_test)))
# Predictions on test set
Y_preds = estimator.predict(X_test) # make predictions on test data and assign to Y_preds variable
################################################################################################################ ################################################################################################################
met= {"MAE: ":[5],
"MSE: ":[5], @property
"MSE: ":[8]} def model_(self):
met = pd.DataFrame(met).T return self.trained
################################################################################################################
st.table(met) @property
st.write("MAE: " + str(mean_absolute_error(y_test, Y_preds))) def metrics_(self):
st.write("MSE: " + str(mean_squared_error(y_test, Y_preds))) metc = metrics(train=(self.y_train, self.yc))
st.write("MAPE: " + str(mean_absolute_percentage_error(y_test, Y_preds))) metcv = metrics(train=(self.y_train, self.ycv))
mett = metrics( train=(self.y_test, self.yt))
# Cross-Validate the model met = pd.concat([metc, metcv, mett], axis = 0)
CV_model(estimator, X_train, y_train, 3) met.index = ['calib','cv','test']
return met
return (trained)
\ No newline at end of file @property
def pred_data_(self):
return self.yc, self.ycv, self.yt
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment