Newer
Older
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
class PinardPlsr:
def __init__(self, x_train, y_train, x_test, y_test):
self.x_train = x_train
self.x_test = x_test
self.y_train = y_train
self.y_test = y_test
# create model module with PINARD
# Declare preprocessing pipeline
svgolay = [ ('_sg1',pp.SavitzkyGolay()),
('_sg2',pp.SavitzkyGolay()) # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
]
preprocessing = [ ('id', pp.IdentityTransformer()), # Identity transformer, no change to the data
('savgol', pp.SavitzkyGolay()), # Savitzky-Golay smoothing filter
('derivate', pp.Derivate()), # Calculate the first derivative of the data
('SVG', FeatureUnion(svgolay))
# Pipeline([('_sg1',pp.SavitzkyGolay()),('_sg2',pp.SavitzkyGolay())]) # nested pipeline to perform the Savitzky-Golay method twice for 2nd order preprocessing
]
# Declare complete pipeline
pipeline = Pipeline([
('scaler', MinMaxScaler()), # scaling the data
('preprocessing', FeatureUnion(preprocessing)), # preprocessing
('PLS', PLSRegression())])
# Estimator including y values scaling
estimator = TransformedTargetRegressor(regressor = pipeline, transformer = MinMaxScaler())
# Training
self.trained = estimator.fit(self.x_train, self.y_train)
# fit scores
# Predictions on test set
self.yc = pd.DataFrame(self.trained.predict(self.x_train)) # make predictions on test data and assign to Y_preds variable
self.ycv = pd.DataFrame(cross_val_predict(self.trained, self.x_train, self.y_train, cv = 3)) # make predictions on test data and assign to Y_preds variable
self.yt = pd.DataFrame(self.trained.predict(self.x_test)) # make predictions on test data and assign to Y_preds variable
################################################################################################################
################################################################################################################
@property
def model_(self):
return self.trained
@property
def metrics_(self):
metc = metrics(train=(self.y_train, self.yc))
metcv = metrics(train=(self.y_train, self.ycv))
mett = metrics( train=(self.y_test, self.yt))
met = pd.concat([metc, metcv, mett], axis = 0)
met.index = ['calib','cv','test']
return met
@property
def pred_data_(self):
return self.yc, self.ycv, self.yt