Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
from Packages import *
from Class_Mod import metrics
from Class_Mod.DATA_HANDLING import *
class PlsProcess:
SCORE = 100000000
index_export = pd.DataFrame()
def __init__(self, x_train, x_test, y_train, y_test, scale, Kfold):
PlsProcess.SCORE = 10000
self.xtrain = x_train
self.xtest = x_test
self.y_train = y_train
self.y_test = y_test
self.scale = scale
self.Kfold = Kfold
self.model = None
self.p = self.xtrain.shape[1]
self.PLS_params = {'polyorder': hp.choice('polyorder', [0, 1, 2]),
'deriv': hp.choice('deriv', [0, 1, 2]),
'window_length': hp.choice('window_length', [15, 19, 23, 27]),
'scatter': hp.choice('scatter', ['Snv', 'Non'])}
self.PLS_params['n_components'] = hp.randint("n_components", 2, 20)
def objective(self, params):
# Train the model
self.xtrain = eval(f'{params['scatter']}(self.xtrain)')
self.xtest = eval( f'{params['scatter']}(self.xtest)')
if params['deriv'] > params['polyorder'] or params['polyorder'] > params['window_length']:
params['deriv'] = 0
params['polyorder'] = 0
params['window_length'] = 1
self.x_train = self.xtrain
self.x_test = self.xtest
else:
self.x_train = pd.DataFrame(eval(f'savgol_filter(self.xtrain, polyorder={params['polyorder']}, deriv={params['deriv']}, window_length = {params['window_length']})'),
columns = self.xtrain.columns, index= self.xtrain.index)
self.x_test = pd.DataFrame(eval(f'savgol_filter(self.xtest, polyorder={params['polyorder']}, deriv={params['deriv']}, window_length = {params['window_length']})'), columns = self.xtest.columns , index= self.xtest.index)
try:
Model = PLSRegression(scale = self.scale, n_components = params['n_components'])
Model.fit(self.x_train, self.y_train)
except ValueError as ve:
params["n_components"] = 1
Model = PLSRegression(scale = self.scale, n_components = params["n_components"])
Model.fit(self.x_train, self.y_train)
## make prediction
yc = Model.predict(self.x_train).reshape(-1)
ycv = cross_val_predict(Model, self.x_train, self.y_train, cv=self.Kfold, n_jobs=-1).reshape(-1)
yt = Model.predict(self.x_test).reshape(-1)
####################
rmsecv = np.sqrt(mean_squared_error(self.y_train, ycv))
rmsec = np.sqrt(mean_squared_error(self.y_train, yc))
rmset = np.sqrt(mean_squared_error(self.y_test, yt))
score = rmsecv/rmsec*np.round(rmset/rmsecv)*rmsecv*100/self.y_train.mean()*rmset*1000/self.y_test.mean()
if score < PlsProcess.SCORE-0.5 :
PlsProcess.SCORE = score
self.nlv = params['n_components']
self.best = params
self.model = Model
self.yc = yc
self.ycv = ycv
self.yt = yt
return score
##############################################
def tune(self, n_iter):
trials = Trials()
best_params = fmin(fn=self.objective,
space=self.PLS_params,
algo=tpe.suggest, # Tree of Parzen Estimators’ (tpe) which is a Bayesian approach
max_evals=n_iter,
trials=trials,
verbose=0)
@property
def best_hyperparams(self):
self.b = {'Scatter':self.best['scatter'], 'Saitzky-Golay derivative parameters':{'polyorder':self.best['polyorder'],
'deriv':self.best['deriv'],
'window_length':self.best['window_length']}}
return self.b
@property
def pred_data_(self):
return self.yc, self.ycv, self.yt