Newer
Older
from Packages import *
from Class_Mod import metrics, Snv, No_transformation, KF_CV
class Regmodel(object):
def __init__(self, train : [pd.DataFrame, pd.DataFrame], test : [pd.DataFrame, pd.DataFrame], n_iter, add_hyperparams = None, nfolds = 5, **kwargs):
self.SCORE = 100000000
self._xc, self._xt, self._ytrain, self._ytest = train[0], test[0], train[1], test[1]
self._nc, self._nt, self._p = train[0].shape[0], test[0].shape[0], train[0].shape[1]
self._model, self._best = None, None
self._yc, self._ycv, self._yt = None, None, None
self._cv_df = pd.DataFrame
self._nfolds = nfolds
self._selected_bands = pd.DataFrame(index = ['from', 'to'])
self.important_features = None
self._hyper_params = {'polyorder': hp.choice('polyorder', [0, 1, 2]),
'deriv': hp.choice('deriv', [0, 1, 2]),
'window_length': hp.choice('window_length', [15, 21, 27, 33]),
'scatter': hp.choice('scatter', ['Snv', 'No_transformation'])}
if add_hyperparams is not None:
self._hyper_params.update(add_hyperparams)
self._best = None
trials = Trials()
best_params = fmin(fn=self.objective,
space=self._hyper_params,
algo=tpe.suggest, # Tree of Parzen Estimators’ (tpe) which is a Bayesian approach
max_evals=n_iter,
trials=trials,
verbose=1)
@property
def train_data_(self):
return [self._xc, self._ytrain]
@property
def test_data_(self):
return [self._xt, self._ytest]
@property
def pretreated_spectra_(self):
return self.pretreated
@property
def get_params_(self):
return self._hyper_params
def objective(self, params):
pass
@property
def best_hyperparams_(self):
return self._best
@property
def best_hyperparams_print(self):
if self._best['scatter'] == 'Snv':
a = 'Standard Normal Variate (SNV)'
elif self._best['scatter'] == 'No_transformation':
a = " No transformation was performed"
SG = f'- Savitzky-Golay derivative parameters (Window_length:{self._best['window_length']}; polynomial order: {self._best['polyorder']}; Derivative order : {self._best['deriv']})'
Norm = f'- Spectral Normalization: {a}'
return SG+"\n"+Norm
@property
def model_(self):
return self._model
@property
def pred_data_(self):
return self._yc, self._yt
@property
def cv_data_(self):
return self._ycv
@property
def CV_results_(self):
return self._cv_df
@property
def important_features_(self):
return self.important_features
@property
def selected_features_(self):
return self._selected_bands
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
########################################### #########################################
class Plsr(Regmodel):
def __init__(self, train: [pd.DataFrame, pd.DataFrame], test: [pd.DataFrame, pd.DataFrame], n_iter = 10):
super().__init__(train, test, n_iter, add_hyperparams = {'n_components': hp.randint('n_components', 2,20)})
### parameters in common
def objective(self, params):
x0 = [self._xc, self._xt]
x1 = [eval(str(params['scatter'])+"(x0[i])") for i in range(2)]
a, b, c = params['deriv'], params['polyorder'], params['window_length']
if a > b or b > c:
if self._best is not None:
a, b, c = self._best['deriv'], self._best['polyorder'], self._best['window_length']
else:
a, b, c = 0, 0, 1
params['deriv'], params['polyorder'], params['window_length'] = a, b, c
x2 = [savgol_filter(x1[i], polyorder=params['polyorder'], deriv=params['deriv'], window_length = params['window_length']) for i in range(2)]
Model = PLSRegression(scale = False, n_components = params['n_components'])
self._cv_df = KF_CV().process(model = Model, x = x2[0], y = self._ytrain, n_folds = self._nfolds)
self._cv_df['Average'] = self._cv_df.mean(axis = 1)
self._cv_df['S'] = self._cv_df.std(axis = 1)
self._cv_df['CV(%)'] = self._cv_df['S'] * 100 / self._cv_df['Average']
self._cv_df = self._cv_df.T.round(2)
score = self._cv_df.loc["CV(%)",'rmse']
Model = PLSRegression(scale = False, n_components = params['n_components'])
Model.fit(x2[0], self._ytrain)
if self.SCORE > score:
self.SCORE = score
self._ycv = KF_CV().cross_val_predictor(model = Model, x = x2[0], y = self._ytrain, n_folds = self._nfolds)
self._yc = Model.predict(x2[0])
self._yt = Model.predict(x2[1])
self._model = Model
self._best = params
self.pretreated = pd.DataFrame(x2[0])
return score
############################################ #########################################
class TpeIpls(Regmodel):
def __init__(self, train: [pd.DataFrame, pd.DataFrame], test: [pd.DataFrame, pd.DataFrame], n_iter = 10, n_intervall = 5):
self.n_intervall = n_intervall
self.n_arrets = self.n_intervall*2
r = {'n_components': hp.randint('n_components', 2,20)}
r.update({f'v{i}': hp.randint(f'v{i}', 0, train[0].shape[1]) for i in range(1,self.n_arrets+1)})
super().__init__(train, test, n_iter, add_hyperparams = r)
### parameters in common
def objective(self, params):
### wevelengths index
self.idx = [params[f'v{i}'] for i in range(1,self.n_arrets+1)]
self.idx.sort()
arrays = [np.arange(self.idx[2*i],self.idx[2*i+1]+1) for i in range(self.n_intervall)]
id = np.unique(np.concatenate(arrays, axis=0), axis=0)
# ## Preprocessing
x0 = [self._xc, self._xt]
x1 = [eval(str(params['scatter'])+"(x0[i])") for i in range(2)]
a, b, c = params['deriv'], params['polyorder'], params['window_length']
if a > b or b > c:
if self._best is not None:
a, b, c = self._best['deriv'], self._best['polyorder'], self._best['window_length']
else:
a, b, c = 0, 0, 1
params['deriv'], params['polyorder'], params['window_length'] = a, b, c
x2 = [savgol_filter(x1[i], polyorder=params['polyorder'], deriv=params['deriv'], window_length = params['window_length']) for i in range(2)]
# print(x2)
try:
Model = PLSRegression(scale = False, n_components = params['n_components'])
self._cv_df = KF_CV().process(model = Model, x = x2[0][:,id], y = self._ytrain, n_folds = self._nfolds)
except ValueError as ve:
params["n_components"] = 1
Model = PLSRegression(scale = False, n_components = params['n_components'])
self._cv_df = KF_CV().process(model = Model, x = x2[0][:,id], y = self._ytrain, n_folds = self._nfolds)
self._cv_df['Average'] = self._cv_df.mean(axis = 1)
self._cv_df['S'] = self._cv_df.std(axis = 1)
self._cv_df['CV(%)'] = self._cv_df['S'] * 100 / self._cv_df['Average']
self._cv_df = self._cv_df.T.round(2)
score = self._cv_df.loc['CV(%)','rmse']
Model = PLSRegression(scale = False, n_components = params['n_components'])
Model.fit(x2[0][:,id], self._ytrain)
if self.SCORE > score:
self.SCORE = score
self._ycv = KF_CV().cross_val_predictor(model = Model, x = x2[0][:,id], y = self._ytrain, n_folds = self._nfolds)
self._yc = Model.predict(x2[0][:,id])
self._yt = Model.predict(x2[1][:,id])
self._model = Model
self._best = params
self.pretreated = pd.DataFrame(x2[0])
self.segments = arrays
for i in range(len(self.segments)):
self._selected_bands[f'band{i+1}'] = [self.segments[i][0], self.segments[i][self.segments[i].shape[0]-1]]
self._selected_bands.index = ['from','to']
return score
############################################ #########################################
class Pcr(Regmodel):
def __init__(self, train: [pd.DataFrame, pd.DataFrame], test: [pd.DataFrame, pd.DataFrame], n_iter = 10, n_val = 5):
super.__init__()
{f'pc{i}': hp.randint(f'pc{i+1}', 0, train[0].shape[1]) for i in range(self.n_val)}