Skip to content
Snippets Groups Projects
PLSR_Preprocess.py 4.21 KiB
Newer Older
  • Learn to ignore specific revisions
  • DIANE's avatar
    DIANE committed
    from Packages import *
    from Class_Mod import metrics
    from Class_Mod.DATA_HANDLING import *
    
    class PlsProcess:
        SCORE = 100000000
        index_export = pd.DataFrame()
        def __init__(self, x_train, x_test, y_train, y_test, scale, Kfold):
    
            PlsProcess.SCORE = 10000
            self.xtrain = x_train
            self.xtest = x_test
            self.y_train =  y_train
            self.y_test = y_test
            self.scale = scale
            self.Kfold = Kfold
            self.model = None
            self.p = self.xtrain.shape[1]
            self.PLS_params = {'polyorder': hp.choice('polyorder', [0, 1, 2]),
                               'deriv': hp.choice('deriv', [0, 1, 2]),
                               'window_length': hp.choice('window_length', [15, 19, 23, 27]),
                               'scatter': hp.choice('scatter', ['Snv', 'Non'])}
            self.PLS_params['n_components'] = hp.randint("n_components", 2, 20)
    
        def objective(self, params):
            # Train the model
            self.xtrain = eval(f'{params['scatter']}(self.xtrain)')
            self.xtest = eval( f'{params['scatter']}(self.xtest)')
    
    
    
            if params['deriv'] > params['polyorder'] or params['polyorder'] > params['window_length']:
                params['deriv'] = 0
                params['polyorder'] = 0
                params['window_length'] = 1
                self.x_train = self.xtrain
                self.x_test = self.xtest
            else:
                self.x_train = pd.DataFrame(eval(f'savgol_filter(self.xtrain, polyorder={params['polyorder']}, deriv={params['deriv']}, window_length = {params['window_length']})'),
                                                columns = self.xtrain.columns, index= self.xtrain.index)
                self.x_test = pd.DataFrame(eval(f'savgol_filter(self.xtest, polyorder={params['polyorder']}, deriv={params['deriv']}, window_length = {params['window_length']})'),                                columns = self.xtest.columns , index= self.xtest.index)
            
            
            try:
                Model = PLSRegression(scale = self.scale, n_components = params['n_components'])
                Model.fit(self.x_train, self.y_train)
    
            except ValueError as ve:
                params["n_components"] = 1
                Model = PLSRegression(scale = self.scale, n_components = params["n_components"])
                Model.fit(self.x_train, self.y_train)
    
            ## make prediction
            yc = Model.predict(self.x_train).reshape(-1)
            ycv = cross_val_predict(Model, self.x_train, self.y_train, cv=self.Kfold, n_jobs=-1).reshape(-1)
            yt = Model.predict(self.x_test).reshape(-1)
            ####################
            rmsecv = np.sqrt(mean_squared_error(self.y_train, ycv))
            rmsec = np.sqrt(mean_squared_error(self.y_train, yc))
            rmset = np.sqrt(mean_squared_error(self.y_test, yt))
            
    
    
    DIANE's avatar
    DIANE committed
            score = rmsecv/rmsec*np.round(rmset/rmsecv)*rmsecv*100/self.y_train.mean()*rmset*1000/self.y_test.mean()
    
    DIANE's avatar
    DIANE committed
            if score < PlsProcess.SCORE-0.5 :
                PlsProcess.SCORE = score
                self.nlv = params['n_components']
                self.best = params
                self.model = Model
                self.yc = yc
                self.ycv = ycv
                self.yt = yt
            return score
    
    
        ##############################################
    
        def tune(self, n_iter):
            trials = Trials()
            
            best_params = fmin(fn=self.objective,
                               space=self.PLS_params,
                               algo=tpe.suggest,  # Tree of Parzen Estimators’ (tpe) which is a Bayesian approach
                               max_evals=n_iter,
                               trials=trials,
                               verbose=0)
        
        @property
        def best_hyperparams(self):
    
            self.b = {'Scatter':self.best['scatter'], 'Saitzky-Golay derivative parameters':{'polyorder':self.best['polyorder'],
                                                                                    'deriv':self.best['deriv'],
                                                                                    'window_length':self.best['window_length']}}
            return self.b
        
    
    DIANE's avatar
    DIANE committed
        @property
        def model_(self):
            return self.model
    
    DIANE's avatar
    DIANE committed
        @property
        def pred_data_(self):
            return self.yc, self.ycv, self.yt