from juliacall import Main as jl import numpy as np import pandas as pd from pandas import DataFrame class LWPLSR: """The lwpls regression model from Jchemo (M. Lesnoff) Returns: self.scores (DataFrame): various metrics and scores self.predicted_results (Dictionary): Dict containing all predicted results (train, test, cross-validation) self.mod (Julia model): the prepared model """ def __init__(self, dataset, preT, action): """Initiate the LWPLSR and prepare data for Julia computing.""" if action == 'Model_Creation': # get train / test data from dataset self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(4)] # calculate number of KFolds and get CV data from dataset self.nb_fold = int((len(dataset)-4)/4) for i in range(self.nb_fold): setattr(self, "xtr_fold"+str(i+1), dataset[i+7]) setattr(self, "ytr_fold"+str(i+1), dataset[i+13]) setattr(self, "xte_fold"+str(i+1), dataset[i+4]) setattr(jl, "xtr_fold"+str(i+1), dataset[i+7]) setattr(jl, "ytr_fold"+str(i+1), dataset[i+13]) setattr(jl, "xte_fold"+str(i+1), dataset[i+4]) # prepare to send dataframes to julia and Jchemo (with the jl. prefix) jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test # Get parameters for preTreatment of the spectra (acquired from a global PLSR) self.preT = preT # initialize vars from the class y_shape = self.y_test.shape self.pred_test = np.zeros(shape=(y_shape[0], 1)) self.pred_train = np.zeros(shape=(y_shape[0], 1)) self.mod = "" self.best_lwplsr_params = np.zeros(shape=(5, 1)) self.predicted_results = {} elif action == 'Prediction': self.x_spectra, self.y, self.x_pred = [dataset[i] for i in range(3)] # prepare to send dataframes to julia and Jchemo (with the jl. prefix) jl.x_spectra, jl.y, jl.x_pred = self.x_spectra, self.y, self.x_pred self.preT = preT self.predicted_results = {} def Jchemo_lwplsr_fit(self): """Send data to Julia to fit lwplsr. Args: self.x_train (DataFrame): self.y_train (DataFrame): self.x_test (DataFrame): self.y_test (DataFrame): Returns: self.mod (Julia model): the prepared model """ # launch Julia Jchemo lwplsr and convert DataFrames from Python Pandas DataFrame to Julia DataFrame jl.seval(""" using DataFrames using Pandas using Jchemo x_train |> Pandas.DataFrame |> DataFrames.DataFrame y_train |> Pandas.DataFrame |> DataFrames.DataFrame x_test |> Pandas.DataFrame |> DataFrames.DataFrame y_test |> Pandas.DataFrame |> DataFrames.DataFrame """) # apply pre-treatments on X data print('LWPLSR - preTreatment') # apply pre-treatments to X data before working with jl.npoint = self.preT['window_length'] jl.deriv = self.preT['deriv'] jl.degree = self.preT['polyorder'] if self.preT['polyorder'] > 0: jl.seval(""" mod1 = model(snv; centr = true, scal = true) mod2 = model(savgol; npoint = npoint, deriv = deriv, degree = degree) """) if self.preT['normalization'] == "No_transformation": jl.seval(""" preMod = mod2 """) elif self.preT['normalization'] == 'Snv': jl.seval(""" preMod = pip(mod1, mod2) """) jl.seval(""" fit!(preMod, x_train) x_train = transf(preMod, x_train) x_test = transf(preMod, x_test) """) # LWPLSR tuning print('LWPLSR - tuning') # set tuning parameters to test jl.seval(""" nlvdis = [5; 10; 15] ; metric = [:eucl; :mah] h = [1; 2; 6; Inf] ; k = [30; 80; 200] nlv = 5:15 pars = Jchemo.mpar(nlvdis = nlvdis, metric = metric, h = h, k = k) """) # split Train data into Cal/Val for tuning jl.seval(""" pct = .3 ntrain = Jchemo.nro(x_train) nval = Int(round(pct * ntrain)) s = Jchemo.samprand(ntrain, nval) Xcal = x_train[s.train, :] ycal = y_train[s.train] Xval = x_train[s.test, :] yval = y_train[s.test] ncal = ntrain - nval """) # Create LWPLSR model and tune with GridScore jl.seval(""" mod = Jchemo.model(Jchemo.lwplsr) res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false) u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination """) # save best lwplsr parameters self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]} print('best lwplsr params ' + str(self.best_lwplsr_params)) # run LWPLSR model with best parameters jl.seval(""" mod = Jchemo.model(Jchemo.lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u]) # Fit model Jchemo.fit!(mod, x_train, y_train) """) # save Julia Jchemo model self.mod = jl.mod def Jchemo_lwplsr_predict_fit(self): """Send data to Julia to fit lwplsr. Args: self.x_spectra (DataFrame): self.y (DataFrame): self.x_pred (DataFrame): Returns: self.mod (Julia model): the prepared model """ # launch Julia Jchemo lwplsr and convert DataFrames from Python Pandas DataFrame to Julia DataFrame jl.seval(""" using DataFrames using Pandas using Jchemo x_spectra |> Pandas.DataFrame |> DataFrames.DataFrame y |> Pandas.DataFrame |> DataFrames.DataFrame x_pred |> Pandas.DataFrame |> DataFrames.DataFrame """) # LWPLSR tuning print('LWPLSR - no tuning, using best parameters from model creation') # # set tuning parameters to test # jl.seval(""" # nlvdis = [5; 10; 15] ; metric = [:eucl; :mah] # h = [1; 2; 6; Inf] ; k = [30; 80; 200] # nlv = 5:15 # pars = Jchemo.mpar(nlvdis = nlvdis, metric = metric, h = h, k = k) # """) # # split Train data into Cal/Val for tuning # jl.seval(""" # pct = .3 # ntrain = Jchemo.nro(x_spectra) # nval = Int(round(pct * ntrain)) # s = Jchemo.samprand(ntrain, nval) # Xcal = x_spectra[s.train, :] # ycal = y[s.train] # Xval = x_spectra[s.test, :] # yval = y[s.test] # ncal = ntrain - nval # """) # # # Create LWPLSR model and tune with GridScore # jl.seval(""" # mod = Jchemo.model(Jchemo.lwplsr) # # res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false) # # u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination # # """) # # save best lwplsr parameters # self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]} # print('best lwplsr params ' + str(self.best_lwplsr_params)) # import best params from model creation jl.nlvdis = self.preT['nlvdis'] jl.metric = self.preT['metric'] jl.h = self.preT['h'] jl.k = self.preT['k'] jl.nlv = self.preT['nlv'] # run LWPLSR model with best parameters jl.seval(""" mod = Jchemo.model(Jchemo.lwplsr; nlvdis = nlvdis, metric = Symbol(metric), h = h, k = k, nlv = nlv) # Fit model Jchemo.fit!(mod, x_spectra, y) """) # save Julia Jchemo model self.mod = jl.mod def Jchemo_lwplsr_predict(self): """Send data to Julia to predict with lwplsr. Args: self.mod (Julia model): the prepared model self.x_train (DataFrame): self.y_train (DataFrame): self.x_test (DataFrame): self.y_test (DataFrame): Returns: self.pred_test (Julia DataFrame): predicted values on x_test self.pred_train (Julia DataFrame): predicted values on x_train """ # Predictions on x_test and store in self.pred self.pred_test = jl.seval(""" println("LWPLSR - start test predict") res = Jchemo.predict(mod, x_test) res.pred """) self.pred_train = jl.seval(""" println("LWPLSR - start train predict") res = Jchemo.predict(mod, x_train) res.pred """) print('LWPLSR - end') def Jchemo_lwplsr_predict_predict(self): """Send data to Julia to predict with lwplsr. Args: self.mod (Julia model): the prepared model self.x_spectra (DataFrame): self.y (DataFrame): self.x_pred (DataFrame): Returns: self.y_pred (Julia DataFrame): predicted values on x_pred """ # Predictions on x_test and store in self.pred self.y_pred = jl.seval(""" println("LWPLSR - start y predict") res = Jchemo.predict(mod, x_pred) res.pred """) print('LWPLSR - end') def Jchemo_lwplsr_cv(self): """Send Cross-Validation data to Julia to fit & predict with lwplsr. Args: self.best_lwplsr_params: the best parameters to use (from tuning) for CV self.xtr_fold1 (DataFrame): self.ytr_fold1 (DataFrame): self.xte_fold1 (DataFrame): Returns: self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation """ for i in range(self.nb_fold): jl.Xtr = getattr(self, "xtr_fold"+str(i+1)) jl.Ytr = getattr(self, "ytr_fold"+str(i+1)) jl.Xte = getattr(self, "xte_fold"+str(i+1)) # convert Python Pandas DataFrame to Julia DataFrame jl.seval(""" using DataFrames using Pandas using Jchemo Xtr |> Pandas.DataFrame |> DataFrames.DataFrame Ytr |> Pandas.DataFrame |> DataFrames.DataFrame Xte |> Pandas.DataFrame |> DataFrames.DataFrame """) # set lwplsr parameters as the best one from tuning jl.nlvdis = int(self.best_lwplsr_params['nlvdis']) jl.metric = self.best_lwplsr_params['metric'] jl.h = self.best_lwplsr_params['h'] jl.k = int(self.best_lwplsr_params['k']) jl.nlv = int(self.best_lwplsr_params['nlv']) jl.seval(""" println("LWPLSR - start CV mod") mod_cv = Jchemo.model(Jchemo.lwplsr; nlvdis = nlvdis, metric = Symbol(metric), h = h, k = k, nlv = nlv) # Fit model Jchemo.fit!(mod_cv, Xtr, Ytr) """) pred_cv = jl.seval(""" println("LWPLSR - start CV predict") res = Jchemo.predict(mod_cv, Xte) res.pred """) # save predicted values for each KFold in the predicted_results dictionary self.predicted_results["CV" + str(i+1)] = DataFrame(pred_cv) @property def pred_data_(self): # convert predicted data from x_test to Pandas DataFrame self.predicted_results["pred_data_train"] = DataFrame(self.pred_train) self.predicted_results["pred_data_test"] = DataFrame(self.pred_test) return self.predicted_results @property def predict_pred_data_(self): # convert predicted data from x_test to Pandas DataFrame self.predicted_results["y_pred"] = DataFrame(self.y_pred) return self.predicted_results @property def model_(self): return self.mod @property def best_lwplsr_params_(self): return self.best_lwplsr_params