from juliacall import Main as jl import numpy as np import pandas as pd class LWPLSR: """The lwpls regression model from Jchemo (M. Lesnoff) Returns: self.scores (DataFrame): various metrics and scores self.predicted_results_on_train (DataFrame): self.predicted_results_on_test (DataFrame): self.mod (Julia model): the prepared model """ def __init__(self, dataset): """Initiate the LWPLSR and prepare data for Julia computing.""" # self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))] self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(4)] nb_fold = int((len(dataset)-4)/4) for i in range(nb_fold): vars()["self.xtr_fold"+str(i+1)] = dataset[i+7] vars()["self.ytr_fold"+str(i+1)] = dataset[i+13] vars()["self.xte_fold"+str(i+1)] = dataset[i+4] vars()["self.yte_fold"+str(i+1)] = dataset[i+10] print(self.xtr_fold1) # prepare to send dataframes to julia and Jchemo jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test # initialize vars from the class y_shape = self.y_test.shape self.predicted_results_on_test = pd.DataFrame self.predicted_results_on_train = pd.DataFrame self.predicted_results_on_cv = pd.DataFrame self.pred_test = np.zeros(shape=(y_shape[0], 1)) self.pred_train = np.zeros(shape=(y_shape[0], 1)) self.mod = "" self.best_lwplsr_params = np.zeros(shape=(5, 1)) def Jchemo_lwplsr_fit(self): """Send data to Julia to fit lwplsr. Args: self.jl.x_train (DataFrame): self.jl.y_train (DataFrame): self.jl.x_test (DataFrame): self.jl.y_test (DataFrame): Returns: self.mod (Julia model): the prepared model """ # launch Julia Jchemo lwplsr jl.seval(""" using DataFrames using Pandas using Jchemo x_train |> Pandas.DataFrame |> DataFrames.DataFrame y_train |> Pandas.DataFrame |> DataFrames.DataFrame x_test |> Pandas.DataFrame |> DataFrames.DataFrame y_test |> Pandas.DataFrame |> DataFrames.DataFrame """) print('LWPLSR - tuning') # set tuning parameters jl.seval(""" nlvdis = [5; 10; 15] ; metric = [:eucl; :mah] h = [1; 2; 6; Inf] ; k = [30; 80; 200] nlv = 5:15 pars = Jchemo.mpar(nlvdis = nlvdis, metric = metric, h = h, k = k) """) # split Train data into Cal/Val for tuning jl.seval(""" pct = .3 ntrain = Jchemo.nro(x_train) nval = Int(round(pct * ntrain)) s = Jchemo.samprand(ntrain, nval) Xcal = x_train[s.train, :] ycal = y_train[s.train] Xval = x_train[s.test, :] yval = y_train[s.test] ncal = ntrain - nval """) # Create LWPLSR model and tune jl.seval(""" mod = Jchemo.model(Jchemo.lwplsr) res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false) u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination """) self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]} print('best lwplsr params' + str(self.best_lwplsr_params)) print('LWPLSR - best params ok') # calculate LWPLSR model with best parameters jl.seval(""" mod = Jchemo.model(Jchemo.lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u]) # Fit model Jchemo.fit!(mod, x_train, y_train) """) self.mod = jl.mod def Jchemo_lwplsr_predict(self): """Send data to Julia to predict with lwplsr. Args: self.mod (Julia model): the prepared model self.jl.x_train (DataFrame): self.jl.y_train (DataFrame): self.jl.x_test (DataFrame): self.jl.y_test (DataFrame): Returns: self.pred_test (Julia DataFrame): predicted values on x_test self.pred_train (Julia DataFrame): predicted values on x_train self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation """ # Predictions on x_test and store in self.pred self.pred_test = jl.seval(""" println("LWPLSR - start test predict") res = Jchemo.predict(mod, x_test) res.pred """) self.pred_train = jl.seval(""" println("LWPLSR - start train predict") res = Jchemo.predict(mod, x_train) res.pred """) print('LWPLSR - end') @property def pred_data_(self): # convert predicted data from x_test to Pandas DataFrame self.predicted_results_on_test = pd.DataFrame(self.pred_test) self.predicted_results_on_train = pd.DataFrame(self.pred_train) return self.predicted_results_on_train, self.predicted_results_on_test @property def model_(self): return self.mod @property def best_lwplsr_params_(self): return self.best_lwplsr_params