Newer
Older
from juliacall import Main as jl
import numpy as np
import pandas as pd
"""The lwpls regression model from Jchemo (M. Lesnoff)
Returns:
self.scores (DataFrame): various metrics and scores
self.predicted_results_on_train (DataFrame):
self.predicted_results_on_test (DataFrame):
self.mod (Julia model): the prepared model
def __init__(self, dataset):
"""Initiate the LWPLSR and prepare data for Julia computing."""
# self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))]
self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(4)]
nb_fold = int((len(dataset)-4)/4)
for i in range(nb_fold):
vars()["self.xtr_fold"+str(i+1)] = dataset[i+7]
vars()["self.ytr_fold"+str(i+1)] = dataset[i+13]
vars()["self.xte_fold"+str(i+1)] = dataset[i+4]
vars()["self.yte_fold"+str(i+1)] = dataset[i+10]
# prepare to send dataframes to julia and Jchemo
jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
# initialize vars from the class
y_shape = self.y_test.shape
self.predicted_results_on_train = pd.DataFrame
self.predicted_results_on_cv = pd.DataFrame
self.pred_test = np.zeros(shape=(y_shape[0], 1))
self.pred_train = np.zeros(shape=(y_shape[0], 1))
self.best_lwplsr_params = np.zeros(shape=(5, 1))
def Jchemo_lwplsr_fit(self):
self.jl.x_train (DataFrame):
self.jl.y_train (DataFrame):
self.jl.x_test (DataFrame):
self.jl.y_test (DataFrame):
x_train |> Pandas.DataFrame |> DataFrames.DataFrame
y_train |> Pandas.DataFrame |> DataFrames.DataFrame
x_test |> Pandas.DataFrame |> DataFrames.DataFrame
y_test |> Pandas.DataFrame |> DataFrames.DataFrame
print('LWPLSR - tuning')
# set tuning parameters
nlvdis = [5; 10; 15] ; metric = [:eucl; :mah]
h = [1; 2; 6; Inf] ; k = [30; 80; 200]
nlv = 5:15
pars = Jchemo.mpar(nlvdis = nlvdis, metric = metric, h = h, k = k)
# split Train data into Cal/Val for tuning
pct = .3
ntrain = Jchemo.nro(x_train)
nval = Int(round(pct * ntrain))
s = Jchemo.samprand(ntrain, nval)
Xcal = x_train[s.train, :]
ycal = y_train[s.train]
Xval = x_train[s.test, :]
yval = y_train[s.test]
ncal = ntrain - nval
# Create LWPLSR model and tune
jl.seval("""
mod = Jchemo.model(Jchemo.lwplsr)
res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false)
u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination
""")
self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]}
print('best lwplsr params' + str(self.best_lwplsr_params))
print('LWPLSR - best params ok')
# calculate LWPLSR model with best parameters
jl.seval("""
mod = Jchemo.model(Jchemo.lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u])
# Fit model
Jchemo.fit!(mod, x_train, y_train)
""")
self.mod = jl.mod
def Jchemo_lwplsr_predict(self):
"""Send data to Julia to predict with lwplsr.
Args:
self.mod (Julia model): the prepared model
self.jl.x_train (DataFrame):
self.jl.y_train (DataFrame):
self.jl.x_test (DataFrame):
self.jl.y_test (DataFrame):
Returns:
self.pred_test (Julia DataFrame): predicted values on x_test
self.pred_train (Julia DataFrame): predicted values on x_train
self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation
"""
self.pred_test = jl.seval("""
println("LWPLSR - start test predict")
self.pred_train = jl.seval("""
println("LWPLSR - start train predict")
res = Jchemo.predict(mod, x_train)
res.pred
""")
print('LWPLSR - end')
# convert predicted data from x_test to Pandas DataFrame
self.predicted_results_on_test = pd.DataFrame(self.pred_test)
self.predicted_results_on_train = pd.DataFrame(self.pred_train)
return self.predicted_results_on_train, self.predicted_results_on_test
@property
def best_lwplsr_params_(self):
return self.best_lwplsr_params