Newer
Older
from juliacall import Main as jl
import numpy as np
import pandas as pd
"""The lwpls regression model from Jchemo (M. Lesnoff)
Returns:
self.scores (DataFrame): various metrics and scores
self.predicted_results_on_train (DataFrame):
self.predicted_results_on_test (DataFrame):
self.mod (Julia model): the prepared model
def __init__(self, dataset):
"""Initiate the LWPLSR and prepare data for Julia computing."""
# self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))]
self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(4)]
self.nb_fold = int((len(dataset)-4)/4)
for i in range(self.nb_fold):
setattr(self, "xtr_fold"+str(i+1), dataset[i+7])
setattr(self, "ytr_fold"+str(i+1), dataset[i+13])
setattr(self, "xte_fold"+str(i+1), dataset[i+4])
# setattr(self, "yte_fold"+str(i+1), dataset[i+10])
setattr(jl, "xtr_fold"+str(i+1), dataset[i+7])
setattr(jl, "ytr_fold"+str(i+1), dataset[i+13])
setattr(jl, "xte_fold"+str(i+1), dataset[i+4])
# setattr(jl, "yte_fold"+str(i+1), dataset[i+10])
# prepare to send dataframes to julia and Jchemo
jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
# initialize vars from the class
y_shape = self.y_test.shape
self.predicted_results_on_train = pd.DataFrame
self.pred_test = np.zeros(shape=(y_shape[0], 1))
self.pred_train = np.zeros(shape=(y_shape[0], 1))
self.best_lwplsr_params = np.zeros(shape=(5, 1))
self.predicted_results = {}
def Jchemo_lwplsr_fit(self):
self.x_train (DataFrame):
self.y_train (DataFrame):
self.x_test (DataFrame):
self.y_test (DataFrame):
x_train |> Pandas.DataFrame |> DataFrames.DataFrame
y_train |> Pandas.DataFrame |> DataFrames.DataFrame
x_test |> Pandas.DataFrame |> DataFrames.DataFrame
y_test |> Pandas.DataFrame |> DataFrames.DataFrame
print('LWPLSR - tuning')
# set tuning parameters
nlvdis = [5; 10; 15] ; metric = [:eucl; :mah]
h = [1; 2; 6; Inf] ; k = [30; 80; 200]
nlv = 5:15
pars = Jchemo.mpar(nlvdis = nlvdis, metric = metric, h = h, k = k)
# split Train data into Cal/Val for tuning
pct = .3
ntrain = Jchemo.nro(x_train)
nval = Int(round(pct * ntrain))
s = Jchemo.samprand(ntrain, nval)
Xcal = x_train[s.train, :]
ycal = y_train[s.train]
Xval = x_train[s.test, :]
yval = y_train[s.test]
ncal = ntrain - nval
# Create LWPLSR model and tune
jl.seval("""
mod = Jchemo.model(Jchemo.lwplsr)
res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false)
u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination
""")
self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]}
print('best lwplsr params ' + str(self.best_lwplsr_params))
print('LWPLSR - best params ok')
# calculate LWPLSR model with best parameters
jl.seval("""
mod = Jchemo.model(Jchemo.lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u])
# Fit model
Jchemo.fit!(mod, x_train, y_train)
""")
self.mod = jl.mod
def Jchemo_lwplsr_predict(self):
"""Send data to Julia to predict with lwplsr.
Args:
self.mod (Julia model): the prepared model
self.x_train (DataFrame):
self.y_train (DataFrame):
self.x_test (DataFrame):
self.y_test (DataFrame):
Returns:
self.pred_test (Julia DataFrame): predicted values on x_test
self.pred_train (Julia DataFrame): predicted values on x_train
"""
self.pred_test = jl.seval("""
println("LWPLSR - start test predict")
self.pred_train = jl.seval("""
println("LWPLSR - start train predict")
res = Jchemo.predict(mod, x_train)
res.pred
""")
print('LWPLSR - end')
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
def Jchemo_lwplsr_cv(self):
"""Send data to Julia to predict with lwplsr.
Args:
self.mod (Julia model): the prepared model
self.xtr_fold1 (DataFrame):
self.ytr_fold1 (DataFrame):
self.xte_fold1 (DataFrame):
self.yte_fold1 (DataFrame):
Returns:
self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation
"""
for i in range(self.nb_fold):
jl.Xtr = getattr(self, "xtr_fold"+str(i+1))
jl.Ytr = getattr(self, "ytr_fold"+str(i+1))
jl.Xte = getattr(self, "xte_fold"+str(i+1))
# jl.Yte = getattr(self, "yte_fold"+str(i+1))
jl.seval("""
using DataFrames
using Pandas
using Jchemo
Xtr |> Pandas.DataFrame |> DataFrames.DataFrame
Ytr |> Pandas.DataFrame |> DataFrames.DataFrame
Xte |> Pandas.DataFrame |> DataFrames.DataFrame
""")
jl.nlvdis = int(self.best_lwplsr_params['nlvdis'])
jl.metric = self.best_lwplsr_params['metric']
jl.h = self.best_lwplsr_params['h']
jl.k = int(self.best_lwplsr_params['k'])
jl.nlv = int(self.best_lwplsr_params['nlv'])
jl.seval("""
println("LWPLSR - start CV mod")
mod_cv = Jchemo.model(Jchemo.lwplsr; nlvdis = nlvdis, metric = Symbol(metric), h = h, k = k, nlv = nlv)
# Fit model
Jchemo.fit!(mod_cv, Xtr, Ytr)
""")
pred_cv = jl.seval("""
println("LWPLSR - start CV predict")
res = Jchemo.predict(mod_cv, Xte)
res.pred
""")
self.predicted_results["CV" + str(i+1)] = pd.DataFrame(pred_cv)
# convert predicted data from x_test to Pandas DataFrame
self.predicted_results_on_test = pd.DataFrame(self.pred_test)
self.predicted_results_on_train = pd.DataFrame(self.pred_train)
self.predicted_results["pred_data_train"] = self.predicted_results_on_train
self.predicted_results["pred_data_test"] = self.predicted_results_on_test
return self.predicted_results
@property
def best_lwplsr_params_(self):
return self.best_lwplsr_params