Skip to content
Snippets Groups Projects
Commit f0bb9226 authored by Nicolas Barthes's avatar Nicolas Barthes
Browse files

prepare LWPLSR for CV

parent d49f8a07
No related branches found
No related tags found
No related merge requests found
from juliacall import Main as jl from juliacall import Main as jl
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from sklearn.model_selection import KFold
class LWPLSR: class LWPLSR:
"""The lwpls regression model from Jchemo (M. Lesnoff)
Returns:
self.scores (DataFrame): various metrics and scores
self.predicted_results_on_train (DataFrame):
self.predicted_results_on_cv (DataFrame):
self.predicted_results_on_test (DataFrame):
self.mod (Julia model): the prepared model
""" """
The lwpls regression model from Jchemo (M. Lesnoff) def __init__(self, x_train, y_train, x_test, y_test, x_train_cv1, y_train_cv1, x_test_cv1, y_test_cv1, x_train_cv2, y_train_cv2, x_test_cv2, y_test_cv2, x_train_cv3, y_train_cv3, x_test_cv3, y_test_cv3):
"""
def __init__(self, x_train, y_train, x_test, y_test):
"""Initiate the LWPLSR and prepare data for Julia computing.""" """Initiate the LWPLSR and prepare data for Julia computing."""
self.x_train, self.y_train, self.x_test, self.y_test = x_train, y_train, x_test, y_test self.x_train, self.y_train, self.x_test, self.y_test = x_train, y_train, x_test, y_test
self.x_train_cv1, self.y_train_cv1, self.x_test_cv1, self.y_test_cv1 = x_train_cv1, y_train_cv1, x_test_cv1, y_test_cv1
self.x_train_cv2, self.y_train_cv2, self.x_test_cv2, self.y_test_cv2 = x_train_cv2, y_train_cv2, x_test_cv2, y_test_cv2
self.x_train_cv3, self.y_train_cv3, self.x_test_cv3, self.y_test_cv3 = x_train_cv3, y_train_cv3, x_test_cv3, y_test_cv3
# prepare to send dataframes to julia and Jchemo # prepare to send dataframes to julia and Jchemo
jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
# Pre-treatment of x_train and x_test jl.x_train_cv1, jl.y_train_cv1, jl.x_test_cv1, jl.y_test_cv1 = self.x_train_cv1, self.y_train_cv1, self.x_test_cv1, self.y_test_cv1
# jl.seval(""" jl.x_train_cv2, jl.y_train_cv2, jl.x_test_cv2, jl.y_test_cv2 = self.x_train_cv2, self.y_train_cv2, self.x_test_cv2, self.y_test_cv2
# # using DataFrames jl.x_train_cv3, jl.y_train_cv3, jl.x_test_cv3, jl.y_test_cv3= self.x_train_cv3, self.y_train_cv3, self.x_test_cv3, self.y_test_cv3
# # using Pandas # optimize lwplsr parameters with Jchemo
# using Jchemo
# mod1 = Jchemo.model(snv; centr = true, scal = true)
# mod2 = Jchemo.model(savgol; npoint = 15, deriv = 1, degree = 2)
# mod = Jchemo.pip(mod1, mod2)
# Jchemo.fit!(mod, x_train)
# x_train = Jchemo.transf(mod1, x_train)
# Jchemo.fit!(mod, x_test)
# x_test = Jchemo.transf(mod1, x_test)
# """)
# jl.seval(""" # jl.seval("""
# ntrain = nro(x_train) # ntrain = nro(x_train)
# segm = segmkf(ntrain, 4; rep = 5) # segm = segmkf(ntrain, 4; rep = 5)
...@@ -36,17 +38,22 @@ class LWPLSR: ...@@ -36,17 +38,22 @@ class LWPLSR:
# initialize vars from the class # initialize vars from the class
y_shape = y_test.shape y_shape = y_test.shape
self.scores = pd.DataFrame y_shape_cv1 = y_test_cv1.shape
y_shape_cv2 = y_test_cv2.shape
y_shape_cv3 = y_test_cv3.shape
# self.scores = pd.DataFrame
self.predicted_results_on_test = pd.DataFrame self.predicted_results_on_test = pd.DataFrame
self.predicted_results_on_train = pd.DataFrame self.predicted_results_on_train = pd.DataFrame
self.predicted_results_on_cv = pd.DataFrame self.predicted_results_on_cv = pd.DataFrame
self.pred_test = np.zeros(shape=(y_shape[0], 1)) self.pred_test = np.zeros(shape=(y_shape[0], 1))
self.pred_train = np.zeros(shape=(y_shape[0], 1)) self.pred_train = np.zeros(shape=(y_shape[0], 1))
self.pred_cv = np.zeros(shape=(y_shape[0], 1)) self.pred_cv1 = np.zeros(shape=(y_shape_cv1[0], 1))
self.pred_cv2 = np.zeros(shape=(y_shape_cv2[0], 1))
self.pred_cv3 = np.zeros(shape=(y_shape_cv3[0], 1))
self.mod = "" self.mod = ""
def Jchemo_lwplsr_fit(self): def Jchemo_lwplsr_fit(self):
"""Send data to Julia to compute lwplsr. """Send data to Julia to fit lwplsr.
Args: Args:
self.jl.x_train (DataFrame): self.jl.x_train (DataFrame):
...@@ -55,8 +62,7 @@ class LWPLSR: ...@@ -55,8 +62,7 @@ class LWPLSR:
self.jl.y_test (DataFrame): self.jl.y_test (DataFrame):
Returns: Returns:
self.scores (DataFrame): various metrics and scores self.mod (Julia model): the prepared model
self.predicted_results_on_test (DataFrame):
""" """
# launch Julia Jchemo lwplsr # launch Julia Jchemo lwplsr
jl.seval(""" jl.seval("""
...@@ -67,6 +73,18 @@ class LWPLSR: ...@@ -67,6 +73,18 @@ class LWPLSR:
y_train |> Pandas.DataFrame |> DataFrames.DataFrame y_train |> Pandas.DataFrame |> DataFrames.DataFrame
x_test |> Pandas.DataFrame |> DataFrames.DataFrame x_test |> Pandas.DataFrame |> DataFrames.DataFrame
y_test |> Pandas.DataFrame |> DataFrames.DataFrame y_test |> Pandas.DataFrame |> DataFrames.DataFrame
x_train_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame
y_train_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame
x_test_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame
y_test_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame
x_train_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame
y_train_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame
x_test_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame
y_test_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame
x_train_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame
y_train_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame
x_test_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame
y_test_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame
""") """)
# Create LWPLSR model and fit # Create LWPLSR model and fit
jl.seval(""" jl.seval("""
...@@ -76,6 +94,18 @@ class LWPLSR: ...@@ -76,6 +94,18 @@ class LWPLSR:
# Fit model # Fit model
Jchemo.fit!(mod, x_train, y_train) Jchemo.fit!(mod, x_train, y_train)
""") """)
# CV model and fit
jl.seval("""
nlvdis = 5 ; metric = :mah
h = 1 ; k = 200 ; nlv = 15 #; scal = true
mod_cv1 = mod
mod_cv2 = mod
mod_cv3 = mod
# Fit model
Jchemo.fit!(mod_cv1, x_train_cv1, y_train_cv1)
Jchemo.fit!(mod_cv2, x_train_cv2, y_train_cv2)
Jchemo.fit!(mod_cv3, x_train_cv3, y_train_cv3)
""")
# jl.seval(""" # jl.seval("""
# mod = Jchemo.model(Jchemo.lwplsr) # mod = Jchemo.model(Jchemo.lwplsr)
...@@ -87,77 +117,107 @@ class LWPLSR: ...@@ -87,77 +117,107 @@ class LWPLSR:
self.mod = jl.mod self.mod = jl.mod
def Jchemo_lwplsr_predict(self): def Jchemo_lwplsr_predict(self):
"""Send data to Julia to predict with lwplsr.
Args:
self.mod (Julia model): the prepared model
self.jl.x_train (DataFrame):
self.jl.y_train (DataFrame):
self.jl.x_test (DataFrame):
self.jl.y_test (DataFrame):
Returns:
self.pred_test (Julia DataFrame): predicted values on x_test
self.pred_train (Julia DataFrame): predicted values on x_train
self.pred_cv (Julia DataFrame): predicted values on x_train with Cross-Validation
"""
# Predictions on x_test and store in self.pred # Predictions on x_test and store in self.pred
self.pred_test = jl.seval(""" self.pred_test = jl.seval("""
println("start test predict")
res = Jchemo.predict(mod, x_test) res = Jchemo.predict(mod, x_test)
res.pred res.pred
""") """)
self.pred_train = jl.seval(""" self.pred_train = jl.seval("""
println("start train predict")
res = Jchemo.predict(mod, x_train) res = Jchemo.predict(mod, x_train)
res.pred res.pred
""") """)
self.pred_cv = self.pred_train self.pred_cv1 = jl.seval("""
println("start test_cv1 predict")
res = Jchemo.predict(mod_cv1, x_test_cv1)
res.pred
""")
self.pred_cv2 = jl.seval("""
println("start test_cv2 predict")
res = Jchemo.predict(mod_cv2, x_test_cv2)
res.pred
""")
self.pred_cv3 = jl.seval("""
println("start test_cv3 predict")
res = Jchemo.predict(mod_cv3, x_test_cv3)
res.pred
""")
@property @property
def pred_data_(self): def pred_data_(self):
# convert predicted data from x_test to Pandas DataFrame # convert predicted data from x_test to Pandas DataFrame
self.predicted_results_on_test = pd.DataFrame(self.pred_test) self.predicted_results_on_test = pd.DataFrame(self.pred_test)
self.predicted_results_on_train = pd.DataFrame(self.pred_train) self.predicted_results_on_train = pd.DataFrame(self.pred_train)
# self.predicted_results_on_cv = pd.DataFrame(self.pred_cv) self.predicted_results_on_cv1 = pd.DataFrame(self.pred_cv1)
self.predicted_results_on_cv = pd.DataFrame(self.pred_train) self.predicted_results_on_cv2 = pd.DataFrame(self.pred_cv2)
return self.predicted_results_on_train, self.predicted_results_on_cv, self.predicted_results_on_test self.predicted_results_on_cv3 = pd.DataFrame(self.pred_cv3)
return self.predicted_results_on_train, self.predicted_results_on_cv1, self.predicted_results_on_cv2, self.predicted_results_on_cv3, self.predicted_results_on_test
@property @property
def model_(self): def model_(self):
return self.mod return self.mod
@property # @property
def metrics_(self): # def metrics_(self):
jl.pred_test = self.pred_test # jl.pred_test = self.pred_test
jl.seval(""" # jl.seval("""
using Jchemo # using Jchemo
""") # """)
scorermsep_test = jl.seval(""" # scorermsep_test = jl.seval("""
first(Jchemo.rmsep(pred_test, y_test)) # first(Jchemo.rmsep(pred_test, y_test))
""") # """)
scoremr2_test = jl.seval(""" # scoremr2_test = jl.seval("""
first(Jchemo.r2(pred_test, y_test)) # first(Jchemo.r2(pred_test, y_test))
""") # """)
scorerpd_test = jl.seval(""" # scorerpd_test = jl.seval("""
first(Jchemo.rpd(pred_test, y_test)) # first(Jchemo.rpd(pred_test, y_test))
""") # """)
scoremsep_test = jl.seval(""" # scoremsep_test = jl.seval("""
first(Jchemo.sep(pred_test, y_test)) # first(Jchemo.sep(pred_test, y_test))
""") # """)
jl.pred_train = self.pred_train # jl.pred_train = self.pred_train
scorermsep_train = jl.seval(""" # scorermsep_train = jl.seval("""
first(Jchemo.rmsep(pred_train, y_train)) # first(Jchemo.rmsep(pred_train, y_train))
""") # """)
scoremr2_train = jl.seval(""" # scoremr2_train = jl.seval("""
first(Jchemo.r2(pred_train, y_train)) # first(Jchemo.r2(pred_train, y_train))
""") # """)
scorerpd_train = jl.seval(""" # scorerpd_train = jl.seval("""
first(Jchemo.rpd(pred_train, y_train)) # first(Jchemo.rpd(pred_train, y_train))
""") # """)
scoremsep_train = jl.seval(""" # scoremsep_train = jl.seval("""
first(Jchemo.sep(pred_train, y_train)) # first(Jchemo.sep(pred_train, y_train))
""") # """)
jl.pred_cv = self.pred_cv # jl.pred_cv = self.pred_cv
scorermsep_cv = jl.seval(""" # scorermsep_cv = jl.seval("""
first(Jchemo.rmsep(pred_cv, y_train)) # first(Jchemo.rmsep(pred_cv, y_train))
""") # """)
scoremr2_cv = jl.seval(""" # scoremr2_cv = jl.seval("""
first(Jchemo.r2(pred_cv, y_train)) # first(Jchemo.r2(pred_cv, y_train))
""") # """)
scorerpd_cv = jl.seval(""" # scorerpd_cv = jl.seval("""
first(Jchemo.rpd(pred_cv, y_train)) # first(Jchemo.rpd(pred_cv, y_train))
""") # """)
scoremsep_cv = jl.seval(""" # scoremsep_cv = jl.seval("""
first(Jchemo.sep(pred_cv, y_train)) # first(Jchemo.sep(pred_cv, y_train))
""") # """)
#
#
self.scores = pd.DataFrame([[scoremr2_test, scorermsep_test, scoremsep_test, scorerpd_test]], columns=['r2', 'rmsep', 'msep', 'rpd'], index=['test']) # self.scores = pd.DataFrame([[scoremr2_test, scorermsep_test, scoremsep_test, scorerpd_test]], columns=['r2', 'rmsep', 'msep', 'rpd'], index=['test'])
self.scores = pd.concat([self.scores, pd.DataFrame([[scoremr2_train, scorermsep_train, scoremsep_train, scorerpd_train]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["train"]), pd.DataFrame([[scoremr2_cv, scorermsep_cv, scoremsep_cv, scorerpd_cv]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["cv"])])# # self.scores = pd.concat([self.scores, pd.DataFrame([[scoremr2_train, scorermsep_train, scoremsep_train, scorerpd_train]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["train"]), pd.DataFrame([[scoremr2_cv, scorermsep_cv, scoremsep_cv, scorerpd_cv]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["cv"])])#
return self.scores # return self.scores
...@@ -5,23 +5,24 @@ from LWPLSR_ import LWPLSR ...@@ -5,23 +5,24 @@ from LWPLSR_ import LWPLSR
# loading the lwplsr_inputs.json # loading the lwplsr_inputs.json
temp_path = Path("temp/") temp_path = Path("temp/")
for i in ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']: for i in ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']:#,'x_train_np_cv1', 'y_train_np_cv1', 'x_test_np_cv1', 'y_test_np_cv1', 'x_train_np_cv2', 'y_train_np_cv2', 'x_test_np_cv2', 'y_test_np_cv2', 'x_train_np_cv3', 'y_train_np_cv3', 'x_test_np_cv3', 'y_test_np_cv3',]:
globals()[i] = np.genfromtxt(temp_path / str(i + ".csv"), delimiter=',') globals()[i] = np.genfromtxt(temp_path / str(i + ".csv"), delimiter=',')
print('CSV imported') print('CSV imported')
print('start model creation') print('start model creation')
Reg = LWPLSR(x_train_np, y_train_np, x_test_np, y_test_np) Reg = LWPLSR(x_train_np, y_train_np, x_test_np, y_test_np, x_train_np_cv1, y_train_np_cv1, x_test_np_cv1, y_test_np_cv1, x_train_np_cv2, y_train_np_cv2, x_test_np_cv2, y_test_np_cv2, x_train_np_cv3, y_train_np_cv3, x_test_np_cv3, y_test_np_cv3)
print('model created. \n now fit') print('model created. \n now fit')
LWPLSR.Jchemo_lwplsr_fit(Reg) LWPLSR.Jchemo_lwplsr_fit(Reg)
print('now predict') print('now predict')
LWPLSR.Jchemo_lwplsr_predict(Reg) LWPLSR.Jchemo_lwplsr_predict(Reg)
json_export = {} json_export = {}
data_to_export = ['model', 'pred_data', 'metrics']
json_export['pred_data_train'] = Reg.pred_data_[0].to_dict() json_export['pred_data_train'] = Reg.pred_data_[0].to_dict()
json_export['pred_data_cv'] = Reg.pred_data_[1].to_dict() json_export['pred_data_cv1'] = Reg.pred_data_[1].to_dict()
json_export['pred_data_test'] = Reg.pred_data_[2].to_dict() json_export['pred_data_cv2'] = Reg.pred_data_[2].to_dict()
json_export['metrics'] = Reg.metrics_.to_dict() json_export['pred_data_cv3'] = Reg.pred_data_[3].to_dict()
json_export['pred_data_test'] = Reg.pred_data_[4].to_dict()
json_export['model'] = str(Reg.model_) json_export['model'] = str(Reg.model_)
# json_export['metrics'] = Reg.metrics_.to_dict()
with open(temp_path / "lwplsr_outputs.json", "w+") as outfile: with open(temp_path / "lwplsr_outputs.json", "w+") as outfile:
json.dump(json_export, outfile) json.dump(json_export, outfile)
print(Reg.metrics_) print(Reg.metrics_)
\ No newline at end of file
...@@ -140,8 +140,9 @@ if not spectra.empty and not y.empty: ...@@ -140,8 +140,9 @@ if not spectra.empty and not y.empty:
reg_model = Reg.model_ reg_model = Reg.model_
#M2.dataframe(Pin.pred_data_) #M2.dataframe(Pin.pred_data_)
elif regression_algo == reg_algo[2]: elif regression_algo == reg_algo[2]:
data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np'] data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']#,'x_train_np_cv1', 'y_train_np_cv1', 'x_test_np_cv1', 'y_test_np_cv1', 'x_train_np_cv2', 'y_train_np_cv2', 'x_test_np_cv2', 'y_test_np_cv2', 'x_train_np_cv3', 'y_train_np_cv3', 'x_test_np_cv3', 'y_test_np_cv3',]
x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy() x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
# x_train_np_cv1, y_train_np_cv1, x_test_np_cv1, y_test_np_cv1, x_train_np_cv2, y_train_np_cv2, x_test_np_cv2, y_test_np_cv2, x_train_np_cv3, y_train_np_cv3, x_test_np_cv3, y_test_np_cv3, = X_train_cv1.to_numpy(), y_train_cv1.to_numpy(), X_test_cv1.to_numpy(), y_test_cv1.to_numpy(), X_train_cv2.to_numpy(), y_train_cv2.to_numpy(), X_test_cv2.to_numpy(), y_test_cv2.to_numpy(), X_train_cv3.to_numpy(), y_train_cv3.to_numpy(), X_test_cv3.to_numpy(), y_test_cv3.to_numpy()
temp_path = Path('temp/') temp_path = Path('temp/')
for i in data_to_work_with: np.savetxt(temp_path / str(i + ".csv"), vars()[i], delimiter=",") for i in data_to_work_with: np.savetxt(temp_path / str(i + ".csv"), vars()[i], delimiter=",")
import subprocess import subprocess
...@@ -151,13 +152,17 @@ if not spectra.empty and not y.empty: ...@@ -151,13 +152,17 @@ if not spectra.empty and not y.empty:
Reg_json = json.load(outfile) Reg_json = json.load(outfile)
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
os.unlink(temp_path / "lwplsr_outputs.json") os.unlink(temp_path / "lwplsr_outputs.json")
Reg = type('obj', (object,), {'metrics_' : pd.json_normalize(Reg_json['metrics']), 'pred_data_' : [pd.json_normalize(Reg_json['pred_data_train']), pd.json_normalize(Reg_json['pred_data_cv']),pd.json_normalize(Reg_json['pred_data_test'])]}) Reg = type('obj', (object,), {'model' : pd.json_normalize(Reg_json['model']), 'pred_data_' : [pd.json_normalize(Reg_json['pred_data_train']), pd.json_normalize(Reg_json['pred_data_cv1']), pd.json_normalize(Reg_json['pred_data_cv2']), pd.json_normalize(Reg_json['pred_data_cv3']), pd.json_normalize(Reg_json['pred_data_test'])]})
Reg.pred_data_[0] = Reg.pred_data_[0].T.reset_index().drop(columns = ['index']) Reg.pred_data_[0] = Reg.pred_data_[0].T.reset_index().drop(columns = ['index'])
Reg.pred_data_[0].index = list(y_train.index) Reg.pred_data_[0].index = list(y_train.index)
Reg.pred_data_[1] = Reg.pred_data_[1].T.reset_index().drop(columns = ['index']) # Reg.pred_data_[1] = Reg.pred_data_[1].T.reset_index().drop(columns = ['index'])
Reg.pred_data_[1].index = list(y_train.index) # Reg.pred_data_[1].index = list(y_train_cv1.index)
Reg.pred_data_[2] = Reg.pred_data_[2].T.reset_index().drop(columns = ['index']) # Reg.pred_data_[2] = Reg.pred_data_[2].T.reset_index().drop(columns = ['index'])
Reg.pred_data_[2].index = list(y_test.index) # Reg.pred_data_[2].index = list(y_train_cv2.index)
# Reg.pred_data_[3] = Reg.pred_data_[3].T.reset_index().drop(columns = ['index'])
# Reg.pred_data_[3].index = list(y_train_cv3.index)
Reg.pred_data_[4] = Reg.pred_data_[4].T.reset_index().drop(columns = ['index'])
Reg.pred_data_[4].index = list(y_test.index)
elif regression_algo == reg_algo[3]: elif regression_algo == reg_algo[3]:
s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3) s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment