Skip to content
Snippets Groups Projects
Commit e3f8d979 authored by Nicolas Barthes's avatar Nicolas Barthes
Browse files

LWPLSR paramaters tuning with JChemo.GridScore

parent e1596310
No related branches found
No related tags found
No related merge requests found
from juliacall import Main as jl from juliacall import Main as jl
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from sklearn.model_selection import KFold
class LWPLSR: class LWPLSR:
"""The lwpls regression model from Jchemo (M. Lesnoff) """The lwpls regression model from Jchemo (M. Lesnoff)
...@@ -9,48 +8,26 @@ class LWPLSR: ...@@ -9,48 +8,26 @@ class LWPLSR:
Returns: Returns:
self.scores (DataFrame): various metrics and scores self.scores (DataFrame): various metrics and scores
self.predicted_results_on_train (DataFrame): self.predicted_results_on_train (DataFrame):
self.predicted_results_on_cv (DataFrame):
self.predicted_results_on_test (DataFrame): self.predicted_results_on_test (DataFrame):
self.mod (Julia model): the prepared model self.mod (Julia model): the prepared model
""" """
def __init__(self, x_train, y_train, x_test, y_test, x_train_cv1, y_train_cv1, x_test_cv1, y_test_cv1, x_train_cv2, y_train_cv2, x_test_cv2, y_test_cv2, x_train_cv3, y_train_cv3, x_test_cv3, y_test_cv3): def __init__(self, dataset):
"""Initiate the LWPLSR and prepare data for Julia computing.""" """Initiate the LWPLSR and prepare data for Julia computing."""
self.x_train, self.y_train, self.x_test, self.y_test = x_train, y_train, x_test, y_test self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))]
self.x_train_cv1, self.y_train_cv1, self.x_test_cv1, self.y_test_cv1 = x_train_cv1, y_train_cv1, x_test_cv1, y_test_cv1
self.x_train_cv2, self.y_train_cv2, self.x_test_cv2, self.y_test_cv2 = x_train_cv2, y_train_cv2, x_test_cv2, y_test_cv2
self.x_train_cv3, self.y_train_cv3, self.x_test_cv3, self.y_test_cv3 = x_train_cv3, y_train_cv3, x_test_cv3, y_test_cv3
# prepare to send dataframes to julia and Jchemo # prepare to send dataframes to julia and Jchemo
jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
jl.x_train_cv1, jl.y_train_cv1, jl.x_test_cv1, jl.y_test_cv1 = self.x_train_cv1, self.y_train_cv1, self.x_test_cv1, self.y_test_cv1
jl.x_train_cv2, jl.y_train_cv2, jl.x_test_cv2, jl.y_test_cv2 = self.x_train_cv2, self.y_train_cv2, self.x_test_cv2, self.y_test_cv2
jl.x_train_cv3, jl.y_train_cv3, jl.x_test_cv3, jl.y_test_cv3= self.x_train_cv3, self.y_train_cv3, self.x_test_cv3, self.y_test_cv3
# optimize lwplsr parameters with Jchemo
# jl.seval("""
# ntrain = nro(x_train)
# segm = segmkf(ntrain, 4; rep = 5)
# nlvdis = [5; 10; 15] ; metric = [:mah]
# h = [1; 2; 6; Inf] ; k = [10; 30; 100]
# nlv = 0:15
# pars = mpar(nlvdis = nlvdis, metric = metric, h = h, k = k)
# println(pars)
# """)
# initialize vars from the class # initialize vars from the class
y_shape = y_test.shape y_shape = self.y_test.shape
y_shape_cv1 = y_test_cv1.shape
y_shape_cv2 = y_test_cv2.shape
y_shape_cv3 = y_test_cv3.shape
# self.scores = pd.DataFrame
self.predicted_results_on_test = pd.DataFrame self.predicted_results_on_test = pd.DataFrame
self.predicted_results_on_train = pd.DataFrame self.predicted_results_on_train = pd.DataFrame
self.predicted_results_on_cv = pd.DataFrame self.predicted_results_on_cv = pd.DataFrame
self.pred_test = np.zeros(shape=(y_shape[0], 1)) self.pred_test = np.zeros(shape=(y_shape[0], 1))
self.pred_train = np.zeros(shape=(y_shape[0], 1)) self.pred_train = np.zeros(shape=(y_shape[0], 1))
self.pred_cv1 = np.zeros(shape=(y_shape_cv1[0], 1))
self.pred_cv2 = np.zeros(shape=(y_shape_cv2[0], 1))
self.pred_cv3 = np.zeros(shape=(y_shape_cv3[0], 1))
self.mod = "" self.mod = ""
self.best_lwplsr_params = np.zeros(shape=(5, 1))
def Jchemo_lwplsr_fit(self): def Jchemo_lwplsr_fit(self):
"""Send data to Julia to fit lwplsr. """Send data to Julia to fit lwplsr.
...@@ -73,47 +50,43 @@ class LWPLSR: ...@@ -73,47 +50,43 @@ class LWPLSR:
y_train |> Pandas.DataFrame |> DataFrames.DataFrame y_train |> Pandas.DataFrame |> DataFrames.DataFrame
x_test |> Pandas.DataFrame |> DataFrames.DataFrame x_test |> Pandas.DataFrame |> DataFrames.DataFrame
y_test |> Pandas.DataFrame |> DataFrames.DataFrame y_test |> Pandas.DataFrame |> DataFrames.DataFrame
x_train_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame
y_train_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame
x_test_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame
y_test_cv1 |> Pandas.DataFrame |> DataFrames.DataFrame
x_train_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame
y_train_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame
x_test_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame
y_test_cv2 |> Pandas.DataFrame |> DataFrames.DataFrame
x_train_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame
y_train_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame
x_test_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame
y_test_cv3 |> Pandas.DataFrame |> DataFrames.DataFrame
""") """)
# Create LWPLSR model and fit print('LWPLSR - tuning')
# set tuning parameters
jl.seval(""" jl.seval("""
nlvdis = 5 ; metric = :mah nlvdis = [5; 10; 15] ; metric = [:eucl; :mah]
h = 1 ; k = 200 ; nlv = 15 #; scal = true h = [1; 2; 6; Inf] ; k = [30; 80; 200]
mod = Jchemo.model(Jchemo.lwplsr; nlvdis, metric, h, k, nlv) nlv = 5:15
# Fit model pars = Jchemo.mpar(nlvdis = nlvdis, metric = metric, h = h, k = k)
Jchemo.fit!(mod, x_train, y_train)
""") """)
# CV model and fit # split Train data into Cal/Val for tuning
jl.seval(""" jl.seval("""
nlvdis = 5 ; metric = :mah pct = .3
h = 1 ; k = 200 ; nlv = 15 #; scal = true ntrain = Jchemo.nro(x_train)
mod_cv1 = mod nval = Int(round(pct * ntrain))
mod_cv2 = mod s = Jchemo.samprand(ntrain, nval)
mod_cv3 = mod Xcal = x_train[s.train, :]
# Fit model ycal = y_train[s.train]
Jchemo.fit!(mod_cv1, x_train_cv1, y_train_cv1) Xval = x_train[s.test, :]
Jchemo.fit!(mod_cv2, x_train_cv2, y_train_cv2) yval = y_train[s.test]
Jchemo.fit!(mod_cv3, x_train_cv3, y_train_cv3) ncal = ntrain - nval
""") """)
# jl.seval(""" # Create LWPLSR model and tune
# mod = Jchemo.model(Jchemo.lwplsr) jl.seval("""
# res = Jchemo.gridcv(mod, x_train, y_train; segm, score = Jchemo.rmsep, pars, nlv, verbose = true).res mod = Jchemo.model(Jchemo.lwplsr)
# # u = findall(res.y1 .== minimum(res.y1))[1] res = gridscore(mod, Xcal, ycal, Xval, yval; score = Jchemo.rmsep, pars, nlv, verbose = false)
# # mod = Jchemo.model(lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u]) ; u = findall(res.y1 .== minimum(res.y1))[1] #best parameters combination
# # Jchemo.fit!(mod, x_train, y_train) """)
# """) self.best_lwplsr_params = {'nlvdis' : jl.res.nlvdis[jl.u], 'metric' : str(jl.res.metric[jl.u]), 'h' : jl.res.h[jl.u], 'k' : jl.res.k[jl.u], 'nlv' : jl.res.nlv[jl.u]}
print('best lwplsr params' + str(self.best_lwplsr_params))
print('LWPLSR - best params ok')
# calculate LWPLSR model with best parameters
jl.seval("""
mod = Jchemo.model(Jchemo.lwplsr; nlvdis = res.nlvdis[u], metric = res.metric[u], h = res.h[u], k = res.k[u], nlv = res.nlv[u])
# Fit model
Jchemo.fit!(mod, x_train, y_train)
""")
self.mod = jl.mod self.mod = jl.mod
def Jchemo_lwplsr_predict(self): def Jchemo_lwplsr_predict(self):
...@@ -133,91 +106,29 @@ class LWPLSR: ...@@ -133,91 +106,29 @@ class LWPLSR:
""" """
# Predictions on x_test and store in self.pred # Predictions on x_test and store in self.pred
self.pred_test = jl.seval(""" self.pred_test = jl.seval("""
println("start test predict") println("LWPLSR - start test predict")
res = Jchemo.predict(mod, x_test) res = Jchemo.predict(mod, x_test)
res.pred res.pred
""") """)
self.pred_train = jl.seval(""" self.pred_train = jl.seval("""
println("start train predict") println("LWPLSR - start train predict")
res = Jchemo.predict(mod, x_train) res = Jchemo.predict(mod, x_train)
res.pred res.pred
""") """)
self.pred_cv1 = jl.seval(""" print('LWPLSR - end')
println("start test_cv1 predict")
res = Jchemo.predict(mod_cv1, x_test_cv1)
res.pred
""")
self.pred_cv2 = jl.seval("""
println("start test_cv2 predict")
res = Jchemo.predict(mod_cv2, x_test_cv2)
res.pred
""")
self.pred_cv3 = jl.seval("""
println("start test_cv3 predict")
res = Jchemo.predict(mod_cv3, x_test_cv3)
res.pred
""")
@property @property
def pred_data_(self): def pred_data_(self):
# convert predicted data from x_test to Pandas DataFrame # convert predicted data from x_test to Pandas DataFrame
self.predicted_results_on_test = pd.DataFrame(self.pred_test) self.predicted_results_on_test = pd.DataFrame(self.pred_test)
self.predicted_results_on_train = pd.DataFrame(self.pred_train) self.predicted_results_on_train = pd.DataFrame(self.pred_train)
self.predicted_results_on_cv1 = pd.DataFrame(self.pred_cv1) return self.predicted_results_on_train, self.predicted_results_on_test
self.predicted_results_on_cv2 = pd.DataFrame(self.pred_cv2)
self.predicted_results_on_cv3 = pd.DataFrame(self.pred_cv3)
return self.predicted_results_on_train, self.predicted_results_on_cv1, self.predicted_results_on_cv2, self.predicted_results_on_cv3, self.predicted_results_on_test
@property @property
def model_(self): def model_(self):
return self.mod return self.mod
# @property @property
# def metrics_(self): def best_lwplsr_params_(self):
# jl.pred_test = self.pred_test return self.best_lwplsr_params
# jl.seval("""
# using Jchemo
# """)
# scorermsep_test = jl.seval("""
# first(Jchemo.rmsep(pred_test, y_test))
# """)
# scoremr2_test = jl.seval("""
# first(Jchemo.r2(pred_test, y_test))
# """)
# scorerpd_test = jl.seval("""
# first(Jchemo.rpd(pred_test, y_test))
# """)
# scoremsep_test = jl.seval("""
# first(Jchemo.sep(pred_test, y_test))
# """)
# jl.pred_train = self.pred_train
# scorermsep_train = jl.seval("""
# first(Jchemo.rmsep(pred_train, y_train))
# """)
# scoremr2_train = jl.seval("""
# first(Jchemo.r2(pred_train, y_train))
# """)
# scorerpd_train = jl.seval("""
# first(Jchemo.rpd(pred_train, y_train))
# """)
# scoremsep_train = jl.seval("""
# first(Jchemo.sep(pred_train, y_train))
# """)
# jl.pred_cv = self.pred_cv
# scorermsep_cv = jl.seval("""
# first(Jchemo.rmsep(pred_cv, y_train))
# """)
# scoremr2_cv = jl.seval("""
# first(Jchemo.r2(pred_cv, y_train))
# """)
# scorerpd_cv = jl.seval("""
# first(Jchemo.rpd(pred_cv, y_train))
# """)
# scoremsep_cv = jl.seval("""
# first(Jchemo.sep(pred_cv, y_train))
# """)
#
#
# self.scores = pd.DataFrame([[scoremr2_test, scorermsep_test, scoremsep_test, scorerpd_test]], columns=['r2', 'rmsep', 'msep', 'rpd'], index=['test'])
# self.scores = pd.concat([self.scores, pd.DataFrame([[scoremr2_train, scorermsep_train, scoremsep_train, scorerpd_train]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["train"]), pd.DataFrame([[scoremr2_cv, scorermsep_cv, scoremsep_cv, scorerpd_cv]], columns=['r2', 'rmsep', 'msep', 'rpd'], index = ["cv"])])#
# return self.scores
...@@ -5,27 +5,24 @@ from LWPLSR_ import LWPLSR ...@@ -5,27 +5,24 @@ from LWPLSR_ import LWPLSR
# loading the lwplsr_inputs.json # loading the lwplsr_inputs.json
temp_path = Path("temp/") temp_path = Path("temp/")
for i in ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np','x_train_np_cv1', 'y_train_np_cv1', 'x_test_np_cv1', 'y_test_np_cv1', 'x_train_np_cv2', 'y_train_np_cv2', 'x_test_np_cv2', 'y_test_np_cv2', 'x_train_np_cv3', 'y_train_np_cv3', 'x_test_np_cv3', 'y_test_np_cv3',]: data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
globals()[i] = np.genfromtxt(temp_path / str(i + ".csv"), delimiter=',') dataset = []
for i in data_to_work_with:
dataset.append(np.genfromtxt(temp_path / str(i + ".csv"), delimiter=','))
print('CSV imported') print('CSV imported')
print('start model creation') print('start model creation')
Reg = LWPLSR(x_train_np, y_train_np, x_test_np, y_test_np, x_train_np_cv1, y_train_np_cv1, x_test_np_cv1, y_test_np_cv1, x_train_np_cv2, y_train_np_cv2, x_test_np_cv2, y_test_np_cv2, x_train_np_cv3, y_train_np_cv3, x_test_np_cv3, y_test_np_cv3) Reg = LWPLSR(dataset)
print('model created. \n now fit') print('model created. \nnow fit')
LWPLSR.Jchemo_lwplsr_fit(Reg) LWPLSR.Jchemo_lwplsr_fit(Reg)
print('now predict') print('now predict')
LWPLSR.Jchemo_lwplsr_predict(Reg) LWPLSR.Jchemo_lwplsr_predict(Reg)
pred = ['pred_data_train', 'pred_data_cv1', 'pred_data_cv2', 'pred_data_cv3', 'pred_data_test'] print('export to json')
pred = ['pred_data_train', 'pred_data_test']
json_export = {} json_export = {}
for i in pred: for i in pred:
json_export[i] = Reg.pred_data_[pred.index(i)].to_dict() json_export[i] = Reg.pred_data_[pred.index(i)].to_dict()
# json_export['pred_data_train'] = Reg.pred_data_[0].to_dict()
# json_export['pred_data_cv1'] = Reg.pred_data_[1].to_dict()
# json_export['pred_data_cv2'] = Reg.pred_data_[2].to_dict()
# json_export['pred_data_cv3'] = Reg.pred_data_[3].to_dict()
# json_export['pred_data_test'] = Reg.pred_data_[4].to_dict()
json_export['model'] = str(Reg.model_) json_export['model'] = str(Reg.model_)
# json_export['metrics'] = Reg.metrics_.to_dict() json_export['best_lwplsr_params'] = Reg.best_lwplsr_params_
with open(temp_path / "lwplsr_outputs.json", "w+") as outfile: with open(temp_path / "lwplsr_outputs.json", "w+") as outfile:
json.dump(json_export, outfile) json.dump(json_export, outfile)
print(Reg.metrics_)
\ No newline at end of file
...@@ -28,7 +28,7 @@ selec_strategy = ['center','random'] ...@@ -28,7 +28,7 @@ selec_strategy = ['center','random']
if st.session_state["interface"] == 'simple': if st.session_state["interface"] == 'simple':
st.write(':red[Automated Simple Interface]') st.write(':red[Automated Simple Interface]')
hide_pages("Predictions") # hide_pages("Predictions")
if 37 not in st.session_state: if 37 not in st.session_state:
default_reduction_option = 1 default_reduction_option = 1
else: else:
......
...@@ -167,24 +167,25 @@ if not spectra.empty and not y.empty: ...@@ -167,24 +167,25 @@ if not spectra.empty and not y.empty:
reg_model = Reg.model_ reg_model = Reg.model_
#M2.dataframe(Pin.pred_data_) #M2.dataframe(Pin.pred_data_)
elif regression_algo == reg_algo[2]: elif regression_algo == reg_algo[2]:
data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']#,'x_train_np_cv1', 'y_train_np_cv1', 'x_test_np_cv1', 'y_test_np_cv1', 'x_train_np_cv2', 'y_train_np_cv2', 'x_test_np_cv2', 'y_test_np_cv2', 'x_train_np_cv3', 'y_train_np_cv3', 'x_test_np_cv3', 'y_test_np_cv3',] # export data to csv for Julia
data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy() x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
# x_train_np_cv1, y_train_np_cv1, x_test_np_cv1, y_test_np_cv1, x_train_np_cv2, y_train_np_cv2, x_test_np_cv2, y_test_np_cv2, x_train_np_cv3, y_train_np_cv3, x_test_np_cv3, y_test_np_cv3, = X_train_cv1.to_numpy(), y_train_cv1.to_numpy(), X_test_cv1.to_numpy(), y_test_cv1.to_numpy(), X_train_cv2.to_numpy(), y_train_cv2.to_numpy(), X_test_cv2.to_numpy(), y_test_cv2.to_numpy(), X_train_cv3.to_numpy(), y_train_cv3.to_numpy(), X_test_cv3.to_numpy(), y_test_cv3.to_numpy()
temp_path = Path('temp/') temp_path = Path('temp/')
for i in data_to_work_with: np.savetxt(temp_path / str(i + ".csv"), vars()[i], delimiter=",") for i in data_to_work_with: np.savetxt(temp_path / str(i + ".csv"), vars()[i], delimiter=",")
# run Julia Jchemo
import subprocess import subprocess
subprocess_path = Path("Class_Mod/") subprocess_path = Path("Class_Mod/")
subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"]) subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
# retrieve json results from Julia JChemo
with open(temp_path / "lwplsr_outputs.json", "r") as outfile: with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
Reg_json = json.load(outfile) Reg_json = json.load(outfile)
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
os.unlink(temp_path / "lwplsr_outputs.json") os.unlink(temp_path / "lwplsr_outputs.json")
# Reg = type('obj', (object,), {'model' : pd.json_normalize(Reg_json['model']), 'pred_data_' : [pd.json_normalize(Reg_json['pred_data_train']), pd.json_normalize(Reg_json['pred_data_cv1']), pd.json_normalize(Reg_json['pred_data_cv2']), pd.json_normalize(Reg_json['pred_data_cv3']), pd.json_normalize(Reg_json['pred_data_test'])]}) pred = ['pred_data_train', 'pred_data_test']
pred = ['pred_data_train', 'pred_data_cv1', 'pred_data_cv2', 'pred_data_cv3', 'pred_data_test'] Reg = type('obj', (object,), {'model' : Reg_json['model'], 'best_lwplsr_params' : Reg_json['best_lwplsr_params'], 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
Reg = type('obj', (object,), {'model' : pd.json_normalize(Reg_json['model']), 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
for i in range(len(pred)): for i in range(len(pred)):
Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index']) Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
if i != 4: if i != 1: # if not pred_data_test
Reg.pred_data_[i].index = list(y_train.index) Reg.pred_data_[i].index = list(y_train.index)
else: else:
Reg.pred_data_[i].index = list(y_test.index) Reg.pred_data_[i].index = list(y_test.index)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment