Skip to content
Snippets Groups Projects
Commit 2434fc99 authored by DIANE's avatar DIANE
Browse files

LWPLS

parent 30bccda8
No related branches found
No related tags found
No related merge requests found
......@@ -289,7 +289,7 @@ if not x_block.empty and not y.empty:
with c4:
# select type of supervised modelling problem
mode = c4.radio("The nature of the target variable :",
options=['Continuous', 'Categorical'])
options=['Continuous', 'Categorical'], disabled =True)
hash_ = ObjectHash(current=hash_, add=mode)
match st.session_state["interface"]:
......@@ -352,123 +352,129 @@ if not x_block.empty and not y.empty:
# Training set preparation for cross-validation(CV)
with c5: # Model columns
nb_folds = 3
@st.cache_data
def RequestingModelCreation(change):
from utils.regress import Plsr
pre = Plsr(train=[X_train, y_train], test=[
X_test, y_test], n_iter=40, cv=nb_folds)
global Model
match model_type:
case 'PLS':
from utils.regress import Plsr
Model = Plsr(train=[X_train, y_train], test=[
X_test, y_test], n_iter=100, cv=nb_folds)
Model = pre
case 'TPE-iPLS':
from utils.regress import TpeIpls
Model = TpeIpls(train=[X_train, y_train], test=[
X_test, y_test], n_intervall=internum, n_iter=iternum, cv=nb_folds)
X_test, y_test], n_intervall=internum, n_iter=iternum, cv=nb_folds, bestglobalparams = pre.best_hyperparams_)
Model.best_fit()
case 'LW-PLS':
# split train data into nb_folds for cross_validation
folds = KF_CV.CV(X_train, y_train, nb_folds)
# export data to csv for Julia train/test
global x_train_np, y_train_np, x_test_np, y_test_np
data_to_work_with = ['x_train_np',
'y_train_np', 'x_test_np', 'y_test_np']
x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(
), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
# Cross-Validation calculation
d = {}
for i in range(nb_folds):
d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(
folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
data_to_work_with.append("xtr_fold{0}".format(i+1))
data_to_work_with.append("ytr_fold{0}".format(i+1))
data_to_work_with.append("xte_fold{0}".format(i+1))
data_to_work_with.append("yte_fold{0}".format(i+1))
# check best pre-treatment with a global PLSR model
from utils.regress import Plsr
pre = Plsr(train=[X_train, y_train], test=[
X_test, y_test], n_iter=5)
temp_path = Path('temp/')
with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile:
json.dump(pre.best_hyperparams_, outfile)
# export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files
for i in data_to_work_with:
if 'fold' in i:
j = d[i]
else:
j = globals()[i]
np.savetxt(temp_path / str(i + ".csv"),
j, delimiter=",")
open(temp_path / 'model', 'w').close()
# run Julia Jchemo as subprocess
import subprocess
subprocess_path = Path("utils/")
subprocess.run(
[str(sys.executable), subprocess_path / "lwplsr_call.py"])
# retrieve json results from Julia JChemo
try:
with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
Reg_json = json.load(outfile)
# delete csv files
for i in data_to_work_with:
os.unlink(temp_path / str(i + ".csv"))
# delete json file after import
os.unlink(temp_path / "lwplsr_outputs.json")
os.unlink(temp_path / "lwplsr_preTreatments.json")
os.unlink(temp_path / 'model')
# format result data into Reg object
# keys of the dict
pred = ['pred_data_train', 'pred_data_test']
for i in range(nb_folds):
# add cv folds keys to pred
pred.append("CV" + str(i+1))
from utils.regress import LwplsObject
Model = LwplsObject(Reg_json=Reg_json, pred=pred)
Model.CV_results_ = DataFrame()
Model.cv_data_ = {'YpredCV': {}, 'idxCV': {}}
# set indexes to Model.pred_data (train, test, folds idx)
for i in range(len(pred)):
Model.pred_data_[i] = Model.pred_data_[
i].T.reset_index().drop(columns=['index'])
if i == 0: # data_train
Model.pred_data_[i].index = list(y_train.index)
Model.pred_data_[i] = Model.pred_data_[
i].iloc[:, 0]
elif i == 1: # data_test
Model.pred_data_[i].index = list(y_test.index)
Model.pred_data_[i] = Model.pred_data_[
i].iloc[:, 0]
else:
# CVi
Model.pred_data_[i].index = folds[list(folds)[
i-2]]
Model.cv_data_[
'YpredCV']['Fold' + str(i-1)] = np.array(Model.pred_data_[i]).reshape(-1)
Model.cv_data_[
'idxCV']['Fold' + str(i-1)] = np.array(folds[list(folds)[i-2]]).reshape(-1)
Model.CV_results_ = KF_CV.metrics_cv(y=y_train, ypcv=Model.cv_data_[
'YpredCV'], folds=folds)[1]
# cross validation results print
Model.best_hyperparams_print = Model.best_hyperparams_
# plots
Model.cv_data_ = KF_CV().meas_pred_eq(y=np.array(y_train),
ypcv=Model.cv_data_['YpredCV'], folds=folds)
Model.pretreated_spectra_ = pre.pretreated_spectra_
Model.best_hyperparams_print = {
**pre.best_hyperparams_, **Model.best_hyperparams_}
Model.best_hyperparams_ = {
**pre.best_hyperparams_, **Model.best_hyperparams_}
Model.__hash__ = ObjectHash(
current=hash_, add=Model.best_hyperparams_print)
except FileNotFoundError:
Model = None
for i in data_to_work_with:
os.unlink(temp_path / str(i + ".csv"))
from utils.regress import LWPLS
Model = LWPLS(train = [X_train, y_train], test = [X_test, y_test], n_iter = 10, cv = nb_folds, bestglobalparams = pre.best_hyperparams_)
Model.best_fit()
# The snippet of code below was first used to communicate with Julia for developing lwplsr() LWPLS modelling, but just lately, lwplsr() xas written in Python and utilized instead.
# case 'LW-PLS':
# # split train data into nb_folds for cross_validation
# folds = KF_CV.CV(X_train, y_train, nb_folds)
# # export data to csv for Julia train/test
# global x_train_np, y_train_np, x_test_np, y_test_np
# data_to_work_with = ['x_train_np',
# 'y_train_np', 'x_test_np', 'y_test_np']
# x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(
# ), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
# # Cross-Validation calculation
# d = {}
# for i in range(nb_folds):
# d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(
# folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
# data_to_work_with.append("xtr_fold{0}".format(i+1))
# data_to_work_with.append("ytr_fold{0}".format(i+1))
# data_to_work_with.append("xte_fold{0}".format(i+1))
# data_to_work_with.append("yte_fold{0}".format(i+1))
# # check best pre-treatment with a global PLSR model
# from utils.regress import Plsr
# pre = Plsr(train=[X_train, y_train], test=[X_test, y_test], n_iter=5)
# temp_path = Path('temp/')
# with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile:
# json.dump(pre.best_hyperparams_, outfile)
# # export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files
# for i in data_to_work_with:
# if 'fold' in i:
# j = d[i]
# else:
# j = globals()[i]
# np.savetxt(temp_path / str(i + ".csv"),
# j, delimiter=",")
# open(temp_path / 'model', 'w').close()
# # run Julia Jchemo as subprocess
# import subprocess
# subprocess_path = Path("utils/")
# subprocess.run(
# [str(sys.executable), subprocess_path / "lwplsr_call.py"])
# # retrieve json results from Julia JChemo
# try:
# with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
# Reg_json = json.load(outfile)
# # delete csv files
# for i in data_to_work_with:
# os.unlink(temp_path / str(i + ".csv"))
# # delete json file after import
# os.unlink(temp_path / "lwplsr_outputs.json")
# os.unlink(temp_path / "lwplsr_preTreatments.json")
# os.unlink(temp_path / 'model')
# # format result data into Reg object
# # keys of the dict
# pred = ['pred_data_train', 'pred_data_test']
# for i in range(nb_folds):
# # add cv folds keys to pred
# pred.append("CV" + str(i+1))
# from utils.regress import LwplsObject
# Model = LwplsObject(Reg_json=Reg_json, pred=pred)
# Model.CV_results_ = DataFrame()
# Model.cv_data_ = {'YpredCV': {}, 'idxCV': {}}
# # set indexes to Model.pred_data (train, test, folds idx)
# for i in range(len(pred)):
# Model.pred_data_[i] = Model.pred_data_[
# i].T.reset_index().drop(columns=['index'])
# if i == 0: # data_train
# Model.pred_data_[i].index = list(y_train.index)
# Model.pred_data_[i] = Model.pred_data_[
# i].iloc[:, 0]
# elif i == 1: # data_test
# Model.pred_data_[i].index = list(y_test.index)
# Model.pred_data_[i] = Model.pred_data_[
# i].iloc[:, 0]
# else:
# # CVi
# Model.pred_data_[i].index = folds[list(folds)[
# i-2]]
# Model.cv_data_[
# 'YpredCV']['Fold' + str(i-1)] = np.array(Model.pred_data_[i]).reshape(-1)
# Model.cv_data_[
# 'idxCV']['Fold' + str(i-1)] = np.array(folds[list(folds)[i-2]]).reshape(-1)
# Model.CV_results_ = KF_CV.metrics_cv(y=y_train, ypcv=Model.cv_data_[
# 'YpredCV'], folds=folds)[1]
# # cross validation results print
# Model.best_hyperparams_print = Model.best_hyperparams_
# # plots
# Model.cv_data_ = KF_CV().meas_pred_eq(y=np.array(y_train),
# ypcv=Model.cv_data_['YpredCV'], folds=folds)
# Model.pretreated_spectra_ = pre.pretreated_spectra_
# Model.best_hyperparams_print = {
# **pre.best_hyperparams_, **Model.best_hyperparams_}
# Model.best_hyperparams_ = {
# **pre.best_hyperparams_, **Model.best_hyperparams_}
# Model.__hash__ = ObjectHash(
# current=hash_, add=Model.best_hyperparams_print)
# except FileNotFoundError:
# Model = None
# for i in data_to_work_with:
# os.unlink(temp_path / str(i + ".csv"))
case "":
Model = None
......@@ -546,6 +552,7 @@ if model_type:
ax3.grid()
ax3.set_xlabel('Wavelenghts/Wavenumbers')
ax3.set_ylabel('Vip')
case 'TPE-iPLS':
fig, (ax1, ax2, ax3) = plt.subplots(
3, 1, figsize=(12, 4), sharex=True)
......@@ -570,6 +577,7 @@ if model_type:
ax3.set_ylabel('Vip')
ax3.grid()
ax3.set_xlabel('Wavelenghts/Wavenumbers')
case 'LW-PLS':
fig, (ax1, ax2) = plt.subplots(
2, 1, figsize=(12, 4), sharex=True)
......@@ -682,6 +690,7 @@ if model_type:
prep_para[i] = str(modelling.best_hyperparams_[i])+'nd'
# reg plot and residuals plot
yc = y_train if model_type == "LW-PLS" else yc
measured_vs_predicted = reg_plot([y_train, y_test], [
yc, yt], train_idx=train_index, test_idx=test_index, trainplot=False if model_type == "LW-PLS" else True)
residuals_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index,
......
......@@ -140,9 +140,10 @@ with c1:
x_block.columns = x_block.columns.astype(str)
yname = system_data['data']['target'].name
st.info("Loaded model to predict " + yname)
shared_elements = set(system_data['predictors_']).intersection(x_block.columns)
if len(shared_elements)==len(system_data['predictors_']):
pred_data = x_block.loc[:,system_data['predictors_']]
shared_elements = set(
system_data['predictors_']).intersection(x_block.columns)
if len(shared_elements) == len(system_data['predictors_']):
pred_data = x_block.loc[:, system_data['predictors_']]
else:
st.error(
'The names of the features (columns) in the training set and the prediction set are not identical. Thus, prediction cannot be performed.')
......@@ -264,55 +265,63 @@ if not preprocessed.empty:
else:
st.write('Model was fitted on '+str(nvar) +
'but prediction data has '+str(preprocesseddf.shape[1]))
case 'LW-PLS':
case 'LW-PLS':
try:
temp_path = Path('temp/')
# export data to csv for Julia train/pred
# with pretreatments
spectra = preprocess_spectra(
system_data['data']['raw-spectra'], change=hash_)
x_pred = preprocessed
rownames = x_pred.index.to_list()
y = system_data['data']['target']
data_to_work_with = [
'spectra_np', 'y_np', 'x_pred_np']
spectra_np, y_np, x_pred_np = spectra[1].to_numpy(
), y.to_numpy(), x_pred.to_numpy()
# export spectra, y, x_pred to temp folder as csv files
for i in data_to_work_with:
j = globals()[i]
np.savetxt(
temp_path / str(i + ".csv"), j, delimiter=",")
# export best LWPLSR params
with open(temp_path / "lwplsr_best_params.json", "w+") as outfile:
json.dump(system_data['lwpls_params'], outfile)
# create empty file to specify LWPLSR_Call.py that we want predictions
open(temp_path / 'predict', 'w').close()
# # run Julia Jchemo as subprocess
import subprocess
subprocess_path = Path("utils/")
subprocess.run(
[str(sys.executable), subprocess_path / "LWPLSR_Call.py"])
# retrieve json results from Julia JChemo
try:
with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
modelling_json = json.load(outfile)
# delete csv files
for i in data_to_work_with:
os.unlink(temp_path / str(i + ".csv"))
os.unlink(temp_path / 'predict')
# delete json file after import
os.unlink(temp_path / "lwplsr_outputs.json")
os.unlink(
temp_path / "lwplsr_best_params.json")
# keys of the json dict
result = DataFrame(modelling_json['y_pred'])
result.index = rownames
result.columns = ['Results']
except FileNotFoundError as e:
for i in data_to_work_with:
os.unlink(temp_path / str(i + ".csv"))
os.unlink(temp_path / 'predict')
_, spectra = preprocess_spectra(
system_data['data']['raw-spectra'].iloc[system_data['data']['training_data_idx'], :], change=hash_)
from utils.lwplsr_julia_converted import lwpls
result = DataFrame(lwpls(Xtrain=np.array(spectra), Xtest=np.array(preprocessed),
ytrain=np.array(system_data['data']['target'].iloc[system_data['data']['training_data_idx']]),
globalplsVL=system_data['model_']['globalplsVL'], metric=system_data['model_']['dist'],
h=system_data['model_']['h'], k=system_data['model_']['k'],
localplsVL=system_data['model_']['localplsVL'], center=True, scale=False, sklearn=True), index =preprocessed.index)
# temp_path = Path('temp/')
# # export data to csv for Julia train/pred
# # with pretreatments
# spectra = preprocess_spectra(
# system_data['data']['raw-spectra'], change=hash_)
# x_pred = preprocessed
# rownames = x_pred.index.to_list()
# y = system_data['data']['target']
# data_to_work_with = [
# 'spectra_np', 'y_np', 'x_pred_np']
# spectra_np, y_np, x_pred_np = spectra[1].to_numpy(
# ), y.to_numpy(), x_pred.to_numpy()
# # export spectra, y, x_pred to temp folder as csv files
# for i in data_to_work_with:
# j = globals()[i]
# np.savetxt(
# temp_path / str(i + ".csv"), j, delimiter=",")
# # export best LWPLSR params
# with open(temp_path / "lwplsr_best_params.json", "w+") as outfile:
# json.dump(system_data['lwpls_params'], outfile)
# # create empty file to specify LWPLSR_Call.py that we want predictions
# open(temp_path / 'predict', 'w').close()
# # # run Julia Jchemo as subprocess
# import subprocess
# subprocess_path = Path("utils/")
# subprocess.run(
# [str(sys.executable), subprocess_path / "LWPLSR_Call.py"])
# # retrieve json results from Julia JChemo
# try:
# with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
# modelling_json = json.load(outfile)
# # delete csv files
# for i in data_to_work_with:
# os.unlink(temp_path / str(i + ".csv"))
# os.unlink(temp_path / 'predict')
# # delete json file after import
# os.unlink(temp_path / "lwplsr_outputs.json")
# os.unlink(
# temp_path / "lwplsr_best_params.json")
# # keys of the json dict
# result = DataFrame(modelling_json['y_pred'])
# result.index = rownames
# result.columns = ['Results']
# except FileNotFoundError as e:
# for i in data_to_work_with:
# os.unlink(temp_path / str(i + ".csv"))
# os.unlink(temp_path / 'predict')
except:
st.error('Error during LWPLSR predictions')
......@@ -335,15 +344,14 @@ if not result.empty:
st.info('descriptive statistics for the model output')
st.write(DataFrame(desc_stats(result)))
################################# Download results #################################
with st.container():
################################# Download results #################################
with st.container():
if not result.empty:
@st.cache_data(show_spinner=False)
def preparing_results_for_downloading(change):
with open(Path('report/results/dataset/')/predfile.name, "wb") as f:
f.write(predfile.getvalue())
rawspectraplot.savefig(
'./report/results/figures/raw_spectra.png')
prepspectraplot.savefig(
......@@ -379,4 +387,4 @@ if not result.empty:
HandleItems.delete_files(keep=['.py', '.pyc', '.bib'])
except:
pass
\ No newline at end of file
pass
......@@ -372,7 +372,7 @@ class KF_CV:
DataFrame(y[folds[Fname]]), ypcv[Fname].reshape(-1, 1))
r.index = folds[Fname]
r['Folds'] = [str(Fname)+'(Predicted = '+str(np.round(ols.intercept_[0], 2)) +
str(np.round(ols.coef_[0][0], 2))+' x Measured'] * r.shape[0]
str(np.round(ols.coef_[0][0], 2))+' x Measured'+ ')'] * r.shape[0]
cvcv[i] = r
coeff[Fname] = [ols.coef_[0][0], ols.intercept_[0]]
......
import numpy as np
import numpy.typing as npt
from weighted_ikpls import PLS
from .weighted_ikpls import PLS
def mad(X, zmed, axis=1, keepdims=True):
"""
......
This diff is collapsed.
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment