Skip to content
Snippets Groups Projects
Commit 27cb217a authored by Nicolas Barthes's avatar Nicolas Barthes
Browse files

LWPLSR CV start... to be continued

parent e9157d45
No related branches found
No related tags found
No related merge requests found
......@@ -14,8 +14,17 @@ class LWPLSR:
def __init__(self, dataset):
"""Initiate the LWPLSR and prepare data for Julia computing."""
self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))]
# self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(len(dataset))]
self.x_train, self.y_train, self.x_test, self.y_test = [dataset[i] for i in range(4)]
nb_fold = int((len(dataset)-4)/4)
for i in range(nb_fold):
vars()["self.xtr_fold"+str(i+1)] = dataset[i+7]
vars()["self.ytr_fold"+str(i+1)] = dataset[i+13]
vars()["self.xte_fold"+str(i+1)] = dataset[i+4]
vars()["self.yte_fold"+str(i+1)] = dataset[i+10]
print(self.xtr_fold1)
# prepare to send dataframes to julia and Jchemo
jl.x_train, jl.y_train, jl.x_test, jl.y_test = self.x_train, self.y_train, self.x_test, self.y_test
......
......@@ -2,10 +2,15 @@ import numpy as np
from pathlib import Path
import json
from LWPLSR_ import LWPLSR
import os
# loading the lwplsr_inputs.json
temp_path = Path("temp/")
data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
temp_files_list = os.listdir(temp_path)
for i in temp_files_list:
if 'fold' in i:
data_to_work_with.append(str(i)[:-4])
dataset = []
for i in data_to_work_with:
dataset.append(np.genfromtxt(temp_path / str(i + ".csv"), delimiter=','))
......@@ -17,6 +22,10 @@ LWPLSR.Jchemo_lwplsr_fit(Reg)
print('now predict')
LWPLSR.Jchemo_lwplsr_predict(Reg)
print('now CV')
print('export to json')
pred = ['pred_data_train', 'pred_data_test']
json_export = {}
......
......@@ -10,10 +10,3 @@ from style.header import add_header, add_sidebar
from config.config import pdflatex_path
local_css(css_file / "style.css")
# path = os.path.dirname(os.path.abspath(__file__)).replace('\\','/')
# d1 = path.find('/')
# css_file = path[:d1]+'/style'
# st.session_state["interface"] = st.session_state.get('interface')
# if st.session_state["interface"] == 'simple':
# hide_pages("Predictions")
# local_css(css_file +"/style.css")
# import streamlit
import pandas as pd
from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
......@@ -123,7 +124,7 @@ if not spectra.empty and not y.empty:
colnames = spectra.columns
else:
colnames = np.arange(spectra.shape[1])
#rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i")
# Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
......@@ -132,9 +133,9 @@ if not spectra.empty and not y.empty:
# Assign data to training and test sets
X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index]
X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index]
#### insight on loaded data
#### insight on loaded data
fig, ax1 = plt.subplots( figsize = (12,3))
spectra.T.plot(legend=False, ax = ax1, linestyle = '--')
ax1.set_ylabel('Signal intensity')
......@@ -167,29 +168,54 @@ if not spectra.empty and not y.empty:
reg_model = Reg.model_
#M2.dataframe(Pin.pred_data_)
elif regression_algo == reg_algo[2]:
# export data to csv for Julia
info = M1.info('Starting LWPLSR model creation... Please wait a few minutes.')
# export data to csv for Julia train/test
data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
# Cross-Validation calculation
nb_folds = 3
st.write('KFold = ' + str(nb_folds))
folds = KF_CV.CV(x_train_np, y_train_np, nb_folds)
d = {}
for i in range(nb_folds):
d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
data_to_work_with.append("xtr_fold{0}".format(i+1))
data_to_work_with.append("ytr_fold{0}".format(i+1))
data_to_work_with.append("xte_fold{0}".format(i+1))
data_to_work_with.append("yte_fold{0}".format(i+1))
temp_path = Path('temp/')
for i in data_to_work_with: np.savetxt(temp_path / str(i + ".csv"), vars()[i], delimiter=",")
for i in data_to_work_with:
if 'fold' in i:
j = d[i]
else:
j = globals()[i]
np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",")
# run Julia Jchemo
import subprocess
subprocess_path = Path("Class_Mod/")
subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
# retrieve json results from Julia JChemo
with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
Reg_json = json.load(outfile)
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
os.unlink(temp_path / "lwplsr_outputs.json")
pred = ['pred_data_train', 'pred_data_test']
Reg = type('obj', (object,), {'model' : Reg_json['model'], 'best_lwplsr_params' : Reg_json['best_lwplsr_params'], 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
for i in range(len(pred)):
Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
if i != 1: # if not pred_data_test
Reg.pred_data_[i].index = list(y_train.index)
else:
Reg.pred_data_[i].index = list(y_test.index)
try:
with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
Reg_json = json.load(outfile)
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
os.unlink(temp_path / "lwplsr_outputs.json")
pred = ['pred_data_train', 'pred_data_test']
Reg = type('obj', (object,), {'model' : Reg_json['model'], 'best_lwplsr_params' : Reg_json['best_lwplsr_params'], 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
for i in range(len(pred)):
Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
if i != 1: # if not pred_data_test
Reg.pred_data_[i].index = list(y_train.index)
else:
Reg.pred_data_[i].index = list(y_test.index)
Reg.CV_results_ = pd.DataFrame()
Reg.cv_data_ = pd.DataFrame()
info.empty()
M1.success('Model created!')
except FileNotFoundError as e:
info.empty()
M1.warning('- ERROR during model creation -')
Reg = None
elif regression_algo == reg_algo[3]:
s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3)
it = M1.number_input(label='Enter the number of iterations', min_value=2, max_value=10, value=3)
......@@ -218,7 +244,7 @@ if not spectra.empty and not y.empty:
################# Model analysis ############
if regression_algo in reg_algo[1:]:
if regression_algo in reg_algo[1:] and Reg is not None:
#M2.write('-- Pretreated data (train) visualization and important spectral regions in the model -- ')
fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 6))
......@@ -368,7 +394,7 @@ with st.container():
if not spectra.empty and not y.empty:
if regression_algo in reg_algo[1:]:
if regression_algo in reg_algo[1:] and Reg is not None:
fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 4), sharex=True)
ax1.plot(colnames, np.mean(X_train, axis = 0), color = 'black', label = 'Average spectrum (Raw)')
ax2.plot(colnames, np.mean(Reg.pretreated_spectra_ , axis = 0), color = 'black', label = 'Average spectrum (pretreated)')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment