Skip to content
Snippets Groups Projects
Commit 996d9c7f authored by BARTHES Nicolas's avatar BARTHES Nicolas
Browse files

LWPLSR CV - begin to format results (missing metrics)

parent 9fdcc282
No related branches found
No related tags found
No related merge requests found
...@@ -174,7 +174,8 @@ if not spectra.empty and not y.empty: ...@@ -174,7 +174,8 @@ if not spectra.empty and not y.empty:
x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy() x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
# Cross-Validation calculation # Cross-Validation calculation
nb_folds = 3 nb_folds = 3
st.write('KFold = ' + str(nb_folds)) st.write('KFold for Cross-Validation = ' + str(nb_folds))
# split train data into nb_folds
folds = KF_CV.CV(x_train_np, y_train_np, nb_folds) folds = KF_CV.CV(x_train_np, y_train_np, nb_folds)
d = {} d = {}
for i in range(nb_folds): for i in range(nb_folds):
...@@ -183,6 +184,7 @@ if not spectra.empty and not y.empty: ...@@ -183,6 +184,7 @@ if not spectra.empty and not y.empty:
data_to_work_with.append("ytr_fold{0}".format(i+1)) data_to_work_with.append("ytr_fold{0}".format(i+1))
data_to_work_with.append("xte_fold{0}".format(i+1)) data_to_work_with.append("xte_fold{0}".format(i+1))
data_to_work_with.append("yte_fold{0}".format(i+1)) data_to_work_with.append("yte_fold{0}".format(i+1))
# export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files
temp_path = Path('temp/') temp_path = Path('temp/')
for i in data_to_work_with: for i in data_to_work_with:
if 'fold' in i: if 'fold' in i:
...@@ -190,7 +192,7 @@ if not spectra.empty and not y.empty: ...@@ -190,7 +192,7 @@ if not spectra.empty and not y.empty:
else: else:
j = globals()[i] j = globals()[i]
np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",") np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",")
# run Julia Jchemo # run Julia Jchemo as subprocess
import subprocess import subprocess
subprocess_path = Path("Class_Mod/") subprocess_path = Path("Class_Mod/")
subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"]) subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
...@@ -198,21 +200,38 @@ if not spectra.empty and not y.empty: ...@@ -198,21 +200,38 @@ if not spectra.empty and not y.empty:
try: try:
with open(temp_path / "lwplsr_outputs.json", "r") as outfile: with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
Reg_json = json.load(outfile) Reg_json = json.load(outfile)
# delete csv files
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
# delete json file after import
os.unlink(temp_path / "lwplsr_outputs.json") os.unlink(temp_path / "lwplsr_outputs.json")
# format result data into Reg object
pred = ['pred_data_train', 'pred_data_test'] pred = ['pred_data_train', 'pred_data_test']
for i in range(nb_folds):
pred.append("CV" + str(i+1))
Reg = type('obj', (object,), {'model' : Reg_json['model'], 'best_lwplsr_params' : Reg_json['best_lwplsr_params'], 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]}) Reg = type('obj', (object,), {'model' : Reg_json['model'], 'best_lwplsr_params' : Reg_json['best_lwplsr_params'], 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
Reg.CV_results_ = pd.DataFrame()
Reg.cv_data_ = {'YpredCV' : {}, 'idxCV' : {}}
# set indexes to Reg.pred_data (train, test, folds idx)
for i in range(len(pred)): for i in range(len(pred)):
Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index']) Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
if i != 1: # if not pred_data_test if i == 0: # data_train
Reg.pred_data_[i].index = list(y_train.index) Reg.pred_data_[i].index = list(y_train.index)
else: elif i == 1: # data_test
Reg.pred_data_[i].index = list(y_test.index) Reg.pred_data_[i].index = list(y_test.index)
Reg.CV_results_ = pd.DataFrame() else: # CVi
Reg.cv_data_ = pd.DataFrame() Reg.pred_data_[i].index = folds[list(folds)[i-2]]
Reg.CV_results_ = pd.concat([Reg.CV_results_, Reg.pred_data_[i]])
Reg.cv_data_['YpredCV']['Fold' + str(i-1)] = Reg.pred_data_[i]
Reg.cv_data_['idxCV']['Fold' + str(i-1)] = folds[list(folds)[i-2]]
Reg.CV_results_.sort_index(inplace = True)
Reg.CV_results_.columns = ['Ypredicted_CV']
# if you want to display Reg.cv_data_ containing by fold YpredCV and idxCV
# cv2.json(Reg.cv_data_)
# Display end of modeling message on the interface
info.empty() info.empty()
M1.success('Model created!') M1.success('Model created!')
except FileNotFoundError as e: except FileNotFoundError as e:
# Display error message on the interface if modeling is wrong
info.empty() info.empty()
M1.warning('- ERROR during model creation -') M1.warning('- ERROR during model creation -')
Reg = None Reg = None
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment