diff --git a/src/pages/2-model_creation.py b/src/pages/2-model_creation.py index f4b43c2938e041b53d182bd871f51085b1ee2a8a..6caaf3609f0567173a8f588f4a281a93398f0ec4 100644 --- a/src/pages/2-model_creation.py +++ b/src/pages/2-model_creation.py @@ -174,7 +174,8 @@ if not spectra.empty and not y.empty: x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy() # Cross-Validation calculation nb_folds = 3 - st.write('KFold = ' + str(nb_folds)) + st.write('KFold for Cross-Validation = ' + str(nb_folds)) + # split train data into nb_folds folds = KF_CV.CV(x_train_np, y_train_np, nb_folds) d = {} for i in range(nb_folds): @@ -183,6 +184,7 @@ if not spectra.empty and not y.empty: data_to_work_with.append("ytr_fold{0}".format(i+1)) data_to_work_with.append("xte_fold{0}".format(i+1)) data_to_work_with.append("yte_fold{0}".format(i+1)) + # export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files temp_path = Path('temp/') for i in data_to_work_with: if 'fold' in i: @@ -190,7 +192,7 @@ if not spectra.empty and not y.empty: else: j = globals()[i] np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",") - # run Julia Jchemo + # run Julia Jchemo as subprocess import subprocess subprocess_path = Path("Class_Mod/") subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"]) @@ -198,21 +200,38 @@ if not spectra.empty and not y.empty: try: with open(temp_path / "lwplsr_outputs.json", "r") as outfile: Reg_json = json.load(outfile) + # delete csv files for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) + # delete json file after import os.unlink(temp_path / "lwplsr_outputs.json") + # format result data into Reg object pred = ['pred_data_train', 'pred_data_test'] + for i in range(nb_folds): + pred.append("CV" + str(i+1)) Reg = type('obj', (object,), {'model' : Reg_json['model'], 'best_lwplsr_params' : Reg_json['best_lwplsr_params'], 'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]}) + Reg.CV_results_ = pd.DataFrame() + Reg.cv_data_ = {'YpredCV' : {}, 'idxCV' : {}} + # set indexes to Reg.pred_data (train, test, folds idx) for i in range(len(pred)): Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index']) - if i != 1: # if not pred_data_test + if i == 0: # data_train Reg.pred_data_[i].index = list(y_train.index) - else: + elif i == 1: # data_test Reg.pred_data_[i].index = list(y_test.index) - Reg.CV_results_ = pd.DataFrame() - Reg.cv_data_ = pd.DataFrame() + else: # CVi + Reg.pred_data_[i].index = folds[list(folds)[i-2]] + Reg.CV_results_ = pd.concat([Reg.CV_results_, Reg.pred_data_[i]]) + Reg.cv_data_['YpredCV']['Fold' + str(i-1)] = Reg.pred_data_[i] + Reg.cv_data_['idxCV']['Fold' + str(i-1)] = folds[list(folds)[i-2]] + Reg.CV_results_.sort_index(inplace = True) + Reg.CV_results_.columns = ['Ypredicted_CV'] + # if you want to display Reg.cv_data_ containing by fold YpredCV and idxCV + # cv2.json(Reg.cv_data_) + # Display end of modeling message on the interface info.empty() M1.success('Model created!') except FileNotFoundError as e: + # Display error message on the interface if modeling is wrong info.empty() M1.warning('- ERROR during model creation -') Reg = None