From cdf5cba66beb63481ec44d0105699633c279decf Mon Sep 17 00:00:00 2001 From: Nicolas Barthes <nicolas.barthes@cnrs.fr> Date: Fri, 13 Sep 2024 16:37:20 +0200 Subject: [PATCH] starting prediction for LWPLSR models (all csv exporter ready to use with LWPLSR_Call) --- src/pages/3-prediction.py | 92 ++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 55 deletions(-) diff --git a/src/pages/3-prediction.py b/src/pages/3-prediction.py index 5455083..410c5a4 100644 --- a/src/pages/3-prediction.py +++ b/src/pages/3-prediction.py @@ -151,29 +151,28 @@ with c2: pred_data = spectra os.unlink(tmp_path) - # Load parameters st.subheader("I - Spectral data preprocessing & visualization", divider='blue') # try: if not pred_data.empty:# Load the model with joblib @st.cache_data - def preprocess_spectra(change): + def preprocess_spectra(data, change): # M4.write(ProcessLookupError) if system_data['spec-preprocessing']['normalization'] == 'Snv': - x1 = Snv(pred_data) + x1 = Snv(data) norm = 'Standard Normal Variate' else: norm = 'No Normalization was applied' - x1 = pred_data + x1 = data x2 = savgol_filter(x1, window_length = int(system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)'][1]), polyorder = int(system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)'][0]), deriv = int(system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)'][2]), delta=1.0, axis=-1, mode="interp", cval=0.0) - preprocessed = DataFrame(x2, index = pred_data.index, columns = pred_data.columns) + preprocessed = DataFrame(x2, index = data.index, columns = data.columns) return norm, preprocessed - norm, preprocessed = preprocess_spectra(change= hash_) + norm, preprocessed = preprocess_spectra(pred_data, change= hash_) # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # @st.cache_data @@ -247,60 +246,43 @@ if not pred_data.empty:# Load the model with joblib st.error(f'''Error: Length mismatch: the number of samples indices is {len(rownames)}, while the model produced {len(model.predict(preprocesseddf))} values. correct the "indexes column in csv?" parameter''') case 'LW-PLS': - # export data to csv for Julia train/test - train_idx, test_idx = system_data['data']['training_data_idx'], system_data['data']['testing_data_idx'] - spectra = system_data['data']['raw-spectra'] - y = system_data['data']['target'] - X_train, y_train, X_test, y_test = spectra.iloc[train_idx,:], y.iloc[train_idx], spectra.iloc[test_idx,:], y.iloc[test_idx] - nb_folds = 3 - folds = KF_CV.CV(X_train, y_train, nb_folds) - #['raw-spectra', 'target', 'training_data_idx', 'testing_data_idx'] - data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np', 'x_pred'] - x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy() - x_pred = pred_data.to_numpy() - # Cross-Validation calculation - d = {} - for i in range(nb_folds): - d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]] - data_to_work_with.append("xtr_fold{0}".format(i+1)) - data_to_work_with.append("ytr_fold{0}".format(i+1)) - data_to_work_with.append("xte_fold{0}".format(i+1)) - data_to_work_with.append("yte_fold{0}".format(i+1)) - # check best pre-treatment with a global PLSR model - preReg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=20) temp_path = Path('temp/') - with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile: - json.dump(preReg.best_hyperparams_, outfile) - # export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files + # export data to csv for Julia train/pred + st.write(system_data['data']) + # spectra = system_data['data']['raw-spectra'] # without pretreatments + spectra = preprocess_spectra(system_data['data']['raw-spectra'], change= hash_) + # with pretreatments + x_pred = preprocessed + y = system_data['data']['target'] + data_to_work_with = ['spectra', 'y', 'x_pred'] + spectra_np, y_np, x_pred_np = spectra.to_numpy(), y.to_numpy(), x_pred.to_numpy() + # export spectra, y, x_pred to temp folder as csv files for i in data_to_work_with: - if 'fold' in i: - j = d[i] - else: - j = globals()[i] - # st.write(j) + j = globals()[i] + # st.write(j) np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",") - # run Julia Jchemo as subprocess + # # run Julia Jchemo as subprocess import subprocess subprocess_path = Path("utils/") - subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"]) - # retrieve json results from Julia JChemo - try: - with open(temp_path / "lwplsr_outputs.json", "r") as outfile: - Reg_json = json.load(outfile) - # delete csv files - for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) - # delete json file after import - os.unlink(temp_path / "lwplsr_outputs.json") - os.unlink(temp_path / "lwplsr_preTreatments.json") - # format result data into Reg object - pred = ['pred_data_train', 'pred_data_test']### keys of the dict - for i in range(nb_folds): - pred.append("CV" + str(i+1)) ### add cv folds keys to pred - except FileNotFoundError as e: - Reg = None - for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) - - st.write(Reg_json) + # subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"]) + # # retrieve json results from Julia JChemo + # try: + # with open(temp_path / "lwplsr_outputs.json", "r") as outfile: + # Reg_json = json.load(outfile) + # # delete csv files + # for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) + # # delete json file after import + # os.unlink(temp_path / "lwplsr_outputs.json") + # os.unlink(temp_path / "lwplsr_preTreatments.json") + # # format result data into Reg object + # pred = ['pred_data_train', 'pred_data_test']### keys of the dict + # for i in range(nb_folds): + # pred.append("CV" + str(i+1)) ### add cv folds keys to pred + # except FileNotFoundError as e: + # Reg = None + # for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) + # + # st.write(Reg_json) ################################### results display ################################### -- GitLab