Skip to content
Snippets Groups Projects
Commit cdf5cba6 authored by Nicolas Barthes's avatar Nicolas Barthes
Browse files

starting prediction for LWPLSR models (all csv exporter ready to use with LWPLSR_Call)

parent c1e55071
No related branches found
No related tags found
No related merge requests found
...@@ -151,29 +151,28 @@ with c2: ...@@ -151,29 +151,28 @@ with c2:
pred_data = spectra pred_data = spectra
os.unlink(tmp_path) os.unlink(tmp_path)
# Load parameters # Load parameters
st.subheader("I - Spectral data preprocessing & visualization", divider='blue') st.subheader("I - Spectral data preprocessing & visualization", divider='blue')
# try: # try:
if not pred_data.empty:# Load the model with joblib if not pred_data.empty:# Load the model with joblib
@st.cache_data @st.cache_data
def preprocess_spectra(change): def preprocess_spectra(data, change):
# M4.write(ProcessLookupError) # M4.write(ProcessLookupError)
if system_data['spec-preprocessing']['normalization'] == 'Snv': if system_data['spec-preprocessing']['normalization'] == 'Snv':
x1 = Snv(pred_data) x1 = Snv(data)
norm = 'Standard Normal Variate' norm = 'Standard Normal Variate'
else: else:
norm = 'No Normalization was applied' norm = 'No Normalization was applied'
x1 = pred_data x1 = data
x2 = savgol_filter(x1, x2 = savgol_filter(x1,
window_length = int(system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)'][1]), window_length = int(system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)'][1]),
polyorder = int(system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)'][0]), polyorder = int(system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)'][0]),
deriv = int(system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)'][2]), deriv = int(system_data['spec-preprocessing']['SavGol(polyorder,window_length,deriv)'][2]),
delta=1.0, axis=-1, mode="interp", cval=0.0) delta=1.0, axis=-1, mode="interp", cval=0.0)
preprocessed = DataFrame(x2, index = pred_data.index, columns = pred_data.columns) preprocessed = DataFrame(x2, index = data.index, columns = data.columns)
return norm, preprocessed return norm, preprocessed
norm, preprocessed = preprocess_spectra(change= hash_) norm, preprocessed = preprocess_spectra(pred_data, change= hash_)
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
# @st.cache_data # @st.cache_data
...@@ -247,60 +246,43 @@ if not pred_data.empty:# Load the model with joblib ...@@ -247,60 +246,43 @@ if not pred_data.empty:# Load the model with joblib
st.error(f'''Error: Length mismatch: the number of samples indices is {len(rownames)}, while the model produced st.error(f'''Error: Length mismatch: the number of samples indices is {len(rownames)}, while the model produced
{len(model.predict(preprocesseddf))} values. correct the "indexes column in csv?" parameter''') {len(model.predict(preprocesseddf))} values. correct the "indexes column in csv?" parameter''')
case 'LW-PLS': case 'LW-PLS':
# export data to csv for Julia train/test
train_idx, test_idx = system_data['data']['training_data_idx'], system_data['data']['testing_data_idx']
spectra = system_data['data']['raw-spectra']
y = system_data['data']['target']
X_train, y_train, X_test, y_test = spectra.iloc[train_idx,:], y.iloc[train_idx], spectra.iloc[test_idx,:], y.iloc[test_idx]
nb_folds = 3
folds = KF_CV.CV(X_train, y_train, nb_folds)
#['raw-spectra', 'target', 'training_data_idx', 'testing_data_idx']
data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np', 'x_pred']
x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
x_pred = pred_data.to_numpy()
# Cross-Validation calculation
d = {}
for i in range(nb_folds):
d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
data_to_work_with.append("xtr_fold{0}".format(i+1))
data_to_work_with.append("ytr_fold{0}".format(i+1))
data_to_work_with.append("xte_fold{0}".format(i+1))
data_to_work_with.append("yte_fold{0}".format(i+1))
# check best pre-treatment with a global PLSR model
preReg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=20)
temp_path = Path('temp/') temp_path = Path('temp/')
with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile: # export data to csv for Julia train/pred
json.dump(preReg.best_hyperparams_, outfile) st.write(system_data['data'])
# export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files # spectra = system_data['data']['raw-spectra'] # without pretreatments
spectra = preprocess_spectra(system_data['data']['raw-spectra'], change= hash_)
# with pretreatments
x_pred = preprocessed
y = system_data['data']['target']
data_to_work_with = ['spectra', 'y', 'x_pred']
spectra_np, y_np, x_pred_np = spectra.to_numpy(), y.to_numpy(), x_pred.to_numpy()
# export spectra, y, x_pred to temp folder as csv files
for i in data_to_work_with: for i in data_to_work_with:
if 'fold' in i: j = globals()[i]
j = d[i] # st.write(j)
else:
j = globals()[i]
# st.write(j)
np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",") np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",")
# run Julia Jchemo as subprocess # # run Julia Jchemo as subprocess
import subprocess import subprocess
subprocess_path = Path("utils/") subprocess_path = Path("utils/")
subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"]) # subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
# retrieve json results from Julia JChemo # # retrieve json results from Julia JChemo
try: # try:
with open(temp_path / "lwplsr_outputs.json", "r") as outfile: # with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
Reg_json = json.load(outfile) # Reg_json = json.load(outfile)
# delete csv files # # delete csv files
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) # for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
# delete json file after import # # delete json file after import
os.unlink(temp_path / "lwplsr_outputs.json") # os.unlink(temp_path / "lwplsr_outputs.json")
os.unlink(temp_path / "lwplsr_preTreatments.json") # os.unlink(temp_path / "lwplsr_preTreatments.json")
# format result data into Reg object # # format result data into Reg object
pred = ['pred_data_train', 'pred_data_test']### keys of the dict # pred = ['pred_data_train', 'pred_data_test']### keys of the dict
for i in range(nb_folds): # for i in range(nb_folds):
pred.append("CV" + str(i+1)) ### add cv folds keys to pred # pred.append("CV" + str(i+1)) ### add cv folds keys to pred
except FileNotFoundError as e: # except FileNotFoundError as e:
Reg = None # Reg = None
for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv")) # for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
#
st.write(Reg_json) # st.write(Reg_json)
################################### results display ################################### ################################### results display ###################################
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment