Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import numpy as np
from pathlib import Path
import json
from LWPLSR_ import LWPLSR
import os
# loading the lwplsr_inputs.json
temp_path = Path("temp/")
data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
# check data for cross-validation depending on KFold number
temp_files_list = os.listdir(temp_path)
nb_fold = 0
for i in temp_files_list:
if 'fold' in i:
# add CV file name to data_to_work_with
data_to_work_with.append(str(i)[:-4])
# and count the number of KFold
nb_fold += 1
# Import data from csv files in the temp/ folder
dataset = []
for i in data_to_work_with:
dataset.append(np.genfromtxt(temp_path / str(i + ".csv"), delimiter=','))
print('CSV imported')
# Get parameters for preTreatment of the spectra (acquired from a global PLSR)
with open(temp_path / "lwplsr_preTreatments.json", "r") as outfile:
preT = json.load(outfile)
# launch LWPLSR Class from LWPLSR_.py in utils
print('start model creation')
Reg = LWPLSR(dataset, preT)
print('model created. \nnow fit')
LWPLSR.Jchemo_lwplsr_fit(Reg)
print('now predict')
LWPLSR.Jchemo_lwplsr_predict(Reg)
print('now CV')
LWPLSR.Jchemo_lwplsr_cv(Reg)
# Export results in a json file to bring data back to 2-model_creation.py and streamlit interface
print('export to json')
pred = ['pred_data_train', 'pred_data_test']
# add KFold results to predicted data
for i in range(int(nb_fold/4)):
pred.append("CV" + str(i+1))
json_export = {}
for i in pred:
json_export[i] = Reg.pred_data_[i].to_dict()
# add the lwplsr global model to the json
json_export['model'] = str(Reg.model_)
# add the best parameters for the lwplsr obtained from GridScore tuning
json_export['best_lwplsr_params'] = Reg.best_lwplsr_params_
with open(temp_path / "lwplsr_outputs.json", "w+") as outfile:
json.dump(json_export, outfile)