lwplsr_call.py

import numpy as np
from pathlib import Path
import json
from lwplsr_ import LWPLSR
import os

# loading the lwplsr_inputs.json
temp_path = Path("temp/")
temp_files_list = os.listdir(temp_path)
# check if model creation of prediction
if 'model' in temp_files_list:
    data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
    # check data for cross-validation depending on KFold number
    nb_fold = 0
    for i in temp_files_list:
        if 'fold' in i:
            # add CV file name to data_to_work_with
            data_to_work_with.append(str(i)[:-4])
            # and count the number of KFold
            nb_fold += 1
    # Import data from csv files in the temp/ folder
    dataset = []
    for i in data_to_work_with:
        dataset.append(np.genfromtxt(temp_path / str(i + ".csv"), delimiter=','))
    print('CSV imported')

    # Get parameters for preTreatment of the spectra (acquired from a global PLSR)
    with open(temp_path / "lwplsr_preTreatments.json", "r") as outfile:
        preT = json.load(outfile)

    # launch LWPLSR Class from LWPLSR_.py in utils
    print('start model creation')
    Reg = LWPLSR(dataset, preT, 'Model_Creation')
    print('model created. \nnow fit')
    LWPLSR.Jchemo_lwplsr_fit(Reg)
    print('now predict')
    LWPLSR.Jchemo_lwplsr_predict(Reg)
    print('now CV')
    LWPLSR.Jchemo_lwplsr_cv(Reg)

    # Export results in a json file to bring data back to 2-model_creation.py and streamlit interface
    print('export to json')
    pred = ['pred_data_train', 'pred_data_test']
    # add KFold results to predicted data
    for i in range(int(nb_fold/4)):
        pred.append("CV" + str(i+1))
    json_export = {}
    for i in pred:
        json_export[i] = Reg.pred_data_[i].to_dict()
    # add the lwplsr global model to the json
    json_export['model'] = str(Reg.model_)
    # add the best parameters for the lwplsr obtained from GridScore tuning
    json_export['best_lwplsr_params'] = Reg.best_lwplsr_params_
    with open(temp_path / "lwplsr_outputs.json", "w+") as outfile:
        json.dump(json_export, outfile)

elif 'predict' in temp_files_list:
    data_to_work_with = ['spectra_np', 'y_np', 'x_pred_np']
    dataset = []
    for i in data_to_work_with:
        dataset.append(np.genfromtxt(temp_path / str(i + ".csv"), delimiter=','))
    print('CSV imported')
    with open(temp_path / "lwplsr_best_params.json", "r") as outfile:
        preT = json.load(outfile)
    print('LWPLSR best parameters imported')
    # launch LWPLSR Class from LWPLSR_.py in utils
    print('start model creation')
    Reg = LWPLSR(dataset, preT, 'Prediction')
    print('model created. \nnow fit')
    LWPLSR.Jchemo_lwplsr_predict_fit(Reg)
    print('now predict')
    LWPLSR.Jchemo_lwplsr_predict_predict(Reg)

    print('export to json')
    pred = ['y_pred']
    json_export = {}
    for i in pred:
        json_export[i] = Reg.predict_pred_data_[i].to_dict()
    # add the lwplsr global model to the json
    json_export['model'] = str(Reg.model_)
    with open(temp_path / "lwplsr_outputs.json", "w+") as outfile:
        json.dump(json_export, outfile)