Skip to content
Snippets Groups Projects
2-model_creation.py 34.4 KiB
Newer Older
from Packages import *
DIANE's avatar
DIANE committed
st.set_page_config(page_title = "NIRS Utils", page_icon = ":goat:", layout = "wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *
DIANE's avatar
DIANE committed
# HTML pour le bandeau "CEFE - CNRS"
add_header()
DIANE's avatar
DIANE committed
local_css(css_file / "style_model.css")#load specific model page css




DIANE's avatar
DIANE committed
hash_ = ''
def p_hash(add):
    global hash_
    hash_ = hash_data(hash_+str(add))
    return hash_
DIANE's avatar
DIANE committed
# Initialize the variable in session state if it doesn't exist for st.cache_data
if 'counter' not in st.session_state:
    st.session_state.counter = 0
DIANE's avatar
DIANE committed
def increment():
    st.session_state.counter += 1
DIANE's avatar
DIANE committed

# ####################################  Methods ##############################################
DIANE's avatar
DIANE committed
def delete_files(keep):
DIANE's avatar
DIANE committed
    supp = []
    # Walk through the directory
    for root, dirs, files in os.walk('Report/', topdown=False):
        for file in files:
DIANE's avatar
DIANE committed
            if file != 'logo_cefe.png' and not any(file.endswith(ext) for ext in keep):
DIANE's avatar
DIANE committed
                os.remove(os.path.join(root, file))

class lw:
    def __init__(self, Reg_json, pred):
        self.model_ = Reg_json['model']
        self.best_hyperparams_ = Reg_json['best_lwplsr_params']
        self.pred_data_ = [pd.json_normalize(Reg_json[i]) for i in pred]

DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
################ clean the results dir #############
DIANE's avatar
DIANE committed
delete_files(keep = ['.py', '.pyc','.bib'])
dirpath = Path('Report/out/model')
if not dirpath.exists():
    os.mkdir(path = dirpath)
# ####################################### page preamble #######################################
st.title("Calibration Model Development") # page title
st.markdown("Create a predictive model, then use it for predicting your target variable (chemical data) from NIRS spectra")
M0, M00 = st.columns([1, .4])
DIANE's avatar
DIANE committed
M0.image("./images/model_creation.png", use_column_width = True) # graphical abstract
DIANE's avatar
DIANE committed

################################################################# Begin : I- Data loading and preparation ######################################
files_format = ['csv', 'dx'] # Supported files format
DIANE's avatar
DIANE committed
file = M00.radio('Select files format:', options = files_format,horizontal = True) # Select a file format
spectra = pd.DataFrame() # preallocate the spectral data block
y = pd.DataFrame() # preallocate the target(s) data block
match file:
    # load csv file
    case 'csv':
        with M00:
            # Load X-block data
DIANE's avatar
DIANE committed
            xcal_csv = st.file_uploader("Select NIRS Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and lambdas as columns")
            if xcal_csv:
                sepx = st.radio("Select separator (X file) - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)),
DIANE's avatar
DIANE committed
                                        options = [";", ","], index = [";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key = 0,horizontal = True)
                hdrx = st.radio("samples name (X file)? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)),
DIANE's avatar
DIANE committed
                                        options = ["no", "yes"], index = ["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key = 1,horizontal = True)
DIANE's avatar
DIANE committed
                    case "yes":col = 0
                    case "no":col = False
DIANE's avatar
DIANE committed
                st.info('Info: Insert your spectral data file above!')
DIANE's avatar
DIANE committed
            ycal_csv = st.file_uploader("Select corresponding Chemical Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and chemical values as a column")
            if ycal_csv:
                sepy = st.radio("Select separator (Y file) - _detected_: " + str(find_delimiter('data/'+ycal_csv.name)),
DIANE's avatar
DIANE committed
                                options = [";",  ","], index = [";", ","].index(str(find_delimiter('data/'+ycal_csv.name))), key = 2, horizontal = True)
                hdry = st.radio("samples name (Y file)? - _detected_: " + str(find_col_index('data/'+ycal_csv.name)),
DIANE's avatar
DIANE committed
                                options = ["no", "yes"], index = ["no", "yes"].index(str(find_col_index('data/'+ycal_csv.name))), key = 3, horizontal = True)
                match hdry:
                    case "yes":
                        col = 0
                    case "no":
                        col = False
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
                st.info('Info: Insert your target data file above!')


            # AFTER LOADING BOTH X AND Y FILES
            if xcal_csv and ycal_csv:
                # create a str instance for storing the hash of both x and y data
DIANE's avatar
DIANE committed
                xy_str = ''
                from io import StringIO
                for i in ["xcal_csv", "ycal_csv"]:
                    stringio = StringIO(eval(f'{i}.getvalue().decode("utf-8")'))
DIANE's avatar
DIANE committed
                    xy_str += str(stringio.read())
DIANE's avatar
DIANE committed
                p_hash([xy_str + str(xcal_csv.name) + str(ycal_csv.name), hdrx, sepx, hdry, sepy])
                # p_hash(add = )
                @st.cache_data
                def csv_loader(change):
DIANE's avatar
DIANE committed
                    delete_files(keep = ['.py', '.pyc','.bib'])
                    file_name = str(xcal_csv.name) +' and '+ str(ycal_csv.name)
DIANE's avatar
DIANE committed
                    xfile = pd.read_csv(xcal_csv, decimal = '.', sep = sepx, index_col = col, header = 0)
                    yfile =  pd.read_csv(ycal_csv, decimal = '.', sep = sepy, index_col = col)
                    return xfile, yfile, file_name
DIANE's avatar
DIANE committed
                
DIANE's avatar
DIANE committed
                xfile, yfile, file_name = csv_loader(change = hash_)



                if yfile.shape[1]>0 and xfile.shape[1]>0 :

                    # prepare x data
DIANE's avatar
DIANE committed
                    try: 
                        spectra, meta_data = col_cat(xfile)
                    except:
                        st.error('Error: The format of the X-file does not correspond to the expected dialect settings. To read the file correctly, please adjust the separator parameters.')
                    spectra = pd.DataFrame(spectra).astype(float)
                    
                    # prepare y data
DIANE's avatar
DIANE committed
                    try:
                        chem_data, idx = col_cat(yfile)
                    except:
                        st.error('Error: The format of the Y-file does not correspond to the expected dialect settings. To read the file correctly, please adjust the separator parameters.')

                    if 'chem_data' in globals():
                        if chem_data.shape[1]>1:
DIANE's avatar
DIANE committed
                            yname = M00.selectbox('Select a target', options = ['']+chem_data.columns.tolist(), format_func = lambda x: x if x else "<Select>")
                            if yname:
                                y = chem_data.loc[:, yname]
                            else:
                                M00.info('Info: Select the target analyte from the drop down list!')
DIANE's avatar
DIANE committed
                        else:
                            y = chem_data.iloc[:, 0]
                        
DIANE's avatar
DIANE committed
                    if not y.empty:
                        if spectra.shape[0] != y.shape[0]:
                            st.error('Error: X and Y have different sample size')
                            y = pd.DataFrame
                            spectra = pd.DataFrame
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
                    st.error('Error: The data has not been loaded successfully, please consider tuning the dialect settings!')
DIANE's avatar
DIANE committed
            data_file = st.file_uploader("Select Data", type = ".dx", help = " :mushroom: select a dx file")
            if data_file:
                file_name = str(data_file.name)
                ## creating the temp file
DIANE's avatar
DIANE committed
                with NamedTemporaryFile(delete = False, suffix = ".dx") as tmp:
DIANE's avatar
DIANE committed
                    tmp.write(data_file.read())
                    tmp_path = tmp.name
                    with open(tmp.name, 'r') as dd:
                        dxdata = dd.read()
                        p_hash(str(dxdata)+str(data_file.name))

                ## load and parse the temp dx file
                @st.cache_data
                def dx_loader(change):
                    chem_data, spectra, meta_data, meta_data_st = read_dx(file =  tmp_path)    
                    os.unlink(tmp_path)
                    return chem_data, spectra, meta_data, meta_data_st
DIANE's avatar
DIANE committed
                chem_data, spectra, meta_data, meta_data_st = dx_loader(change = hash_)
DIANE's avatar
DIANE committed
                    st.success("Info: The data have been loaded successfully", icon = "")

                if chem_data.shape[1]>0:
DIANE's avatar
DIANE committed
                    yname = st.selectbox('Select the target analyte', options = ['']+chem_data.columns.tolist(), format_func = lambda x: x if x else "<Select>" )
                    if yname:
                        measured = chem_data.loc[:, yname] > 0
                        y = chem_data.loc[:, yname].loc[measured]
                        spectra = spectra.loc[measured]
                        
                        
                    else:
                        st.info('Info: Please select the target analyte from the dropdown list!')
DIANE's avatar
DIANE committed
                    st.warning('Warning: your file includes no target variables to model !', icon = "⚠️")
DIANE's avatar
DIANE committed


DIANE's avatar
DIANE committed
                st.info('Info: Load your file here!')
################################################### END : I- Data loading and preparation ####################################################
DIANE's avatar
DIANE committed



DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed

################################################### BEGIN : visualize and split the data ####################################################
DIANE's avatar
DIANE committed
st.header("I - Data visualization", divider = 'blue')
if not spectra.empty and not y.empty:
DIANE's avatar
DIANE committed
    p_hash(y)
    p_hash(np.mean(spectra))
DIANE's avatar
DIANE committed
    @st.cache_data(show_spinner =False)
DIANE's avatar
DIANE committed
        if np.array(spectra.columns).dtype.kind in ['i', 'f']:
            colnames = spectra.columns
        else:
            colnames = np.arange(spectra.shape[1])
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed

        # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
DIANE's avatar
DIANE committed
        train_index, test_index = train_test_split_idx(spectra, y = y, method = "kennard_stone", metric = "correlation", test_size = 0.25, random_state = 42)
DIANE's avatar
DIANE committed

        # Assign data to training and test sets
        X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index]
        X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index]
DIANE's avatar
DIANE committed


        #### insight on loaded data
        # M0, M000 = st.columns([1, .4])
DIANE's avatar
DIANE committed
        fig1, ax1 = plt.subplots( figsize = (12, 3))
        spectra.T.plot(legend = False, ax = ax1, linestyle = '-', linewidth = 0.6)
        ax1.set_ylabel('Signal intensity')
        ax1.margins(0)
        plt.tight_layout()

        fig2, ax2 = plt.subplots(figsize = (12,3))
DIANE's avatar
DIANE committed
        sns.histplot(y, color = "deeppink", kde = True, label = "y", ax = ax2, fill = True)
        sns.histplot(y_train, color = "blue", kde = True, label = "y (train)", ax = ax2, fill = True)
        sns.histplot(y_test, color = "green", kde = True, label = "y (test)", ax = ax2, fill = True)
        ax2.set_xlabel('y')
        plt.legend()
        plt.tight_layout()
DIANE's avatar
DIANE committed
        stats = pd.DataFrame([desc_stats(y_train), desc_stats(y_test), desc_stats(y)], index =['train', 'test', 'total'] ).round(2) 
DIANE's avatar
DIANE committed
        return X_train, X_test, y_train, y_test, colnames, train_index, test_index, stats, fig1, fig2
DIANE's avatar
DIANE committed

    X_train, X_test, y_train, y_test, colnames, train_index, test_index, stats, spectra_plot, target_plot = visualize(change = hash_)
DIANE's avatar
DIANE committed
        st.pyplot(spectra_plot) ######## Loaded graph
        st.pyplot(target_plot)
DIANE's avatar
DIANE committed

    with M000:
        st.write('Loaded data summary')
        st.write(stats)

################################################### END : visualize and split the data #######################################################




DIANE's avatar
DIANE committed
# if 'model_type' not in st.session_state:
#     st.cache_data.model_type = ''
DIANE's avatar
DIANE committed
#     ###################################################     BEGIN : Create Model     ####################################################
model_type = None # initialize the selected regression algorithm
DIANE's avatar
DIANE committed
Reg = None  # initialize the regression model object
DIANE's avatar
DIANE committed
# intervalls_with_cols = pd.DataFrame()
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
st.header("II - Model creation", divider = 'blue')
DIANE's avatar
DIANE committed
if not spectra.empty and not y.empty:
DIANE's avatar
DIANE committed
    M10, M20, M30, M40, M50 = st.columns([1, 1, 1, 1, 1])
DIANE's avatar
DIANE committed
    # select type of supervised modelling problem
    modes = ['regression', 'classification']
DIANE's avatar
DIANE committed
    mode = M10.radio("Analysis Methods", options=modes)
DIANE's avatar
DIANE committed
    p_hash(mode)
DIANE's avatar
DIANE committed
    match mode:
        case "regression":
DIANE's avatar
DIANE committed
            reg_algo = ["", "PLS", "LW-PLS", "TPE-iPLS"]
DIANE's avatar
DIANE committed
            model_type = M20.selectbox("Choose the regression algorithm", options = reg_algo, key = "model_type", format_func = lambda x: x if x else "<Select>")
DIANE's avatar
DIANE committed
        case 'classification':
DIANE's avatar
DIANE committed
            reg_algo = ["", "PLS", "LW-PLS", "TPE-iPLS", 'LDA']
DIANE's avatar
DIANE committed
            model_type = M20.selectbox("Choose the classification algorithm", options = reg_algo, key = 12, format_func = lambda x: x if x else "<Select>")
    # if  model_type != st.session_state.model_type:
    #     st.session_state.model_type = model_type
    #     increment()
DIANE's avatar
DIANE committed
    p_hash(model_type)

DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
    # Training set preparation for cross-validation(CV)
DIANE's avatar
DIANE committed

    # Model creation-M20 columns
    with M20:
DIANE's avatar
DIANE committed
        @st.cache_data
DIANE's avatar
DIANE committed
        def RequestingModelCreation(change):
            # spectra_plot.savefig("./Report/figures/spectra_plot.png")
            # target_plot.savefig("./Report/figures/histogram.png")
            # st.session_state['hash_Reg'] = str(np.random.randint(2000000000))
DIANE's avatar
DIANE committed
            folds = KF_CV.CV(X_train, y_train, nb_folds)# split train data into nb_folds for cross_validation

DIANE's avatar
DIANE committed
            match model_type:
DIANE's avatar
DIANE committed
                case 'PLS':
DIANE's avatar
DIANE committed
                    Reg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter = 10, cv = nb_folds)
                    # reg_model = Reg.model_
DIANE's avatar
DIANE committed
                    rega = Reg.selected_features_

                case 'LW-PLS':
                    # export data to csv for Julia train/test
                    global x_train_np, y_train_np, x_test_np, y_test_np
                    data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
                    x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
                    # Cross-Validation calculation
                    d = {}
                    for i in range(nb_folds):
                        d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
                        data_to_work_with.append("xtr_fold{0}".format(i+1))
                        data_to_work_with.append("ytr_fold{0}".format(i+1))
                        data_to_work_with.append("xte_fold{0}".format(i+1))
                        data_to_work_with.append("yte_fold{0}".format(i+1))
                    # check best pre-treatment with a global PLSR model
                    preReg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=20)
                    temp_path = Path('temp/')
                    with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile:
                        json.dump(preReg.best_hyperparams_, outfile)
                    # export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files
                    for i in data_to_work_with:
                        if 'fold' in i:
                            j = d[i]
                        else:
                            j = globals()[i]
                            # st.write(j)
                        np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",")
                    # run Julia Jchemo as subprocess
                    import subprocess
                    subprocess_path = Path("Class_Mod/")
                    subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
                    # retrieve json results from Julia JChemo
                    try:
                        with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
                            Reg_json = json.load(outfile)
                            # delete csv files
                            for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
                        # delete json file after import
                        os.unlink(temp_path / "lwplsr_outputs.json")
                        os.unlink(temp_path / "lwplsr_preTreatments.json")
                        # format result data into Reg object
                        pred = ['pred_data_train', 'pred_data_test']### keys of the dict
                        for i in range(nb_folds):
DIANE's avatar
DIANE committed
                            pred.append("CV" + str(i+1)) ### add cv folds keys to pred
DIANE's avatar
DIANE committed
                        # global Reg
                        # Reg = type('obj', (object,), {'model_' : Reg_json['model'], 'best_hyperparams_' : Reg_json['best_lwplsr_params'],
                        #                             'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
                        # global Reg
                        Reg = lw(Reg_json = Reg_json, pred = pred)
DIANE's avatar
DIANE committed
                        # reg_model = Reg.model_
DIANE's avatar
DIANE committed
                        Reg.CV_results_ = pd.DataFrame()
                        Reg.cv_data_ = {'YpredCV' : {}, 'idxCV' : {}}
                        # set indexes to Reg.pred_data (train, test, folds idx)
                        for i in range(len(pred)):
                            Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
                            if i == 0: # data_train
                                # Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
                                Reg.pred_data_[i].index = list(y_train.index)
                                Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
                            elif i == 1: # data_test
                                # Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
                                Reg.pred_data_[i].index = list(y_test.index)
                                Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
                            else:
                                # CVi
                                Reg.pred_data_[i].index = folds[list(folds)[i-2]]
                                # Reg.CV_results_ = pd.concat([Reg.CV_results_, Reg.pred_data_[i]])
                                Reg.cv_data_['YpredCV']['Fold' + str(i-1)] = np.array(Reg.pred_data_[i]).reshape(-1)
                                Reg.cv_data_['idxCV']['Fold' + str(i-1)] = np.array(folds[list(folds)[i-2]]).reshape(-1)

                        Reg.CV_results_= KF_CV.metrics_cv(y = y_train, ypcv = Reg.cv_data_['YpredCV'], folds = folds)[1]
                        #### cross validation results print
                        Reg.best_hyperparams_print = Reg.best_hyperparams_
                        ## plots
                        Reg.cv_data_ = KF_CV().meas_pred_eq(y = np.array(y_train), ypcv = Reg.cv_data_['YpredCV'], folds = folds)
                        Reg.pretreated_spectra_ = preReg.pretreated_spectra_
DIANE's avatar
DIANE committed
                        Reg.best_hyperparams_print = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
                        Reg.best_hyperparams_ = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}

                        Reg.__hash__ = hash_data(Reg.best_hyperparams_print)
                    except FileNotFoundError as e:
                        Reg = None
                        for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))

                case 'TPE-iPLS':
DIANE's avatar
DIANE committed
                    Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it, cv = nb_folds)
DIANE's avatar
DIANE committed
                    # reg_model = Reg.model_
DIANE's avatar
DIANE committed
                    
                    global intervalls, intervalls_with_cols
                    intervalls = Reg.selected_features_.T
                    intervalls_with_cols = Reg.selected_features_.T
                    
                    for i in range(intervalls.shape[0]):
                        for j in range(intervalls.shape[1]):
                            intervalls_with_cols.iloc[i,j] = spectra.columns[intervalls.iloc[i,j]]
                    rega = Reg.selected_features_

                    st.session_state.intervalls = Reg.selected_features_.T
                    st.session_state.intervalls_with_cols = intervalls_with_cols
            return Reg
        




DIANE's avatar
DIANE committed
        if model_type:
            info = st.info('Info: The model is being created. This may take a few minutes.')
            if model_type == 'TPE-iPLS':# if model type is ipls then ask for the number of iterations and intervalls
DIANE's avatar
DIANE committed
                s = st.number_input(label = 'Enter the maximum number of intervals', min_value = 1, max_value = 6)
                it = st.number_input(label = 'Enter the number of iterations', min_value = 2, max_value = 500, value = 2)
            else:
                s, it = None, None
DIANE's avatar
DIANE committed
            p_hash(str(s)+str(it))
                
            remodel_button = st.button('re-model the data', key=4, help=None, type="primary", use_container_width=True, on_click=increment)
            p_hash(st.session_state.counter)
            Reg = RequestingModelCreation(change = hash_)
            reg_model = Reg.model_
            hash_ = joblib.hash(Reg)
DIANE's avatar
DIANE committed
            st.info('Info: Choose a modelling algorithm from the dropdown list!')
DIANE's avatar
DIANE committed
                
DIANE's avatar
DIANE committed
        if model_type:
            info.empty()
            if Reg:
                st.success('Success! Your model has been created and is ready to use.')
            else:
                st.error("Error: Model creation failed. Please try again.")
        
DIANE's avatar
DIANE committed
        if model_type:
            if model_type == 'TPE-iPLS':
DIANE's avatar
DIANE committed
                 if ('intervalls' and 'intervalls_with_cols') in st.session_state:
                    intervalls = st.session_state.intervalls
                    intervalls_with_cols = st.session_state.intervalls_with_cols
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
    # remodel_button = st.button('re-model the data', key=4, help=None, type="primary", use_container_width=True)
    # if remodel_button:# remodel feature for re-tuning the model
    #     increment()
DIANE's avatar
DIANE committed

    # fitted values and predicted  values 
DIANE's avatar
DIANE committed
    yc = Reg.pred_data_[0]
    yt = Reg.pred_data_[1]

    
    M1, M2 = st.columns([2 ,4])
    with M1:
        # Show and export the preprocessing methods
        st.write('-- Spectral preprocessing info --')
        st.write(Reg.best_hyperparams_print)
DIANE's avatar
DIANE committed
        @st.cache_data(show_spinner =False)
DIANE's avatar
DIANE committed
        def preprocessings(change):
            with open('Report/out/Preprocessing.json', "w") as outfile:
                json.dump(Reg.best_hyperparams_, outfile)
        preprocessings(change=hash_)

        # Show the model performance table
        st.write("-- Model performance --")
DIANE's avatar
DIANE committed
        if model_type != reg_algo[2]:
DIANE's avatar
DIANE committed
            model_per = pd.DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method = 'regression').scores_)
DIANE's avatar
DIANE committed
            model_per = pd.DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method = 'regression').scores_)    
DIANE's avatar
DIANE committed

    
    # M1.dataframe(model_per) # duplicate with line 371
DIANE's avatar
DIANE committed
    @st.cache_data(show_spinner =False)
DIANE's avatar
DIANE committed
    def prep_important(change, model_type, model_hash):
DIANE's avatar
DIANE committed
        fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 4), sharex=True)
        ax1.plot(colnames, np.mean(X_train, axis = 0), color = 'black', label = 'Average spectrum (Raw)')
DIANE's avatar
DIANE committed
        # if model_type != reg_algo[2]:
DIANE's avatar
DIANE committed
        ax2.plot(colnames, np.mean(Reg.pretreated_spectra_ , axis = 0), color = 'black', label = 'Average spectrum (Pretreated)')
        ax2.set_xlabel('Wavelenghts')
        plt.tight_layout()
DIANE's avatar
DIANE committed
        for i in range(2):
DIANE's avatar
DIANE committed
            eval(f'ax{i+1}').grid(color = 'grey', linestyle = ':', linewidth = 0.2)
DIANE's avatar
DIANE committed
            eval(f'ax{i+1}').margins(x = 0)
            eval(f'ax{i+1}').legend(loc = 'upper right')
            eval(f'ax{i+1}').set_ylabel('Intensity')
DIANE's avatar
DIANE committed
            if model_type == 'TPE-iPLS':
DIANE's avatar
DIANE committed
                a = change
                for j in range(s):
                    if np.array(spectra.columns).dtype.kind in ['i','f']:
DIANE's avatar
DIANE committed
                        min, max = intervalls_with_cols.iloc[j,0], intervalls_with_cols.iloc[j,1]
DIANE's avatar
DIANE committed
                    else:
DIANE's avatar
DIANE committed
                        min, max = intervalls.iloc[j,0], intervalls.iloc[j,1]
DIANE's avatar
DIANE committed
                    eval(f'ax{i+1}').axvspan(min, max, color = '#00ff00', alpha = 0.5, lw = 0)
DIANE's avatar
DIANE committed
        if model_type == 'PLS':
DIANE's avatar
DIANE committed
            ax1.scatter(colnames[np.array(Reg.sel_ratio_.index)], np.mean(X_train, axis = 0).iloc[np.array(Reg.sel_ratio_.index)],
DIANE's avatar
DIANE committed
                            color = '#7ab0c7', label = 'Important variables')
            ax2.scatter(colnames[Reg.sel_ratio_.index], np.mean(Reg.pretreated_spectra_, axis = 0)[np.array(Reg.sel_ratio_.index)],
                            color = '#7ab0c7', label = 'Important variables')
            ax1.legend()
            ax2.legend()
        return fig
    
    with M2:## Visualize raw,preprocessed spectra, and selected intervalls(in case of ipls) 
DIANE's avatar
DIANE committed
        if model_type =='TPE-iPLS' :
DIANE's avatar
DIANE committed
                st.write('-- Important Spectral regions used for model creation --')
                st.table(intervalls_with_cols)
        st.write('-- Visualization of the spectral regions used for model creation --')
DIANE's avatar
DIANE committed
        imp_fig = prep_important(change = st.session_state.counter, model_type = model_type, model_hash = hash_)
        st.pyplot(imp_fig)
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
        # Display CV results
    numbers_dict = {1: "One", 2: "Two",3: "Three",4: "Four",5: "Five",
                    6: "Six",7: "Seven",8: "Eight",9: "Nine",10: "Ten"}
    st.header(f" {numbers_dict[nb_folds]}-Fold Cross-Validation results")
DIANE's avatar
DIANE committed
    @st.cache_data(show_spinner =False)
DIANE's avatar
DIANE committed
    def cv_display(change):
        fig1 = px.scatter(Reg.cv_data_[0], x = 'Measured', y = 'Predicted' , trendline = 'ols', color = 'Folds', symbol = 'Folds',
                color_discrete_sequence=px.colors.qualitative.G10)
        fig1.add_shape(type = 'line', x0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), x1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']),
                        y0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), y1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']), line = dict(color = 'black', dash = "dash"))
        fig1.update_traces(marker_size = 7, showlegend=False)
        
        fig0 = px.scatter(Reg.cv_data_[0], x ='Measured', y = 'Predicted' , trendline = 'ols', color = 'Folds', symbol = "Folds", facet_col = 'Folds',facet_col_wrap = 1,
                color_discrete_sequence = px.colors.qualitative.G10, text = 'index', width = 800, height = 1000)
        fig0.update_traces(marker_size = 8, showlegend = False)
        return fig0, fig1
    fig0, fig1 = cv_display(change= Reg.cv_data_)
DIANE's avatar
DIANE committed
    cv1, cv2 = st.columns([2, 2])
    with cv2:
        cv_results = pd.DataFrame(Reg.CV_results_).round(4)# CV table
        st.write('-- Cross-Validation Summary--')
        st.write(cv_results.astype(str).style.map(lambda _: "background-color: #cecece;", subset = (cv_results.index.drop(['sd', 'mean', 'cv']), slice(None))))
        
        st.write('-- Out-of-Fold Predictions Visualization (All in one) --')
        st.plotly_chart(fig1, use_container_width = True)
DIANE's avatar
DIANE committed
    with cv1:
        st.write('-- Out-of-Fold Predictions Visualization (Separate plots) --')
        st.plotly_chart(fig0, use_container_width=True)
    
DIANE's avatar
DIANE committed

    ###################################################    BEGIN : Model Diagnosis    ####################################################
st.header("III - Model Diagnosis", divider='blue')
DIANE's avatar
DIANE committed
if Reg:
    # signal preprocessing results preparation for latex report
    prep_para = Reg.best_hyperparams_.copy()
DIANE's avatar
DIANE committed
    if model_type != reg_algo[2]:
DIANE's avatar
DIANE committed
        prep_para.pop('n_components')
        for i in ['deriv','polyorder']:
            if Reg.best_hyperparams_[i] == 0:
                prep_para[i] = '0'
            elif Reg.best_hyperparams_[i] == 1:
                prep_para[i] = '1st'
            elif Reg.best_hyperparams_[i] > 1:
                prep_para[i] = f"{Reg.best_hyperparams_[i]}nd"
    # reg plot and residuals plot
DIANE's avatar
DIANE committed
    if model_type != reg_algo[2]:
        measured_vs_predicted = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
        residuals_plot = resid_plot([y_train, y_test], [yc, yt], train_idx = train_index, test_idx = test_index)
DIANE's avatar
DIANE committed
    else:
DIANE's avatar
DIANE committed
        measured_vs_predicted = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
        residuals_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)
DIANE's avatar
DIANE committed
    
    M7, M8 = st.columns([2,2])
    with M7:
        st.write('Predicted vs Measured values')
DIANE's avatar
DIANE committed
        st.pyplot(measured_vs_predicted)
        # regression_plot.savefig('./Report/figures/measured_vs_predicted.png')
DIANE's avatar
DIANE committed
    
    with M8:
        st.write('Residuals plot')
DIANE's avatar
DIANE committed
        st.pyplot(residuals_plot)
        # residual_plot.savefig('./Report/figures/residuals_plot.png')

###################################################      END : Model Diagnosis   #######################################################
DIANE's avatar
DIANE committed
    
###################################################    BEGIN : Download results    #######################################################
##########################################################################################################################################
##########################################################################################################################################
DIANE's avatar
DIANE committed
if Reg:
DIANE's avatar
DIANE committed
    st.header('Download the analysis results')
    st.write("**Note:** Please check the box only after you have finished processing your data and are satisfied with the results. Checking the box prematurely may slow down the app and could lead to crashes.")
    decis = st.checkbox("Yes, I want to download the results")
    if decis:
        @st.cache_data(show_spinner =False)
        def export_report(change):
            match model_type:
                case 'PLS':
                        latex_report = report.report('Predictive model development', file_name, stats, list(prep_para.values()), model_type, model_per, cv_results)
                        
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
                case 'LW-PLS':
                        latex_report = report.report('Predictive model development', file_name, stats,
                                                    list({key: Reg.best_hyperparams_[key] for key in ['deriv', 'normalization', 'polyorder', 'window_length'] if key in Reg.best_hyperparams_}.values()), model_type, model_per, cv_results)
                        
                case 'TPE-iPLS':
                        latex_report = report.report('Predictive model development', file_name, stats,
                                                    list({key: Reg.best_hyperparams_[key] for key in ['deriv', 'normalization', 'polyorder', 'window_length'] if key in Reg.best_hyperparams_}.values()), model_type, model_per, cv_results)
                        
                case _:
                    st.warning('Data processing has not been performed or finished yet!', icon = "⚠️")

        @st.cache_data(show_spinner =False)
        def preparing_results_for_downloading(change):
            match file:
                # load csv file
                case 'csv':
                    xfile.to_csv('Report/out/dataset/'+ xcal_csv.name, sep = ';', encoding = 'utf-8', mode = 'a')
                    yfile.to_csv('Report/out/dataset/'+ ycal_csv.name, sep = ';', encoding = 'utf-8', mode = 'a')
                case 'dx':
                    with open('Report/out/dataset/'+data_file.name, 'w') as dd:
                        dd.write(dxdata)
                                    
            with open('./Report/out/model/'+ model_type + '.pkl','wb') as f:# export model
                joblib.dump(reg_model, f)
            figpath ='./Report/out/figures/'
            spectra_plot.savefig(figpath + "spectra_plot.png")
            target_plot.savefig(figpath + "histogram.png")
            imp_fig.savefig(figpath + "variable_importance.png")
            fig1.write_image(figpath + "meas_vs_pred_cv_all.png")
            fig0.write_image(figpath + "meas_vs_pred_cv_onebyone.png")
            measured_vs_predicted.savefig(figpath + 'measured_vs_predicted.png')
            residuals_plot.savefig(figpath + 'residuals_plot.png')
            with open('Report/out/Preprocessing.json', "w") as outfile:
                json.dump(Reg.best_hyperparams_, outfile)
            
            if model_type == 'TPE-iPLS': # export selected wavelengths
                wlfilename = './Report/out/model/'+ model_type+'-selected_wavelengths.xlsx'
                all = pd.concat([intervalls_with_cols.T, Reg.selected_features_], axis = 0,  ignore_index=True).T
                all.columns=['wl_from','wl_to','idx_from', 'idx_to']
                all.to_excel(wlfilename)
            
            export_report(change = hash_)
            if Path("./Report/report.tex").exists():
                report.generate_report(change = hash_)
            if Path("./Report/report.pdf").exists():
                shutil.move("./Report/report.pdf", "./Report/out/report.pdf")
            return change
        preparing_results_for_downloading(change = hash_)
DIANE's avatar
DIANE committed
        import tempfile
        @st.cache_data(show_spinner =False)
        def tempdir(change):
            with  tempfile.TemporaryDirectory( prefix="results", dir="./Report") as temp_dir:# create a temp directory
                tempdirname = os.path.split(temp_dir)[1]

                if len(os.listdir('./Report/out/figures/'))>2:
                    shutil.make_archive(base_name="./Report/Results", format="zip", base_dir="out", root_dir = "./Report")# create a zip file
                    shutil.move("./Report/Results.zip", f"./Report/{tempdirname}/Results.zip")# put the inside the temp dir
                    with open(f"./Report/{tempdirname}/Results.zip", "rb") as f:
                        zip_data = f.read()
            return tempdirname, zip_data

        date_time = datetime.datetime.now().strftime('%y%m%d%H%M')
        try :
            tempdirname, zip_data = tempdir(change = hash_)
            st.download_button(label = 'Download', data = zip_data, file_name = f'Nirs_Workflow_{date_time}_Reg_.zip', mime ="application/zip",
                        args = None, kwargs = None,type = "primary",use_container_width = True)
        except:
            pass

        delete_files(keep = ['.py', '.pyc','.bib'])