Skip to content
Snippets Groups Projects
2-model_creation.py 37.6 KiB
Newer Older
DIANE's avatar
DIANE committed
from common import *
DIANE's avatar
UI  
DIANE committed
st.set_page_config(page_title = "NIRS Utils", page_icon = ":goat:", layout = "wide")

DIANE's avatar
DIANE committed

DIANE's avatar
UI  
DIANE committed
# layout
UiComponents(pagespath = pages_folder, csspath= css_file,imgpath=image_path ,
             header=True, sidebar= True, bgimg=False, colborders=True)
DIANE's avatar
DIANE committed
hash_ = ''
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
# Initialize the variable in session state if it doesn't exist for st.cache_data
if 'counter' not in st.session_state:
    st.session_state.counter = 0
DIANE's avatar
DIANE committed
def increment():
    st.session_state.counter += 1
DIANE's avatar
DIANE committed

# ####################################  Methods ##############################################
DIANE's avatar
DIANE committed
def delete_files(keep):
DIANE's avatar
DIANE committed
    supp = []
    # Walk through the directory
DIANE's avatar
DIANE committed
    for root, dirs, files in os.walk('report/', topdown=False):
DIANE's avatar
DIANE committed
        for file in files:
DIANE's avatar
DIANE committed
            if file != 'logo_cefe.png' and not any(file.endswith(ext) for ext in keep):
DIANE's avatar
DIANE committed
                os.remove(os.path.join(root, file))

class lw:
    def __init__(self, Reg_json, pred):
        self.model_ = Reg_json['model']
        self.best_hyperparams_ = Reg_json['best_lwplsr_params']
DIANE's avatar
DIANE committed
        self.pred_data_ = [json_normalize(Reg_json[i]) for i in pred]
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
################ clean the results dir #############
DIANE's avatar
DIANE committed
delete_files(keep = ['.py', '.pyc','.bib'])
DIANE's avatar
DIANE committed
for i in ['model', 'dataset', 'figures']:
    dirpath = Path('./report/out/')/i
    if not dirpath.exists():
        dirpath.mkdir(parents=True, exist_ok=True)
# ####################################### page preamble #######################################
DIANE's avatar
DIANE committed
st.header("Calibration Model Development") # page title
st.markdown("Create a predictive model, then use it for predicting your target variable (chemical data) from NIRS spectra")
DIANE's avatar
DIANE committed
c0, c1 = st.columns([1, .4])
c0.image("./images/model_creation.png", use_column_width = True) # graphical abstract
DIANE's avatar
DIANE committed

################################################################# Begin : I- Data loading and preparation ######################################
files_format = ['csv', 'dx'] # Supported files format
DIANE's avatar
DIANE committed
file = c1.radio('Select files format:', options = files_format,horizontal = True) # Select a file format
DIANE's avatar
DIANE committed
spectra = DataFrame() # preallocate the spectral data block
y = DataFrame() # preallocate the target(s) data block
match file:
    # load csv file
    case 'csv':
DIANE's avatar
DIANE committed
        from utils.data_parsing import CsvParser
        def read_csv(file = file, change = None, dec = None, sep= None, names = None, hdr = None):
            delete_files(keep = ['.py', '.pyc','.bib'])
            from utils.data_parsing import CsvParser
            par = CsvParser(file= file)
            par.parse(decimal = dec, separator = sep, index_col = names, header = hdr)
            return par.float, par.meta_data, par.meta_data_st_, par.df

DIANE's avatar
DIANE committed
        with c1:
DIANE's avatar
DIANE committed
            xcal_csv = st.file_uploader("Select NIRS Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and lambdas as columns")
DIANE's avatar
DIANE committed
                c1_1, c2_2 = st.columns([.5, .5])
                with c1_1:
                    decx = st.radio('decimal(x):', options= [".", ","], horizontal = True)
                    sepx = st.radio("separator(x):", options = [";", ","], horizontal = True)
                with c2_2:
                    phdrx = st.radio("header(x): ", options = ["yes", "no"], horizontal = True)
                    pnamesx = st.radio("samples name(x):", options = ["yes", "no"], horizontal = True)

                hdrx = 0 if phdrx =="yes" else None
                namesx = 0 if pnamesx =="yes" else None
                try:
                    spectra, meta_data, md_df_st_, xfile = read_csv(file= xcal_csv, change = hash_, dec = decx, sep = sepx, names =namesx, hdr = hdrx)
                    st.success('xfile has been loaded successfully')
                except:
                    st.error('Error: The xfile has not been loaded successfully, please consider tuning the dialect settings!')
                
DIANE's avatar
DIANE committed
                st.info('Info: Insert your spectral data file above!')
DIANE's avatar
DIANE committed



DIANE's avatar
DIANE committed
            ycal_csv = st.file_uploader("Select corresponding Chemical Data", type = "csv", help = " :mushroom: select a csv matrix with samples as rows and chemical values as a column")
DIANE's avatar
DIANE committed
                c1_1, c2_2 = st.columns([.5, .5])
                with c1_1:
                    decy = st.radio('decimal(y):', options= [".", ","], horizontal = True)
                    sepy = st.radio("separator(y):", options = [";", ","], horizontal = True)
                with c2_2:
                    phdry = st.radio("header(y): ", options = ["yes", "no"], horizontal = True)
                    pnamesy = st.radio("samples name(y):", options = ["yes", "no"], horizontal = True)

                hdry = 0 if phdry =="yes" else None
                namesy = 0 if pnamesy =="yes" else None
                try:
                    chem_data, meta_data, md_df_st_, yfile = read_csv(file= ycal_csv, change = hash_, dec = decy, sep = sepy, names =namesy, hdr = hdry)
                    st.success('yfile has been loaded successfully')
                except:
                    st.error('Error: The yfile has not been loaded successfully, please consider tuning the dialect settings!')
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
                st.info('Info: Insert your target data file above!')


            # AFTER LOADING BOTH X AND Y FILES
            if xcal_csv and ycal_csv:
                # create a str instance for storing the hash of both x and y data
DIANE's avatar
DIANE committed
                xy_str = ''
                from io import StringIO
                for i in ["xcal_csv", "ycal_csv"]:
                    stringio = StringIO(eval(f'{i}.getvalue().decode("utf-8")'))
DIANE's avatar
DIANE committed
                    xy_str += str(stringio.read())
DIANE's avatar
DIANE committed
                # p_hash([xy_str + str(xcal_csv.name) + str(ycal_csv.name), hdrx, sepx, hdry, sepy])
DIANE's avatar
DIANE committed
                hash_ = ObjectHash(current=hash_,add = xy_str)
DIANE's avatar
DIANE committed
                
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed



            


                # xfile, yfile, file_name = csv_loader(change = hash_)
                # yfile =  read_csv(file= ycal_csv, change = hash_)



                if yfile.shape[1]>0 and xfile.shape[1]>0 :    
DIANE's avatar
DIANE committed
                    if 'chem_data' in globals():
DIANE's avatar
DIANE committed
                        if chem_data.shape[1] > 1:
DIANE's avatar
DIANE committed
                            yname = c1.selectbox('Select a target', options = ['']+chem_data.columns.tolist(), format_func = lambda x: x if x else "<Select>")
DIANE's avatar
DIANE committed
                            if yname:
                                y = chem_data.loc[:, yname]
                            else:
DIANE's avatar
DIANE committed
                                c1.info('Info: Select the target analyte from the drop down list!')
                        elif chem_data.shape[1] == 1:
DIANE's avatar
DIANE committed
                            y = chem_data.iloc[:, 0]
DIANE's avatar
DIANE committed
                            yname = chem_data.iloc[:, [0]].columns[0]
DIANE's avatar
DIANE committed
                        
DIANE's avatar
DIANE committed
                    if not y.empty:
                        if spectra.shape[0] != y.shape[0]:
                            st.error('Error: X and Y have different sample size')
DIANE's avatar
DIANE committed
                            y = DataFrame
                            spectra = DataFrame
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
                    st.error('Error: The data has not been loaded successfully, please consider tuning the dialect settings!')
DIANE's avatar
DIANE committed
        with c1:
DIANE's avatar
DIANE committed
            data_file = st.file_uploader("Select Data", type = ".dx", help = " :mushroom: select a dx file")
            if data_file:
                file_name = str(data_file.name)
                ## creating the temp file
DIANE's avatar
DIANE committed
                with NamedTemporaryFile(delete = False, suffix = ".dx") as tmp:
DIANE's avatar
DIANE committed
                    tmp.write(data_file.read())
                    tmp_path = tmp.name
                    with open(tmp.name, 'r') as dd:
                        dxdata = dd.read()
DIANE's avatar
DIANE committed
                        # p_hash(str(dxdata)+str(data_file.name))
DIANE's avatar
DIANE committed

                ## load and parse the temp dx file
                @st.cache_data
DIANE's avatar
DIANE committed
                def read_dx(tmp_path):
                    M = JcampParser(path = tmp_path)
                    M.parse()
                    # chem_data, spectra, meta_data, meta_data_st = read_dx(file =  tmp_path)    
                    # os.unlink(tmp_path)
DIANE's avatar
DIANE committed
                    return M.chem_data, M.specs_df_, M.meta_data, M.meta_data_st_
DIANE's avatar
DIANE committed
                chem_data, spectra, meta_data, meta_data_st = read_dx(tmp_path = tmp_path)
DIANE's avatar
DIANE committed
                    st.success("Info: The data have been loaded successfully", icon = "")

                if chem_data.shape[1]>0:
DIANE's avatar
DIANE committed
                    yname = st.selectbox('Select the target analyte', options = ['']+chem_data.columns.tolist(), format_func = lambda x: x if x else "<Select>" )
                    if yname:
                        measured = chem_data.loc[:, yname] > 0
                        y = chem_data.loc[:, yname].loc[measured]
                        spectra = spectra.loc[measured]
                        
                        
                    else:
                        st.info('Info: Please select the target analyte from the dropdown list!')
DIANE's avatar
DIANE committed
                    st.warning('Warning: your file includes no target variables to model !', icon = "⚠️")
DIANE's avatar
DIANE committed


DIANE's avatar
DIANE committed
                st.info('Info: Load your file here!')
################################################### END : I- Data loading and preparation ####################################################
DIANE's avatar
DIANE committed



DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed

################################################### BEGIN : visualize and split the data ####################################################
DIANE's avatar
DIANE committed
st.subheader("I - Data visualization", divider = 'blue')
if not spectra.empty and not y.empty:
DIANE's avatar
DIANE committed
    # p_hash(y)
    # p_hash(np.mean(spectra))
DIANE's avatar
DIANE committed
    if np.array(spectra.columns).dtype.kind in ['i', 'f']:
        colnames = spectra.columns
    else:
        colnames = np.arange(spectra.shape[1])
DIANE's avatar
DIANE committed

    from utils.miscellaneous import data_split
DIANE's avatar
DIANE committed
    X_train, X_test, y_train, y_test, train_index, test_index = data_split(x=spectra, y=y)
    
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
    #### insight on loaded data
    spectra_plot = plot_spectra(spectra, xunits = 'Wavelength/Wavenumber', yunits = "Signal intensity")
    target_plot = hist(y = y, y_train = y_train, y_test = y_test, target_name=yname)
DIANE's avatar
DIANE committed
    from utils.miscellaneous import desc_stats
DIANE's avatar
DIANE committed
    stats = DataFrame([desc_stats(y_train), desc_stats(y_test), desc_stats(y)], index =['train', 'test', 'total'] ).round(2) 
DIANE's avatar
DIANE committed
    # fig1, ax1 = plt.subplots( figsize = (12, 3))
    # spectra.T.plot(legend = False, ax = ax1, linestyle = '-', linewidth = 0.6)
    # ax1.set_ylabel('Signal intensity')
    # ax1.margins(0)
    # plt.tight_layout()
    c2, c3 = st.columns([1, .4])
    with c2:
DIANE's avatar
DIANE committed
        st.pyplot(spectra_plot) ######## Loaded graph
        st.pyplot(target_plot)
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
    with c3:
        st.write('Loaded data summary')
        st.write(stats)

################################################### END : visualize and split the data #######################################################




DIANE's avatar
DIANE committed
# if 'model_type' not in st.session_state:
#     st.cache_data.model_type = ''
DIANE's avatar
DIANE committed
#     ###################################################     BEGIN : Create Model     ####################################################
model_type = None # initialize the selected regression algorithm
DIANE's avatar
DIANE committed
Reg = None  # initialize the regression model object
DIANE's avatar
DIANE committed
# intervalls_with_cols = DataFrame()
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
st.subheader("II - Model creation", divider = 'blue')
DIANE's avatar
DIANE committed
if not spectra.empty and not y.empty:
DIANE's avatar
DIANE committed
    c4, c5, c6 = st.columns([1, 1, 3])
    with c4:
        # select type of supervised modelling problem
        var_nature = ['Continuous', 'Categorical']
        mode = c4.radio("The nature of the target variable :", options = var_nature)
DIANE's avatar
DIANE committed
        # p_hash(mode)
DIANE's avatar
DIANE committed
        match mode:
            case "Continuous":
                reg_algo = ["", "PLS", "LW-PLS", "TPE-iPLS"]
                st.markdown(f'Example1: Quantifying the volume of nectar consumed by a pollinator during a foraging session.')
                st.markdown(f"Example2: Measure the sugar content, amino acids, or other compounds in nectar from different flower species.")
            case 'Categorical':
                reg_algo = ["", "PLS", "LW-PLS", "TPE-iPLS", 'LDA']
                st.markdown(f"Example1: Classifying pollinators into categories such as bees, butterflies, moths, and beetles.")
                st.markdown(f"Example2: Classifying plants based on their health status, such as healthy, stressed, or diseased, using NIR spectral data.")
    with c5:
        model_type = c5.selectbox("Choose a modelling algorithm:", options = reg_algo, key = 12, format_func = lambda x: x if x else "<Select>")
    
    with c6:
        st.markdown("-------------")
        match model_type:
            case "PLS":
                st.markdown("#### For further details on the PLS (Partial Least Squares) algorithm, check the following reference:")
                st.markdown('##### https://www.tandfonline.com/doi/abs/10.1080/03610921003778225')
                
            case "LW-PLS":
                st.markdown("#### For further details on the LW-PLS (Locally Weighted - Partial Least Squares) algorithm, check the following reference:")
                st.markdown('##### https://analyticalsciencejournals.onlinelibrary.wiley.com/doi/full/10.1002/cem.3117')
            
            case "TPE-iPLS":
                st.markdown("#### For further details on the TPE-iPLS (Tree-structured Parzen Estimator based interval-Partial Least Squares) algorithm, which is a wrapper method for interval selection, check the following references:")
                st.markdown("##### https://papers.nips.cc/paper_files/paper/2011/file/86e8f7ab32cfd12577bc2619bc635690-Paper.pdf")
                st.markdown('##### https://www.tandfonline.com/doi/abs/10.1080/03610921003778225')
                st.markdown('##### https://journals.sagepub.com/doi/abs/10.1366/0003702001949500')
        st.markdown("-------------")

DIANE's avatar
DIANE committed
    # if  model_type != st.session_state.model_type:
    #     st.session_state.model_type = model_type
    #     increment()
DIANE's avatar
DIANE committed
    # p_hash(model_type)
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
    # Training set preparation for cross-validation(CV)
DIANE's avatar
DIANE committed

    # Model creation-M20 columns
DIANE's avatar
DIANE committed
    with c5:
DIANE's avatar
DIANE committed
        @st.cache_data
DIANE's avatar
DIANE committed
        def RequestingModelCreation(change):
DIANE's avatar
DIANE committed
            # spectra_plot.savefig("./report/figures/spectra_plot.png")
            # target_plot.savefig("./report/figures/histogram.png")
DIANE's avatar
DIANE committed
            # st.session_state['hash_Reg'] = str(np.random.randint(2000000000))
DIANE's avatar
DIANE committed
            folds = KF_CV.CV(X_train, y_train, nb_folds)# split train data into nb_folds for cross_validation

DIANE's avatar
DIANE committed
            match model_type:
DIANE's avatar
DIANE committed
                case 'PLS':
DIANE's avatar
DIANE committed
                    from utils.regress import Plsr
DIANE's avatar
DIANE committed
                    Reg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter = 100, cv = nb_folds)
DIANE's avatar
DIANE committed
                    # reg_model = Reg.model_
DIANE's avatar
DIANE committed
                    rega = Reg.selected_features_

                case 'LW-PLS':
                    # export data to csv for Julia train/test
                    global x_train_np, y_train_np, x_test_np, y_test_np
                    data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
                    x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
                    # Cross-Validation calculation
                    d = {}
                    for i in range(nb_folds):
                        d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
                        data_to_work_with.append("xtr_fold{0}".format(i+1))
                        data_to_work_with.append("ytr_fold{0}".format(i+1))
                        data_to_work_with.append("xte_fold{0}".format(i+1))
                        data_to_work_with.append("yte_fold{0}".format(i+1))
                    # check best pre-treatment with a global PLSR model
DIANE's avatar
DIANE committed
                    from utils.regress import Plsr
                    preReg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=100)
DIANE's avatar
DIANE committed
                    temp_path = Path('temp/')
                    with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile:
                        json.dump(preReg.best_hyperparams_, outfile)
                    # export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files
                    for i in data_to_work_with:
                        if 'fold' in i:
                            j = d[i]
                        else:
                            j = globals()[i]
                            # st.write(j)
                        np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",")
                    open(temp_path / 'model', 'w').close()
DIANE's avatar
DIANE committed
                    # run Julia Jchemo as subprocess
                    import subprocess
DIANE's avatar
DIANE committed
                    subprocess_path = Path("utils/")
DIANE's avatar
DIANE committed
                    subprocess.run([f"{sys.executable}", subprocess_path / "lwplsr_call.py"])
DIANE's avatar
DIANE committed
                    # retrieve json results from Julia JChemo
                    try:
                        with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
                            Reg_json = json.load(outfile)
                            # delete csv files
                            for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
                        # delete json file after import
                        os.unlink(temp_path / "lwplsr_outputs.json")
                        os.unlink(temp_path / "lwplsr_preTreatments.json")
DIANE's avatar
DIANE committed
                        # format result data into Reg object
                        pred = ['pred_data_train', 'pred_data_test']### keys of the dict
                        for i in range(nb_folds):
DIANE's avatar
DIANE committed
                            pred.append("CV" + str(i+1)) ### add cv folds keys to pred
DIANE's avatar
DIANE committed
                        # global Reg
                        # Reg = type('obj', (object,), {'model_' : Reg_json['model'], 'best_hyperparams_' : Reg_json['best_lwplsr_params'],
DIANE's avatar
DIANE committed
                        #                             'pred_data_' : [json_normalize(Reg_json[i]) for i in pred]})
DIANE's avatar
DIANE committed
                        # global Reg
                        Reg = lw(Reg_json = Reg_json, pred = pred)
DIANE's avatar
DIANE committed
                        # reg_model = Reg.model_
DIANE's avatar
DIANE committed
                        Reg.CV_results_ = DataFrame()
DIANE's avatar
DIANE committed
                        Reg.cv_data_ = {'YpredCV' : {}, 'idxCV' : {}}
                        # set indexes to Reg.pred_data (train, test, folds idx)
                        for i in range(len(pred)):
                            Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
                            if i == 0: # data_train
                                # Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
                                Reg.pred_data_[i].index = list(y_train.index)
                                Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
                            elif i == 1: # data_test
                                # Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
                                Reg.pred_data_[i].index = list(y_test.index)
                                Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
                            else:
                                # CVi
                                Reg.pred_data_[i].index = folds[list(folds)[i-2]]
DIANE's avatar
DIANE committed
                                # Reg.CV_results_ = concat([Reg.CV_results_, Reg.pred_data_[i]])
DIANE's avatar
DIANE committed
                                Reg.cv_data_['YpredCV']['Fold' + str(i-1)] = np.array(Reg.pred_data_[i]).reshape(-1)
                                Reg.cv_data_['idxCV']['Fold' + str(i-1)] = np.array(folds[list(folds)[i-2]]).reshape(-1)

                        Reg.CV_results_= KF_CV.metrics_cv(y = y_train, ypcv = Reg.cv_data_['YpredCV'], folds = folds)[1]
                        #### cross validation results print
                        Reg.best_hyperparams_print = Reg.best_hyperparams_
                        ## plots
                        Reg.cv_data_ = KF_CV().meas_pred_eq(y = np.array(y_train), ypcv = Reg.cv_data_['YpredCV'], folds = folds)
                        Reg.pretreated_spectra_ = preReg.pretreated_spectra_
DIANE's avatar
DIANE committed
                        Reg.best_hyperparams_print = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
                        Reg.best_hyperparams_ = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}

DIANE's avatar
DIANE committed
                        Reg.__hash__ = ObjectHash(current = hash_,add = Reg.best_hyperparams_print)
DIANE's avatar
DIANE committed
                    except FileNotFoundError as e:
                        Reg = None
                        for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))

                case 'TPE-iPLS':
DIANE's avatar
DIANE committed
                    from utils.regress import TpeIpls
DIANE's avatar
DIANE committed
                    Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it, cv = nb_folds)
DIANE's avatar
DIANE committed
                    # reg_model = Reg.model_
DIANE's avatar
DIANE committed
                    
                    global intervalls, intervalls_with_cols
DIANE's avatar
DIANE committed
                    intervalls = Reg.selected_features_.T.copy()
                    intervalls_with_cols = Reg.selected_features_.T.copy().astype(str)
DIANE's avatar
DIANE committed
                    
                    for i in range(intervalls.shape[0]):
                        for j in range(intervalls.shape[1]):
                            intervalls_with_cols.iloc[i,j] = spectra.columns[intervalls.iloc[i,j]]
                    rega = Reg.selected_features_

                    st.session_state.intervalls = Reg.selected_features_.T
                    st.session_state.intervalls_with_cols = intervalls_with_cols
            return Reg
        




DIANE's avatar
DIANE committed
        if model_type:
            info = st.info('Info: The model is being created. This may take a few minutes.')
            if model_type == 'TPE-iPLS':# if model type is ipls then ask for the number of iterations and intervalls
DIANE's avatar
DIANE committed
                s = st.number_input(label = 'Enter the maximum number of intervals', min_value = 1, max_value = 6)
DIANE's avatar
DIANE committed
                it = st.number_input(label = 'Enter the number of iterations', min_value = 2, max_value = 500, value = 250)
DIANE's avatar
DIANE committed
            else:
                s, it = None, None
DIANE's avatar
DIANE committed
            hash_ = ObjectHash( current = hash_,add = str(s)+str(it))
DIANE's avatar
DIANE committed
                
            remodel_button = st.button('re-model the data', key=4, help=None, type="primary", use_container_width=True, on_click=increment)
DIANE's avatar
DIANE committed
            hash_ = ObjectHash(current = hash_, add = st.session_state.counter)
DIANE's avatar
DIANE committed
            Reg = RequestingModelCreation(change = hash_)
            reg_model = Reg.model_
DIANE's avatar
DIANE committed
            hash_ = hash(Reg)
DIANE's avatar
DIANE committed
            st.info('Info: Choose a modelling algorithm from the dropdown list!')
DIANE's avatar
DIANE committed
                
DIANE's avatar
DIANE committed
        if model_type:
            info.empty()
            if Reg:
                st.success('Success! Your model has been created and is ready to use.')
            else:
                st.error("Error: Model creation failed. Please try again.")
        
DIANE's avatar
DIANE committed
        if model_type:
            if model_type == 'TPE-iPLS':
DIANE's avatar
DIANE committed
                 if ('intervalls' and 'intervalls_with_cols') in st.session_state:
                    intervalls = st.session_state.intervalls
                    intervalls_with_cols = st.session_state.intervalls_with_cols
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
    # remodel_button = st.button('re-model the data', key=4, help=None, type="primary", use_container_width=True)
    # if remodel_button:# remodel feature for re-tuning the model
    #     increment()
DIANE's avatar
DIANE committed

    # fitted values and predicted  values 
DIANE's avatar
DIANE committed
    yc = Reg.pred_data_[0]
    yt = Reg.pred_data_[1]

DIANE's avatar
DIANE committed
    c7, c8 = st.columns([2 ,4])
    with c7:
        # Show and export the preprocessing methods
        st.write('-- Spectral preprocessing info --')
        st.write(Reg.best_hyperparams_print)
DIANE's avatar
DIANE committed
        @st.cache_data(show_spinner =False)
DIANE's avatar
DIANE committed
        def preprocessings(change):
DIANE's avatar
DIANE committed
            with open('report/out/Preprocessing.json', "w") as outfile:
DIANE's avatar
DIANE committed
                json.dump(Reg.best_hyperparams_, outfile)
        preprocessings(change=hash_)

        # Show the model performance table
        st.write("-- Model performance --")
DIANE's avatar
DIANE committed
        if model_type != reg_algo[2]:
DIANE's avatar
DIANE committed
            model_per = DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method = 'regression').scores_)
DIANE's avatar
DIANE committed
            model_per = DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method = 'regression').scores_)    
DIANE's avatar
DIANE committed

    
    # M1.dataframe(model_per) # duplicate with line 371
DIANE's avatar
DIANE committed
    @st.cache_data(show_spinner =False)
DIANE's avatar
DIANE committed
    def prep_important(change, model_type, model_hash):
DIANE's avatar
DIANE committed
        fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 4), sharex=True)
        ax1.plot(colnames, np.mean(X_train, axis = 0), color = 'black', label = 'Average spectrum (Raw)')
DIANE's avatar
DIANE committed
        # if model_type != reg_algo[2]:
DIANE's avatar
DIANE committed
        ax2.plot(colnames, np.mean(Reg.pretreated_spectra_ , axis = 0), color = 'black', label = 'Average spectrum (Pretreated)')
        ax2.set_xlabel('Wavelenghts')
        plt.tight_layout()
DIANE's avatar
DIANE committed
        for i in range(2):
DIANE's avatar
DIANE committed
            eval(f'ax{i+1}').grid(color = 'grey', linestyle = ':', linewidth = 0.2)
DIANE's avatar
DIANE committed
            eval(f'ax{i+1}').margins(x = 0)
            eval(f'ax{i+1}').legend(loc = 'upper right')
            eval(f'ax{i+1}').set_ylabel('Intensity')
DIANE's avatar
DIANE committed
            if model_type == 'TPE-iPLS':
DIANE's avatar
DIANE committed
                a = change
                for j in range(s):
                    if np.array(spectra.columns).dtype.kind in ['i','f']:
DIANE's avatar
DIANE committed
                        min, max = intervalls_with_cols.iloc[j,0], intervalls_with_cols.iloc[j,1]
DIANE's avatar
DIANE committed
                    else:
DIANE's avatar
DIANE committed
                        min, max = intervalls.iloc[j,0], intervalls.iloc[j,1]
DIANE's avatar
DIANE committed
                    eval(f'ax{i+1}').axvspan(min, max, color = '#00ff00', alpha = 0.5, lw = 0)
DIANE's avatar
DIANE committed
        if model_type == 'PLS':
DIANE's avatar
DIANE committed
            ax1.scatter(colnames[np.array(Reg.sel_ratio_.index)], np.mean(X_train, axis = 0).iloc[np.array(Reg.sel_ratio_.index)],
DIANE's avatar
DIANE committed
                            color = '#7ab0c7', label = 'Important variables')
            ax2.scatter(colnames[Reg.sel_ratio_.index], np.mean(Reg.pretreated_spectra_, axis = 0)[np.array(Reg.sel_ratio_.index)],
                            color = '#7ab0c7', label = 'Important variables')
            ax1.legend()
            ax2.legend()
        return fig
DIANE's avatar
DIANE committed
    with c8:## Visualize raw,preprocessed spectra, and selected intervalls(in case of ipls) 
DIANE's avatar
DIANE committed
        if model_type =='TPE-iPLS' :
DIANE's avatar
DIANE committed
                st.write('-- Important Spectral regions used for model creation --')
                st.table(intervalls_with_cols)
        st.write('-- Visualization of the spectral regions used for model creation --')
DIANE's avatar
DIANE committed
        imp_fig = prep_important(change = st.session_state.counter, model_type = model_type, model_hash = hash_)
        st.pyplot(imp_fig)
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
        # Display CV results
    numbers_dict = {1: "One", 2: "Two",3: "Three",4: "Four",5: "Five",
                    6: "Six",7: "Seven",8: "Eight",9: "Nine",10: "Ten"}
DIANE's avatar
DIANE committed
    st.subheader(f" {numbers_dict[nb_folds]}-Fold Cross-Validation results")
DIANE's avatar
DIANE committed
    @st.cache_data(show_spinner =False)
DIANE's avatar
DIANE committed
    def cv_display(change):
        fig1 = px.scatter(Reg.cv_data_[0], x = 'Measured', y = 'Predicted' , trendline = 'ols', color = 'Folds', symbol = 'Folds',
                color_discrete_sequence=px.colors.qualitative.G10)
        fig1.add_shape(type = 'line', x0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), x1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']),
                        y0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), y1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']), line = dict(color = 'black', dash = "dash"))
        fig1.update_traces(marker_size = 7, showlegend=False)
        
        fig0 = px.scatter(Reg.cv_data_[0], x ='Measured', y = 'Predicted' , trendline = 'ols', color = 'Folds', symbol = "Folds", facet_col = 'Folds',facet_col_wrap = 1,
                color_discrete_sequence = px.colors.qualitative.G10, text = 'index', width = 800, height = 1000)
        fig0.update_traces(marker_size = 8, showlegend = False)
        return fig0, fig1
    fig0, fig1 = cv_display(change= Reg.cv_data_)
DIANE's avatar
DIANE committed
    cv1, cv2 = st.columns([2, 2])
    with cv2:
DIANE's avatar
DIANE committed
        cv_results = DataFrame(Reg.CV_results_).round(4)# CV table
DIANE's avatar
DIANE committed
        st.write('-- Cross-Validation Summary--')
        st.write(cv_results.astype(str).style.map(lambda _: "background-color: #cecece;", subset = (cv_results.index.drop(['sd', 'mean', 'cv']), slice(None))))
        
        st.write('-- Out-of-Fold Predictions Visualization (All in one) --')
        st.plotly_chart(fig1, use_container_width = True)
DIANE's avatar
DIANE committed
    with cv1:
        st.write('-- Out-of-Fold Predictions Visualization (Separate plots) --')
        st.plotly_chart(fig0, use_container_width=True)
    
DIANE's avatar
DIANE committed

    ###################################################    BEGIN : Model Diagnosis    ####################################################
DIANE's avatar
DIANE committed
st.subheader("III - Model Diagnosis", divider='blue')
DIANE's avatar
DIANE committed
if Reg:
    # signal preprocessing results preparation for latex report
    prep_para = Reg.best_hyperparams_.copy()
DIANE's avatar
DIANE committed
    if model_type != reg_algo[2]:
DIANE's avatar
DIANE committed
        prep_para.pop('n_components')
        for i in ['deriv','polyorder']:
            if Reg.best_hyperparams_[i] == 0:
                prep_para[i] = '0'
            elif Reg.best_hyperparams_[i] == 1:
                prep_para[i] = '1st'
            elif Reg.best_hyperparams_[i] > 1:
                prep_para[i] = f"{Reg.best_hyperparams_[i]}nd"
    # reg plot and residuals plot
DIANE's avatar
DIANE committed
    if model_type != reg_algo[2]:
        measured_vs_predicted = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
        residuals_plot = resid_plot([y_train, y_test], [yc, yt], train_idx = train_index, test_idx = test_index)
DIANE's avatar
DIANE committed
    else:
DIANE's avatar
DIANE committed
        measured_vs_predicted = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
        residuals_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)
DIANE's avatar
DIANE committed
    
    M7, M8 = st.columns([2,2])
    with M7:
        st.write('Predicted vs Measured values')
DIANE's avatar
DIANE committed
        st.pyplot(measured_vs_predicted)
DIANE's avatar
DIANE committed
        # regression_plot.savefig('./report/figures/measured_vs_predicted.png')
DIANE's avatar
DIANE committed
    
    with M8:
        st.write('Residuals plot')
DIANE's avatar
DIANE committed
        st.pyplot(residuals_plot)
DIANE's avatar
DIANE committed
        # residual_plot.savefig('./report/figures/residuals_plot.png')

###################################################      END : Model Diagnosis   #######################################################
DIANE's avatar
DIANE committed
    
###################################################    BEGIN : Download results    #######################################################
##########################################################################################################################################
##########################################################################################################################################
DIANE's avatar
DIANE committed
if Reg:
DIANE's avatar
DIANE committed
    zip_data = ""
DIANE's avatar
DIANE committed
    st.header('Download the analysis results')
    st.write("**Note:** Please check the box only after you have finished processing your data and are satisfied with the results. Checking the box prematurely may slow down the app and could lead to crashes.")
    decis = st.checkbox("Yes, I want to download the results")
    if decis:
        @st.cache_data(show_spinner =False)
        def export_report(change):
            match model_type:
                case 'PLS':
                        latex_report = report.report('Predictive model development', file_name, stats, list(prep_para.values()), model_type, model_per, cv_results)
                        
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
                case 'LW-PLS':
                        latex_report = report.report('Predictive model development', file_name, stats,
                                                    list({key: Reg.best_hyperparams_[key] for key in ['deriv', 'normalization', 'polyorder', 'window_length'] if key in Reg.best_hyperparams_}.values()), model_type, model_per, cv_results)
                        
                case 'TPE-iPLS':
                        latex_report = report.report('Predictive model development', file_name, stats,
                                                    list({key: Reg.best_hyperparams_[key] for key in ['deriv', 'normalization', 'polyorder', 'window_length'] if key in Reg.best_hyperparams_}.values()), model_type, model_per, cv_results)
                        
                case _:
                    st.warning('Data processing has not been performed or finished yet!', icon = "⚠️")

        @st.cache_data(show_spinner =False)
        def preparing_results_for_downloading(change):
            match file:
                # load csv file
                case 'csv':
DIANE's avatar
DIANE committed
                    xfile.to_csv('report/out/dataset/'+ xcal_csv.name, sep = ';', encoding = 'utf-8', mode = 'a')
                    yfile.to_csv('report/out/dataset/'+ ycal_csv.name, sep = ';', encoding = 'utf-8', mode = 'a')
DIANE's avatar
DIANE committed
                case 'dx':
DIANE's avatar
DIANE committed
                    with open('report/out/dataset/'+data_file.name, 'w') as dd:
DIANE's avatar
DIANE committed
                        dd.write(dxdata)
                                    
DIANE's avatar
DIANE committed
            with open('./report/out/model/'+ model_type + '.pkl','wb') as f:# export model
DIANE's avatar
DIANE committed
                dump(reg_model, f)
DIANE's avatar
DIANE committed
            figpath ='./report/out/figures/'
DIANE's avatar
DIANE committed
            spectra_plot.savefig(figpath + "spectra_plot.png")
            target_plot.savefig(figpath + "histogram.png")
            imp_fig.savefig(figpath + "variable_importance.png")
            fig1.write_image(figpath + "meas_vs_pred_cv_all.png")
            fig0.write_image(figpath + "meas_vs_pred_cv_onebyone.png")
            measured_vs_predicted.savefig(figpath + 'measured_vs_predicted.png')
            residuals_plot.savefig(figpath + 'residuals_plot.png')
DIANE's avatar
DIANE committed
            # with open('report/out/Preprocessing.json', "w") as outfile:
            #     json.dump(Reg.best_hyperparams_, outfile)
DIANE's avatar
DIANE committed
            
            if model_type == 'TPE-iPLS': # export selected wavelengths
DIANE's avatar
DIANE committed
                wlfilename = './report/out/model/'+ model_type+'-selected_wavelengths.xlsx'
DIANE's avatar
DIANE committed
                all = concat([intervalls_with_cols.T, Reg.selected_features_], axis = 0,  ignore_index=True).T
DIANE's avatar
DIANE committed
                all.columns=['wl_from','wl_to','idx_from', 'idx_to']
                all.to_excel(wlfilename)
            
            export_report(change = hash_)
DIANE's avatar
DIANE committed
            if Path("./report/report.tex").exists():
DIANE's avatar
DIANE committed
                report.generate_report(change = hash_)
DIANE's avatar
DIANE committed
            if Path("./report/report.pdf").exists():
DIANE's avatar
DIANE committed
                move("./report/report.pdf", "./report/out/report.pdf")
            
            # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~            ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~#
            # pklfile = {'model_': Reg.model_,"model_type" : model_type, 'training_data':{'raw-spectra':spectra,'target':y, },
            #         'spec-preprocessing':{"normalization": Reg.best_hyperparams_['normalization'], 'SavGol(polyorder,window_length,deriv)': [Reg.best_hyperparams_["polyorder"],
            #                                                                                                                                    Reg.best_hyperparams_['window_length'],
            #                                                                                                                                    Reg.best_hyperparams_['deriv']]}}
            pklfile = {'model_': Reg.model_,"model_type" : model_type, 'data':{'raw-spectra':spectra,'target':y, 'training_data_idx':train_index,'testing_data_idx':test_index},
                    'spec-preprocessing':{"normalization": Reg.best_hyperparams_['normalization'], 'SavGol(polyorder,window_length,deriv)': [Reg.best_hyperparams_["polyorder"],
                                                                                                                                               Reg.best_hyperparams_['window_length'],
                                                                                                                                               Reg.best_hyperparams_['deriv']]}}
            if model_type == 'TPE-iPLS': # export selected wavelengths
                pklfile['selected-wls'] = {'idx':Reg.selected_features_.T , "wls":intervalls_with_cols }
            elif model_type == 'LW-PLS': # export LWPLS best model parameters
                pklfile['selected-wls'] = {'idx':None, "wls":None }
                pklfile['lwpls_params'] = Reg.best_hyperparams_
            else:
DIANE's avatar
DIANE committed
                pklfile['selected-wls'] = {'idx':None, "wls":None }
                    
            with open('./report/out/file_system.pkl', "wb") as pkl:
                dump(pklfile, pkl)

DIANE's avatar
DIANE committed
            return change
        preparing_results_for_downloading(change = hash_)
DIANE's avatar
DIANE committed
        @st.cache_data(show_spinner =False)
        def tempdir(change):
DIANE's avatar
DIANE committed
            with  TemporaryDirectory( prefix="results", dir="./report") as temp_dir:# create a temp directory
DIANE's avatar
DIANE committed
                tempdirname = os.path.split(temp_dir)[1]

DIANE's avatar
DIANE committed
                if len(os.listdir('./report/out/figures/'))>2:
DIANE's avatar
DIANE committed
                    make_archive(base_name="./report/Results", format="zip", base_dir="out", root_dir = "./report")# create a zip file
                    move("./report/Results.zip", f"./report/{tempdirname}/Results.zip")# put the inside the temp dir
DIANE's avatar
DIANE committed
                    with open(f"./report/{tempdirname}/Results.zip", "rb") as f:
DIANE's avatar
DIANE committed
                        zip_data = f.read()
            return tempdirname, zip_data

        try :
            tempdirname, zip_data = tempdir(change = hash_)
        except:
            pass
DIANE's avatar
DIANE committed
    date_time = datetime.now().strftime('%y%m%d%H%M')
    disabled_down = True if zip_data=='' else False
    st.download_button(label = 'Download', data = zip_data, file_name = f'Nirs_Workflow_{date_time}_Reg_.zip', mime ="application/zip",
                args = None, kwargs = None,type = "primary",use_container_width = True, disabled = disabled_down)

DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
    delete_files(keep = ['.py', '.pyc','.bib'])