Skip to content
Snippets Groups Projects
2-model_creation.py 25.1 KiB
Newer Older
# import streamlit
DIANE's avatar
DIANE committed
import pandas as pd
from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *
DIANE's avatar
DIANE committed
from Class_Mod.Miscellaneous import desc_stats
add_header()
repertoire_a_vider = Path('Report/figures')
if os.path.exists(repertoire_a_vider):
    for fichier in os.listdir(repertoire_a_vider):
        chemin_fichier = repertoire_a_vider / fichier
        if os.path.isfile(chemin_fichier) or os.path.islink(chemin_fichier):
            os.unlink(chemin_fichier)
        elif os.path.isdir(chemin_fichier):
            os.rmdir(chemin_fichier)
local_css(css_file / "style_model.css")
DIANE's avatar
DIANE committed
    ####################################### page Design #######################################
DIANE's avatar
DIANE committed
st.title("Calibration Model Development")
st.markdown("Create a predictive model, then use it for predicting your target variable (chemical data) from NIRS spectra")
st.header("I - Data visualization", divider='blue')
M0, M00 = st.columns([1, .4])
st.header("II - Model creation", divider='blue')
DIANE's avatar
DIANE committed
st.header("Cross-Validation results")
DIANE's avatar
DIANE committed
cv1, cv2 = st.columns([2,2])
cv3 = st.container()

DIANE's avatar
DIANE committed
st.header("III - Model Diagnosis", divider='blue')
M7, M8 = st.columns([2,2])
M7.write('Predicted vs Measured values')
M8.write('Residuals plot')
DIANE's avatar
DIANE committed
M9 = st.container()
M9.write("-- Save the model --")
DIANE's avatar
DIANE committed
    ##############################################################################################
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
files_format = ['.csv', '.dx']
DIANE's avatar
DIANE committed
file = M00.radio('Select files format:', options = files_format)
DIANE's avatar
DIANE committed
spectra = pd.DataFrame()
y = pd.DataFrame()
regression_algo = None
Reg = None
DIANE's avatar
DIANE committed
# load .csv file
if file == files_format[0]:
DIANE's avatar
DIANE committed
    xcal_csv = M00.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
DIANE's avatar
DIANE committed
    if xcal_csv:
DIANE's avatar
DIANE committed
        sepx = M00.radio("Select separator (X file) - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)),
DIANE's avatar
DIANE committed
                                options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
DIANE's avatar
DIANE committed
        hdrx = M00.radio("samples name (X file)? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)),
DIANE's avatar
DIANE committed
                                options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1)
        if hdrx == "yes": col = 0
        else: col = False
    else:
        M00.warning('Insert your spectral data file here!')
DIANE's avatar
DIANE committed
        
DIANE's avatar
DIANE committed
    ycal_csv = M00.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
DIANE's avatar
DIANE committed
    if ycal_csv:
        sepy = M00.radio("Select separator (Y file) - _detected_: " + str(find_delimiter('data/'+ycal_csv.name)),
                         options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+ycal_csv.name))), key=2)
        hdry = M00.radio("samples name (Y file)? - _detected_: " + str(find_col_index('data/'+ycal_csv.name)),
                         options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+ycal_csv.name))), key=3)
DIANE's avatar
DIANE committed
        if hdry == "yes": col = 0
        else: col = False
    else:
        M00.warning('Insert your target data file here!')
DIANE's avatar
DIANE committed
    
    if xcal_csv and ycal_csv:
        file_name = str(xcal_csv.name) +' and '+ str(ycal_csv.name)
DIANE's avatar
DIANE committed
        xfile = pd.read_csv(xcal_csv, decimal='.', sep=sepx, index_col=col, header=0)
        yfile =  pd.read_csv(ycal_csv, decimal='.', sep=sepy, index_col=col)
        if yfile.shape[1]>0 and xfile.shape[1]>0 :
            spectra, meta_data = col_cat(xfile)
            chem_data, idx = col_cat(yfile)
            if chem_data.shape[1]>1:
                yname = M00.selectbox('Select target', options=chem_data.columns)
                y = chem_data.loc[:,yname]
DIANE's avatar
DIANE committed
            else:
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
            spectra = pd.DataFrame(spectra).astype(float)
            if not meta_data.empty :
                st.write(meta_data)
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
            if spectra.shape[0] != y.shape[0]:
DIANE's avatar
DIANE committed
                M00.warning('X and Y have different sample size')
DIANE's avatar
DIANE committed
                y = pd.DataFrame
                spectra = pd.DataFrame
DIANE's avatar
DIANE committed

            M00.error('Error: The data has not been loaded successfully, please consider tuning the decimal and separator !')
DIANE's avatar
DIANE committed

## Load .dx file
elif file == files_format[1]:
DIANE's avatar
DIANE committed
    data_file = M00.file_uploader("Select Data", type=".dx", help=" :mushroom: select a dx file")
    if not data_file:
        M00.warning('Load your file here!')
    else :
        file_name = str(data_file.name)
DIANE's avatar
DIANE committed
        with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
            tmp.write(data_file.read())
            tmp_path = tmp.name
DIANE's avatar
DIANE committed
            chem_data, spectra, meta_data, meta_data_st = read_dx(file =  tmp_path)
DIANE's avatar
DIANE committed
            M00.success("The data have been loaded successfully", icon="")
DIANE's avatar
DIANE committed
            if chem_data.shape[1]>0:
DIANE's avatar
DIANE committed
                yname = M00.selectbox('Select target', options=chem_data.columns)
DIANE's avatar
DIANE committed
                measured = chem_data.loc[:,yname] > 0
                y = chem_data.loc[:,yname].loc[measured]
                spectra = spectra.loc[measured]
            else:
                M00.warning('Warning: your file includes no target variables to model !', icon="⚠️")
DIANE's avatar
DIANE committed
        os.unlink(tmp_path)

### split the data
if not spectra.empty and not y.empty:
DIANE's avatar
DIANE committed
    if np.array(spectra.columns).dtype.kind in ['i','f']:
        colnames = spectra.columns
    else:
        colnames = np.arange(spectra.shape[1])
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed
    #rd_seed = M1.slider("Customize Train-test split", min_value=1, max_value=100, value=42, format="%i")
    # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
    train_index, test_index = train_test_split_idx(spectra, y = y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=42)

DIANE's avatar
DIANE committed
    # Assign data to training and test sets
    X_train, y_train = pd.DataFrame(spectra.iloc[train_index,:]), y.iloc[train_index]
    X_test, y_test = pd.DataFrame(spectra.iloc[test_index,:]), y.iloc[test_index]
DIANE's avatar
DIANE committed

DIANE's avatar
DIANE committed

    #### insight on loaded data
DIANE's avatar
DIANE committed
    fig, ax1 = plt.subplots( figsize = (12,3))
    spectra.T.plot(legend=False, ax = ax1, linestyle = '--')
    ax1.set_ylabel('Signal intensity')
    ax1.margins(0)
    plt.tight_layout()
    M0.pyplot(fig) ######## Loaded graph
    fig.savefig("./Report/figures/spectra_plot.png")
DIANE's avatar
DIANE committed
    fig, ax2 = plt.subplots(figsize = (12,3))
    sns.histplot(y, color="deeppink", kde = True,label="y",ax = ax2, fill=True)
    sns.histplot(y_train, color="blue", kde = True,label="y (train)",ax = ax2, fill=True)
    sns.histplot(y_test, color="green", kde = True,label="y (test)",ax = ax2, fill=True)
    ax2.set_xlabel('y')
    plt.legend()
    plt.tight_layout()

    M0.pyplot(fig)
    fig.savefig("./Report/figures/Histogram.png")
DIANE's avatar
DIANE committed


    M0.write('Loaded data summary')
    M0.write(pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['train', 'test', 'total'] ).round(2))
    stats=pd.DataFrame([desc_stats(y_train),desc_stats(y_test),desc_stats(y)], index =['train', 'test', 'total'] ).round(2)
DIANE's avatar
DIANE committed
    ####################################### Insight into the loaded data


    ####################################### Model creation ###################################################
    reg_algo = ["","Full-PLSR", "Locally Weighted PLSR", "Interval-PLSR"]
    regression_algo = M1.selectbox("Choose the algorithm for regression", options= reg_algo, key = 12, placeholder ="Choose an option")
    # split train data into nb_folds for cross_validation
    nb_folds = 3
    folds = KF_CV.CV(X_train, y_train, nb_folds)

    if not regression_algo:
        M1.warning('Choose a modelling algorithm from the dropdown list !')
DIANE's avatar
DIANE committed
    if regression_algo == reg_algo[1]:
        # Train model with model function from application_functions.py
        Reg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=1)
DIANE's avatar
DIANE committed
        reg_model = Reg.model_
        #M2.dataframe(Pin.pred_data_)
DIANE's avatar
DIANE committed
    elif regression_algo == reg_algo[2]:
        M1.write('KFold for Cross-Validation = ' + str(nb_folds))
DIANE's avatar
DIANE committed
        info = M1.info('Starting LWPLSR model creation... Please wait a few minutes.')
        # export data to csv for Julia train/test
        data_to_work_with = ['x_train_np', 'y_train_np', 'x_test_np', 'y_test_np']
        x_train_np, y_train_np, x_test_np, y_test_np = X_train.to_numpy(), y_train.to_numpy(), X_test.to_numpy(), y_test.to_numpy()
DIANE's avatar
DIANE committed
        # Cross-Validation calculation
DIANE's avatar
DIANE committed
        d = {}
        for i in range(nb_folds):
            d["xtr_fold{0}".format(i+1)], d["ytr_fold{0}".format(i+1)], d["xte_fold{0}".format(i+1)], d["yte_fold{0}".format(i+1)] = np.delete(x_train_np, folds[list(folds)[i]], axis=0), np.delete(y_train_np, folds[list(folds)[i]], axis=0), x_train_np[folds[list(folds)[i]]], y_train_np[folds[list(folds)[i]]]
            data_to_work_with.append("xtr_fold{0}".format(i+1))
            data_to_work_with.append("ytr_fold{0}".format(i+1))
            data_to_work_with.append("xte_fold{0}".format(i+1))
            data_to_work_with.append("yte_fold{0}".format(i+1))
        # check best pre-treatment with a global PLSR model
        preReg = Plsr(train = [X_train, y_train], test = [X_test, y_test], n_iter=20)
        # M2.write(preReg.best_hyperparams_)
        temp_path = Path('temp/')
        with open(temp_path / "lwplsr_preTreatments.json", "w+") as outfile:
            json.dump(preReg.best_hyperparams_, outfile)
        # export Xtrain, Xtest, Ytrain, Ytest and all CV folds to temp folder as csv files
DIANE's avatar
DIANE committed
        for i in data_to_work_with:
            if 'fold' in i:
                j = d[i]
            else:
                j = globals()[i]
            np.savetxt(temp_path / str(i + ".csv"), j, delimiter=",")
        # run Julia Jchemo as subprocess
        import subprocess
        subprocess_path = Path("Class_Mod/")
        subprocess.run([f"{sys.executable}", subprocess_path / "LWPLSR_Call.py"])
        # retrieve json results from Julia JChemo
DIANE's avatar
DIANE committed
        try:
            with open(temp_path / "lwplsr_outputs.json", "r") as outfile:
                Reg_json = json.load(outfile)
                # delete csv files
                for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))
            # # delete json file after import
DIANE's avatar
DIANE committed
            os.unlink(temp_path / "lwplsr_outputs.json")
            os.unlink(temp_path / "lwplsr_preTreatments.json")
DIANE's avatar
DIANE committed
            # format result data into Reg object
            pred = ['pred_data_train', 'pred_data_test']### keys of the dict
DIANE's avatar
DIANE committed
            for i in range(nb_folds):
                pred.append("CV" + str(i+1)) ### add cv folds keys to pred

            Reg = type('obj', (object,), {'model_' : Reg_json['model'], 'best_hyperparams_' : Reg_json['best_lwplsr_params'],
                                          'pred_data_' : [pd.json_normalize(Reg_json[i]) for i in pred]})
DIANE's avatar
DIANE committed
            Reg.CV_results_ = pd.DataFrame()
            Reg.cv_data_ = {'YpredCV' : {}, 'idxCV' : {}}
            # # set indexes to Reg.pred_data (train, test, folds idx)
DIANE's avatar
DIANE committed
            for i in range(len(pred)):
                Reg.pred_data_[i] = Reg.pred_data_[i].T.reset_index().drop(columns = ['index'])
                if i == 0: # data_train
                    # Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
DIANE's avatar
DIANE committed
                    Reg.pred_data_[i].index = list(y_train.index)
                    Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
DIANE's avatar
DIANE committed
                elif i == 1: # data_test
                    # Reg.pred_data_[i] = np.array(Reg.pred_data_[i])
DIANE's avatar
DIANE committed
                    Reg.pred_data_[i].index = list(y_test.index)
                    Reg.pred_data_[i] = Reg.pred_data_[i].iloc[:,0]
                else:
                    # CVi
DIANE's avatar
DIANE committed
                    Reg.pred_data_[i].index = folds[list(folds)[i-2]]
                    # Reg.CV_results_ = pd.concat([Reg.CV_results_, Reg.pred_data_[i]])
                    Reg.cv_data_['YpredCV']['Fold' + str(i-1)] = np.array(Reg.pred_data_[i]).reshape(-1)
                    Reg.cv_data_['idxCV']['Fold' + str(i-1)] = np.array(folds[list(folds)[i-2]]).reshape(-1)
            Reg.CV_results_= KF_CV.metrics_cv(y = y_train, ypcv = Reg.cv_data_['YpredCV'], folds = folds)[1]
            #### cross validation results print
            Reg.best_hyperparams_print = Reg.best_hyperparams_
            Reg.cv_data_ = KF_CV().meas_pred_eq(y = np.array(y_train), ypcv= Reg.cv_data_['YpredCV'], folds=folds)
            Reg.pretreated_spectra_ = preReg.pretreated_spectra_
            Reg.best_hyperparams_print = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
            Reg.best_hyperparams_ = {**preReg.best_hyperparams_, **Reg.best_hyperparams_}
            info.empty()
DIANE's avatar
DIANE committed
            M1.success('Model created!')
        except FileNotFoundError as e:
            # Display error message on the interface if modeling is wrong
            info.empty()
            M1.warning('- ERROR during model creation -')
            Reg = None
            for i in data_to_work_with: os.unlink(temp_path / str(i + ".csv"))

DIANE's avatar
DIANE committed
    elif regression_algo == reg_algo[3]:
        s = M1.number_input(label='Enter the maximum number of intervals', min_value=1, max_value=6, value=3)
        it = M1.number_input(label='Enter the number of iterations', min_value=2, max_value=10, value=3)
DIANE's avatar
DIANE committed
        progress_text = "The model is being created. Please wait."
DIANE's avatar
DIANE committed
            
DIANE's avatar
DIANE committed
        Reg = TpeIpls(train = [X_train, y_train], test=[X_test, y_test], n_intervall = s, n_iter=it)
DIANE's avatar
DIANE committed
        pro = M1.progress(0, text="The model is being created. Please wait!")
        pro.empty()
        M1.progress(100, text = "The model has successfully been  created!")            
        time.sleep(1)
        reg_model = Reg.model_
DIANE's avatar
DIANE committed
        M2.write('-- Important Spectral regions used for model creation --')
        intervalls = Reg.selected_features_.T
        intervalls_with_cols = Reg.selected_features_.T
        for i in range(intervalls.shape[0]):
            for j in range(intervalls.shape[1]):
                intervalls_with_cols.iloc[i,j] = spectra.columns[intervalls.iloc[i,j]]
        M2.table(intervalls_with_cols)
        
DIANE's avatar
DIANE committed
    # elif regression_algo == reg_algo[4]:
    #     Reg = PlsR(x_train = X_train, x_test = X_test, y_train = y_train, y_test = y_test)
    #     reg_model = Reg.model_
DIANE's avatar
DIANE committed

#         ###############################################################################################################DDDVVVVVVVVVV
#        ################# Model analysis ############
DIANE's avatar
DIANE committed
    if regression_algo in reg_algo[1:] and Reg is not None:
DIANE's avatar
DIANE committed
        #M2.write('-- Pretreated data (train) visualization and important spectral regions in the model --   ')

        fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 6))
        fig = make_subplots(rows=3, cols=1, shared_xaxes=True, vertical_spacing=0.02)
        # fig.append_trace(go.Scatter(x=[3, 4, 5],
        #                             y=[1000, 1100, 1200],), row=1, col=1)

        # fig.append_trace(go.Scatter(x=[2, 3, 4],
        #                             y=[100, 110, 120],), row=2, col=1)

        # fig.append_trace(go.Scatter(x=[0, 1, 2],
        #                             y=[10, 11, 12]), row=3, col=1)

        # fig.update_layout(height=600, width=600, title_text="Stacked Subplots")   
        # a = Reg.pretreated_spectra_
        # r = pd.concat([y_train, a], axis = 1)
        # rr = r.melt("x")
        # rr.columns = ['y values', 'x_axis', 'y_axis']
        # fig = px.scatter(rr, x = 'x_axis', y = 'y_axis', color_continuous_scale=px.colors.sequential.Viridis, color = 'y values')
        # M3.plotly_chart(fig)
        
        
DIANE's avatar
DIANE committed
        # from matplotlib.colors import Normalize
        # color_variable = y_train
        # norm = Normalize(vmin=color_variable.min(), vmax= color_variable.max())
        # cmap = plt.get_cmap('viridis')
        # colors = cmap(norm(color_variable.values))
        # fig, ax = plt.subplots(figsize = (10,3))

        # for i in range(Reg.pretreated_spectra_.shape[0]):
        #     ax.plot(Reg.pretreated_spectra_.columns, Reg.pretreated_spectra_.iloc[i,:], color = colors[i])
        # sm = ScalarMappable(norm = norm, cmap = cmap)
        # cbar = plt.colorbar(sm, ax = ax)
        # # cbar.set_label('Target range') 
        # plt.tight_layout()      
        # htmlfig = mpld3.fig_to_html(fig)
        # with M2:
        #     st.components.v1.html(htmlfig, height=600)
DIANE's avatar
DIANE committed
        cv2.write('-- Cross-Validation Summary--')
        cv2.write(Reg.CV_results_)
        cv_results=pd.DataFrame(Reg.CV_results_)
DIANE's avatar
DIANE committed
        cv2.write('-- Out-of-Fold Predictions Visualization (All in one) --')

        fig1 = px.scatter(Reg.cv_data_[0], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds", 
DIANE's avatar
DIANE committed
                 color_discrete_sequence=px.colors.qualitative.G10)
        fig1.add_shape(type='line', x0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), x1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']),
                        y0 = .95 * min(Reg.cv_data_[0].loc[:,'Measured']), y1 = 1.05 * max(Reg.cv_data_[0].loc[:,'Measured']), line = dict(color='black', dash = "dash"))
DIANE's avatar
DIANE committed
        fig1.update_traces(marker_size=7, showlegend=False)
        cv2.plotly_chart(fig1, use_container_width=True)
        fig0 = px.scatter(Reg.cv_data_[0], x ='Measured', y = 'Predicted' , trendline='ols', color='Folds', symbol="Folds", facet_col = 'Folds',facet_col_wrap=1,
DIANE's avatar
DIANE committed
                 color_discrete_sequence=px.colors.qualitative.G10, text='index', width=800, height=1000)
        fig0.update_traces(marker_size=8, showlegend=False)
        fig0.write_image("./Report/figures/meas_vs_pred_cv_onebyone.png")
DIANE's avatar
DIANE committed

        cv1.write('-- Out-of-Fold Predictions Visualization (Separate plots) --')
        cv1.plotly_chart(fig0, use_container_width=True)
        fig1.write_image("./Report/figures/meas_vs_pred_cv_all.png")
DIANE's avatar
DIANE committed
        yc = Reg.pred_data_[0]
DIANE's avatar
DIANE committed
        yt = Reg.pred_data_[1]
DIANE's avatar
DIANE committed
        #if
        M1.write('-- Spectral preprocessing info --')
        M1.write(Reg.best_hyperparams_print)
        with open("data/params/Preprocessing.json", "w") as outfile:
            json.dump(Reg.best_hyperparams_, outfile)
        
        M1.write("-- Model performance --")
        if regression_algo != "Locally Weighted PLSR":
            M1.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
        else:
            M1.dataframe(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
        model_per=pd.DataFrame(metrics(c = [y_train, yc], t = [y_test, yt], method='regression').scores_)
DIANE's avatar
DIANE committed
        #from st_circular_progress import CircularProgress
        #my_circular_progress = CircularProgress(label = 'Performance',value = 50, key = 'my performance',
        #                                         size = "medium", track_color = "black", color = "blue")
        
        #my_circular_progress.st_circular_progress()
        #my_circular_progress.update_value(progress=20)
        if regression_algo != "Locally Weighted PLSR":
            a = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)
        else:
            a = reg_plot([y_train, y_test],[yc, yt], train_idx = train_index, test_idx = test_index)

        M7.pyplot(a)
        plt.savefig('./Report/figures/measured_vs_predicted.png')
        prep_para = Reg.best_hyperparams_
        if regression_algo != "Locally Weighted PLSR":
            prep_para.pop('n_components')
            for i in ['deriv','polyorder']:
                if Reg.best_hyperparams_[i] == 0:
                    prep_para[i] = '0'
                elif Reg.best_hyperparams_[i] == 1:
                    prep_para[i] = '1st'
                elif Reg.best_hyperparams_[i] > 1:
                    prep_para[i] = f"{Reg.best_hyperparams_[i]}nd"
        
        if regression_algo != "Locally Weighted PLSR":
            residual_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)
        else:
            residual_plot = resid_plot([y_train, y_test], [yc, yt], train_idx=train_index, test_idx=test_index)

        M8.pyplot(residual_plot)
        plt.savefig('./Report/figures/residuals_plot.png')
        
        if regression_algo != "Locally Weighted PLSR":
            rega = Reg.selected_features_  ##### ADD FEATURES IMPORTANCE PLOT
            #model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20)
DIANE's avatar
DIANE committed
        model_name = M9.text_input('Give it a name')
DIANE's avatar
DIANE committed
        date_time = datetime.datetime.strftime(datetime.date.today(), '_%Y_%m_%d_')
DIANE's avatar
DIANE committed
        if M9.button('Export Model'):
            path = 'data/models/model_'
            if file == files_format[0]:
                #export_package = __import__(model_export)
DIANE's avatar
DIANE committed
                with open(path + model_name + date_time + '_created_on_' + xcal_csv.name[:xcal_csv.name.find(".")] +""+
                           '_and_' + ycal_csv.name[:ycal_csv.name.find(".")] + '_data_' + '.pkl','wb') as f:
                    joblib.dump(reg_model, f)
DIANE's avatar
DIANE committed
                    if regression_algo == reg_algo[3]:
DIANE's avatar
DIANE committed
                        Reg.selected_features_.T.to_csv(path + model_name + date_time + '_on_' + xcal_csv.name[:xcal_csv.name.find(".")]
DIANE's avatar
DIANE committed
                                                      + '_and_' + ycal_csv.name[:ycal_csv.name.find(".")] + '_data_'+'Wavelengths_index.csv', sep = ';')
DIANE's avatar
DIANE committed

            elif file == files_format[1]:
                #export_package = __import__(model_export)
DIANE's avatar
DIANE committed
                with open(path + model_name + '_on_'+ data_file.name[:data_file.name.find(".")] + '_data_' + '.pkl','wb') as f:
DIANE's avatar
DIANE committed
                    joblib.dump(reg_model, f)
                    if regression_algo == reg_algo[3]:
DIANE's avatar
DIANE committed
                        Reg.selected_features_.T.to_csv(path +data_file.name[:data_file.name.find(".")]+ model_name + date_time+ '_on_' + '_data_'+'Wavelengths_index.csv', sep = ';')
DIANE's avatar
DIANE committed
                        st.write('Model Exported ')

                # create a report with information on the model
                ## see https://stackoverflow.com/a/59578663
DIANE's avatar
DIANE committed
        if st.session_state['interface'] == 'simple':
            pages_folder = Path("pages/")
            show_pages(
                [Page("app.py", "Home"),
                 Page(str(pages_folder / "4-inputs.py"), "Inputs"),
                 Page(str(pages_folder / "1-samples_selection.py"), "Samples Selection"),
                 Page(str(pages_folder / "2-model_creation.py"), "Models Creation"),
                 Page(str(pages_folder / "3-prediction.py"), "Predictions"),
                 ]
            )
DIANE's avatar
DIANE committed
            st.page_link('pages\\3-prediction.py', label = 'Keep on keepin\' on to predict your values !')
if not spectra.empty and not y.empty and regression_algo:
DIANE's avatar
DIANE committed
    if regression_algo in reg_algo[1:] and Reg is not None:
DIANE's avatar
DIANE committed
        fig, (ax1, ax2) = plt.subplots(2,1, figsize = (12, 4), sharex=True)
        ax1.plot(colnames, np.mean(X_train, axis = 0), color = 'black', label = 'Average spectrum (Raw)')
        if regression_algo != "Locally Weighted PLSR_":
            ax2.plot(colnames, np.mean(Reg.pretreated_spectra_ , axis = 0), color = 'black', label = 'Average spectrum (pretreated)')
DIANE's avatar
DIANE committed
        ax2.set_xlabel('Wavelenghts')
        plt.tight_layout()
        
        for i in range(2):
            eval(f'ax{i+1}').grid(color='grey', linestyle=':', linewidth=0.2)
            eval(f'ax{i+1}').margins(x = 0)
            eval(f'ax{i+1}').legend(loc = 'upper right')
            eval(f'ax{i+1}').set_ylabel('Intensity')
            if regression_algo == reg_algo[3]:
                for j in range(s):
                    if np.array(spectra.columns).dtype.kind in ['i','f']:
                        min, max = intervalls_with_cols['from'][j], intervalls_with_cols['to'][j]
                    else:
                        min, max = intervalls['from'][j], intervalls['to'][j]
                    
                    eval(f'ax{i+1}').axvspan(min, max, color='#00ff00', alpha=0.5, lw=0)                
        if regression_algo == reg_algo[1]:
                ax1.scatter(colnames[np.array(Reg.sel_ratio_.index)], np.mean(X_train, axis = 0)[np.array(Reg.sel_ratio_.index)],
DIANE's avatar
DIANE committed
                             color = 'red', label = 'Important variables')
                ax2.scatter(colnames[Reg.sel_ratio_.index], np.mean(Reg.pretreated_spectra_, axis = 0)[np.array(Reg.sel_ratio_.index)],
DIANE's avatar
DIANE committed
                             color = 'red', label = 'Important variables')
                ax1.legend()
                ax2.legend()

        M2.write('-- Visualization of the spectral regions used for model creation --')
        fig.savefig("./Report/figures/Variable_importance.png")
        M2.pyplot(fig)

## Load .dx file
if Reg is not None:
    with st.container():
        if st.button("Download the report"):
            if regression_algo == reg_algo[1]:
                        latex_report = report.report('Predictive model development', file_name, stats, list(Reg.best_hyperparams_.values()), regression_algo, model_per, cv_results)
                        report.compile_latex()
            if regression_algo is None:
                st.warning('Data processing has not been performed or finished yet!', icon = "⚠️")
            else:
                pass