Skip to content
Snippets Groups Projects
app.py 14.6 KiB
Newer Older
  • Learn to ignore specific revisions
  • DIANE's avatar
    DIANE committed
    #from Modules_manager.PCA_ import pca_maker
    
    from Packages import *
    
    Nicolas BARTHES's avatar
    Nicolas BARTHES committed
    st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
    
    DIANE's avatar
    DIANE committed
    from Modules import *
    
    DIANE's avatar
    DIANE committed
    
    # graphical delimiter
    st.write("---")
    
    
    # load images for web interface
    
    Nicolas BARTHES's avatar
    Nicolas BARTHES committed
    img_sselect = Image.open("images\sselect.JPG")
    img_general = Image.open("images\general.JPG")
    img_predict = Image.open("images\predict.JPG")
    
    
    # TOC menu on the left
    
    Nicolas BARTHES's avatar
    Nicolas BARTHES committed
    with st.sidebar:
        st.markdown("[Sample Selection](#sample-selection)")
    
    DIANE's avatar
    DIANE committed
        st.markdown("[Model Development](#create-a-model)")
        st.markdown("[Predictions Making](#predict)")
    
    
    
    Nicolas BARTHES's avatar
    Nicolas BARTHES committed
    with st.container():
    
    DIANE's avatar
    DIANE committed
        st.subheader("Plateforme d'Analyses Chimiques pour l'Ecologie-PACE :goat:")
    
    Nicolas BARTHES's avatar
    Nicolas BARTHES committed
        st.title("NIRS Utils")
    
        st.write("Samples selection (PCA, [UMAP](https://umap-learn.readthedocs.io/en/latest/how_umap_works.html), ...), Predictive Modelling ([Pinard](https://github.com/GBeurier/pinard), [LWPLSR](https://doi.org/10.1002/cem.3209), ...), and Predictions using your data (CSV or DX files) and/or PACE NIRS Database.")
    
    DIANE's avatar
    DIANE committed
        #st.image(img_general)
    
    DIANE's avatar
    DIANE committed
    
    
    
    DIANE's avatar
    DIANE committed
    ################################### Data Loading and Visualization ########################################
    container1 = st.container(border=True)
    col2, col1 = st.columns([3, 1])
    
    
    container2 = st.container(border=True)
    container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue')
    scores, loadings, pc = st.columns([2, 2, 0.5])
    influence, hotelling, qexp = st.columns([2, 2, 1])
    
    
    with container1:
        col1.header("NIRS Data Loading", divider='blue')
        col2.header("Spectral Data Visualization", divider='blue')
    
        with col1:
            # loader for csv file containing NIRS spectra
            sselectx_csv = st.file_uploader("Load NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
            if sselectx_csv is not None:
                # Select list for CSV delimiter
                psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
                # Select list for CSV header True / False
                phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31)
                if phdr == 'yes':
                    col = 0
                else:
                    col = False
                data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
                st.success("The data have been loaded successfully", icon="")
                ## Visualize spectra
    
        if sselectx_csv is not None: 
            with col2:
                fig, ax = plt.subplots(figsize = (30,7))
                data_import.T.plot(legend=False, ax = ax, color = 'blue')
                ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
                ax.set_ylabel('Signal', fontsize=18)
                plt.margins(x = 0)
                st.pyplot(fig)
                        
                st.write("Summary")
                info = pd.DataFrame({'N':[data_import.shape[0]],
                                        'Min': [np.min(data_import)],
                                        'Max':[np.max(data_import)],}, index = ['Values']).T
                info.rename_axis('information')
                st.table(data=info)
    ######################################################################################
    
    ############################## Exploratory data analysis ###############################
    with container2:
    
    Nicolas BARTHES's avatar
    Nicolas BARTHES committed
        if sselectx_csv is not None:
    
    DIANE's avatar
    DIANE committed
            plot_type=['', 'PCA','UMAP', 'NMF']
            cluster_methods = ['', 'Kmeans','UMAP', 'AP']
    
            with pc:
                type_plot = st.selectbox("Dimensionality reduction techniques: ", options=plot_type, key=37)
                type_cluster = st.selectbox("Clustering techniques: ", options=cluster_methods, key=38)
                # compute UMAP - umap_maker in application_functions.py
                if type_plot == 'PCA':
                    model = LinearPCA(data_import, Ncomp=5)
                elif type_plot =='UMAP':
    
    DIANE's avatar
    DIANE committed
                    model = Umap(x = data_import, n_components = 5, n_neighbors = 20 , min_dist = 0)
    
    
    
    
    DIANE's avatar
    DIANE committed
            if type_plot in ['PCA', 'UMAP']:
    
                # add 2 select lists to choose which component to plot
    
    DIANE's avatar
    DIANE committed
                axis1 = pc.selectbox("x-axis", options = model.scores_.columns, index=0)
                axis2 = pc.selectbox("y-axis", options = model.scores_.columns, index=1)
                axis3 = pc.selectbox("z-axis", options = model.scores_.columns, index=2)
    
                if type_cluster == 'Kmeans':
                     cl = Sk_Kmeans(pd.concat([model.scores_.loc[:,axis1], model.scores_.loc[:,axis2], model.scores_.loc[:,axis3]], axis = 1), max_clusters = 30)
    
    DIANE's avatar
    DIANE committed
                with scores:
                        t = model.scores_
                        if type_cluster in ['Kmeans','UMAP', 'AP']:
                            st.write('Scree plot')
                            fig2 = px.scatter(cl.inertia_.T, y = 'inertia')
                            st.plotly_chart(fig2)
    
                            ncluster = st.number_input(min_value=2, max_value=30, value=3, label = 'Select the desired number of clusters')
                            data, colors = cl.fit_optimal(nclusters=ncluster)
                            #fig = px.scatter(data, x=axis1, y=axis2, color= colors)
                            st.write('Scores plot')
                            fig = px.scatter_3d(data, x=axis1, y=axis2, z = axis3, color=colors)
    
                            
                            
                        else:
                            fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3)
    
                        st.plotly_chart(fig)
        
                            
    
    DIANE's avatar
    DIANE committed
                if type_plot =='PCA':
                    with loadings:
                            st.write('Loadings plot')
                            p = model.loadings_
                            pp = pd.concat([p, pd.DataFrame(np.arange(p.shape[0]), index=p.index, columns=['wl'])], axis =1)
                            df1 = pp.melt(id_vars="wl")
    
    DIANE's avatar
    DIANE committed
    
    
    DIANE's avatar
    DIANE committed
                            fig = px.line(df1, x = 'wl', y = 'value', color='variable')
                            fig.update_layout(
                                legend=dict(x=1, y=0,
                                            font=dict(
                                                family="Courier", size=12, color="black"),
                                                bordercolor="Black", borderwidth=2)
                                                )
                            st.plotly_chart(fig)
    
    DIANE's avatar
    DIANE committed
    
    
    DIANE's avatar
    DIANE committed
    
    
    
    
    DIANE's avatar
    DIANE committed
                    with influence:
                            st.write('Influence plot')
                            ax1 = st.selectbox("Component", options=model.scores_.columns, index=3)
    
    DIANE's avatar
    DIANE committed
    
    
    DIANE's avatar
    DIANE committed
                            leverage = model.leverage_
                            residuals = model.residuals
                            fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1])
                            st.plotly_chart(fig)
    
    DIANE's avatar
    DIANE committed
                    with hotelling:
    
    DIANE's avatar
    DIANE committed
    
    
    DIANE's avatar
    DIANE committed
                            st.write('T²-Hotelling vs Q residuals plot')
                            ax2 = st.selectbox("Component", options=model.scores_.columns, index=4)
    
    DIANE's avatar
    DIANE committed
    
    
    DIANE's avatar
    DIANE committed
                            t = model.scores_
                            fig = px.scatter(t, x=axis1, y=t.columns[1])
                            st.plotly_chart(fig)
    
    DIANE's avatar
    DIANE committed
                    with qexp:
                            pass
    
    DIANE's avatar
    DIANE committed
    
                
    
    DIANE's avatar
    DIANE committed
                else:
                        st.markdown('Select a dimensionality reduction technique from the dropdown list')
    
    DIANE's avatar
    DIANE committed
    
    
    ########################################################################################
    
    # Model creation module
    
    DIANE's avatar
    DIANE committed
    container2 = st.container(border=True)
    
    M1, M2, M3 = st.columns([2,2,2])
    M4, M5 = st.columns([6,2])
    container3 = st.container(border=True)
    M7, M8 = st.columns([2,2])
    
    available_regression_algo = ["","SciKitLearn PLSR", "Jchemo Local Weighted PLSR", "Intervalle Selection PLSR"]
    with container2:
        st.header("Calibration Model Development", divider='blue')
        st.write("Create a predictive model, then use it for predicting your target variable(chemical values) from NIRS spectra")
    
    DIANE's avatar
    DIANE committed
        xcal_csv = M3.file_uploader("Select NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
        ycal_csv = M3.file_uploader("Select corresponding Chemical Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and chemical values as a column")
    
    
    Nicolas BARTHES's avatar
    Nicolas BARTHES committed
        if xcal_csv is not None and ycal_csv is not None:
    
            # Select list for CSV delimiter
    
    DIANE's avatar
    DIANE committed
            sep = M3.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+xcal_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+xcal_csv.name))), key=0)
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            # Select list for CSV header True / False
    
    DIANE's avatar
    DIANE committed
            hdr = M3.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+xcal_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+xcal_csv.name))), key=1)
            if hdr == 'yes':
                 col = 0
            else:
                 col = False
            rd_seed = M1.slider("Choose seed", min_value=1, max_value=1212, value=42, format="%i")
            x, y = utils.load_csv(xcal_csv, ycal_csv, autoremove_na=True, sep=sep, x_hdr=0, y_hdr=0, x_index_col=col, y_index_col=col)
                # Split data into training and test sets using the kennard_stone method and correlation metric, 25% of data is used for testing
            train_index, test_index = train_test_split_idx(x, y=y, method="kennard_stone", metric="correlation", test_size=0.25, random_state=rd_seed)
                # Assign data to training and test sets
            X_train, y_train, X_test, y_test = pd.DataFrame(x[train_index]), pd.DataFrame(y[train_index]), pd.DataFrame(x[test_index]), pd.DataFrame(y[test_index])
            #############################
    
            regression_algo = M1.selectbox("Choose the algorithm for regression", options=available_regression_algo, key = 12)
            
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            if regression_algo == 'SciKitLearn PLSR':
                # Train model with model function from application_functions.py
    
    DIANE's avatar
    DIANE committed
                Reg = PinardPlsr(x_train=X_train, x_test=X_test,y_train=y_train, y_test=y_test)
                reg_model = Reg.model_
                
                #M2.dataframe(Pin.pred_data_)
    
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            elif regression_algo == 'Jchemo Local Weighted PLSR':
    
    DIANE's avatar
    DIANE committed
                reg_model = model_LWPLSR(xcal_csv, ycal_csv, sep, hdr)
            
            elif regression_algo == "Intervalle Selection PLSR":
                 s = M2.number_input(label='Enter the maximum number of intervalls', min_value=1, max_value=6, value="min")
                 reg_model = TpeIpls(x_train= X_train, y_train= y_train, x_test=X_test, y_test= y_test,Kfold= 3,scale= True, n_intervall = 3)
                 reg_model.tune(n_iter=10)
            
            if regression_algo in ["SciKitLearn PLSR", "Jchemo Local Weighted PLSR", "Intervalle Selection PLSR"]:
                 with container3:
                    st.header("Model Diagnosis", divider='blue')
                    yc = Reg.pred_data_[0]
                    ycv = Reg.pred_data_[1]
                    yt = Reg.pred_data_[2]
                    M7.write('Predicted vs Measured values')
                    M7.pyplot(reg_plot([y_train, y_train, y_test],[yc, ycv, yt]))
                    M8.write('Residuals plot')
                    M8.pyplot(resid_plot([y_train, y_train, y_test],[yc, ycv, yt]))
    
    
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            # Export the model with pickle or joblib
            if regression_algo != '':
    
    DIANE's avatar
    DIANE committed
                M1.write("-- Performance metrics --")
                M1.dataframe(Reg.metrics_)
                M1.write("-- Save the model --")
                #model_export = M1.selectbox("Choose way to export", options=["pickle", "joblib"], key=20)
                model_name = M1.text_input('Give it a name')
                if M1.button('Export Model'):
                    #export_package = __import__(model_export)
                    with open('data/models/model_' + model_name + '_on_' + xcal_csv.name + '_and_' + ycal_csv.name + '_data_' + '.pkl','wb') as f:
                        joblib.dump(reg_model,f)
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
                    st.write('Model Exported')
    
    DIANE's avatar
    DIANE committed
                
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
                    # create a report with information on the model
                    ## see https://stackoverflow.com/a/59578663
    
    DIANE's avatar
    DIANE committed
            #M4.pyplot(reg_plot(meas==(ycal_csv,ycal_csv,ycal_csv], pred=[ycal_csv,ycal_csv,ycal_csv]))
            
    
    
    # graphical delimiter
    
    Nicolas BARTHES's avatar
    Nicolas BARTHES committed
    st.write("---")
    
    DIANE's avatar
    DIANE committed
    
    
    
    #M9, M10, M11 = st.columns([2,2,2])
    
    # Prediction module - TO BE DONE !!!!!
    
    Nicolas BARTHES's avatar
    Nicolas BARTHES committed
    with st.container():
    
    DIANE's avatar
    DIANE committed
        st.header("Predictions making")
    
    Nicolas BARTHES's avatar
    Nicolas BARTHES committed
        st.write("---")
        st.write("Predict chemical values from NIRS")
    
    DIANE's avatar
    DIANE committed
        model_column, space, file_column= st.columns((2, 1, 1))
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
        NIRS_csv = file_column.file_uploader("Select NIRS Data to predict", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns")
    
        export_folder = './data/predictions/'
        export_name = 'Predictions_of_'
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
        if NIRS_csv:
            export_name += str(NIRS_csv.name[:-4])
            qsep = file_column.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+NIRS_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+NIRS_csv.name))), key=2)
            qhdr = file_column.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+NIRS_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+NIRS_csv.name))), key=3)
    
    DIANE's avatar
    DIANE committed
            model_column.write("Load your saved predictive model")
            model_name_import = model_column.selectbox('Choose file:', options=os.listdir('data/models/'), key = 21)
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            if model_name_import != ' ':
                export_name += '_with_' + str(model_name_import[:-4])
                with open('data/models/'+ model_name_import,'rb') as f:
    
    DIANE's avatar
    DIANE committed
                    model_loaded = joblib.load(f)
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
                if model_loaded:
    
    DIANE's avatar
    DIANE committed
                    model_column.success("The model has been loaded successfully", icon="")
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
        result = ''
    
    DIANE's avatar
    DIANE committed
    
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
        if st.button("Predict"):
    
            # use prediction function from application_functions.py to predict chemical values
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            result = prediction(NIRS_csv, qsep, qhdr, model_loaded)
            st.write('Predicted values are: ')
    
    DIANE's avatar
    DIANE committed
            st.dataframe(result.T)
    
            pd.DataFrame(result).to_csv(export_folder + export_name + '.csv')
            # export to local drive - Download
            download_results(export_folder + export_name + '.csv', export_name + '.csv')
    
    Nicolas Barthes's avatar
    Nicolas Barthes committed
            # create a report with information on the prediction
            ## see https://stackoverflow.com/a/59578663