1-samples_selection.py

from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *

################################### Data Loading and Visualization ########################################
container1 = st.container(border=True)
col2, col1 = st.columns([3, 1])


container2 = st.container(border=True)
container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue')
scores, loadings, pc = st.columns([2, 3, 0.5])
influence, hotelling, qexp = st.columns([2, 2, 1])


with container1:
    col1.header("NIRS Data Loading", divider='blue')
    col2.header("Spectral Data Visualization", divider='blue')

    with col1:
        # loader for csv file containing NIRS spectra
        sselectx_csv = st.file_uploader("Load NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
        if sselectx_csv is not None:
            # Select list for CSV delimiter
            psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
            # Select list for CSV header True / False
            phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31)
            if phdr == 'yes':
                col = 0
            else:
                col = False
            data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
            st.success("The data have been loaded successfully", icon="✅")
            ## Visualize spectra

    if sselectx_csv is not None:
        with col2:
            fig, ax = plt.subplots(figsize = (30,7))
            data_import.T.plot(legend=False, ax = ax, color = 'blue')
            ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
            ax.set_ylabel('Signal', fontsize=18)
            plt.margins(x = 0)
            st.pyplot(fig)

            st.write("Summary")
            info = pd.DataFrame({'N':[data_import.shape[0]],
                                 'Min': [np.min(data_import)],
                                 'Max':[np.max(data_import)],}, index = ['Values']).T
            info.rename_axis('information')
            st.table(data=info)
######################################################################################

############################## Exploratory data analysis ###############################
with container2:
    if sselectx_csv is not None:
        plot_type=['', 'PCA','UMAP', 'NMF']
        cluster_methods = ['', 'Kmeans','UMAP', 'AP']

        with pc:
            type_plot = st.selectbox("Dimensionality reduction techniques: ", options=plot_type, key=37)
            type_cluster = st.selectbox("Clustering techniques: ", options=cluster_methods, key=38)
            # compute UMAP - umap_maker in application_functions.py
            if type_plot == 'PCA':
                model = LinearPCA(data_import, Ncomp=5)
            elif type_plot =='UMAP':
                model = Umap(x = data_import, n_components = 5, n_neighbors = 20 , min_dist = 0)


        if type_plot in ['PCA', 'UMAP']:
            # add 2 select lists to choose which component to plot
            axis1 = pc.selectbox("x-axis", options = model.scores_.columns, index=0)
            axis2 = pc.selectbox("y-axis", options = model.scores_.columns, index=1)
            axis3 = pc.selectbox("z-axis", options = model.scores_.columns, index=2)

            if type_cluster == 'Kmeans':
                cl = Sk_Kmeans(pd.concat([model.scores_.loc[:,axis1], model.scores_.loc[:,axis2], model.scores_.loc[:,axis3]], axis = 1), max_clusters = 30)

            with scores:
                t = model.scores_
                if type_cluster in ['Kmeans','UMAP', 'AP']:
                    st.write('Scree plot')
                    fig2 = px.scatter(cl.inertia_.T, y = 'inertia')
                    st.plotly_chart(fig2)

                    ncluster = st.number_input(min_value=2, max_value=30, value=3, label = 'Select the desired number of clusters')
                    data, colors = cl.fit_optimal(nclusters=ncluster)
                    #fig = px.scatter(data, x=axis1, y=axis2, color= colors)
                    st.write('Scores plot')
                    fig = px.scatter_3d(data, x=axis1, y=axis2, z = axis3, color=colors)
                    fig.update_traces(marker=dict(size=4))


                else:
                    fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3)
                    fig.update_traces(marker=dict(size=4))

                st.plotly_chart(fig)


            if type_plot =='PCA':
                with loadings:
                    st.write('Loadings plot')
                    p = model.loadings_
                    pp = pd.concat([p, pd.DataFrame(np.arange(p.shape[0]), index=p.index, columns=['wl'])], axis =1)
                    df1 = pp.melt(id_vars="wl")

                    fig = px.line(df1, x = 'wl', y = 'value', color='variable')
                    fig.update_layout(
                        legend=dict(x=1, y=0,
                                    font=dict(
                                        family="Courier", size=12, color="black"),
                                    bordercolor="Black", borderwidth=2)
                    )
                    st.plotly_chart(fig)


                with influence:
                    st.write('Influence plot')
                    ax1 = st.selectbox("Component", options=model.scores_.columns, index=3)

                    leverage = model.leverage_
                    residuals = model.residuals
                    fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1])
                    st.plotly_chart(fig)

                with hotelling:

                    st.write('T²-Hotelling vs Q residuals plot')
                    ax2 = st.selectbox("Component", options=model.scores_.columns, index=4)

                    t = model.hotelling_
                    fig = px.scatter(t, x=t[ax2], y=t[ax2])
                    st.plotly_chart(fig)

                with qexp:
                    pass


            else:
                st.markdown('Select a dimensionality reduction technique from the dropdown list')