from Packages import * st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide") from Modules import * from Class_Mod.DATA_HANDLING import * ################################### Data Loading and Visualization ######################################## container1 = st.container(border=True) col2, col1 = st.columns([3, 1]) container2 = st.container(border=True) container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue') scores, loadings, pc = st.columns([2, 3, 0.5]) influence, hotelling, qexp = st.columns([2, 2, 1]) with container1: col1.header("Data Loading", divider='blue') col2.header("Spectral Data Visualization", divider='blue') # loader for csv file containing NIRS spectra sselectx_csv = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5) if sselectx_csv is not None: test = sselectx_csv.name[sselectx_csv.name.find('.'):] if test== '.csv': with col1: # Select list for CSV delimiter psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9) # Select list for CSV header True / False phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31) if phdr == 'yes': col = 0 else: col = False data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col) st.success("The data have been loaded successfully", icon="✅") ## Visualize spectra with col2: fig, ax = plt.subplots(figsize = (30,7)) data_import.T.plot(legend=False, ax = ax, color = 'blue') ax.set_xlabel('Wavelength/Wavenumber', fontsize=18) ax.set_ylabel('Signal', fontsize=18) plt.margins(x = 0) st.pyplot(fig) st.write("Summary") info = pd.DataFrame({'N':[data_import.shape[0]], 'Min': [np.min(data_import)], 'Max':[np.max(data_import)],}, index = ['Values']).T info.rename_axis('information') st.table(data=info) elif test == '.dx': # Create a temporary file to save the uploaded file with NamedTemporaryFile(delete=False, suffix=".dx") as tmp: tmp.write(sselectx_csv.read()) tmp_path = tmp.name with col1: data = DxRead(path = tmp_path) data_import = data.specs_df_ st.success("The data have been loaded successfully", icon="✅") ## Visualize spectra with col2: fig, ax = plt.subplots(figsize = (30,7)) data_import.T.plot(legend=False, ax = ax, color = 'blue') ax.set_xlabel('Wavelength/Wavenumber', fontsize=18) ax.set_ylabel('Signal', fontsize=18) plt.margins(x = 0) st.pyplot(fig) st.write("Summary") info = pd.DataFrame({'N':[data_import.shape[0]], 'Min': [np.min(data_import)], 'Max':[np.max(data_import)],}, index = ['Values']).T info.rename_axis('information') st.table(data=info) os.unlink(tmp_path) ###################################################################################### ############################## Exploratory data analysis ############################### with container2: if sselectx_csv is not None: plot_type=['', 'PCA','UMAP', 'NMF'] cluster_methods = ['', 'Kmeans','UMAP', 'AP'] with pc: type_plot = st.selectbox("Dimensionality reduction techniques: ", options=plot_type, key=37) type_cluster = st.selectbox("Clustering techniques: ", options=cluster_methods, key=38) # compute UMAP - umap_maker in application_functions.py if type_plot == 'PCA': model = LinearPCA(data_import, Ncomp=5) elif type_plot =='UMAP': model = Umap(x = data_import, n_components = 5, n_neighbors = 20 , min_dist = 0) if type_plot in ['PCA', 'UMAP']: # add 2 select lists to choose which component to plot axis1 = pc.selectbox("x-axis", options = model.scores_.columns, index=0) axis2 = pc.selectbox("y-axis", options = model.scores_.columns, index=1) axis3 = pc.selectbox("z-axis", options = model.scores_.columns, index=2) if type_cluster == 'Kmeans': scsc = pd.concat([model.scores_.loc[:,axis1], model.scores_.loc[:,axis2], model.scores_.loc[:,axis3]], axis = 1) cl = Sk_Kmeans(scsc, max_clusters = 30) with scores: t = model.scores_ if type_cluster in ['Kmeans','UMAP', 'AP']: st.write('Scree plot') fig2 = px.scatter(cl.inertia_.T, y = 'inertia') st.plotly_chart(fig2) ncluster = st.number_input(min_value=2, max_value=30, value=3, label = 'Select the desired number of clusters') data, colors = cl.fit_optimal(nclusters=ncluster) #fig = px.scatter(data, x=axis1, y=axis2, color= colors) st.write('Scores plot') fig = px.scatter_3d(data, x=axis1, y=axis2, z = axis3, color=colors) fig.update_traces(marker=dict(size=4)) else: fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3) fig.update_traces(marker=dict(size=4)) st.plotly_chart(fig) if type_plot =='PCA': with loadings: st.write('Loadings plot') p = model.loadings_ pp = pd.concat([p, pd.DataFrame(np.arange(p.shape[0]), index=p.index, columns=['wl'])], axis =1) df1 = pp.melt(id_vars="wl") fig = px.line(df1, x = 'wl', y = 'value', color='variable') fig.update_layout( legend=dict(x=1, y=0, font=dict( family="Courier", size=12, color="black"), bordercolor="Black", borderwidth=2) ) st.plotly_chart(fig, use_container_width = True) with influence: st.write('Influence plot') ax1 = st.selectbox("Component", options=model.scores_.columns, index=3) leverage = model.leverage_ residuals = model.residuals_ fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1]).update_layout(xaxis_title="Leverage",yaxis_title="Residuals") st.plotly_chart(fig) with hotelling: st.write('T²-Hotelling vs Q residuals plot') hotelling = model.hotelling_ ax2 = st.selectbox("Component", options=model.scores_.columns, index=4) hotelling = model.hotelling_ fig = px.scatter(t, x=hotelling[ax2], y=residuals[ax2]).update_layout(xaxis_title="T²",yaxis_title="Residuals") st.plotly_chart(fig) else: st.markdown('Select a dimensionality reduction technique from the dropdown list')