Newer
Older
from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *
################################### Data Loading and Visualization ########################################
container1 = st.container(border=True)
col2, col1 = st.columns([3, 1])
container2 = st.container(border=True)
container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue')
scores, loadings, pc = st.columns([2, 3, 0.5])
influence, hotelling, qexp = st.columns([2, 2, 1])
with container1:
col2.header("Spectral Data Visualization", divider='blue')
# loader for csv file containing NIRS spectra
sselectx_csv = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
if sselectx_csv is not None:
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
test = sselectx_csv.name[sselectx_csv.name.find('.'):]
if test== '.csv':
with col1:
# Select list for CSV delimiter
psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
# Select list for CSV header True / False
phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31)
if phdr == 'yes':
col = 0
else:
col = False
data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
st.success("The data have been loaded successfully", icon="✅")
## Visualize spectra
with col2:
fig, ax = plt.subplots(figsize = (30,7))
data_import.T.plot(legend=False, ax = ax, color = 'blue')
ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
ax.set_ylabel('Signal', fontsize=18)
plt.margins(x = 0)
st.pyplot(fig)
st.write("Summary")
info = pd.DataFrame({'N':[data_import.shape[0]],
'Min': [np.min(data_import)],
'Max':[np.max(data_import)],}, index = ['Values']).T
info.rename_axis('information')
st.table(data=info)
elif test == '.dx':
# Create a temporary file to save the uploaded file
with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
tmp.write(sselectx_csv.read())
tmp_path = tmp.name
with col1:
data = DxRead(path = tmp_path)
data_import = data.specs_df_
st.success("The data have been loaded successfully", icon="✅")
## Visualize spectra
with col2:
fig, ax = plt.subplots(figsize = (30,7))
data_import.T.plot(legend=False, ax = ax, color = 'blue')
ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
ax.set_ylabel('Signal', fontsize=18)
plt.margins(x = 0)
st.pyplot(fig)
st.write("Summary")
info = pd.DataFrame({'N':[data_import.shape[0]],
'Min': [np.min(data_import)],
'Max':[np.max(data_import)],}, index = ['Values']).T
info.rename_axis('information')
st.table(data=info)
os.unlink(tmp_path)
######################################################################################
############################## Exploratory data analysis ###############################
with container2:
if sselectx_csv is not None:
plot_type=['', 'PCA','UMAP', 'NMF']
cluster_methods = ['', 'Kmeans','UMAP', 'AP']
with pc:
type_plot = st.selectbox("Dimensionality reduction techniques: ", options=plot_type, key=37)
type_cluster = st.selectbox("Clustering techniques: ", options=cluster_methods, key=38)
# compute UMAP - umap_maker in application_functions.py
if type_plot == 'PCA':
model = LinearPCA(data_import, Ncomp=5)
elif type_plot =='UMAP':
model = Umap(x = data_import, n_components = 5, n_neighbors = 20 , min_dist = 0)
if type_plot in ['PCA', 'UMAP']:
# add 2 select lists to choose which component to plot
axis1 = pc.selectbox("x-axis", options = model.scores_.columns, index=0)
axis2 = pc.selectbox("y-axis", options = model.scores_.columns, index=1)
axis3 = pc.selectbox("z-axis", options = model.scores_.columns, index=2)
if type_cluster == 'Kmeans':
scsc = pd.concat([model.scores_.loc[:,axis1], model.scores_.loc[:,axis2], model.scores_.loc[:,axis3]], axis = 1)
cl = Sk_Kmeans(scsc, max_clusters = 30)
with scores:
t = model.scores_
if type_cluster in ['Kmeans','UMAP', 'AP']:
st.write('Scree plot')
fig2 = px.scatter(cl.inertia_.T, y = 'inertia')
st.plotly_chart(fig2)
ncluster = st.number_input(min_value=2, max_value=30, value=3, label = 'Select the desired number of clusters')
data, colors = cl.fit_optimal(nclusters=ncluster)
#fig = px.scatter(data, x=axis1, y=axis2, color= colors)
st.write('Scores plot')
fig = px.scatter_3d(data, x=axis1, y=axis2, z = axis3, color=colors)
else:
fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3)
st.plotly_chart(fig)
if type_plot =='PCA':
with loadings:
st.write('Loadings plot')
p = model.loadings_
pp = pd.concat([p, pd.DataFrame(np.arange(p.shape[0]), index=p.index, columns=['wl'])], axis =1)
df1 = pp.melt(id_vars="wl")
fig = px.line(df1, x = 'wl', y = 'value', color='variable')
fig.update_layout(
legend=dict(x=1, y=0,
font=dict(
family="Courier", size=12, color="black"),
bordercolor="Black", borderwidth=2)
)
st.plotly_chart(fig, use_container_width = True)
with influence:
st.write('Influence plot')
ax1 = st.selectbox("Component", options=model.scores_.columns, index=3)
leverage = model.leverage_
residuals = model.residuals_
fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1]).update_layout(xaxis_title="Leverage",yaxis_title="Residuals")
st.plotly_chart(fig)
with hotelling:
st.write('T²-Hotelling vs Q residuals plot')
ax2 = st.selectbox("Component", options=model.scores_.columns, index=4)
hotelling = model.hotelling_
fig = px.scatter(t, x=hotelling[ax2], y=residuals[ax2]).update_layout(xaxis_title="T²",yaxis_title="Residuals")
st.plotly_chart(fig)
else:
st.markdown('Select a dimensionality reduction technique from the dropdown list')