Newer
Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *
################################### Data Loading and Visualization ########################################
container1 = st.container(border=True)
col2, col1 = st.columns([3, 1])
container2 = st.container(border=True)
container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue')
scores, loadings, pc = st.columns([2, 3, 0.5])
influence, hotelling, qexp = st.columns([2, 2, 1])
with container1:
col1.header("NIRS Data Loading", divider='blue')
col2.header("Spectral Data Visualization", divider='blue')
with col1:
# loader for csv file containing NIRS spectra
sselectx_csv = st.file_uploader("Load NIRS Data", type="csv", help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
if sselectx_csv is not None:
# Select list for CSV delimiter
psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
# Select list for CSV header True / False
phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31)
if phdr == 'yes':
col = 0
else:
col = False
data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
st.success("The data have been loaded successfully", icon="✅")
## Visualize spectra
if sselectx_csv is not None:
with col2:
fig, ax = plt.subplots(figsize = (30,7))
data_import.T.plot(legend=False, ax = ax, color = 'blue')
ax.set_xlabel('Wavelength/Wavenumber', fontsize=18)
ax.set_ylabel('Signal', fontsize=18)
plt.margins(x = 0)
st.pyplot(fig)
st.write("Summary")
info = pd.DataFrame({'N':[data_import.shape[0]],
'Min': [np.min(data_import)],
'Max':[np.max(data_import)],}, index = ['Values']).T
info.rename_axis('information')
st.table(data=info)
######################################################################################
############################## Exploratory data analysis ###############################
with container2:
if sselectx_csv is not None:
plot_type=['', 'PCA','UMAP', 'NMF']
cluster_methods = ['', 'Kmeans','UMAP', 'AP']
with pc:
type_plot = st.selectbox("Dimensionality reduction techniques: ", options=plot_type, key=37)
type_cluster = st.selectbox("Clustering techniques: ", options=cluster_methods, key=38)
# compute UMAP - umap_maker in application_functions.py
if type_plot == 'PCA':
model = LinearPCA(data_import, Ncomp=5)
elif type_plot =='UMAP':
model = Umap(x = data_import, n_components = 5, n_neighbors = 20 , min_dist = 0)
if type_plot in ['PCA', 'UMAP']:
# add 2 select lists to choose which component to plot
axis1 = pc.selectbox("x-axis", options = model.scores_.columns, index=0)
axis2 = pc.selectbox("y-axis", options = model.scores_.columns, index=1)
axis3 = pc.selectbox("z-axis", options = model.scores_.columns, index=2)
if type_cluster == 'Kmeans':
cl = Sk_Kmeans(pd.concat([model.scores_.loc[:,axis1], model.scores_.loc[:,axis2], model.scores_.loc[:,axis3]], axis = 1), max_clusters = 30)
with scores:
t = model.scores_
if type_cluster in ['Kmeans','UMAP', 'AP']:
st.write('Scree plot')
fig2 = px.scatter(cl.inertia_.T, y = 'inertia')
st.plotly_chart(fig2)
ncluster = st.number_input(min_value=2, max_value=30, value=3, label = 'Select the desired number of clusters')
data, colors = cl.fit_optimal(nclusters=ncluster)
#fig = px.scatter(data, x=axis1, y=axis2, color= colors)
st.write('Scores plot')
fig = px.scatter_3d(data, x=axis1, y=axis2, z = axis3, color=colors)
else:
fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3)
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
st.plotly_chart(fig)
if type_plot =='PCA':
with loadings:
st.write('Loadings plot')
p = model.loadings_
pp = pd.concat([p, pd.DataFrame(np.arange(p.shape[0]), index=p.index, columns=['wl'])], axis =1)
df1 = pp.melt(id_vars="wl")
fig = px.line(df1, x = 'wl', y = 'value', color='variable')
fig.update_layout(
legend=dict(x=1, y=0,
font=dict(
family="Courier", size=12, color="black"),
bordercolor="Black", borderwidth=2)
)
st.plotly_chart(fig)
with influence:
st.write('Influence plot')
ax1 = st.selectbox("Component", options=model.scores_.columns, index=3)
leverage = model.leverage_
residuals = model.residuals
fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1])
st.plotly_chart(fig)
with hotelling:
st.write('T²-Hotelling vs Q residuals plot')
ax2 = st.selectbox("Component", options=model.scores_.columns, index=4)