Newer
Older
from Packages import *
st.set_page_config(page_title="NIRS Utils", page_icon=":goat:", layout="wide")
from Modules import *
from Class_Mod.DATA_HANDLING import *
st.session_state["interface"] = st.session_state.get('interface')
if st.session_state["interface"] == 'simple':
hide_pages("Predictions")
################################### Data Loading and Visualization ########################################
# container1 = st.header("Data loading",border=True)
col2, col1 = st.columns([3, 1])
col1.header("Data Loading", divider='blue')
col2.header("Spectral Data Visualization", divider='blue')
## Preallocation of data structure
data_import = pd.DataFrame
meta_data = pd.DataFrame
selected_samples = pd.DataFrame
# loader for csv file containing NIRS spectra
sselectx_csv = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
#with container1:
if sselectx_csv:
test = sselectx_csv.name[sselectx_csv.name.find('.'):]
if test== '.csv':
with col1:
# Select list for CSV delimiter
psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
# Select list for CSV header True / False
phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31)
if phdr == 'yes':
col = 0
else:
col = False
imp = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
data_import = col_cat(imp)[0]
meta_data = col_cat(imp)[1]
st.success("The data have been loaded successfully", icon="✅")
elif test == '.dx':
# Create a temporary file to save the uploaded file
with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
tmp.write(sselectx_csv.read())
tmp_path = tmp.name
with col1:
_, data_import, meta_data = read_dx(file = tmp_path)
st.success("The data have been loaded successfully", icon="✅")
os.unlink(tmp_path)
if not data_import.empty:
## Visualize spectra
with col2:
fig = plot_spectra(data_import)
#plt.annotate(text = info.T, xy =(m, info.loc[:,"Max"]), size=20, color = 'black', backgroundcolor='red')
st.pyplot(fig)
############################## Exploratory data analysis ###############################
container2 = st.container(border=True)
container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divider='blue')
scores, loadings, pc = st.columns([2, 3, 0.5])
influence, hotelling, qexp = st.columns([2, 2, 1])
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
dim_red_methods=['', 'PCA','UMAP', 'NMF']
cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP']
dr_model = None
cl_model = None
# Dimensionality reduction
t = pd.DataFrame
if not data_import.empty:
dim_red_method = pc.selectbox("Dimensionality reduction techniques: ", options = dim_red_methods, key = 37)
clus_method = pc.selectbox("Clustering techniques: ", options = cluster_methods, key = 38)
if dim_red_method == dim_red_methods[1]:
dr_model = LinearPCA(data_import, Ncomp=5)
elif dim_red_method == dim_red_methods[2]:
dr_model = Umap(x = data_import, n_components = 5, n_neighbors = 20 , min_dist = 0)
if dr_model:
axis1 = pc.selectbox("x-axis", options = dr_model.scores_.columns, index=0)
axis2 = pc.selectbox("y-axis", options = dr_model.scores_.columns, index=1)
axis3 = pc.selectbox("z-axis", options = dr_model.scores_.columns, index=2)
t = pd.concat([dr_model.scores_.loc[:,axis1], dr_model.scores_.loc[:,axis2], dr_model.scores_.loc[:,axis3]], axis = 1)
# clustering
labels = pd.DataFrame
if not t.empty:
# Clustering
if clus_method == cluster_methods[1]:
ncluster = scores.number_input(min_value=2, max_value=30, value=3, label = 'Select the desired number of clusters')
cl_model = Sk_Kmeans(t, max_clusters = 30)
fig2 = px.scatter(cl_model.inertia_.T, y = 'inertia')
scores.plotly_chart(fig2)
data, labels = cl_model.fit_optimal(nclusters = ncluster)
elif clus_method == cluster_methods[1]:
from hdbscan import HDBSCAN_function
labels, hdbscan_score = HDBSCAN_function(t, min_cluster_size=10)
if not t.empty:
with scores:
st.write('Scores plot')
# scores plot with clustering
if not pd.DataFrame(labels).empty:
fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3, color = labels)
else:
# scores plot with metadata
if not meta_data.empty:
filter = meta_data.columns[1:]
col = st.selectbox('filter', options= filter)
if col == 0:
fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3)
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3, color = list(map(str.lower,meta_data[col])) )
else:
# scores plot with neither metadata nor clustering
fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3)
fig.update_traces(marker=dict(size=4))
st.plotly_chart(fig)
if not data_import.empty:
if dim_red_method == dim_red_methods[1]:
with loadings:
st.write('Loadings plot')
p = dr_model.loadings_
pp = pd.concat([p, pd.DataFrame(np.arange(p.shape[0]), index=p.index, columns=['wl'])], axis =1)
df1 = pp.melt(id_vars="wl")
fig = px.line(df1, x = 'wl', y = 'value', color='variable')
fig.update_layout(legend=dict(x=1, y=0,font=dict(family="Courier", size=12, color="black"),
bordercolor="Black", borderwidth=2))
st.plotly_chart(fig, use_container_width = True)
with influence:
st.write('Influence plot')
ax1 = st.selectbox("Component", options=dr_model.scores_.columns, index=3)
leverage = dr_model.leverage_
residuals = dr_model.residuals_
fig = px.scatter(x=leverage[ax1], y=residuals[ax1], color = leverage[ax1]*residuals[ax1]).update_layout(xaxis_title="Leverage",yaxis_title="Residuals")
st.plotly_chart(fig)
with hotelling:
st.write('T²-Hotelling vs Q residuals plot')
hotelling = dr_model.hotelling_
ax2 = st.selectbox("Component", options=dr_model.scores_.columns, index=4)
hotelling = dr_model.hotelling_
fig = px.scatter(t, x=hotelling[ax2], y=residuals[ax2]).update_layout(xaxis_title="T²",yaxis_title="Residuals")
st.plotly_chart(fig)