Skip to content
Snippets Groups Projects
Commit 8915f27a authored by DIANE's avatar DIANE
Browse files

color filters

parent 965f7e42
No related branches found
No related tags found
No related merge requests found
...@@ -12,58 +12,57 @@ if st.session_state["interface"] == 'simple': ...@@ -12,58 +12,57 @@ if st.session_state["interface"] == 'simple':
hide_pages("Predictions") hide_pages("Predictions")
################################### Data Loading and Visualization ######################################## ################################### Data Loading and Visualization ########################################
# container1 = st.header("Data loading",border=True)
col2, col1 = st.columns([3, 1]) col2, col1 = st.columns([3, 1])
col1.header("Data Loading", divider='blue') col1.header("Data Loading", divider='blue')
col2.header("Spectral Data Visualization", divider='blue') col2.header("Spectral Data Visualization", divider='blue')
## Preallocation of data structure ## Preallocation of data structure
data_import = pd.DataFrame spectra = pd.DataFrame
meta_data = pd.DataFrame meta_data = pd.DataFrame
selected_samples = pd.DataFrame selected_samples = pd.DataFrame
# loader for csv file containing NIRS spectra # loader for datafile
sselectx_csv = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5) data_file = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5)
#with container1: if data_file:
if sselectx_csv: # Retrieve the extension of the file
test = sselectx_csv.name[sselectx_csv.name.find('.'):] test = data_file.name[data_file.name.find('.'):]
## Load .csv file
if test== '.csv': if test== '.csv':
with col1: with col1:
# Select list for CSV delimiter # Select list for CSV delimiter
psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9) psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+data_file.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+data_file.name))), key=9)
# Select list for CSV header True / False # Select list for CSV header True / False
phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31) phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+data_file.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+data_file.name))), key=31)
if phdr == 'yes': if phdr == 'yes':
col = 0 col = 0
else: else:
col = False col = False
imp = pd.read_csv(sselectx_csv, sep=psep, index_col=col) imp = pd.read_csv(data_file, sep=psep, index_col=col)
data_import = col_cat(imp)[0] spectra = col_cat(imp)[0]
meta_data = col_cat(imp)[1] meta_data = col_cat(imp)[1]
st.success("The data have been loaded successfully", icon="") st.success("The data have been loaded successfully", icon="")
## Load .dx file
elif test == '.dx': elif test == '.dx':
# Create a temporary file to save the uploaded file # Create a temporary file to save the uploaded file
with NamedTemporaryFile(delete=False, suffix=".dx") as tmp: with NamedTemporaryFile(delete=False, suffix=".dx") as tmp:
tmp.write(sselectx_csv.read()) tmp.write(data_file.read())
tmp_path = tmp.name tmp_path = tmp.name
with col1: with col1:
_, data_import, meta_data = read_dx(file = tmp_path) _, spectra, meta_data = read_dx(file = tmp_path)
st.success("The data have been loaded successfully", icon="") st.success("The data have been loaded successfully", icon="")
os.unlink(tmp_path) os.unlink(tmp_path)
if not data_import.empty: ## Visualize spectra
## Visualize spectra if not spectra.empty:
with col2: with col2:
fig = plot_spectra(data_import) fig = plot_spectra(spectra)
#plt.annotate(text = info.T, xy =(m, info.loc[:,"Max"]), size=20, color = 'black', backgroundcolor='red')
st.pyplot(fig) st.pyplot(fig)
...@@ -73,20 +72,25 @@ container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divid ...@@ -73,20 +72,25 @@ container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divid
scores, loadings, pc = st.columns([2, 3, 0.5]) scores, loadings, pc = st.columns([2, 3, 0.5])
influence, hotelling, qexp = st.columns([2, 2, 1]) influence, hotelling, qexp = st.columns([2, 2, 1])
dim_red_methods=['', 'PCA','UMAP', 'NMF'] dim_red_methods=['', 'PCA','UMAP', 'NMF'] # List of dimensionality reduction algos
cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP'] cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP'] # List of clustering algos
dr_model = None
cl_model = None dr_model = None # dimensionality reduction model
cl_model = None # clustering model
# Dimensionality reduction # Dimensionality reduction
t = pd.DataFrame t = pd.DataFrame # scores
if not data_import.empty: p = pd.DataFrame # loadings
labels = []
if not spectra.empty:
dim_red_method = pc.selectbox("Dimensionality reduction techniques: ", options = dim_red_methods, key = 37) dim_red_method = pc.selectbox("Dimensionality reduction techniques: ", options = dim_red_methods, key = 37)
clus_method = pc.selectbox("Clustering techniques: ", options = cluster_methods, key = 38) clus_method = pc.selectbox("Clustering techniques: ", options = cluster_methods, key = 38)
xc = standardize(spectra)
if dim_red_method == dim_red_methods[1]: if dim_red_method == dim_red_methods[1]:
dr_model = LinearPCA(data_import, Ncomp=5) dr_model = LinearPCA(xc, Ncomp=5)
elif dim_red_method == dim_red_methods[2]: elif dim_red_method == dim_red_methods[2]:
dr_model = Umap(x = data_import, n_components = 5, n_neighbors = 20 , min_dist = 0) dr_model = Umap(x = xc, n_components = 5, n_neighbors = 20 , min_dist = 0)
if dr_model: if dr_model:
axis1 = pc.selectbox("x-axis", options = dr_model.scores_.columns, index=0) axis1 = pc.selectbox("x-axis", options = dr_model.scores_.columns, index=0)
...@@ -96,7 +100,6 @@ if not data_import.empty: ...@@ -96,7 +100,6 @@ if not data_import.empty:
# clustering # clustering
labels = pd.DataFrame
if not t.empty: if not t.empty:
# Clustering # Clustering
if clus_method == cluster_methods[1]: if clus_method == cluster_methods[1]:
...@@ -106,21 +109,19 @@ if not t.empty: ...@@ -106,21 +109,19 @@ if not t.empty:
scores.plotly_chart(fig2) scores.plotly_chart(fig2)
data, labels = cl_model.fit_optimal(nclusters = ncluster) data, labels = cl_model.fit_optimal(nclusters = ncluster)
elif clus_method == cluster_methods[1]: elif clus_method == cluster_methods[2]:
from hdbscan import HDBSCAN_function from hdbscan import HDBSCAN_function
labels, hdbscan_score = HDBSCAN_function(t, min_cluster_size=10) labels, hdbscan_score = HDBSCAN_function(t, min_cluster_size=10)
##### Plots ##### Plots
## Scores ## Scores
if not t.empty: if not t.empty:
with scores: with scores:
st.write('Scores plot') st.write('Scores plot')
# scores plot with clustering # scores plot with clustering
if not pd.DataFrame(labels).empty: if list(labels):
fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3, color = labels) fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3, color = labels)
else: else:
# scores plot with metadata # scores plot with metadata
...@@ -140,7 +141,7 @@ if not t.empty: ...@@ -140,7 +141,7 @@ if not t.empty:
if not data_import.empty: if not spectra.empty:
if dim_red_method == dim_red_methods[1]: if dim_red_method == dim_red_methods[1]:
with loadings: with loadings:
st.write('Loadings plot') st.write('Loadings plot')
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment