From 8915f27a3e4aa259d09e8d4d3fac82d1e9a7eb09 Mon Sep 17 00:00:00 2001 From: DIANE <abderrahim.diane@cefe.cnrs.fr> Date: Mon, 15 Apr 2024 09:48:34 +0200 Subject: [PATCH] color filters --- pages/1-samples_selection.py | 65 ++++++++++++++++++------------------ 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/pages/1-samples_selection.py b/pages/1-samples_selection.py index 76c2454..121d467 100644 --- a/pages/1-samples_selection.py +++ b/pages/1-samples_selection.py @@ -12,58 +12,57 @@ if st.session_state["interface"] == 'simple': hide_pages("Predictions") ################################### Data Loading and Visualization ######################################## -# container1 = st.header("Data loading",border=True) col2, col1 = st.columns([3, 1]) col1.header("Data Loading", divider='blue') col2.header("Spectral Data Visualization", divider='blue') ## Preallocation of data structure -data_import = pd.DataFrame +spectra = pd.DataFrame meta_data = pd.DataFrame selected_samples = pd.DataFrame -# loader for csv file containing NIRS spectra -sselectx_csv = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5) +# loader for datafile +data_file = col1.file_uploader("Load NIRS Data", type=["csv","dx"], help=" :mushroom: select a csv matrix with samples as rows and lambdas as columns", key=5) -#with container1: -if sselectx_csv: - test = sselectx_csv.name[sselectx_csv.name.find('.'):] +if data_file: + # Retrieve the extension of the file + test = data_file.name[data_file.name.find('.'):] + ## Load .csv file if test== '.csv': with col1: # Select list for CSV delimiter - psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9) + psep = st.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+data_file.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+data_file.name))), key=9) # Select list for CSV header True / False - phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31) + phdr = st.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+data_file.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+data_file.name))), key=31) if phdr == 'yes': col = 0 else: col = False - imp = pd.read_csv(sselectx_csv, sep=psep, index_col=col) - data_import = col_cat(imp)[0] + imp = pd.read_csv(data_file, sep=psep, index_col=col) + spectra = col_cat(imp)[0] meta_data = col_cat(imp)[1] st.success("The data have been loaded successfully", icon="✅") + ## Load .dx file elif test == '.dx': # Create a temporary file to save the uploaded file with NamedTemporaryFile(delete=False, suffix=".dx") as tmp: - tmp.write(sselectx_csv.read()) + tmp.write(data_file.read()) tmp_path = tmp.name with col1: - _, data_import, meta_data = read_dx(file = tmp_path) + _, spectra, meta_data = read_dx(file = tmp_path) st.success("The data have been loaded successfully", icon="✅") os.unlink(tmp_path) -if not data_import.empty: - ## Visualize spectra +## Visualize spectra +if not spectra.empty: with col2: - fig = plot_spectra(data_import) - - #plt.annotate(text = info.T, xy =(m, info.loc[:,"Max"]), size=20, color = 'black', backgroundcolor='red') + fig = plot_spectra(spectra) st.pyplot(fig) @@ -73,20 +72,25 @@ container2.header("Exploratory Data Analysis-Multivariable Data Analysis", divid scores, loadings, pc = st.columns([2, 3, 0.5]) influence, hotelling, qexp = st.columns([2, 2, 1]) -dim_red_methods=['', 'PCA','UMAP', 'NMF'] -cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP'] -dr_model = None -cl_model = None +dim_red_methods=['', 'PCA','UMAP', 'NMF'] # List of dimensionality reduction algos +cluster_methods = ['', 'Kmeans','HDBSCAN', 'AP'] # List of clustering algos + +dr_model = None # dimensionality reduction model +cl_model = None # clustering model # Dimensionality reduction -t = pd.DataFrame -if not data_import.empty: +t = pd.DataFrame # scores +p = pd.DataFrame # loadings +labels = [] +if not spectra.empty: dim_red_method = pc.selectbox("Dimensionality reduction techniques: ", options = dim_red_methods, key = 37) clus_method = pc.selectbox("Clustering techniques: ", options = cluster_methods, key = 38) + xc = standardize(spectra) + if dim_red_method == dim_red_methods[1]: - dr_model = LinearPCA(data_import, Ncomp=5) + dr_model = LinearPCA(xc, Ncomp=5) elif dim_red_method == dim_red_methods[2]: - dr_model = Umap(x = data_import, n_components = 5, n_neighbors = 20 , min_dist = 0) + dr_model = Umap(x = xc, n_components = 5, n_neighbors = 20 , min_dist = 0) if dr_model: axis1 = pc.selectbox("x-axis", options = dr_model.scores_.columns, index=0) @@ -96,7 +100,6 @@ if not data_import.empty: # clustering -labels = pd.DataFrame if not t.empty: # Clustering if clus_method == cluster_methods[1]: @@ -106,21 +109,19 @@ if not t.empty: scores.plotly_chart(fig2) data, labels = cl_model.fit_optimal(nclusters = ncluster) - elif clus_method == cluster_methods[1]: + elif clus_method == cluster_methods[2]: from hdbscan import HDBSCAN_function labels, hdbscan_score = HDBSCAN_function(t, min_cluster_size=10) ##### Plots - ## Scores - if not t.empty: with scores: st.write('Scores plot') # scores plot with clustering - if not pd.DataFrame(labels).empty: + if list(labels): fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3, color = labels) else: # scores plot with metadata @@ -140,7 +141,7 @@ if not t.empty: -if not data_import.empty: +if not spectra.empty: if dim_red_method == dim_red_methods[1]: with loadings: st.write('Loadings plot') -- GitLab