From a4a322a42e29182ded7b89f0f4e160c12a01552e Mon Sep 17 00:00:00 2001 From: Nicolas Barthes <nicolas.barthes@cnrs.fr> Date: Fri, 22 Mar 2024 18:11:34 +0100 Subject: [PATCH] autodetect column index True or False when importing CSV data --- app.py | 5 ++--- application_functions.py | 6 +++++- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/app.py b/app.py index 67673ee..f9cb557 100644 --- a/app.py +++ b/app.py @@ -8,7 +8,7 @@ import pandas as pd import plotly.express as px from sklearn.cluster import KMeans as km from sklearn.metrics import pairwise_distances_argmin_min -from application_functions import pca_maker, model, predict, find_delimiter, umap_maker +from application_functions import pca_maker, model, predict, find_delimiter, umap_maker, find_col_index # load images for web interface img_sselect = Image.open("images\sselect.JPG") @@ -43,12 +43,11 @@ with st.container(): # Select list for CSV delimiter psep = settings_column.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9) # Select list for CSV header True / False - phdr = settings_column.selectbox("indexes column in csv?", options=["no", "yes"], key=31) + phdr = settings_column.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31) if phdr == 'yes': col = 0 else: col = False - data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col) # Select type of plot plot_type=['', 'pca','umap'] diff --git a/application_functions.py b/application_functions.py index 364b108..c8ba4d8 100644 --- a/application_functions.py +++ b/application_functions.py @@ -19,7 +19,11 @@ def find_delimiter(filename): with open(filename) as fp: delimiter = sniffer.sniff(fp.read(5000)).delimiter return delimiter - +def find_col_index(filename): + with open(filename) as fp: + lines = pd.read_csv(fp, skiprows=3, nrows=3, index_col=False, sep=str(find_delimiter(filename))) + col_index = 'yes' if lines.iloc[:,0].dtypes != np.float64 else 'no' + return col_index # detection of columns categories and scaling def col_cat(data_import): # detect numerical and categorical columns in the csv -- GitLab