From a4a322a42e29182ded7b89f0f4e160c12a01552e Mon Sep 17 00:00:00 2001
From: Nicolas Barthes <nicolas.barthes@cnrs.fr>
Date: Fri, 22 Mar 2024 18:11:34 +0100
Subject: [PATCH] autodetect column index True or False when importing CSV data

---
 app.py                   | 5 ++---
 application_functions.py | 6 +++++-
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/app.py b/app.py
index 67673ee..f9cb557 100644
--- a/app.py
+++ b/app.py
@@ -8,7 +8,7 @@ import pandas as pd
 import plotly.express as px
 from sklearn.cluster import KMeans as km
 from sklearn.metrics import pairwise_distances_argmin_min
-from application_functions import pca_maker, model, predict, find_delimiter, umap_maker
+from application_functions import pca_maker, model, predict, find_delimiter, umap_maker, find_col_index
 
 # load images for web interface
 img_sselect = Image.open("images\sselect.JPG")
@@ -43,12 +43,11 @@ with st.container():
         # Select list for CSV delimiter
         psep = settings_column.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9)
         # Select list for CSV header True / False
-        phdr = settings_column.selectbox("indexes column in csv?", options=["no", "yes"], key=31)
+        phdr = settings_column.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31)
         if phdr == 'yes':
             col = 0
         else:
             col = False
-
         data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
         # Select type of plot
         plot_type=['', 'pca','umap']
diff --git a/application_functions.py b/application_functions.py
index 364b108..c8ba4d8 100644
--- a/application_functions.py
+++ b/application_functions.py
@@ -19,7 +19,11 @@ def find_delimiter(filename):
     with open(filename) as fp:
         delimiter = sniffer.sniff(fp.read(5000)).delimiter
     return delimiter
-
+def find_col_index(filename):
+    with open(filename) as fp:
+        lines = pd.read_csv(fp, skiprows=3, nrows=3, index_col=False, sep=str(find_delimiter(filename)))
+        col_index = 'yes' if lines.iloc[:,0].dtypes != np.float64 else 'no'
+    return col_index
 # detection of columns categories and scaling
 def col_cat(data_import):
     # detect numerical and categorical columns in the csv
-- 
GitLab