diff --git a/app.py b/app.py index 67673eebdc0b7aba67bcbad09c6b9428c56aee2f..f9cb557bdeef289ffb47be43c2ab12099ee5c75b 100644 --- a/app.py +++ b/app.py @@ -8,7 +8,7 @@ import pandas as pd import plotly.express as px from sklearn.cluster import KMeans as km from sklearn.metrics import pairwise_distances_argmin_min -from application_functions import pca_maker, model, predict, find_delimiter, umap_maker +from application_functions import pca_maker, model, predict, find_delimiter, umap_maker, find_col_index # load images for web interface img_sselect = Image.open("images\sselect.JPG") @@ -43,12 +43,11 @@ with st.container(): # Select list for CSV delimiter psep = settings_column.selectbox("Select csv separator - _detected_: " + str(find_delimiter('data/'+sselectx_csv.name)), options=[";", ","], index=[";", ","].index(str(find_delimiter('data/'+sselectx_csv.name))), key=9) # Select list for CSV header True / False - phdr = settings_column.selectbox("indexes column in csv?", options=["no", "yes"], key=31) + phdr = settings_column.selectbox("indexes column in csv? - _detected_: " + str(find_col_index('data/'+sselectx_csv.name)), options=["no", "yes"], index=["no", "yes"].index(str(find_col_index('data/'+sselectx_csv.name))), key=31) if phdr == 'yes': col = 0 else: col = False - data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col) # Select type of plot plot_type=['', 'pca','umap'] diff --git a/application_functions.py b/application_functions.py index 364b10885b0db5a317cd69b52e4522ab136568a2..c8ba4d8b3ca83eebcbb0c8af35f3cc0ad3f896b8 100644 --- a/application_functions.py +++ b/application_functions.py @@ -19,7 +19,11 @@ def find_delimiter(filename): with open(filename) as fp: delimiter = sniffer.sniff(fp.read(5000)).delimiter return delimiter - +def find_col_index(filename): + with open(filename) as fp: + lines = pd.read_csv(fp, skiprows=3, nrows=3, index_col=False, sep=str(find_delimiter(filename))) + col_index = 'yes' if lines.iloc[:,0].dtypes != np.float64 else 'no' + return col_index # detection of columns categories and scaling def col_cat(data_import): # detect numerical and categorical columns in the csv