Skip to content
Snippets Groups Projects
Commit 1073b25b authored by DIANE's avatar DIANE
Browse files

new function to read csv data

parent cdf5cba6
No related branches found
No related tags found
No related merge requests found
......@@ -46,6 +46,90 @@ def col_cat(data_import):
return numerical_data, categorical_data
############## new function
def csv_loader(file):
import clevercsv
import numpy as np
import pandas as pd
dec_dia = ['.',',']
sep_dia = [',',';']
dec, sep = [], []
with open(file, mode = 'r') as csvfile:
lines = [csvfile.readline() for i in range(3)]
for i in lines:
for j in range(2):
dec.append(i.count(dec_dia[j]))
sep.append(i.count(sep_dia[j]))
if dec[0] != dec[2]:
header = 0
else:
header = 0
semi = np.sum([sep[2*i+1] for i in range(3)])
commas = np.sum([sep[2*i] for i in range(3)])
if semi>commas:separator = ';'
elif semi<commas: separator = ','
elif semi ==0 and commas == 0: separator = ';'
commasdec = np.sum([dec[2*i+1] for i in range(1,3)])
dot = np.sum([dec[2*i] for i in range(1,3)])
if commasdec>dot:decimal = ','
elif commasdec<=dot:decimal = '.'
if decimal == separator or len(np.unique(dec)) <= 2:
decimal = "."
df = pd.read_csv(file, decimal=decimal, sep=separator, header=None, index_col=None)
try:
rat = np.mean(df.iloc[0,50:60]/df.iloc[5,50:60])>10
header = 0 if rat or np.nan else None
except:
header = 0
from pandas.api.types import is_float_dtype
if is_float_dtype(df.iloc[1:,0]):
index_col = None
else:
try:
te = df.iloc[1:,0].to_numpy().astype(float).dtype
except:
te = set(df.iloc[1:,0])
if len(te) == df.shape[0]-1:
index_col = 0
elif len(te) < df.shape[0]-1:
index_col = None
else:
index_col = None
# index_col = 0 if len(set(df.iloc[1:,0])) == df.shape[0]-1 and is_float_dtype(df.iloc[:,0])==False else None
df = pd.read_csv(file, decimal=decimal, sep=separator, header=header, index_col=index_col)
# st.write(decimal, separator, index_col, header)
if df.select_dtypes(exclude='float').shape[1] >0:
non_float = df.select_dtypes(exclude='float')
else:
non_float = pd.DataFrame()
if df.select_dtypes(include='float').shape[1] >0:
float_data = df.select_dtypes(include='float')
else:
float_data = pd.DataFrame()
return float_data, non_float
def list_files(mypath, import_type):
list_files = [f for f in listdir(mypath) if isfile(join(mypath, f)) and f.endswith(import_type + '.pkl')]
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment