Newer
Older
from Packages import *
## try to automatically detect the field separator within the CSV
def find_delimiter(filename):
sniffer = csv.Sniffer()
with open(filename) as fp:
delimiter = sniffer.sniff(fp.read(5000)).delimiter
return delimiter
def find_col_index(filename):
with open(filename) as fp:
lines = pd.read_csv(fp, skiprows=3, nrows=3, index_col=False, sep=str(find_delimiter(filename)))
col_index = 'yes' if lines.iloc[:,0].dtypes != np.float64 else 'no'
return col_index
# detection of columns categories and scaling
def col_cat(data_import):
# detect numerical and categorical columns in the csv
numerical_columns_list = []
categorical_columns_list = []
for i in data_import.columns:
if data_import[i].dtype == np.dtype("float64") or data_import[i].dtype == np.dtype("int64"):
numerical_columns_list.append(data_import[i])
else:
categorical_columns_list.append(data_import[i])
if len(numerical_columns_list) == 0:
empty = [0 for x in range(len(data_import))]
numerical_columns_list.append(empty)
if len(categorical_columns_list) > 0:
categorical_data = pd.concat(categorical_columns_list, axis=1)
if len(categorical_columns_list) == 0:
# Create numerical data matrix from the numerical columns list and fill na with the mean of the column
numerical_data = pd.concat(numerical_columns_list, axis=1)
numerical_data = numerical_data.apply(lambda x: x.fillna(x.mean())) #np.mean(x)))
return numerical_data, categorical_data
def list_files(mypath, import_type):
list_files = [f for f in listdir(mypath) if isfile(join(mypath, f)) and f.endswith(import_type + '.pkl')]
if list_files == []:
list_files = ['Please, create a model before - no model available yet']
return list_files
def standardize(X):
t = X
sk = StandardScaler()
sc = pd.DataFrame(sk.fit_transform(X), index = t.index, columns = t.columns)
return sc
def MinMaxScale(X):
t = X
sk = MinMaxScaler(feature_range=(0,1))
sc = pd.DataFrame(sk.fit_transform(X), index = t.index, columns = t.columns)
return sc