Skip to content
Snippets Groups Projects
Commit 21546763 authored by BARTHES Nicolas's avatar BARTHES Nicolas
Browse files

updated col_cat function from DATA_HANDLING.py. Outputs are now comparable to those of dr_reader

parent 41e614bc
Branches
Tags
No related merge requests found
...@@ -5,7 +5,7 @@ from Packages import * ...@@ -5,7 +5,7 @@ from Packages import *
def find_delimiter(filename): def find_delimiter(filename):
sniffer = csv.Sniffer() sniffer = csv.Sniffer()
with open(filename) as fp: with open(filename) as fp:
delimiter = sniffer.sniff(fp.read(5000)).delimiter delimiter = sniffer.sniff(fp.read(200)).delimiter
return delimiter return delimiter
def find_col_index(filename): def find_col_index(filename):
...@@ -17,7 +17,10 @@ def find_col_index(filename): ...@@ -17,7 +17,10 @@ def find_col_index(filename):
# detection of columns categories and scaling # detection of columns categories and scaling
def col_cat(data_import): def col_cat(data_import):
# detect numerical and categorical columns in the csv """detect numerical and categorical columns in the csv"""
# set first column as sample names
name_col = pd.DataFrame(list(data_import.index), index = list(data_import.index))
name_col=name_col.rename(columns = {0:'name'})
numerical_columns_list = [] numerical_columns_list = []
categorical_columns_list = [] categorical_columns_list = []
for i in data_import.columns: for i in data_import.columns:
...@@ -30,6 +33,7 @@ def col_cat(data_import): ...@@ -30,6 +33,7 @@ def col_cat(data_import):
numerical_columns_list.append(empty) numerical_columns_list.append(empty)
if len(categorical_columns_list) > 0: if len(categorical_columns_list) > 0:
categorical_data = pd.concat(categorical_columns_list, axis=1) categorical_data = pd.concat(categorical_columns_list, axis=1)
categorical_data.insert(0, 'name', name_col)
if len(categorical_columns_list) == 0: if len(categorical_columns_list) == 0:
categorical_data = pd.DataFrame categorical_data = pd.DataFrame
# Create numerical data matrix from the numerical columns list and fill na with the mean of the column # Create numerical data matrix from the numerical columns list and fill na with the mean of the column
......
This diff is collapsed.
This diff is collapsed.
...@@ -39,8 +39,9 @@ if data_file: ...@@ -39,8 +39,9 @@ if data_file:
else: else:
col = False col = False
imp = pd.read_csv(data_file, sep=psep, index_col=col) imp = pd.read_csv(data_file, sep=psep, index_col=col)
spectra = col_cat(imp)[0] # spectra = col_cat(imp)[0]
meta_data = col_cat(imp)[1] # meta_data = col_cat(imp)[1]
spectra, meta_data = col_cat(imp)
st.success("The data have been loaded successfully", icon="") st.success("The data have been loaded successfully", icon="")
## Load .dx file ## Load .dx file
...@@ -50,7 +51,7 @@ if data_file: ...@@ -50,7 +51,7 @@ if data_file:
tmp.write(data_file.read()) tmp.write(data_file.read())
tmp_path = tmp.name tmp_path = tmp.name
with col1: with col1:
_, spectra, meta_data = read_dx(file = tmp_path) _, spectra, meta_data = read_dx(file = tmp_path)
st.success("The data have been loaded successfully", icon="") st.success("The data have been loaded successfully", icon="")
os.unlink(tmp_path) os.unlink(tmp_path)
...@@ -83,10 +84,11 @@ if not spectra.empty: ...@@ -83,10 +84,11 @@ if not spectra.empty:
clus_method = pc.selectbox("Clustering techniques: ", options = cluster_methods, key = 38) clus_method = pc.selectbox("Clustering techniques: ", options = cluster_methods, key = 38)
xc = standardize(spectra) xc = standardize(spectra)
if dim_red_method == dim_red_methods[1]: if dim_red_method == dim_red_methods[1]:
dr_model = LinearPCA(xc, Ncomp=5) dr_model = LinearPCA(xc, Ncomp=5)
elif dim_red_method == dim_red_methods[2]: elif dim_red_method == dim_red_methods[2]:
dr_model = Umap(data_import = data_import, numerical_data = scaled_values, cat_data = categorical_data) dr_model = Umap(data_import = imp, numerical_data = MinMaxScale(spectra), cat_data = meta_data)
if dr_model: if dr_model:
axis1 = pc.selectbox("x-axis", options = dr_model.scores_.columns, index=0) axis1 = pc.selectbox("x-axis", options = dr_model.scores_.columns, index=0)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment