UMAP_.py 894 B
# UMAP function for the Sample Selection module
from Packages import *
from Class_Mod.DATA_HANDLING import *
def umap_maker(data_import):
numerical_data, categorical_data, scaled_values = col_cat(data_import)
umap_func = UMAP(random_state=42, n_neighbors=20, n_components=4, min_dist=0.0,)
umap_fit = umap_func.fit(scaled_values)
umap_data = umap_fit.transform(scaled_values)
umap_data = pd.DataFrame(umap_data, index=numerical_data.index)
# Set UMAP column names with component number
new_column_names = ["UMAP_" + str(i) for i in range(1, len(umap_data.columns) + 1)]
# Format the output
column_mapper = dict(zip(list(umap_data.columns), new_column_names))
umap_data = umap_data.rename(columns=column_mapper)
output = pd.concat([data_import, umap_data], axis=1)
return output, list(categorical_data.columns), new_column_names