Skip to content
Snippets Groups Projects
UMAP_.py 894 B
# UMAP function for the Sample Selection module
from Packages import * 
from Class_Mod.DATA_HANDLING import * 

def umap_maker(data_import):
    numerical_data, categorical_data, scaled_values = col_cat(data_import)
    umap_func = UMAP(random_state=42, n_neighbors=20, n_components=4, min_dist=0.0,)
    umap_fit = umap_func.fit(scaled_values)
    umap_data = umap_fit.transform(scaled_values)
    umap_data = pd.DataFrame(umap_data, index=numerical_data.index)
    # Set UMAP column names with component number
    new_column_names = ["UMAP_" + str(i) for i in range(1, len(umap_data.columns) + 1)]
    # Format the output
    column_mapper = dict(zip(list(umap_data.columns), new_column_names))
    umap_data = umap_data.rename(columns=column_mapper)
    output = pd.concat([data_import, umap_data], axis=1)
    return output, list(categorical_data.columns), new_column_names