Skip to content
Snippets Groups Projects
UMAP_.py 894 B
Newer Older
  • Learn to ignore specific revisions
  • DIANE's avatar
    DIANE committed
    # UMAP function for the Sample Selection module
    from Packages import * 
    from Class_Mod.DATA_HANDLING import * 
    
    def umap_maker(data_import):
        numerical_data, categorical_data, scaled_values = col_cat(data_import)
        umap_func = UMAP(random_state=42, n_neighbors=20, n_components=4, min_dist=0.0,)
        umap_fit = umap_func.fit(scaled_values)
        umap_data = umap_fit.transform(scaled_values)
        umap_data = pd.DataFrame(umap_data, index=numerical_data.index)
        # Set UMAP column names with component number
        new_column_names = ["UMAP_" + str(i) for i in range(1, len(umap_data.columns) + 1)]
        # Format the output
        column_mapper = dict(zip(list(umap_data.columns), new_column_names))
        umap_data = umap_data.rename(columns=column_mapper)
        output = pd.concat([data_import, umap_data], axis=1)
        return output, list(categorical_data.columns), new_column_names