# UMAP function for the Sample Selection module from Packages import * from Class_Mod.DATA_HANDLING import * class Umap: """ The UMAP dimension reduction algorithm from scikit learn """ def __init__(self, data_import, numerical_data, cat_data): self.x = data_import self.numerical_data = numerical_data if cat_data is None: self.categorical_data_encoded = cat_data elif len(cat_data) > 0: self.categorical_data = cat_data self.le = LabelEncoder() self.categorical_data_encoded = self.le.fit_transform(self.categorical_data) else: self.categorical_data_encoded = None self.model = UMAP(n_neighbors=20, n_components=3, min_dist=0.0, random_state=42,) self.model.fit(self.numerical_data, y = self.categorical_data_encoded) self.scores_raw = self.model.transform(self.numerical_data) self.scores = pd.DataFrame(self.scores_raw, index = self.x.index) @property def scores_(self): return self.scores @property def scores_raw_(self): return self.scores_raw