Newer
Older
from Packages import *
from Class_Mod.DATA_HANDLING import *
"""
The UMAP dimension reduction algorithm from scikit learn
"""
def __init__(self, data_import, numerical_data, cat_data):
self.x = data_import
self.numerical_data = numerical_data
if cat_data is None:
self.categorical_data_encoded = cat_data
elif len(cat_data) > 0:
self.categorical_data = cat_data
self.le = LabelEncoder()
self.categorical_data_encoded = self.le.fit_transform(self.categorical_data)
else:
self.categorical_data_encoded = None
self.model = UMAP(n_neighbors=20, n_components=3, min_dist=0.0, random_state=42,)
self.model.fit(self.numerical_data, y = self.categorical_data_encoded)
self.scores_raw = self.model.transform(self.numerical_data)
self.scores = pd.DataFrame(self.scores_raw, index = self.x.index)
return self.scores
@property
def scores_raw_(self):
return self.scores_raw