diff --git a/Class_Mod/HDBSCAN_Clustering.py b/Class_Mod/HDBSCAN_Clustering.py
index f01928254a72c2516d8f2093011b06130aaaea27..1a9df2d72833121b79f19a6d9c0618868fc0ffc3 100644
--- a/Class_Mod/HDBSCAN_Clustering.py
+++ b/Class_Mod/HDBSCAN_Clustering.py
@@ -1,299 +1,308 @@
 from Packages import *
-from scipy.spatial.distance import euclidean, cdist
-from scipy.sparse.csgraph import minimum_spanning_tree
-from scipy.sparse import csgraph
-
-
-def DBCV(X, labels, dist_function=euclidean):
-    """
-    Implimentation of Density-Based Clustering Validation "DBCV"
-
-    Citation:
-    Moulavi, Davoud, et al. "Density-based clustering validation."
-    Proceedings of the 2014 SIAM International Conference on Data Mining.
-    Society for Industrial and Applied Mathematics, 2014.
-
-    Density Based clustering validation
-
-    Args:
-        X (np.ndarray): ndarray with dimensions [n_samples, n_features]
-            data to check validity of clustering
-        labels (np.array): clustering assignments for data X
-        dist_dunction (func): function to determine distance between objects
-            func args must be [np.array, np.array] where each array is a point
-
-    Returns: cluster_validity (float)
-        score in range[-1, 1] indicating validity of clustering assignments
-    """
-    graph = _mutual_reach_dist_graph(X, labels, dist_function)
-    mst = _mutual_reach_dist_MST(graph)
-    cluster_validity = _clustering_validity_index(mst, labels)
-    return cluster_validity
-
-
-def _core_dist(point, neighbors, dist_function):
-    """
-    Computes the core distance of a point.
-    Core distance is the inverse density of an object.
-
-    Args:
-        point (np.array): array of dimensions (n_features,)
-            point to compute core distance of
-        neighbors (np.ndarray): array of dimensions (n_neighbors, n_features):
-            array of all other points in object class
-        dist_dunction (func): function to determine distance between objects
-            func args must be [np.array, np.array] where each array is a point
-
-    Returns: core_dist (float)
-        inverse density of point
-    """
-    n_features = np.shape(point)[0]
-    n_neighbors = np.shape(neighbors)[0]
-
-    distance_vector = cdist(point.reshape(1, -1), neighbors)
-    distance_vector = distance_vector[distance_vector != 0]
-    numerator = ((1/distance_vector)**n_features).sum()
-    core_dist = (numerator / (n_neighbors - 1)) ** (-1/n_features)
-    return core_dist
-
-
-def _mutual_reachability_dist(point_i, point_j, neighbors_i,
-                              neighbors_j, dist_function):
-    """.
-    Computes the mutual reachability distance between points
-
-    Args:
-        point_i (np.array): array of dimensions (n_features,)
-            point i to compare to point j
-        point_j (np.array): array of dimensions (n_features,)
-            point i to compare to point i
-        neighbors_i (np.ndarray): array of dims (n_neighbors, n_features):
-            array of all other points in object class of point i
-        neighbors_j (np.ndarray): array of dims (n_neighbors, n_features):
-            array of all other points in object class of point j
-        dist_dunction (func): function to determine distance between objects
-            func args must be [np.array, np.array] where each array is a point
-
-    Returns: mutual_reachability (float)
-        mutual reachability between points i and j
-
+class Hdbscan:
     """
-    core_dist_i = _core_dist(point_i, neighbors_i, dist_function)
-    core_dist_j = _core_dist(point_j, neighbors_j, dist_function)
-    dist = dist_function(point_i, point_j)
-    mutual_reachability = np.max([core_dist_i, core_dist_j, dist])
-    return mutual_reachability
-
-
-def _mutual_reach_dist_graph(X, labels, dist_function):
-    """
-    Computes the mutual reach distance complete graph.
-    Graph of all pair-wise mutual reachability distances between points
-
-    Args:
-        X (np.ndarray): ndarray with dimensions [n_samples, n_features]
-            data to check validity of clustering
-        labels (np.array): clustering assignments for data X
-        dist_dunction (func): function to determine distance between objects
-            func args must be [np.array, np.array] where each array is a point
-
-    Returns: graph (np.ndarray)
-        array of dimensions (n_samples, n_samples)
-        Graph of all pair-wise mutual reachability distances between points.
-
-    """
-    n_samples = np.shape(X)[0]
-    graph = []
-    counter = 0
-    for row in range(n_samples):
-        graph_row = []
-        for col in range(n_samples):
-            point_i = X[row]
-            point_j = X[col]
-            class_i = labels[row]
-            class_j = labels[col]
-            members_i = _get_label_members(X, labels, class_i)
-            members_j = _get_label_members(X, labels, class_j)
-            dist = _mutual_reachability_dist(point_i, point_j,
-                                             members_i, members_j,
-                                             dist_function)
-            graph_row.append(dist)
-        counter += 1
-        graph.append(graph_row)
-    graph = np.array(graph)
-    return graph
-
-
-def _mutual_reach_dist_MST(dist_tree):
-    """
-    Computes minimum spanning tree of the mutual reach distance complete graph
-
-    Args:
-        dist_tree (np.ndarray): array of dimensions (n_samples, n_samples)
-            Graph of all pair-wise mutual reachability distances
-            between points.
-
-    Returns: minimum_spanning_tree (np.ndarray)
-        array of dimensions (n_samples, n_samples)
-        minimum spanning tree of all pair-wise mutual reachability
-            distances between points.
-    """
-    mst = minimum_spanning_tree(dist_tree).toarray()
-    return mst + np.transpose(mst)
-
-
-def _cluster_density_sparseness(MST, labels, cluster):
-    """
-    Computes the cluster density sparseness, the minimum density
-        within a cluster
-
-    Args:
-        MST (np.ndarray): minimum spanning tree of all pair-wise
-            mutual reachability distances between points.
-        labels (np.array): clustering assignments for data X
-        cluster (int): cluster of interest
-
-    Returns: cluster_density_sparseness (float)
-        value corresponding to the minimum density within a cluster
-    """
-    indices = np.where(labels == cluster)[0]
-    cluster_MST = MST[indices][:, indices]
-    cluster_density_sparseness = np.max(cluster_MST)
-    return cluster_density_sparseness
-
-
-def _cluster_density_separation(MST, labels, cluster_i, cluster_j):
-    """
-    Computes the density separation between two clusters, the maximum
-        density between clusters.
-
-    Args:
-        MST (np.ndarray): minimum spanning tree of all pair-wise
-            mutual reachability distances between points.
-        labels (np.array): clustering assignments for data X
-        cluster_i (int): cluster i of interest
-        cluster_j (int): cluster j of interest
-
-    Returns: density_separation (float):
-        value corresponding to the maximum density between clusters
-    """
-    indices_i = np.where(labels == cluster_i)[0]
-    indices_j = np.where(labels == cluster_j)[0]
-    shortest_paths = csgraph.dijkstra(MST, indices=indices_i)
-    relevant_paths = shortest_paths[:, indices_j]
-    density_separation = np.min(relevant_paths)
-    return density_separation
-
-
-def _cluster_validity_index(MST, labels, cluster):
-    """
-    Computes the validity of a cluster (validity of assignmnets)
-
-    Args:
-        MST (np.ndarray): minimum spanning tree of all pair-wise
-            mutual reachability distances between points.
-        labels (np.array): clustering assignments for data X
-        cluster (int): cluster of interest
-
-    Returns: cluster_validity (float)
-        value corresponding to the validity of cluster assignments
-    """
-    min_density_separation = np.inf
-    for cluster_j in np.unique(labels):
-        if cluster_j != cluster:
-            cluster_density_separation = _cluster_density_separation(MST,
-                                                                     labels,
-                                                                     cluster,
-                                                                     cluster_j)
-            if cluster_density_separation < min_density_separation:
-                min_density_separation = cluster_density_separation
-    cluster_density_sparseness = _cluster_density_sparseness(MST,
-                                                             labels,
-                                                             cluster)
-    numerator = min_density_separation - cluster_density_sparseness
-    denominator = np.max([min_density_separation, cluster_density_sparseness])
-    cluster_validity = numerator / denominator
-    return cluster_validity
-
-
-def _clustering_validity_index(MST, labels):
-    """
-    Computes the validity of all clustering assignments for a
-    clustering algorithm
-
-    Args:
-        MST (np.ndarray): minimum spanning tree of all pair-wise
-            mutual reachability distances between points.
-        labels (np.array): clustering assignments for data X
-
-    Returns: validity_index (float):
-        score in range[-1, 1] indicating validity of clustering assignments
-    """
-    n_samples = len(labels)
-    validity_index = 0
-    for label in np.unique(labels):
-        fraction = np.sum(labels == label) / float(n_samples)
-        cluster_validity = _cluster_validity_index(MST, labels, label)
-        validity_index += fraction * cluster_validity
-    return validity_index
-
-
-def _get_label_members(X, labels, cluster):
-    """
-    Helper function to get samples of a specified cluster.
-
-    Args:
-        X (np.ndarray): ndarray with dimensions [n_samples, n_features]
-            data to check validity of clustering
-        labels (np.array): clustering assignments for data X
-        cluster (int): cluster of interest
-
-    Returns: members (np.ndarray)
-        array of dimensions (n_samples, n_features) of samples of the
-        specified cluster.
+    Runs an automatic optimized sklearn.HDBSCAN clustering on Dimensionality reducted space.
+    Vars:
+        data: the Dimensionality reducted space, raw result of the UMAP.fit()
+        param_dist: the HDBSCAN optimization parameters to test
+        Density-Based Clustering Validation - DBCV (https://github.com/christopherjenness/DBCV/tree/master ;
+            Moulavi, Davoud, et al. "Density-based clustering validation." Proceedings of the 2014 SIAM
+            International Conference on Data Mining. Society for Industrial and Applied Mathematics, 2014.)
+            is used as a metric to optimize HDBSCAN algorithm.
+            Functions DBCV, _core_dist, _mutual_reachability_dist, _mutual_reach_dist_graph, _mutual_reach_dist_graph,
+            _mutual_reach_dist_MST, _cluster_density_sparseness, _cluster_density_separation, _cluster_validity_index,
+            _clustering_validity_index and _get_label_members aim at DBCV computing.
+        _score is a dataframe with the DBCV value for each combination of param_dist. We search for the higher value and
+            compute an HDBSCAN with the best parameters.
+        The HDBSCAN_scores_ @property return the cluster number of each sample (_labels) and the DBCV best score.
     """
-    indices = np.where(labels == cluster)[0]
-    members = X[indices]
-    return members
-
-def HDBSCAN_function(data):
-    # param_dist = {'min_samples': [1,5,10,30],
-    #               'min_cluster_size':[5,10,20,30,50,75,100],
-    #               # 'cluster_selection_method' : ['eom','leaf'],
-    #               # 'metric' : ['euclidean','manhattan']
-    #               }
-    # param_dist = {'min_samples': [1,5,10,50],
-    #               'min_cluster_size':[5,10,30,50,100,300,500],
-    #               }
-    param_dist = {'min_samples': [1,5, 10,],
-                  'min_cluster_size':[5,10,30,50,100],
-                  'metric' : ['euclidean','manhattan'],
-                  }
-
-    clusterable_embedding = UMAP(
-        n_neighbors=20,
-        min_dist=0.0,
-        n_components=5,
-        random_state=42,
-    ).fit_transform(data)
-
-    # RandomizedSearchCV not working...
-    # def scoring(model, clusterable_embedding):
-    #     label = HDBSCAN().fit_predict(clusterable_embedding)
-    #     hdbscan_score = DBCV(clusterable_embedding, label, dist_function=euclidean)
-    #     return hdbscan_score
-    # tunning = RandomizedSearchCV(estimator=HDBSCAN(), param_distributions=param_dist,  scoring=scoring)
-    # tunning.fit(clusterable_embedding)
-    # return tunning
-    min_score = pd.DataFrame()
-    for i in param_dist.get('min_samples'):
-        for j in param_dist.get('min_cluster_size'):
-            ij_label = HDBSCAN(min_samples=i, min_cluster_size=j).fit_predict(clusterable_embedding)
-            ij_hdbscan_score = DBCV(clusterable_embedding, ij_label, dist_function=euclidean)
-            min_score.at[i,j] = ij_hdbscan_score
-    hdbscan_score  = max(min_score.max())
-    # get the coordinates of the best clustering parameters and run HDBSCAN below
-    bparams = np.where(min_score == hdbscan_score)
-    # run HDBSCAN with best params
-    labels = HDBSCAN(min_samples=param_dist['min_samples'][bparams[0][0]], min_cluster_size=param_dist['min_cluster_size'][bparams[1][0]], metric=param_dist['metric'][bparams[1][0]]).fit_predict(clusterable_embedding)
-    return labels, hdbscan_score
+    def __init__(self, data):
+        # self._param_dist = {'min_samples': [1],
+        #               'min_cluster_size':[5,10],
+        #               'metric' : ['euclidean','manhattan'],
+        #               }
+        self._param_dist = {'min_samples': [1,5,10,],
+                      'min_cluster_size':[5,25,50,],
+                      'metric' : ['euclidean','manhattan'],
+                      }
+
+        self._clusterable_embedding = data
+
+        # RandomizedSearchCV not working...
+        # def scoring(model, clusterable_embedding):
+        #     label = HDBSCAN().fit_predict(clusterable_embedding)
+        #     hdbscan_score = DBCV(clusterable_embedding, label, dist_function=euclidean)
+        #     return hdbscan_score
+        # tunning = RandomizedSearchCV(estimator=HDBSCAN(), param_distributions=param_dist,  scoring=scoring)
+        # tunning.fit(clusterable_embedding)
+        # return tunning
+
+        # compute optimization. Test each combination of parameters and store DBCV score into _score.
+        self._score = pd.DataFrame()
+        for i in self._param_dist.get('min_samples'):
+            for j in self._param_dist.get('min_cluster_size'):
+                self._ij_label = HDBSCAN(min_samples=i, min_cluster_size=j).fit_predict(self._clusterable_embedding)
+                self._ij_hdbscan_score = self.DBCV(self._clusterable_embedding, self._ij_label,)# dist_function=euclidean)
+                self._score.at[i,j] = self._ij_hdbscan_score
+        # get the best DBCV score
+        self._hdbscan_score  = max(self._score.max())
+        # find the coordinates of the best clustering parameters and run HDBSCAN below
+        self._bparams = np.where(self._score == self._hdbscan_score)
+        # run HDBSCAN with best params
+        self._labels = HDBSCAN(min_samples=self._param_dist['min_samples'][self._bparams[0][0]], min_cluster_size=self._param_dist['min_cluster_size'][self._bparams[1][0]], metric=self._param_dist['metric'][self._bparams[1][0]]).fit_predict(self._clusterable_embedding)
+
+    def DBCV(self, X, labels, dist_function=euclidean):
+        """
+        Implimentation of Density-Based Clustering Validation "DBCV"
+
+        Citation:
+        Moulavi, Davoud, et al. "Density-based clustering validation."
+        Proceedings of the 2014 SIAM International Conference on Data Mining.
+        Society for Industrial and Applied Mathematics, 2014.
+
+        Density Based clustering validation
+
+        Args:
+            X (np.ndarray): ndarray with dimensions [n_samples, n_features]
+                data to check validity of clustering
+            labels (np.array): clustering assignments for data X
+            dist_dunction (func): function to determine distance between objects
+                func args must be [np.array, np.array] where each array is a point
+
+        Returns: cluster_validity (float)
+            score in range[-1, 1] indicating validity of clustering assignments
+        """
+        graph = self._mutual_reach_dist_graph(X, labels, dist_function)
+        mst = self._mutual_reach_dist_MST(graph)
+        cluster_validity = self._clustering_validity_index(mst, labels)
+        return cluster_validity
+
+
+    def _core_dist(self, point, neighbors, dist_function):
+        """
+        Computes the core distance of a point.
+        Core distance is the inverse density of an object.
+
+        Args:
+            point (np.array): array of dimensions (n_features,)
+                point to compute core distance of
+            neighbors (np.ndarray): array of dimensions (n_neighbors, n_features):
+                array of all other points in object class
+            dist_dunction (func): function to determine distance between objects
+                func args must be [np.array, np.array] where each array is a point
+
+        Returns: core_dist (float)
+            inverse density of point
+        """
+        n_features = np.shape(point)[0]
+        n_neighbors = np.shape(neighbors)[0]
+
+        distance_vector = cdist(point.reshape(1, -1), neighbors)
+        distance_vector = distance_vector[distance_vector != 0]
+        numerator = ((1/distance_vector)**n_features).sum()
+        core_dist = (numerator / (n_neighbors - 1)) ** (-1/n_features)
+        return core_dist
+
+
+    def _mutual_reachability_dist(self, point_i, point_j, neighbors_i,
+                                  neighbors_j, dist_function):
+        """.
+        Computes the mutual reachability distance between points
+
+        Args:
+            point_i (np.array): array of dimensions (n_features,)
+                point i to compare to point j
+            point_j (np.array): array of dimensions (n_features,)
+                point i to compare to point i
+            neighbors_i (np.ndarray): array of dims (n_neighbors, n_features):
+                array of all other points in object class of point i
+            neighbors_j (np.ndarray): array of dims (n_neighbors, n_features):
+                array of all other points in object class of point j
+            dist_dunction (func): function to determine distance between objects
+                func args must be [np.array, np.array] where each array is a point
+
+        Returns: mutual_reachability (float)
+            mutual reachability between points i and j
+
+        """
+        core_dist_i = self._core_dist(point_i, neighbors_i, dist_function)
+        core_dist_j = self._core_dist(point_j, neighbors_j, dist_function)
+        dist = dist_function(point_i, point_j)
+        mutual_reachability = np.max([core_dist_i, core_dist_j, dist])
+        return mutual_reachability
+
+
+    def _mutual_reach_dist_graph(self, X, labels, dist_function):
+        """
+        Computes the mutual reach distance complete graph.
+        Graph of all pair-wise mutual reachability distances between points
+
+        Args:
+            X (np.ndarray): ndarray with dimensions [n_samples, n_features]
+                data to check validity of clustering
+            labels (np.array): clustering assignments for data X
+            dist_dunction (func): function to determine distance between objects
+                func args must be [np.array, np.array] where each array is a point
+
+        Returns: graph (np.ndarray)
+            array of dimensions (n_samples, n_samples)
+            Graph of all pair-wise mutual reachability distances between points.
+
+        """
+        n_samples = np.shape(X)[0]
+        graph = []
+        counter = 0
+        for row in range(n_samples):
+            graph_row = []
+            for col in range(n_samples):
+                point_i = X[row]
+                point_j = X[col]
+                class_i = labels[row]
+                class_j = labels[col]
+                members_i = self._get_label_members(X, labels, class_i)
+                members_j = self._get_label_members(X, labels, class_j)
+                dist = self._mutual_reachability_dist(point_i, point_j,
+                                                 members_i, members_j,
+                                                 dist_function)
+                graph_row.append(dist)
+            counter += 1
+            graph.append(graph_row)
+        graph = np.array(graph)
+        return graph
+
+
+    def _mutual_reach_dist_MST(self, dist_tree):
+        """
+        Computes minimum spanning tree of the mutual reach distance complete graph
+
+        Args:
+            dist_tree (np.ndarray): array of dimensions (n_samples, n_samples)
+                Graph of all pair-wise mutual reachability distances
+                between points.
+
+        Returns: minimum_spanning_tree (np.ndarray)
+            array of dimensions (n_samples, n_samples)
+            minimum spanning tree of all pair-wise mutual reachability
+                distances between points.
+        """
+        mst = minimum_spanning_tree(dist_tree).toarray()
+        return mst + np.transpose(mst)
+
+
+    def _cluster_density_sparseness(self, MST, labels, cluster):
+        """
+        Computes the cluster density sparseness, the minimum density
+            within a cluster
+
+        Args:
+            MST (np.ndarray): minimum spanning tree of all pair-wise
+                mutual reachability distances between points.
+            labels (np.array): clustering assignments for data X
+            cluster (int): cluster of interest
+
+        Returns: cluster_density_sparseness (float)
+            value corresponding to the minimum density within a cluster
+        """
+        indices = np.where(labels == cluster)[0]
+        cluster_MST = MST[indices][:, indices]
+        cluster_density_sparseness = np.max(cluster_MST)
+        return cluster_density_sparseness
+
+
+    def _cluster_density_separation(self, MST, labels, cluster_i, cluster_j):
+        """
+        Computes the density separation between two clusters, the maximum
+            density between clusters.
+
+        Args:
+            MST (np.ndarray): minimum spanning tree of all pair-wise
+                mutual reachability distances between points.
+            labels (np.array): clustering assignments for data X
+            cluster_i (int): cluster i of interest
+            cluster_j (int): cluster j of interest
+
+        Returns: density_separation (float):
+            value corresponding to the maximum density between clusters
+        """
+        indices_i = np.where(labels == cluster_i)[0]
+        indices_j = np.where(labels == cluster_j)[0]
+        shortest_paths = csgraph.dijkstra(MST, indices=indices_i)
+        relevant_paths = shortest_paths[:, indices_j]
+        density_separation = np.min(relevant_paths)
+        return density_separation
+
+
+    def _cluster_validity_index(self, MST, labels, cluster):
+        """
+        Computes the validity of a cluster (validity of assignmnets)
+
+        Args:
+            MST (np.ndarray): minimum spanning tree of all pair-wise
+                mutual reachability distances between points.
+            labels (np.array): clustering assignments for data X
+            cluster (int): cluster of interest
+
+        Returns: cluster_validity (float)
+            value corresponding to the validity of cluster assignments
+        """
+        min_density_separation = np.inf
+        for cluster_j in np.unique(labels):
+            if cluster_j != cluster:
+                cluster_density_separation = self._cluster_density_separation(MST,
+                                                                         labels,
+                                                                         cluster,
+                                                                         cluster_j)
+                if cluster_density_separation < min_density_separation:
+                    min_density_separation = cluster_density_separation
+        cluster_density_sparseness = self._cluster_density_sparseness(MST,
+                                                                 labels,
+                                                                 cluster)
+        numerator = min_density_separation - cluster_density_sparseness
+        denominator = np.max([min_density_separation, cluster_density_sparseness])
+        cluster_validity = numerator / denominator
+        return cluster_validity
+
+
+    def _clustering_validity_index(self, MST, labels):
+        """
+        Computes the validity of all clustering assignments for a
+        clustering algorithm
+
+        Args:
+            MST (np.ndarray): minimum spanning tree of all pair-wise
+                mutual reachability distances between points.
+            labels (np.array): clustering assignments for data X
+
+        Returns: validity_index (float):
+            score in range[-1, 1] indicating validity of clustering assignments
+        """
+        n_samples = len(labels)
+        validity_index = 0
+        for label in np.unique(labels):
+            fraction = np.sum(labels == label) / float(n_samples)
+            cluster_validity = self._cluster_validity_index(MST, labels, label)
+            validity_index += fraction * cluster_validity
+        return validity_index
+
+
+    def _get_label_members(self, X, labels, cluster):
+        """
+        Helper function to get samples of a specified cluster.
+
+        Args:
+            X (np.ndarray): ndarray with dimensions [n_samples, n_features]
+                data to check validity of clustering
+            labels (np.array): clustering assignments for data X
+            cluster (int): cluster of interest
+
+        Returns: members (np.ndarray)
+            array of dimensions (n_samples, n_features) of samples of the
+            specified cluster.
+        """
+        indices = np.where(labels == cluster)[0]
+        members = X[indices]
+        return members
+
+    @property
+    def HDBSCAN_scores_(self):
+         return self._labels, self._hdbscan_score
diff --git a/Class_Mod/UMAP_.py b/Class_Mod/UMAP_.py
index e9ae0dc4c930947d47cf8b47660f5ea8d749905a..8d415ebb9b32761ea9c53c06a88363e0300206da 100644
--- a/Class_Mod/UMAP_.py
+++ b/Class_Mod/UMAP_.py
@@ -4,17 +4,28 @@ from Class_Mod.DATA_HANDLING import *
 
 
 class Umap:
-    def __init__(self, x, n_components, n_neighbors, min_dist):
-        self.numerical_data, categorical_data, scaled_values = col_cat(x)
-        self.catdata = list(categorical_data.columns)
+    """
+    The UMAP dimension reduction algorithm from scikit learn
+    """
+    def __init__(self, data_import, numerical_data, cat_data):
+        self.x = data_import
+        self.numerical_data = numerical_data
+        if len(cat_data) > 0:
+            self.categorical_data = cat_data
+            self.le = LabelEncoder()
+            self.categorical_data_encoded = self.le.fit_transform(self.categorical_data)
 
-        self.x = scaled_values
-        
-        self.model = UMAP(n_neighbors=20, n_components=4, min_dist=0.0,) # random_state=42,)
-        self.model.fit(self.x)
-        self.scores = self.model.transform(self.x)
-        self.scores = pd.DataFrame(self.scores, index = self.numerical_data.index)
+        else:
+            self.categorical_data = False
+
+        self.model = UMAP(n_neighbors=20, n_components=3, min_dist=0.0, random_state=42,)
+        self.model.fit(self.numerical_data, y = self.categorical_data_encoded)
+        self.scores_raw = self.model.transform(self.numerical_data)
+        self.scores = pd.DataFrame(self.scores_raw, index = self.x.index)
 
     @property
     def scores_(self):
-        return self.scores
\ No newline at end of file
+        return self.scores
+    @property
+    def scores_raw_(self):
+        return self.scores_raw
\ No newline at end of file
diff --git a/Class_Mod/__init__.py b/Class_Mod/__init__.py
index eb2dbb5b6b3a030cfa727730bf21e84ba9ed0948..c684862836ba8af35807b889e3b822f091dad3d6 100644
--- a/Class_Mod/__init__.py
+++ b/Class_Mod/__init__.py
@@ -8,3 +8,5 @@ from .Regression_metrics import metrics
 from .VarSel import TpeIpls
 from .Miscellaneous import resid_plot, reg_plot
 from .DxReader import DxRead
+from .HDBSCAN_Clustering import Hdbscan
+
diff --git a/Modules.py b/Modules.py
index 54399173517fa1fbd82e19b0df1cca4a63e380a2..0076fb22adc7da0d1aec6530ee3f6ab0a754d370 100644
--- a/Modules.py
+++ b/Modules.py
@@ -1,4 +1,4 @@
-from Class_Mod import LinearPCA, Umap, find_col_index, PinardPlsr, model_LWPLSR, list_files, metrics, TpeIpls, reg_plot, resid_plot, Sk_Kmeans, DxRead
+from Class_Mod import LinearPCA, Umap, find_col_index, PinardPlsr, model_LWPLSR, list_files, metrics, TpeIpls, reg_plot, resid_plot, Sk_Kmeans, DxRead, Hdbscan
 # find_col_index
 
 from Class_Mod.Miscellaneous import prediction, download_results
diff --git a/Packages.py b/Packages.py
index 9cad07bc174a70c11930d36b1a3e9f5c6d0ef109..b0d939baa8021ba8dfa14088d1b33d972500954d 100644
--- a/Packages.py
+++ b/Packages.py
@@ -1,4 +1,3 @@
-
 ## Data loading, handling, and preprocessing
 import os
 import sys
@@ -10,14 +9,18 @@ import numpy as np
 import pandas as pd
 from os import listdir
 from os.path import isfile, join
-from sklearn.preprocessing import StandardScaler, MinMaxScaler
+from sklearn.preprocessing import StandardScaler, MinMaxScaler, LabelEncoder
 import time
+
 ### Exploratory data analysis-Dimensionality reduction
 from umap.umap_ import UMAP
 from sklearn.decomposition import PCA, NMF
 
 # Clustering
 from sklearn.cluster import KMeans, HDBSCAN
+from scipy.spatial.distance import euclidean, cdist
+from scipy.sparse.csgraph import minimum_spanning_tree
+from scipy.sparse import csgraph
 
 # Modelling
 # import julia
@@ -38,6 +41,7 @@ from PIL import Image
 import plotly.express as px
 import matplotlib.pyplot as plt
 import seaborn as sns
+
 ### Important Metrics
 from sklearn.metrics import pairwise_distances_argmin_min, adjusted_rand_score, adjusted_mutual_info_score
 
@@ -49,6 +53,7 @@ from tempfile import NamedTemporaryFile
 
 #Library for connecting to SQL DB
 import pyodbc
+
 #Library for reading the config file, which is in JSON
 import json
 
diff --git a/pages/1-samples_selection.py b/pages/1-samples_selection.py
index cb1348dca6451bbf023afb0051ff68b4a8963dbc..ffb4d81631eab0beda7d3fd473b21e004a6704f4 100644
--- a/pages/1-samples_selection.py
+++ b/pages/1-samples_selection.py
@@ -36,6 +36,7 @@ with container1:
                 else:
                     col = False
                 data_import = pd.read_csv(sselectx_csv, sep=psep, index_col=col)
+                data_import, categorical_data, scaled_values = col_cat(data_import)
                 st.success("The data have been loaded successfully", icon="âœ…")
                 ## Visualize spectra
 
@@ -103,23 +104,27 @@ with container2:
             if type_plot == 'PCA':
                 model = LinearPCA(data_import, Ncomp=5)
             elif type_plot =='UMAP':
-                model = Umap(x = data_import, n_components = 5, n_neighbors = 20 , min_dist = 0)
-
+                model = Umap(data_import = data_import, numerical_data = scaled_values, cat_data = categorical_data)
 
 
         if type_plot in ['PCA', 'UMAP']:
-            # add 2 select lists to choose which component to plot
-            axis1 = pc.selectbox("x-axis", options = model.scores_.columns, index=0)
-            axis2 = pc.selectbox("y-axis", options = model.scores_.columns, index=1)
-            axis3 = pc.selectbox("z-axis", options = model.scores_.columns, index=2)
+            if type_plot in ['PCA']:
+                # add 2 select lists to choose which component to plot
+                axis1 = pc.selectbox("x-axis", options = model.scores_.columns, index=0)
+                axis2 = pc.selectbox("y-axis", options = model.scores_.columns, index=1)
+                axis3 = pc.selectbox("z-axis", options = model.scores_.columns, index=2)
+            elif type_plot in ['UMAP']:
+                axis1 = 0
+                axis2 = 1
+                axis3 = 2
 
             if type_cluster == 'Kmeans':
                 scsc = pd.concat([model.scores_.loc[:,axis1], model.scores_.loc[:,axis2], model.scores_.loc[:,axis3]], axis = 1)
                 cl = Sk_Kmeans(scsc, max_clusters = 30)
 
             elif type_cluster == 'HDBSCAN':
-                from Class_Mod.HDBSCAN_Clustering import HDBSCAN_function
-                labels, hdbscan_score = HDBSCAN_function(data_import)
+                optimized_hdbscan = Hdbscan(model.scores_raw_)
+                labels, hdbscan_score = optimized_hdbscan.HDBSCAN_scores_
             with scores:
                 t = model.scores_
                 if type_cluster in ['AP', 'Kmeans']:
@@ -140,7 +145,9 @@ with container2:
                     fig = px.scatter_3d(t, x=axis1, y=axis2, z = axis3, color=labels)
                     fig.update_traces(marker=dict(size=4))
                     # st.plotly_chart(fig_hdbscan)
-                    st.write('DBCV score (-1:1) = ' + str(hdbscan_score))
+                    st.write('Optimal number of clusters = ' + str(len(set(labels))))
+                    st.write('DBCV score (-1 to 1 - higher is better) = ' + str(round(hdbscan_score,3)))
+                    st.write('Unclassified samples: ' + str(len(t[labels==-1])) + ' on ' + str(len(t)) + ' samples (' + str(round(len(t[labels==-1])/len(t)*100, 1)) + '%).')
 
                 else:
                     if test == '.dx':
@@ -190,7 +197,6 @@ with container2:
                     fig = px.scatter(t, x=hotelling[ax2], y=residuals[ax2]).update_layout(xaxis_title="TÂ²",yaxis_title="Residuals")
                     st.plotly_chart(fig)
 
-
-            else:
-                st.markdown('Select a dimensionality reduction technique from the dropdown list')
+        else:
+            st.markdown('Select a dimensionality reduction technique from the dropdown list')