from utils.data_handling import * # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ pca ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# class LinearPCA: def __init__(self, X, Ncomp=10): ## input matrix self.__x = np.array(X) ## set the number of components to compute and fit the model self.__ncp = Ncomp # Fit PCA model M = PCA(n_components = self.__ncp) M.fit(self.__x) ######## results ######## # Results self.__pcnames = [f'PC{i+1}({100 * M.explained_variance_ratio_[i].round(2)}%)' for i in range(self.__ncp)] self._Qexp_ratio = DataFrame(100 * M.explained_variance_ratio_, columns = ["Qexp"], index= [f'PC{i+1}' for i in range(self.__ncp)]) self._p = M.components_.T self._t = M.transform(self.__x) self.eigvals = M.singular_values_**2 self.Lambda = np.diag(self.eigvals) # Matrix reconstruction or prediction making self.T2 = {} self._xp = {} self._qres = {} self.leverage = {} # for i in range(self.__ncp): # Matrix reconstruction- prediction self._xp[i] = np.dot(self._t[:,:i+1], self._p.T[:i+1,:]) #self.T2[i] = np.diag(self._t[:,:i+1] @ np.transpose(self._t[:,:i+1])) @property def scores_(self): return DataFrame(self._t, columns= self.__pcnames) @property def loadings_(self): return DataFrame(self._p, columns=self.__pcnames) @property def residuals_(self): res = DataFrame(self._qres) res.columns=self.__pcnames return res # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ umap ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# class Umap: """ The UMAP dimension reduction algorithm from scikit learn """ def __init__(self, numerical_data, cat_data): self.numerical_data = numerical_data if cat_data is None: self.categorical_data_encoded = cat_data elif len(cat_data) > 0: self.categorical_data = cat_data self.le = LabelEncoder() self.categorical_data_encoded = self.le.fit_transform(self.categorical_data) else: self.categorical_data_encoded = None self.model = UMAP(n_neighbors=20, n_components=3, min_dist=0.0, )#random_state=42,) self.model.fit(self.numerical_data, y = self.categorical_data_encoded) self.scores_raw = self.model.transform(self.numerical_data) self.scores = DataFrame(self.scores_raw) self.scores.columns = [f'axis_{i+1}' for i in range(self.scores_raw.shape[1])] @property def scores_(self): return self.scores @property def scores_raw_(self): return self.scores_raw # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nmf ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~# class Nmf: def __init__(self, X, Ncomp=3): ## input matrix if np.min(X)<0: self.__x = np.array(X-np.min(X)) else: self.__x = np.array(X) ## set the number of components to compute and fit the model self.__ncp = Ncomp # Fit PCA model Mo = NMF(n_components=self.__ncp, init=None, solver='cd', beta_loss='frobenius', tol=0.0001, max_iter=300, random_state=None, alpha_W=0.0, alpha_H='same', l1_ratio=0.0, verbose=0, shuffle=False) Mo.fit(self.__x) # Results self._p = Mo.components_.T self._t = Mo.transform(self.__x) @property def scores_(self): return DataFrame(self._t) @property def loadings_(self): return DataFrame(self._p)