from Packages import * from Class_Mod.DATA_HANDLING import * def pca_maker(data_import): numerical_data, categorical_data, scaled_values = col_cat(data_import) # Compute a 6 components PCA on scaled values pca = PCA(n_components=6) pca_fit = pca.fit(scaled_values) pca_data = pca_fit.transform(scaled_values) pca_data = pd.DataFrame(pca_data, index=numerical_data.index) # Set PCA column names with component number and explained variance % new_column_names = ["PCA_" + str(i) + ' - ' + str(round(pca_fit.explained_variance_ratio_[i-1], 3) *100) + '%' for i in range(1, len(pca_data.columns) + 1)] # Format the output column_mapper = dict(zip(list(pca_data.columns), new_column_names)) pca_data = pca_data.rename(columns=column_mapper) output = pd.concat([data_import, pca_data], axis=1) return output, list(categorical_data.columns), new_column_names ####################################################################################################################################################################