Newer
Older
from Packages import *
from Class_Mod.DATA_HANDLING import *
def pca_maker(data_import):
numerical_data, categorical_data, scaled_values = col_cat(data_import)
# Compute a 6 components PCA on scaled values
pca = PCA(n_components=6)
pca_fit = pca.fit(scaled_values)
pca_data = pca_fit.transform(scaled_values)
pca_data = pd.DataFrame(pca_data, index=numerical_data.index)
# Set PCA column names with component number and explained variance %
new_column_names = ["PCA_" + str(i) + ' - ' + str(round(pca_fit.explained_variance_ratio_[i-1], 3) *100) + '%' for i in range(1, len(pca_data.columns) + 1)]
# Format the output
column_mapper = dict(zip(list(pca_data.columns), new_column_names))
pca_data = pca_data.rename(columns=column_mapper)
output = pd.concat([data_import, pca_data], axis=1)
return output, list(categorical_data.columns), new_column_names
####################################################################################################################################################################