| from configuration import DatasetName, DatasetType, W300Conf, InputDataSize, LearningConfig, WflwConf |
| from image_utility import ImageUtility |
| from sklearn.decomposition import PCA, IncrementalPCA |
| from sklearn.decomposition import TruncatedSVD |
| import numpy as np |
| import pickle |
| import os |
| from tqdm import tqdm |
| from numpy import save, load |
| import math |
| from PIL import Image |
| from numpy import save, load |
|
|
|
|
| class PCAUtility: |
| eigenvalues_prefix = "_eigenvalues_" |
| eigenvectors_prefix = "_eigenvectors_" |
| meanvector_prefix = "_meanvector_" |
|
|
|
|
|
|
| def create_pca_from_npy(self, dataset_name, labels_npy_path, pca_percentages): |
| """ |
| generate and save eigenvalues, eigenvectors, meanvector |
| :param labels_npy_path: the path to the normalized labels that are save in npy format. |
| :param pca_percentages: % of eigenvalues that will be used |
| :return: generate |
| """ |
| path = labels_npy_path |
| print('PCA calculation started: loading labels') |
|
|
| lbl_arr = [] |
| for file in tqdm(os.listdir(path)): |
| if file.endswith(".npy"): |
| npy_file = os.path.join(path, file) |
| lbl_arr.append(load(npy_file)) |
|
|
| lbl_arr = np.array(lbl_arr) |
|
|
| reduced_lbl_arr, eigenvalues, eigenvectors = self._func_PCA(lbl_arr, pca_percentages) |
| mean_lbl_arr = np.mean(lbl_arr, axis=0) |
| eigenvectors = eigenvectors.T |
|
|
| save('./pca_obj/' + dataset_name + self.eigenvalues_prefix + str(pca_percentages), eigenvalues) |
| save('./pca_obj/' + dataset_name + self.eigenvectors_prefix + str(pca_percentages), eigenvectors) |
| save('./pca_obj/' + dataset_name + self.meanvector_prefix + str(pca_percentages), mean_lbl_arr) |
|
|
| def load_pca_obj(self, dataset_name, pca_percentages): |
| eigenvalues = np.load('./pca_obj/' + dataset_name + self.eigenvalues_prefix + str(pca_percentages)) |
| eigenvectors = np.load('./pca_obj/' + dataset_name + self.eigenvectors_prefix + str(pca_percentages)) |
| meanvector = np.load('./pca_obj/' + dataset_name + self.meanvector_prefix + str(pca_percentages)) |
| return eigenvalues, eigenvectors, meanvector |
|
|
| def _func_PCA(self, input_data, pca_postfix): |
| input_data = np.array(input_data) |
| pca = PCA(n_components=pca_postfix / 100) |
| |
| |
| pca.fit(input_data) |
| pca_input_data = pca.transform(input_data) |
| eigenvalues = pca.explained_variance_ |
| eigenvectors = pca.components_ |
| return pca_input_data, eigenvalues, eigenvectors |
|
|
| def __svd_func(self, input_data, pca_postfix): |
| svd = TruncatedSVD(n_components=50) |
| svd.fit(input_data) |
| pca_input_data = svd.transform(input_data) |
| eigenvalues = svd.explained_variance_ |
| eigenvectors = svd.components_ |
| return pca_input_data, eigenvalues, eigenvectors |
| |
|
|