| import os |
| import numpy as np |
| from sklearn.decomposition import PCA |
| import joblib |
| from sklearn.preprocessing import StandardScaler |
|
|
| |
| data_dir = "./train_data" |
| from tqdm.notebook import tqdm |
| |
| data_list = [] |
| for file in tqdm(os.listdir(data_dir)): |
| if file.endswith(".npz"): |
| hsi_path = os.path.join(data_dir, file) |
| with np.load(hsi_path) as npz: |
| arr = np.ma.MaskedArray(**npz) |
| data_list.append(arr.reshape(150, -1).transpose()) |
|
|
| |
| x = np.vstack(data_list) |
| print("\n\n") |
| print(x.shape) |
| |
| |
| scaler = StandardScaler() |
| X_scaled = scaler.fit_transform(x) |
|
|
| |
| pca = PCA(n_components=16) |
| pca.fit(X_scaled) |
|
|
| |
| joblib.dump({"scaler": scaler, "pca": pca}, "pca_pipeline.pkl") |
|
|