| |
| """ |
| Inference script for samples autoencoder model |
| Generated automatically during training |
| """ |
|
|
| import torch |
| import pandas as pd |
| import numpy as np |
| import json |
| import argparse |
| import os |
|
|
| def load_model_and_config(model_dir): |
| """Load the trained model and its configuration""" |
| config_path = os.path.join(model_dir, 'model_config.json') |
| with open(config_path, 'r') as f: |
| config = json.load(f) |
| |
| |
| model_file = config['model_info']['saved_model_file'] |
| model_path = os.path.join(model_dir, model_file) |
| |
| |
| from compress_data_unified import SimpleAE, AE |
| |
| latent_dims = config['model_info']['latent_dims'] |
| input_dim = config['model_info']['input_dim'] |
| layer_sizes = config['model_info']['layer_sizes'] |
| model_type = config['model_info']['model_type'] |
| |
| if model_type == 'SimpleAE': |
| if isinstance(layer_sizes, list) and len(layer_sizes) > 1: |
| |
| model = AE(layer_sizes, use_simple=True) |
| else: |
| |
| model = SimpleAE(input_dim, latent_dims) |
| else: |
| |
| model = AE(layer_sizes, use_simple=False) |
| |
| model.load_state_dict(torch.load(model_path, map_location='cpu')) |
| model.eval() |
| |
| return model, config |
|
|
| def preprocess_data(data, config): |
| """Apply same preprocessing as training""" |
| |
| eps = 1e-8 |
| min_val = np.nanmin(data) |
| max_val = np.nanmax(data) |
| if max_val - min_val < eps: |
| return data |
| normalized = 2 * (data - min_val) / (max_val - min_val + eps) - 1 |
| return normalized |
|
|
| def run_inference(model_dir, input_data_path, output_path=None): |
| """Run inference on new data""" |
| model, config = load_model_and_config(model_dir) |
| |
| |
| data = pd.read_csv(input_data_path, index_col=0) |
| data_processed = preprocess_data(data, config) |
| |
| |
| data_tensor = torch.FloatTensor(data_processed.values) |
| |
| |
| with torch.no_grad(): |
| |
| latent = model.encode(data_tensor) |
| |
| reconstructed = model.decode(latent) |
| |
| |
| latent_df = pd.DataFrame(latent.numpy(), |
| index=data.index, |
| columns=[f'latent_{i+1}' for i in range(config['model_info']['latent_dims'])]) |
| |
| reconstructed_df = pd.DataFrame(reconstructed.numpy(), |
| index=data.index, |
| columns=data.columns) |
| |
| |
| if output_path is None: |
| output_path = 'inference_results' |
| |
| os.makedirs(output_path, exist_ok=True) |
| latent_df.to_csv(os.path.join(output_path, 'latent_representation.csv')) |
| reconstructed_df.to_csv(os.path.join(output_path, 'reconstructed_data.csv')) |
| |
| print(f"Inference completed:") |
| print(f" Latent representation saved: {os.path.join(output_path, 'latent_representation.csv')}") |
| print(f" Reconstructed data saved: {os.path.join(output_path, 'reconstructed_data.csv')}") |
| |
| return latent_df, reconstructed_df |
|
|
| if __name__ == "__main__": |
| parser = argparse.ArgumentParser(description='Run inference with trained autoencoder') |
| parser.add_argument('--model_dir', type=str, required=True, |
| help='Directory containing trained model and config') |
| parser.add_argument('--input_data', type=str, required=True, |
| help='Path to input data CSV file') |
| parser.add_argument('--output_dir', type=str, default='inference_results', |
| help='Output directory for results') |
| |
| args = parser.parse_args() |
| |
| latent, reconstructed = run_inference(args.model_dir, args.input_data, args.output_dir) |
| print(f"Latent dimensions: {latent.shape}") |
| print(f"Reconstructed dimensions: {reconstructed.shape}") |
|
|