""" Configuration system for Q-TensorFormer v3. Supports: - YAML config files for experiment tracking - Budget constraints (max params, max latency, max energy) - Automatic hardware sizing - Config validation """ from dataclasses import dataclass, field from typing import Optional, Tuple, List import math @dataclass class ModelConfig: """Core model architecture hyperparameters.""" d_model: int = 128 n_heads: int = 4 n_layers: int = 2 ff_multiplier: int = 4 max_seq_len: int = 128 vocab_size: int = 10000 dropout: float = 0.1 # Tensor network tt_rank: int = 8 tt_min_rank: int = 2 use_tensor_ffn: bool = True # Quantum n_qubits: int = 4 n_quantum_layers: int = 2 quantum_sparsity: float = 0.3 use_quantum: bool = True # Rank scheduler rank_alpha: float = 2.0 rank_smoothing: float = 0.9 def validate(self): assert self.d_model % self.n_heads == 0, f"d_model ({self.d_model}) must be divisible by n_heads ({self.n_heads})" assert self.tt_rank >= 1, "tt_rank must be >= 1" assert self.tt_min_rank >= 1, "tt_min_rank must be >= 1" assert self.tt_min_rank <= self.tt_rank, "tt_min_rank must be <= tt_rank" assert self.n_qubits <= 8, "n_qubits should be <= 8 for NISQ compatibility" assert 0 <= self.quantum_sparsity <= 1, "quantum_sparsity must be in [0, 1]" return True @dataclass class TrainingConfig: """Training hyperparameters.""" learning_rate: float = 3e-4 weight_decay: float = 0.01 warmup_steps: int = 100 max_epochs: int = 10 batch_size: int = 16 gradient_accumulation_steps: int = 1 max_grad_norm: float = 1.0 seed: int = 42 # Scheduler lr_scheduler: str = "cosine" # cosine, linear, constant lr_min_factor: float = 0.1 def validate(self): assert self.learning_rate > 0 assert self.batch_size >= 1 assert self.seed >= 0 return True @dataclass class BudgetConfig: """Deployment budget constraints. The model auto-adjusts tensor ranks and quantum usage to meet these. """ max_params: Optional[int] = None # Maximum trainable parameters max_latency_ms: Optional[float] = None # Max inference latency (ms) max_energy_per_query: Optional[float] = None # Max energy per query (μJ) target_compression_ratio: Optional[float] = None # Target param reduction def validate(self): if self.max_params is not None: assert self.max_params > 0 if self.max_latency_ms is not None: assert self.max_latency_ms > 0 return True @dataclass class ExperimentConfig: """Master configuration combining all sub-configs.""" model: ModelConfig = field(default_factory=ModelConfig) training: TrainingConfig = field(default_factory=TrainingConfig) budget: BudgetConfig = field(default_factory=BudgetConfig) experiment_name: str = "default" output_dir: str = "./outputs" wandb_project: Optional[str] = None @classmethod def from_yaml(cls, path: str) -> "ExperimentConfig": """Load from YAML file.""" import yaml with open(path) as f: data = yaml.safe_load(f) model = ModelConfig(**data.get("model", {})) training = TrainingConfig(**data.get("training", {})) budget = BudgetConfig(**data.get("budget", {})) return cls( model=model, training=training, budget=budget, experiment_name=data.get("experiment_name", "default"), output_dir=data.get("output_dir", "./outputs"), wandb_project=data.get("wandb_project"), ) def to_yaml(self, path: str): """Save to YAML file.""" import yaml data = { "experiment_name": self.experiment_name, "output_dir": self.output_dir, "wandb_project": self.wandb_project, "model": {k: v for k, v in self.model.__dict__.items()}, "training": {k: v for k, v in self.training.__dict__.items()}, "budget": {k: v for k, v in self.budget.__dict__.items()}, } with open(path, "w") as f: yaml.dump(data, f, default_flow_style=False) def validate(self): self.model.validate() self.training.validate() self.budget.validate() return True # Preset configurations def tiny_config() -> ExperimentConfig: return ExperimentConfig( model=ModelConfig(d_model=64, n_layers=2, n_heads=4, tt_rank=4, vocab_size=5000), training=TrainingConfig(max_epochs=5, batch_size=16), experiment_name="tiny", ) def small_config() -> ExperimentConfig: return ExperimentConfig( model=ModelConfig(d_model=128, n_layers=2, n_heads=4, tt_rank=8, vocab_size=10000), training=TrainingConfig(max_epochs=8, batch_size=16), experiment_name="small", ) def medium_config() -> ExperimentConfig: return ExperimentConfig( model=ModelConfig(d_model=256, n_layers=4, n_heads=8, tt_rank=12, vocab_size=20000), training=TrainingConfig(max_epochs=10, batch_size=8), experiment_name="medium", ) def production_config() -> ExperimentConfig: return ExperimentConfig( model=ModelConfig(d_model=512, n_layers=6, n_heads=8, tt_rank=16, vocab_size=30000), training=TrainingConfig(max_epochs=15, batch_size=4, gradient_accumulation_steps=4), budget=BudgetConfig(max_latency_ms=50.0, target_compression_ratio=2.0), experiment_name="production", ) PRESETS = { "tiny": tiny_config, "small": small_config, "medium": medium_config, "production": production_config, }