| """ |
| Configuration system for Q-TensorFormer v3. |
| |
| Supports: |
| - YAML config files for experiment tracking |
| - Budget constraints (max params, max latency, max energy) |
| - Automatic hardware sizing |
| - Config validation |
| """ |
|
|
| from dataclasses import dataclass, field |
| from typing import Optional, Tuple, List |
| import math |
|
|
|
|
| @dataclass |
| class ModelConfig: |
| """Core model architecture hyperparameters.""" |
| d_model: int = 128 |
| n_heads: int = 4 |
| n_layers: int = 2 |
| ff_multiplier: int = 4 |
| max_seq_len: int = 128 |
| vocab_size: int = 10000 |
| dropout: float = 0.1 |
|
|
| |
| tt_rank: int = 8 |
| tt_min_rank: int = 2 |
| use_tensor_ffn: bool = True |
|
|
| |
| n_qubits: int = 4 |
| n_quantum_layers: int = 2 |
| quantum_sparsity: float = 0.3 |
| use_quantum: bool = True |
|
|
| |
| rank_alpha: float = 2.0 |
| rank_smoothing: float = 0.9 |
|
|
| def validate(self): |
| assert self.d_model % self.n_heads == 0, f"d_model ({self.d_model}) must be divisible by n_heads ({self.n_heads})" |
| assert self.tt_rank >= 1, "tt_rank must be >= 1" |
| assert self.tt_min_rank >= 1, "tt_min_rank must be >= 1" |
| assert self.tt_min_rank <= self.tt_rank, "tt_min_rank must be <= tt_rank" |
| assert self.n_qubits <= 8, "n_qubits should be <= 8 for NISQ compatibility" |
| assert 0 <= self.quantum_sparsity <= 1, "quantum_sparsity must be in [0, 1]" |
| return True |
|
|
|
|
| @dataclass |
| class TrainingConfig: |
| """Training hyperparameters.""" |
| learning_rate: float = 3e-4 |
| weight_decay: float = 0.01 |
| warmup_steps: int = 100 |
| max_epochs: int = 10 |
| batch_size: int = 16 |
| gradient_accumulation_steps: int = 1 |
| max_grad_norm: float = 1.0 |
| seed: int = 42 |
|
|
| |
| lr_scheduler: str = "cosine" |
| lr_min_factor: float = 0.1 |
|
|
| def validate(self): |
| assert self.learning_rate > 0 |
| assert self.batch_size >= 1 |
| assert self.seed >= 0 |
| return True |
|
|
|
|
| @dataclass |
| class BudgetConfig: |
| """Deployment budget constraints. |
| |
| The model auto-adjusts tensor ranks and quantum usage to meet these. |
| """ |
| max_params: Optional[int] = None |
| max_latency_ms: Optional[float] = None |
| max_energy_per_query: Optional[float] = None |
| target_compression_ratio: Optional[float] = None |
|
|
| def validate(self): |
| if self.max_params is not None: |
| assert self.max_params > 0 |
| if self.max_latency_ms is not None: |
| assert self.max_latency_ms > 0 |
| return True |
|
|
|
|
| @dataclass |
| class ExperimentConfig: |
| """Master configuration combining all sub-configs.""" |
| model: ModelConfig = field(default_factory=ModelConfig) |
| training: TrainingConfig = field(default_factory=TrainingConfig) |
| budget: BudgetConfig = field(default_factory=BudgetConfig) |
| experiment_name: str = "default" |
| output_dir: str = "./outputs" |
| wandb_project: Optional[str] = None |
|
|
| @classmethod |
| def from_yaml(cls, path: str) -> "ExperimentConfig": |
| """Load from YAML file.""" |
| import yaml |
| with open(path) as f: |
| data = yaml.safe_load(f) |
| model = ModelConfig(**data.get("model", {})) |
| training = TrainingConfig(**data.get("training", {})) |
| budget = BudgetConfig(**data.get("budget", {})) |
| return cls( |
| model=model, training=training, budget=budget, |
| experiment_name=data.get("experiment_name", "default"), |
| output_dir=data.get("output_dir", "./outputs"), |
| wandb_project=data.get("wandb_project"), |
| ) |
|
|
| def to_yaml(self, path: str): |
| """Save to YAML file.""" |
| import yaml |
| data = { |
| "experiment_name": self.experiment_name, |
| "output_dir": self.output_dir, |
| "wandb_project": self.wandb_project, |
| "model": {k: v for k, v in self.model.__dict__.items()}, |
| "training": {k: v for k, v in self.training.__dict__.items()}, |
| "budget": {k: v for k, v in self.budget.__dict__.items()}, |
| } |
| with open(path, "w") as f: |
| yaml.dump(data, f, default_flow_style=False) |
|
|
| def validate(self): |
| self.model.validate() |
| self.training.validate() |
| self.budget.validate() |
| return True |
|
|
|
|
| |
| def tiny_config() -> ExperimentConfig: |
| return ExperimentConfig( |
| model=ModelConfig(d_model=64, n_layers=2, n_heads=4, tt_rank=4, vocab_size=5000), |
| training=TrainingConfig(max_epochs=5, batch_size=16), |
| experiment_name="tiny", |
| ) |
|
|
|
|
| def small_config() -> ExperimentConfig: |
| return ExperimentConfig( |
| model=ModelConfig(d_model=128, n_layers=2, n_heads=4, tt_rank=8, vocab_size=10000), |
| training=TrainingConfig(max_epochs=8, batch_size=16), |
| experiment_name="small", |
| ) |
|
|
|
|
| def medium_config() -> ExperimentConfig: |
| return ExperimentConfig( |
| model=ModelConfig(d_model=256, n_layers=4, n_heads=8, tt_rank=12, vocab_size=20000), |
| training=TrainingConfig(max_epochs=10, batch_size=8), |
| experiment_name="medium", |
| ) |
|
|
|
|
| def production_config() -> ExperimentConfig: |
| return ExperimentConfig( |
| model=ModelConfig(d_model=512, n_layers=6, n_heads=8, tt_rank=16, vocab_size=30000), |
| training=TrainingConfig(max_epochs=15, batch_size=4, gradient_accumulation_steps=4), |
| budget=BudgetConfig(max_latency_ms=50.0, target_compression_ratio=2.0), |
| experiment_name="production", |
| ) |
|
|
|
|
| PRESETS = { |
| "tiny": tiny_config, |
| "small": small_config, |
| "medium": medium_config, |
| "production": production_config, |
| } |
|
|