File size: 5,730 Bytes
b9c4adf | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 | """
Configuration system for Q-TensorFormer v3.
Supports:
- YAML config files for experiment tracking
- Budget constraints (max params, max latency, max energy)
- Automatic hardware sizing
- Config validation
"""
from dataclasses import dataclass, field
from typing import Optional, Tuple, List
import math
@dataclass
class ModelConfig:
"""Core model architecture hyperparameters."""
d_model: int = 128
n_heads: int = 4
n_layers: int = 2
ff_multiplier: int = 4
max_seq_len: int = 128
vocab_size: int = 10000
dropout: float = 0.1
# Tensor network
tt_rank: int = 8
tt_min_rank: int = 2
use_tensor_ffn: bool = True
# Quantum
n_qubits: int = 4
n_quantum_layers: int = 2
quantum_sparsity: float = 0.3
use_quantum: bool = True
# Rank scheduler
rank_alpha: float = 2.0
rank_smoothing: float = 0.9
def validate(self):
assert self.d_model % self.n_heads == 0, f"d_model ({self.d_model}) must be divisible by n_heads ({self.n_heads})"
assert self.tt_rank >= 1, "tt_rank must be >= 1"
assert self.tt_min_rank >= 1, "tt_min_rank must be >= 1"
assert self.tt_min_rank <= self.tt_rank, "tt_min_rank must be <= tt_rank"
assert self.n_qubits <= 8, "n_qubits should be <= 8 for NISQ compatibility"
assert 0 <= self.quantum_sparsity <= 1, "quantum_sparsity must be in [0, 1]"
return True
@dataclass
class TrainingConfig:
"""Training hyperparameters."""
learning_rate: float = 3e-4
weight_decay: float = 0.01
warmup_steps: int = 100
max_epochs: int = 10
batch_size: int = 16
gradient_accumulation_steps: int = 1
max_grad_norm: float = 1.0
seed: int = 42
# Scheduler
lr_scheduler: str = "cosine" # cosine, linear, constant
lr_min_factor: float = 0.1
def validate(self):
assert self.learning_rate > 0
assert self.batch_size >= 1
assert self.seed >= 0
return True
@dataclass
class BudgetConfig:
"""Deployment budget constraints.
The model auto-adjusts tensor ranks and quantum usage to meet these.
"""
max_params: Optional[int] = None # Maximum trainable parameters
max_latency_ms: Optional[float] = None # Max inference latency (ms)
max_energy_per_query: Optional[float] = None # Max energy per query (μJ)
target_compression_ratio: Optional[float] = None # Target param reduction
def validate(self):
if self.max_params is not None:
assert self.max_params > 0
if self.max_latency_ms is not None:
assert self.max_latency_ms > 0
return True
@dataclass
class ExperimentConfig:
"""Master configuration combining all sub-configs."""
model: ModelConfig = field(default_factory=ModelConfig)
training: TrainingConfig = field(default_factory=TrainingConfig)
budget: BudgetConfig = field(default_factory=BudgetConfig)
experiment_name: str = "default"
output_dir: str = "./outputs"
wandb_project: Optional[str] = None
@classmethod
def from_yaml(cls, path: str) -> "ExperimentConfig":
"""Load from YAML file."""
import yaml
with open(path) as f:
data = yaml.safe_load(f)
model = ModelConfig(**data.get("model", {}))
training = TrainingConfig(**data.get("training", {}))
budget = BudgetConfig(**data.get("budget", {}))
return cls(
model=model, training=training, budget=budget,
experiment_name=data.get("experiment_name", "default"),
output_dir=data.get("output_dir", "./outputs"),
wandb_project=data.get("wandb_project"),
)
def to_yaml(self, path: str):
"""Save to YAML file."""
import yaml
data = {
"experiment_name": self.experiment_name,
"output_dir": self.output_dir,
"wandb_project": self.wandb_project,
"model": {k: v for k, v in self.model.__dict__.items()},
"training": {k: v for k, v in self.training.__dict__.items()},
"budget": {k: v for k, v in self.budget.__dict__.items()},
}
with open(path, "w") as f:
yaml.dump(data, f, default_flow_style=False)
def validate(self):
self.model.validate()
self.training.validate()
self.budget.validate()
return True
# Preset configurations
def tiny_config() -> ExperimentConfig:
return ExperimentConfig(
model=ModelConfig(d_model=64, n_layers=2, n_heads=4, tt_rank=4, vocab_size=5000),
training=TrainingConfig(max_epochs=5, batch_size=16),
experiment_name="tiny",
)
def small_config() -> ExperimentConfig:
return ExperimentConfig(
model=ModelConfig(d_model=128, n_layers=2, n_heads=4, tt_rank=8, vocab_size=10000),
training=TrainingConfig(max_epochs=8, batch_size=16),
experiment_name="small",
)
def medium_config() -> ExperimentConfig:
return ExperimentConfig(
model=ModelConfig(d_model=256, n_layers=4, n_heads=8, tt_rank=12, vocab_size=20000),
training=TrainingConfig(max_epochs=10, batch_size=8),
experiment_name="medium",
)
def production_config() -> ExperimentConfig:
return ExperimentConfig(
model=ModelConfig(d_model=512, n_layers=6, n_heads=8, tt_rank=16, vocab_size=30000),
training=TrainingConfig(max_epochs=15, batch_size=4, gradient_accumulation_steps=4),
budget=BudgetConfig(max_latency_ms=50.0, target_compression_ratio=2.0),
experiment_name="production",
)
PRESETS = {
"tiny": tiny_config,
"small": small_config,
"medium": medium_config,
"production": production_config,
}
|