Q-TensorFormer / src /config.py
Premchan369's picture
v3.0.0: Source files
b9c4adf verified
"""
Configuration system for Q-TensorFormer v3.
Supports:
- YAML config files for experiment tracking
- Budget constraints (max params, max latency, max energy)
- Automatic hardware sizing
- Config validation
"""
from dataclasses import dataclass, field
from typing import Optional, Tuple, List
import math
@dataclass
class ModelConfig:
"""Core model architecture hyperparameters."""
d_model: int = 128
n_heads: int = 4
n_layers: int = 2
ff_multiplier: int = 4
max_seq_len: int = 128
vocab_size: int = 10000
dropout: float = 0.1
# Tensor network
tt_rank: int = 8
tt_min_rank: int = 2
use_tensor_ffn: bool = True
# Quantum
n_qubits: int = 4
n_quantum_layers: int = 2
quantum_sparsity: float = 0.3
use_quantum: bool = True
# Rank scheduler
rank_alpha: float = 2.0
rank_smoothing: float = 0.9
def validate(self):
assert self.d_model % self.n_heads == 0, f"d_model ({self.d_model}) must be divisible by n_heads ({self.n_heads})"
assert self.tt_rank >= 1, "tt_rank must be >= 1"
assert self.tt_min_rank >= 1, "tt_min_rank must be >= 1"
assert self.tt_min_rank <= self.tt_rank, "tt_min_rank must be <= tt_rank"
assert self.n_qubits <= 8, "n_qubits should be <= 8 for NISQ compatibility"
assert 0 <= self.quantum_sparsity <= 1, "quantum_sparsity must be in [0, 1]"
return True
@dataclass
class TrainingConfig:
"""Training hyperparameters."""
learning_rate: float = 3e-4
weight_decay: float = 0.01
warmup_steps: int = 100
max_epochs: int = 10
batch_size: int = 16
gradient_accumulation_steps: int = 1
max_grad_norm: float = 1.0
seed: int = 42
# Scheduler
lr_scheduler: str = "cosine" # cosine, linear, constant
lr_min_factor: float = 0.1
def validate(self):
assert self.learning_rate > 0
assert self.batch_size >= 1
assert self.seed >= 0
return True
@dataclass
class BudgetConfig:
"""Deployment budget constraints.
The model auto-adjusts tensor ranks and quantum usage to meet these.
"""
max_params: Optional[int] = None # Maximum trainable parameters
max_latency_ms: Optional[float] = None # Max inference latency (ms)
max_energy_per_query: Optional[float] = None # Max energy per query (μJ)
target_compression_ratio: Optional[float] = None # Target param reduction
def validate(self):
if self.max_params is not None:
assert self.max_params > 0
if self.max_latency_ms is not None:
assert self.max_latency_ms > 0
return True
@dataclass
class ExperimentConfig:
"""Master configuration combining all sub-configs."""
model: ModelConfig = field(default_factory=ModelConfig)
training: TrainingConfig = field(default_factory=TrainingConfig)
budget: BudgetConfig = field(default_factory=BudgetConfig)
experiment_name: str = "default"
output_dir: str = "./outputs"
wandb_project: Optional[str] = None
@classmethod
def from_yaml(cls, path: str) -> "ExperimentConfig":
"""Load from YAML file."""
import yaml
with open(path) as f:
data = yaml.safe_load(f)
model = ModelConfig(**data.get("model", {}))
training = TrainingConfig(**data.get("training", {}))
budget = BudgetConfig(**data.get("budget", {}))
return cls(
model=model, training=training, budget=budget,
experiment_name=data.get("experiment_name", "default"),
output_dir=data.get("output_dir", "./outputs"),
wandb_project=data.get("wandb_project"),
)
def to_yaml(self, path: str):
"""Save to YAML file."""
import yaml
data = {
"experiment_name": self.experiment_name,
"output_dir": self.output_dir,
"wandb_project": self.wandb_project,
"model": {k: v for k, v in self.model.__dict__.items()},
"training": {k: v for k, v in self.training.__dict__.items()},
"budget": {k: v for k, v in self.budget.__dict__.items()},
}
with open(path, "w") as f:
yaml.dump(data, f, default_flow_style=False)
def validate(self):
self.model.validate()
self.training.validate()
self.budget.validate()
return True
# Preset configurations
def tiny_config() -> ExperimentConfig:
return ExperimentConfig(
model=ModelConfig(d_model=64, n_layers=2, n_heads=4, tt_rank=4, vocab_size=5000),
training=TrainingConfig(max_epochs=5, batch_size=16),
experiment_name="tiny",
)
def small_config() -> ExperimentConfig:
return ExperimentConfig(
model=ModelConfig(d_model=128, n_layers=2, n_heads=4, tt_rank=8, vocab_size=10000),
training=TrainingConfig(max_epochs=8, batch_size=16),
experiment_name="small",
)
def medium_config() -> ExperimentConfig:
return ExperimentConfig(
model=ModelConfig(d_model=256, n_layers=4, n_heads=8, tt_rank=12, vocab_size=20000),
training=TrainingConfig(max_epochs=10, batch_size=8),
experiment_name="medium",
)
def production_config() -> ExperimentConfig:
return ExperimentConfig(
model=ModelConfig(d_model=512, n_layers=6, n_heads=8, tt_rank=16, vocab_size=30000),
training=TrainingConfig(max_epochs=15, batch_size=4, gradient_accumulation_steps=4),
budget=BudgetConfig(max_latency_ms=50.0, target_compression_ratio=2.0),
experiment_name="production",
)
PRESETS = {
"tiny": tiny_config,
"small": small_config,
"medium": medium_config,
"production": production_config,
}