Q-TensorFormer / scripts /benchmark.py
Premchan369's picture
v3.0.0: Scripts
d30a2f9 verified
raw
history blame
6.24 kB
"""
Comprehensive benchmark script for Q-TensorFormer v3.
Runs multi-model comparison against all baselines and produces
a full evaluation report with Pareto frontier analysis.
Usage:
python scripts/benchmark.py --preset small --epochs 5 --output results/
"""
import sys
import os
import argparse
import json
from pathlib import Path
# Add project root to path
sys.path.insert(0, str(Path(__file__).parent.parent))
from src.config import ExperimentConfig, ModelConfig, TrainingConfig, PRESETS
from src.models import create_model
from src.baselines import StandardTransformer, DistilledTransformer, PrunedTransformer
from src.data import load_wikitext2, load_synthetic_data
from src.training import Trainer
from src.metrics import (
evaluate_model, compare_models, compute_pareto_frontier,
compute_efficiency_score, print_comparison_table,
rank_trajectory_analysis,
)
def parse_args():
parser = argparse.ArgumentParser(description="Q-TensorFormer Benchmark")
parser.add_argument("--preset", type=str, default="small",
choices=["tiny", "small", "medium"],
help="Configuration preset")
parser.add_argument("--epochs", type=int, default=5,
help="Training epochs")
parser.add_argument("--batch-size", type=int, default=16)
parser.add_argument("--seq-len", type=int, default=128)
parser.add_argument("--output", type=str, default="./outputs/benchmark/",
help="Output directory")
parser.add_argument("--device", type=str, default="cpu",
help="Device (cpu, cuda)")
parser.add_argument("--synthetic", action="store_true",
help="Use synthetic data (faster)")
parser.add_argument("--seed", type=int, default=42)
return parser.parse_args()
def main():
args = parse_args()
torch.manual_seed(args.seed)
# Load config
config = PRESETS[args.preset]()
config.training.max_epochs = args.epochs
config.training.batch_size = args.batch_size
config.model.max_seq_len = args.seq_len
print(f"Config: {config.experiment_name}")
print(f"Model: d_model={config.model.d_model}, "
f"n_layers={config.model.n_layers}, "
f"tt_rank={config.model.tt_rank}")
# Load data
print("\nLoading data...")
if args.synthetic:
train_loader = load_synthetic_data(
vocab_size=config.model.vocab_size,
seq_len=args.seq_len,
n_samples=2000,
batch_size=args.batch_size,
)
val_loader = None
test_loader = train_loader # Same for synthetic
tokenizer = None
else:
train_loader, val_loader, test_loader, tokenizer = load_wikitext2(
seq_len=args.seq_len,
batch_size=args.batch_size,
)
config.model.vocab_size = tokenizer.vocab_size
# Create models
print("\nCreating models...")
models = {}
# Q-TensorFormer (hybrid)
models["QTensorFormer"] = create_model(config, "qtensor")
print(f" QTensorFormer: {models['QTensorFormer'].total_params:,} params")
# TT-Only (no quantum)
models["TensorOnly"] = create_model(config, "tensor_only")
print(f" TensorOnly: {models['TensorOnly'].total_params:,} params")
# Standard transformer (dense)
models["StandardTransformer"] = StandardTransformer(
vocab_size=config.model.vocab_size,
d_model=config.model.d_model,
n_heads=config.model.n_heads,
n_layers=config.model.n_layers,
max_seq_len=config.model.max_seq_len,
)
print(f" StandardTransformer: {models['StandardTransformer'].total_params:,} params")
# Distilled (smaller dense)
models["Distilled"] = DistilledTransformer(
vocab_size=config.model.vocab_size,
d_model=max(64, config.model.d_model // 2),
n_heads=config.model.n_heads,
n_layers=config.model.n_layers,
max_seq_len=config.model.max_seq_len,
)
print(f" Distilled: {models['Distilled'].total_params:,} params")
# Train all models
print(f"\n{'='*60}")
print("Training models...")
print(f"{'='*60}")
trained_models = {}
for name, model in models.items():
print(f"\n--- Training {name} ---")
trainer = Trainer(
model, config,
train_loader=train_loader,
val_loader=val_loader,
test_loader=test_loader,
device=args.device,
output_dir=f"{args.output}/{name}",
)
trainer.train()
trained_models[name] = model
# Evaluate
print(f"\n{'='*60}")
print("Evaluating models...")
print(f"{'='*60}")
results = {}
for name, model in trained_models.items():
results[name] = evaluate_model(model, test_loader, args.device)
# Print comparison
print_comparison_table(results)
# Pareto frontier
pareto = compute_pareto_frontier(results)
print(f"\nPareto-optimal models: {pareto}")
# Efficiency ranking
efficiency = {name: compute_efficiency_score(r) for name, r in results.items()}
best = max(efficiency, key=efficiency.get)
print(f"Most efficient: {best} (score={efficiency[best]:.1f})")
# Save results
os.makedirs(args.output, exist_ok=True)
with open(f"{args.output}/results.json", "w") as f:
# Convert float32 to native float
clean = {}
for name, r in results.items():
clean[name] = {k: (float(v) if hasattr(v, 'item') else v) for k, v in r.items()}
json.dump({
"config": config.experiment_name,
"results": clean,
"pareto": pareto,
"efficiency": {k: float(v) for k, v in efficiency.items()},
"best": best,
}, f, indent=2)
print(f"\nResults saved to {args.output}/results.json")
# Summary
print(f"\n{'='*60}")
print("SUMMARY")
print(f"{'='*60}")
for name in results:
ppl = results[name]["test_ppl"]
params = results[name]["total_params"]
lat = results[name].get("latency_ms_mean", 0)
print(f" {name:<25} PPL={ppl:.2f} Params={params:,} Lat={lat:.1f}ms")
if __name__ == "__main__":
main()