experiment_name: production output_dir: ./outputs/production model: d_model: 512 n_heads: 8 n_layers: 6 ff_multiplier: 4 max_seq_len: 256 vocab_size: 30000 dropout: 0.1 tt_rank: 16 tt_min_rank: 4 use_tensor_ffn: true n_qubits: 6 n_quantum_layers: 3 quantum_sparsity: 0.8 use_quantum: true rank_alpha: 2.0 rank_smoothing: 0.95 training: learning_rate: 2.0e-4 weight_decay: 0.01 warmup_steps: 500 max_epochs: 15 batch_size: 4 gradient_accumulation_steps: 4 max_grad_norm: 1.0 seed: 42 lr_scheduler: cosine lr_min_factor: 0.05 budget: max_params: 50000000 max_latency_ms: 50.0 max_energy_per_query: 500.0 target_compression_ratio: 2.0