| experiment_name: production |
| output_dir: ./outputs/production |
|
|
| model: |
| d_model: 512 |
| n_heads: 8 |
| n_layers: 6 |
| ff_multiplier: 4 |
| max_seq_len: 256 |
| vocab_size: 30000 |
| dropout: 0.1 |
| tt_rank: 16 |
| tt_min_rank: 4 |
| use_tensor_ffn: true |
| n_qubits: 6 |
| n_quantum_layers: 3 |
| quantum_sparsity: 0.8 |
| use_quantum: true |
| rank_alpha: 2.0 |
| rank_smoothing: 0.95 |
|
|
| training: |
| learning_rate: 2.0e-4 |
| weight_decay: 0.01 |
| warmup_steps: 500 |
| max_epochs: 15 |
| batch_size: 4 |
| gradient_accumulation_steps: 4 |
| max_grad_norm: 1.0 |
| seed: 42 |
| lr_scheduler: cosine |
| lr_min_factor: 0.05 |
|
|
| budget: |
| max_params: 50000000 |
| max_latency_ms: 50.0 |
| max_energy_per_query: 500.0 |
| target_compression_ratio: 2.0 |
|
|