| experiment_name: sweep |
| output_dir: ./outputs/sweep |
|
|
| model: |
| d_model: 128 |
| n_heads: 4 |
| n_layers: 2 |
| ff_multiplier: 4 |
| max_seq_len: 128 |
| vocab_size: 10000 |
| dropout: 0.1 |
| tt_rank: 8 |
| tt_min_rank: 2 |
| use_tensor_ffn: true |
| n_qubits: 4 |
| n_quantum_layers: 2 |
| quantum_sparsity: 0.7 |
| use_quantum: true |
| rank_alpha: 2.0 |
| rank_smoothing: 0.9 |
|
|
| training: |
| learning_rate: 3.0e-4 |
| weight_decay: 0.01 |
| warmup_steps: 50 |
| max_epochs: 5 |
| batch_size: 16 |
| gradient_accumulation_steps: 1 |
| max_grad_norm: 1.0 |
| seed: 42 |
| lr_scheduler: cosine |
| lr_min_factor: 0.1 |
|
|
| budget: |
| max_params: null |
| max_latency_ms: null |
| max_energy_per_query: null |
| target_compression_ratio: null |
|
|
| |
| sweep: |
| tt_rank: [2, 4, 8, 16] |
| use_quantum: [true, false] |
|
|