experiment_name: default output_dir: ./outputs/default model: d_model: 128 n_heads: 4 n_layers: 2 ff_multiplier: 4 max_seq_len: 128 vocab_size: 10000 dropout: 0.1 tt_rank: 8 tt_min_rank: 2 use_tensor_ffn: true n_qubits: 4 n_quantum_layers: 2 quantum_sparsity: 0.7 use_quantum: true rank_alpha: 2.0 rank_smoothing: 0.9 training: learning_rate: 3.0e-4 weight_decay: 0.01 warmup_steps: 100 max_epochs: 10 batch_size: 16 gradient_accumulation_steps: 1 max_grad_norm: 1.0 seed: 42 lr_scheduler: cosine lr_min_factor: 0.1 budget: max_params: null max_latency_ms: null max_energy_per_query: null target_compression_ratio: null