Premchan369 commited on
Commit
3067a41
·
verified ·
1 Parent(s): bcadbf4

v3.0.0: Configs

Browse files
configs/default.yaml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: default
2
+ output_dir: ./outputs/default
3
+
4
+ model:
5
+ d_model: 128
6
+ n_heads: 4
7
+ n_layers: 2
8
+ ff_multiplier: 4
9
+ max_seq_len: 128
10
+ vocab_size: 10000
11
+ dropout: 0.1
12
+ tt_rank: 8
13
+ tt_min_rank: 2
14
+ use_tensor_ffn: true
15
+ n_qubits: 4
16
+ n_quantum_layers: 2
17
+ quantum_sparsity: 0.7
18
+ use_quantum: true
19
+ rank_alpha: 2.0
20
+ rank_smoothing: 0.9
21
+
22
+ training:
23
+ learning_rate: 3.0e-4
24
+ weight_decay: 0.01
25
+ warmup_steps: 100
26
+ max_epochs: 10
27
+ batch_size: 16
28
+ gradient_accumulation_steps: 1
29
+ max_grad_norm: 1.0
30
+ seed: 42
31
+ lr_scheduler: cosine
32
+ lr_min_factor: 0.1
33
+
34
+ budget:
35
+ max_params: null
36
+ max_latency_ms: null
37
+ max_energy_per_query: null
38
+ target_compression_ratio: null
configs/production.yaml ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: production
2
+ output_dir: ./outputs/production
3
+
4
+ model:
5
+ d_model: 512
6
+ n_heads: 8
7
+ n_layers: 6
8
+ ff_multiplier: 4
9
+ max_seq_len: 256
10
+ vocab_size: 30000
11
+ dropout: 0.1
12
+ tt_rank: 16
13
+ tt_min_rank: 4
14
+ use_tensor_ffn: true
15
+ n_qubits: 6
16
+ n_quantum_layers: 3
17
+ quantum_sparsity: 0.8
18
+ use_quantum: true
19
+ rank_alpha: 2.0
20
+ rank_smoothing: 0.95
21
+
22
+ training:
23
+ learning_rate: 2.0e-4
24
+ weight_decay: 0.01
25
+ warmup_steps: 500
26
+ max_epochs: 15
27
+ batch_size: 4
28
+ gradient_accumulation_steps: 4
29
+ max_grad_norm: 1.0
30
+ seed: 42
31
+ lr_scheduler: cosine
32
+ lr_min_factor: 0.05
33
+
34
+ budget:
35
+ max_params: 50000000
36
+ max_latency_ms: 50.0
37
+ max_energy_per_query: 500.0
38
+ target_compression_ratio: 2.0
configs/sweep.yaml ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ experiment_name: sweep
2
+ output_dir: ./outputs/sweep
3
+
4
+ model:
5
+ d_model: 128
6
+ n_heads: 4
7
+ n_layers: 2
8
+ ff_multiplier: 4
9
+ max_seq_len: 128
10
+ vocab_size: 10000
11
+ dropout: 0.1
12
+ tt_rank: 8
13
+ tt_min_rank: 2
14
+ use_tensor_ffn: true
15
+ n_qubits: 4
16
+ n_quantum_layers: 2
17
+ quantum_sparsity: 0.7
18
+ use_quantum: true
19
+ rank_alpha: 2.0
20
+ rank_smoothing: 0.9
21
+
22
+ training:
23
+ learning_rate: 3.0e-4
24
+ weight_decay: 0.01
25
+ warmup_steps: 50
26
+ max_epochs: 5
27
+ batch_size: 16
28
+ gradient_accumulation_steps: 1
29
+ max_grad_norm: 1.0
30
+ seed: 42
31
+ lr_scheduler: cosine
32
+ lr_min_factor: 0.1
33
+
34
+ budget:
35
+ max_params: null
36
+ max_latency_ms: null
37
+ max_energy_per_query: null
38
+ target_compression_ratio: null
39
+
40
+ # Sweep overrides (handled by sweep.py)
41
+ sweep:
42
+ tt_rank: [2, 4, 8, 16]
43
+ use_quantum: [true, false]