LuxiaSL commited on
Commit
9c0d743
·
verified ·
1 Parent(s): e53ec1a

Training config

Browse files
Files changed (1) hide show
  1. fullcorpus-ddv1.yaml +61 -0
fullcorpus-ddv1.yaml ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Full corpus DD-v1 108M: single-phase shuffled ~105B tokens
2
+ # Purpose: test AttnRes geometry under extreme overtrain (~1000x tokens/param)
3
+ # Fresh start from NCA checkpoint, cosine decay, single epoch
4
+
5
+ # Model
6
+ model_size: proxy
7
+
8
+ # AttnRes DD-v1
9
+ attn_res: true
10
+ attn_res_boundaries: "0,3,7,12,21,25"
11
+
12
+ # Data — assembled binary on NFS (mmap, bandwidth is negligible for training)
13
+ data_path: /models/kotodama-data/assembled/train.bin
14
+ sequence_length: 4096
15
+ micro_batch_size: 16
16
+
17
+ # Training — 170.4B tokens, single epoch (no dedup, all 13 sources)
18
+ total_tokens: 170_400_000_000
19
+ muon_lr: 0.02
20
+ adamw_lr: 0.0006
21
+ # ~81K steps total, warmup ~6% of training
22
+ warmup_steps: 5000
23
+ decay_start_pct: 0.90
24
+ decay_type: cosine
25
+ gradient_clip: 1.0
26
+
27
+ # Muon
28
+ muon_momentum: 0.95
29
+ muon_weight_decay: 0.01
30
+ muon_ns_iterations: 5
31
+ muon_ns_coefficients: gram_ns
32
+
33
+ # NCA resume — co-trained NCA+AttnRes DD-v1 checkpoint (seed-17, 852M tokens)
34
+ resume_nca: checkpoints/nca-attnres-ddv1/step_00006500.pt
35
+
36
+ # Optimizations
37
+ compile: true
38
+ attn_impl: auto
39
+ fp8: true
40
+ use_liger: false
41
+
42
+ # Checkpointing — save every ~5B tokens (~2400 steps), keep all for geometric analysis
43
+ checkpoint_dir: checkpoints/fullcorpus-ddv1
44
+ save_every: 2400
45
+ keep_checkpoints: 80
46
+ async_checkpoint: true
47
+ checkpoint_shm_dir: /dev/shm/luxia-base-ckpts
48
+
49
+ # Geometric monitoring
50
+ geo_monitor: true
51
+ geo_monitor_tier1_every: 75
52
+ geo_monitor_tier2_every: 500
53
+
54
+ # Logging
55
+ log_every: 10
56
+ wandb: true
57
+ wandb_project: kotodama-ddv1-fullcorpus
58
+ wandb_run_name: fullcorpus-ddv1-170B
59
+
60
+ # HF upload — push final checkpoint on completion
61
+ hf_upload_repo: aethera-gp/kotodama-fullcorpus-ddv1