LuxiaSL commited on
Commit
5d28053
·
verified ·
1 Parent(s): 9b9135c

Upload fc-sft_config.yaml with huggingface_hub

Browse files
Files changed (1) hide show
  1. fc-sft_config.yaml +51 -0
fc-sft_config.yaml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ model_config_path: /home/athuser/luxi-files/kotodama/configs/model.yaml
2
+ model_config_section: proxy
3
+ attn_res: true
4
+ attn_res_boundaries:
5
+ - 0
6
+ - 3
7
+ - 7
8
+ - 12
9
+ - 21
10
+ - 25
11
+ dataset: pipeline/formatted/tokenized/train
12
+ eval_dataset: pipeline/formatted/tokenized/eval
13
+ packed: true
14
+ max_seq_len: 4096
15
+ batch_size: 4
16
+ gradient_accumulation: 1
17
+ max_steps: -1
18
+ bf16: true
19
+ max_grad_norm: 1.0
20
+ muon_momentum: 0.95
21
+ muon_weight_decay: 0.01
22
+ muon_ns_iterations: 5
23
+ muon_ns_coefficients: gram_ns
24
+ adamw_betas:
25
+ - 0.9
26
+ - 0.95
27
+ adamw_weight_decay: 0.1
28
+ warmup_ratio: 0.05
29
+ wsd_decay_start: 1.0
30
+ wsd_decay_type: sqrt
31
+ logging_steps: 10
32
+ grad_analysis_every: 10
33
+ weight_drift_every: 10
34
+ geo_tier1_every: 10
35
+ sample_every: 50
36
+ save_every: 25
37
+ eval_steps: 25
38
+ checkpoint_keep: 3
39
+ async_save: true
40
+ checkpoint_compress: true
41
+ checkpoint_shm_dir: /dev/shm/luxia-sft-ckpts
42
+ wandb_project: kotodama-sft-sweep
43
+ wandb_entity: aethera
44
+ num_workers: 4
45
+ pretrained_checkpoint: /home/athuser/luxi-files/kotodama/checkpoints/fullcorpus-ddv1/step_00081252.pt.zst
46
+ muon_lr: 0.003
47
+ adamw_lr: 0.00030000000000000003
48
+ num_epochs: 2
49
+ wandb_run_name: sweep-fullcorpus-lr3e-03-ep2
50
+ output_dir: outputs/sweep/sweep-fullcorpus-lr3e-03-ep2
51
+ checkpoint_dir: outputs/sweep/sweep-fullcorpus-lr3e-03-ep2/checkpoints