model_config_path: /home/athuser/luxi-files/kotodama/configs/model.yaml model_config_section: proxy attn_res: true attn_res_boundaries: - 0 - 3 - 7 - 12 - 21 - 25 dataset: pipeline/formatted/tokenized/train eval_dataset: pipeline/formatted/tokenized/eval packed: true max_seq_len: 4096 batch_size: 4 gradient_accumulation: 1 max_steps: -1 bf16: true max_grad_norm: 1.0 muon_momentum: 0.95 muon_weight_decay: 0.01 muon_ns_iterations: 5 muon_ns_coefficients: gram_ns adamw_betas: - 0.9 - 0.95 adamw_weight_decay: 0.1 warmup_ratio: 0.05 wsd_decay_start: 1.0 wsd_decay_type: sqrt logging_steps: 10 grad_analysis_every: 10 weight_drift_every: 10 geo_tier1_every: 10 sample_every: 50 save_every: 25 eval_steps: 25 checkpoint_keep: 3 async_save: true checkpoint_compress: true checkpoint_shm_dir: /dev/shm/luxia-sft-ckpts wandb_project: kotodama-sft-sweep wandb_entity: aethera num_workers: 4 pretrained_checkpoint: /home/athuser/luxi-files/kotodama/checkpoints/fullcorpus-ddv1/step_00081252.pt.zst muon_lr: 0.003 adamw_lr: 0.00030000000000000003 num_epochs: 2 wandb_run_name: sweep-fullcorpus-lr3e-03-ep2 output_dir: outputs/sweep/sweep-fullcorpus-lr3e-03-ep2 checkpoint_dir: outputs/sweep/sweep-fullcorpus-lr3e-03-ep2/checkpoints