kotodama-108m-instruct / fc-sft_config.yaml
LuxiaSL's picture
Upload fc-sft_config.yaml with huggingface_hub
5d28053 verified
model_config_path: /home/athuser/luxi-files/kotodama/configs/model.yaml
model_config_section: proxy
attn_res: true
attn_res_boundaries:
- 0
- 3
- 7
- 12
- 21
- 25
dataset: pipeline/formatted/tokenized/train
eval_dataset: pipeline/formatted/tokenized/eval
packed: true
max_seq_len: 4096
batch_size: 4
gradient_accumulation: 1
max_steps: -1
bf16: true
max_grad_norm: 1.0
muon_momentum: 0.95
muon_weight_decay: 0.01
muon_ns_iterations: 5
muon_ns_coefficients: gram_ns
adamw_betas:
- 0.9
- 0.95
adamw_weight_decay: 0.1
warmup_ratio: 0.05
wsd_decay_start: 1.0
wsd_decay_type: sqrt
logging_steps: 10
grad_analysis_every: 10
weight_drift_every: 10
geo_tier1_every: 10
sample_every: 50
save_every: 25
eval_steps: 25
checkpoint_keep: 3
async_save: true
checkpoint_compress: true
checkpoint_shm_dir: /dev/shm/luxia-sft-ckpts
wandb_project: kotodama-sft-sweep
wandb_entity: aethera
num_workers: 4
pretrained_checkpoint: /home/athuser/luxi-files/kotodama/checkpoints/fullcorpus-ddv1/step_00081252.pt.zst
muon_lr: 0.003
adamw_lr: 0.00030000000000000003
num_epochs: 2
wandb_run_name: sweep-fullcorpus-lr3e-03-ep2
output_dir: outputs/sweep/sweep-fullcorpus-lr3e-03-ep2
checkpoint_dir: outputs/sweep/sweep-fullcorpus-lr3e-03-ep2/checkpoints