| { | |
| "vocab_size": 32007, | |
| "context_length": 512, | |
| "d_model": 512, | |
| "n_heads": 8, | |
| "n_layers": 6, | |
| "d_ff": 2048, | |
| "dropout": 0.1, | |
| "bias": true, | |
| "use_rope": true, | |
| "rope_base": 10000.0, | |
| "use_swiglu": true, | |
| "use_rmsnorm": true, | |
| "use_flash_attention": false, | |
| "batch_size": 4, | |
| "learning_rate": 5e-05, | |
| "weight_decay": 0.1, | |
| "beta1": 0.9, | |
| "beta2": 0.95, | |
| "grad_clip": 1.0, | |
| "grad_accum_steps": 8, | |
| "warmup_steps": 100, | |
| "max_steps": 2000, | |
| "eval_interval": 100, | |
| "save_interval": 500, | |
| "data_dir": "data/processed", | |
| "checkpoint_dir": "checkpoints/phase8_chat_v05_v5", | |
| "log_dir": "logs/phase8_chat_v05_v5", | |
| "device": "auto", | |
| "dtype": "float32", | |
| "seed": 42, | |
| "use_wandb": false | |
| } |