{ "vocab_size": 32007, "context_length": 512, "d_model": 512, "n_heads": 8, "n_layers": 6, "d_ff": 2048, "dropout": 0.1, "bias": true, "use_rope": true, "rope_base": 10000.0, "use_swiglu": true, "use_rmsnorm": true, "use_flash_attention": false, "batch_size": 4, "learning_rate": 5e-05, "weight_decay": 0.1, "beta1": 0.9, "beta2": 0.95, "grad_clip": 1.0, "grad_accum_steps": 8, "warmup_steps": 100, "max_steps": 2000, "eval_interval": 100, "save_interval": 500, "data_dir": "data/processed", "checkpoint_dir": "checkpoints/phase8_chat_v05_v5", "log_dir": "logs/phase8_chat_v05_v5", "device": "auto", "dtype": "float32", "seed": 42, "use_wandb": false }