{ "vocab_size": 256, "max_seq_len": 64, "n_layers": 2, "n_heads": 4, "n_kv_heads": 2, "n_embd": 64, "n_inner": 128, "dropout": 0.0, "bias": false, "rope_theta": 10000.0, "rms_norm_eps": 1e-06, "tie_embeddings": true, "batch_size": 2, "learning_rate": 0.001, "weight_decay": 0.1, "max_steps": 3, "warmup_steps": 0, "eval_interval": 250, "eval_steps": 20, "log_interval": 50, "save_interval": 1000, "gradient_accumulation_steps": 4, "grad_clip": 1.0, "dataset_name": "roneneldan/TinyStories", "output_dir": "checkpoints" }