adaptive-model / config.json
moudook's picture
config step=22500
813b8f2 verified
raw
history blame contribute delete
869 Bytes
{
"checkpoint_dir": "/workspace/adaptive_model/checkpoints",
"tokenizer_dir": "/workspace/adaptive_model/tokenizer",
"run_name": "phase2_deepseek_exclusive",
"hf_repo_id": "moudook/adaptive-model",
"hf_push_every_n": 3,
"dataset_names": [
"a-m-team/AM-DeepSeek-R1-Distilled-1.4M"
],
"local_paths": {},
"vocab_size": 32000,
"hidden_dim": 2048,
"num_layers": 24,
"num_heads": 16,
"num_kv_heads": 4,
"intermediate_dim": 5504,
"max_seq_len": 2048,
"dtype": "bfloat16",
"learning_rate": 0.0001,
"weight_decay": 0.1,
"beta1": 0.9,
"beta2": 0.95,
"grad_clip": 1.0,
"warmup_steps": 200,
"total_steps": 100000,
"batch_size": 64,
"grad_accum": 2,
"save_every": 500,
"log_every": 10,
"keep_last_n": 3,
"use_wandb": false,
"wandb_project": "adaptive-model",
"device": "cuda",
"seed": 42,
"prefetch_batches": 8
}