moudook
/

adaptive-model

Model card Files Files and versions

adaptive-model / config.json

moudook's picture

config step=22500

813b8f2 verified 26 days ago

history blame contribute delete

869 Bytes

	{
	"checkpoint_dir": "/workspace/adaptive_model/checkpoints",
	"tokenizer_dir": "/workspace/adaptive_model/tokenizer",
	"run_name": "phase2_deepseek_exclusive",
	"hf_repo_id": "moudook/adaptive-model",
	"hf_push_every_n": 3,
	"dataset_names": [
	"a-m-team/AM-DeepSeek-R1-Distilled-1.4M"
	],
	"local_paths": {},
	"vocab_size": 32000,
	"hidden_dim": 2048,
	"num_layers": 24,
	"num_heads": 16,
	"num_kv_heads": 4,
	"intermediate_dim": 5504,
	"max_seq_len": 2048,
	"dtype": "bfloat16",
	"learning_rate": 0.0001,
	"weight_decay": 0.1,
	"beta1": 0.9,
	"beta2": 0.95,
	"grad_clip": 1.0,
	"warmup_steps": 200,
	"total_steps": 100000,
	"batch_size": 64,
	"grad_accum": 2,
	"save_every": 500,
	"log_every": 10,
	"keep_last_n": 3,
	"use_wandb": false,
	"wandb_project": "adaptive-model",
	"device": "cuda",
	"seed": 42,
	"prefetch_batches": 8
	}