adaptive-retro-gpt-1b / config.json
kyLELEng's picture
Train Adaptive-RETRO-GPT-1B
5a8b07f verified
raw
history blame contribute delete
429 Bytes
{
"model_type": "adaptive_retro_gpt",
"architectures": [
"RetroGPT"
],
"vocab_size": 50000,
"d_model": 2048,
"n_layers": 20,
"n_heads": 16,
"seq_len": 2048,
"retrieval_layers": "5,11,17",
"top_k": 2,
"retrieval_seq_len": 512,
"hash_dim": 1024,
"retrieval_budget_lambda": 0.001,
"no_retrieval_prob": 0.1,
"random_retrieval_prob": 0.1,
"parameter_count": 1172146179,
"checkpoint_step": 20000
}