InstabilityZoo / metadata /ckpt_139.json
lxe5wipauser17
gpt2-large
4e61b89
{
"checkpoint": "ckpt_139.pt",
"model": "GPT2-L",
"training_step": 12000,
"instability_type": "Slow divergence",
"learning_rate": "1e-3",
"decay": "0.0",
"warm": "2000",
"data_type": "BF16"
}