InstabilityZoo / metadata /ckpt_12.json
lxe5wipauser17
gpt2-small
ee194bd
{
"checkpoint": "ckpt_12.pt",
"model": "GPT2-S",
"training_step": 4000,
"instability_type": "complete non-convergence",
"learning_rate": "3e-3",
"decay": "0.1",
"warm": "2000",
"data_type": "FP8_with_FP8_head"
}