| { | |
| "step": 700, | |
| "val_loss": 1.114107608795166, | |
| "mmlu_acc": 0.2822265625, | |
| "arc_easy_acc": 0.271484375, | |
| "model_config": { | |
| "sequence_len": 2048, | |
| "vocab_size": 65536, | |
| "n_layer": 20, | |
| "n_head": 10, | |
| "n_kv_head": 10, | |
| "n_embd": 1280 | |
| } | |
| } |
| { | |
| "step": 700, | |
| "val_loss": 1.114107608795166, | |
| "mmlu_acc": 0.2822265625, | |
| "arc_easy_acc": 0.271484375, | |
| "model_config": { | |
| "sequence_len": 2048, | |
| "vocab_size": 65536, | |
| "n_layer": 20, | |
| "n_head": 10, | |
| "n_kv_head": 10, | |
| "n_embd": 1280 | |
| } | |
| } |