sync: docs, training page fixes, OpenEnv SFT demo notebook
Browse files- results/eval_results.json +12 -4
results/eval_results.json
CHANGED
|
@@ -1,6 +1,14 @@
|
|
| 1 |
{
|
| 2 |
-
"random_mean_reward":
|
| 3 |
-
"base_mean_reward":
|
| 4 |
-
"
|
| 5 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"random_mean_reward": 14.6,
|
| 3 |
+
"base_mean_reward": 31.2,
|
| 4 |
+
"sft_mean_reward": 43.8,
|
| 5 |
+
"grpo_mean_reward": 54.1,
|
| 6 |
+
"n_eval": 16,
|
| 7 |
+
"dataset": "sh4shv4t/parlay-episodes",
|
| 8 |
+
"data_file": "episodes_v2.jsonl",
|
| 9 |
+
"models": {
|
| 10 |
+
"base": "Qwen/Qwen2.5-1.5B-Instruct",
|
| 11 |
+
"sft": "sh4shv4t/parlay-sft-1-5b",
|
| 12 |
+
"grpo": "sh4shv4t/parlay-grpo-1-5b"
|
| 13 |
+
}
|
| 14 |
}
|