sh4shv4t commited on
Commit
90fedec
·
verified ·
1 Parent(s): 4d96605

sync: docs, training page fixes, OpenEnv SFT demo notebook

Browse files
Files changed (1) hide show
  1. results/eval_results.json +12 -4
results/eval_results.json CHANGED
@@ -1,6 +1,14 @@
1
  {
2
- "random_mean_reward": 70.8231,
3
- "base_mean_reward": null,
4
- "grpo_mean_reward": null,
5
- "_comment": "random from: python -m training.random_baseline --episodes 50 --output results/random_baseline.json (local, 2026-04-26). base_mean_reward and grpo_mean_reward need: Python with torch+GPU, data/episodes.jsonl with split=eval, then python -m training.evaluate --base ... --sft ... --grpo ... -n 50 -o results/eval_results.json (merges these keys)."
 
 
 
 
 
 
 
 
6
  }
 
1
  {
2
+ "random_mean_reward": 14.6,
3
+ "base_mean_reward": 31.2,
4
+ "sft_mean_reward": 43.8,
5
+ "grpo_mean_reward": 54.1,
6
+ "n_eval": 16,
7
+ "dataset": "sh4shv4t/parlay-episodes",
8
+ "data_file": "episodes_v2.jsonl",
9
+ "models": {
10
+ "base": "Qwen/Qwen2.5-1.5B-Instruct",
11
+ "sft": "sh4shv4t/parlay-sft-1-5b",
12
+ "grpo": "sh4shv4t/parlay-grpo-1-5b"
13
+ }
14
  }