claims-env / artifacts /baselines /random_baseline.json
akhiilll's picture
hackathon submission: theme-aligned README, refreshed training, Qwen vs random plots
9fed01d verified
raw
history blame contribute delete
874 Bytes
{
"backend": "Random baseline (uniform over valid verbs)",
"model": "random",
"env": "https://akhiilll-claims-env.hf.space",
"episodes": 20,
"max_steps": 8,
"rewards": [
10.975,
-8.2,
13.5,
-1.5000000000000002,
13.449152542372882,
12.1,
15.9,
-7.1,
-15.8,
-7.8,
16.0,
13.7,
-7.3,
-5.0,
10.3,
16.0,
16.0,
-15.0,
11.375,
11.0
],
"averages": [
10.975,
1.3875000000000002,
5.425,
3.69375,
5.644830508474577,
5.8698305084745765,
10.689830508474577,
6.569830508474577,
3.7098305084745764,
-0.54,
0.24000000000000005,
-0.20000000000000018,
-0.2400000000000002,
1.92,
5.54,
5.54,
6.0,
4.46,
7.735,
7.875
],
"start_avg": 10.975,
"final_avg": 7.875
}