SynthAudit-Env / outputs /training_log_200.json
Timusgeorge's picture
Upload training_log_200.json
8bda97a verified
Invalid JSON: Unexpected token 'N', ...".189, NaN, 0"... is not valid JSON
{
"steps": [
1,
2,
3,
4,
5,
6,
7,
8,
9,
10,
11,
12,
13,
14,
15,
16,
17,
18,
19,
20,
21,
22,
23,
24,
25,
26,
27,
28,
29,
30,
31,
32,
33,
34,
35,
36,
37,
38,
39,
40,
41,
42,
43,
44,
45,
46,
47,
48,
49,
50,
51,
52,
53,
54,
55,
56,
57,
58,
59,
60,
61,
62,
63,
64,
65,
66,
67,
68,
69,
70,
71,
72,
73,
74,
75,
76,
77,
78,
79,
80,
81,
82,
83,
84,
85,
86,
87,
88,
89,
90,
91,
92,
93,
94,
95,
96,
97,
98,
99,
100,
101,
102,
103,
104,
105,
106,
107,
108,
109,
110,
111,
112,
113,
114,
115,
116,
117,
118,
119,
120,
121,
122,
123,
124,
125,
126,
127,
128,
129,
130,
131,
132,
133,
134,
135,
136,
137,
138,
139,
140,
141,
142,
143,
144,
145,
146,
147,
148,
149,
150,
151,
152,
153,
154,
155,
156,
157,
158,
159,
160,
161,
162,
163,
164,
165,
166,
167,
168,
169,
170,
171,
172,
173,
174,
175,
176,
177,
178,
179,
180,
181,
182,
183,
184,
185,
186,
187,
188,
189,
190,
191,
192,
193,
194,
195,
196,
197,
198,
199,
200
],
"rewards": [
0.184,
0.1201,
0.1201,
0.0333,
0.1145,
0.1035,
0.244,
0.1729,
0.1007,
0.1063,
0.1174,
0.3363,
0.18,
0.1736,
0.2347,
0.0333,
0.1063,
0.0416,
0.1174,
0.2712,
0.2014,
0.1736,
0.1736,
0.1174,
0.0444,
0.1763,
0.1792,
0.2069,
0.1736,
0.1673,
0.2014,
0.2018,
0.3584,
0.1856,
0.2347,
0.1991,
0.193,
0.1229,
0.2513,
0.2201,
0.2347,
0.0333,
0.1645,
0.1736,
0.2597,
0.2708,
0.2485,
0.2014,
0.1847,
0.1847,
0.2907,
0.1063,
0.1903,
0.1736,
0.1945,
0.1173,
0.1063,
0.293,
0.2847,
0.2763,
0.1173,
0.2347,
0.2145,
0.3002,
0.1145,
0.1035,
0.2569,
0.1173,
0.2996,
0.2903,
0.3751,
0.0333,
0.2347,
0.1903,
0.1146,
0.0333,
0.109,
0.3341,
0.2224,
0.2347,
0.2702,
0.1812,
0.1903,
0.2224,
0.3013,
0.1903,
0.1118,
0.1646,
0.179,
0.2375,
0.209,
0.3885,
0.2796,
0.2846,
0.1145,
0.2903,
0.1903,
0.1763,
0.1007,
0.1736,
0.2168,
0.2435,
0.2146,
0.2958,
0.263,
0.1903,
0.3647,
0.2569,
0.1257,
0.0333,
0.2501,
0.2907,
0.2173,
0.2935,
0.3485,
0.3264,
0.368,
0.1007,
0.1201,
0.109,
0.3207,
0.2324,
0.2542,
0.2946,
0.3514,
0.2597,
0.399,
0.4013,
0.3701,
0.4363,
0.025,
0.0333,
0.368,
0.0333,
0.1958,
0.3046,
0.3208,
0.2401,
0.3013,
0.2553,
0.3074,
0.2347,
0.368,
0.2344,
0.2708,
0.3335,
0.2819,
0.3241,
0.3813,
0.0333,
0.0361,
0.1145,
0.1174,
0.293,
0.2769,
0.0472,
0.5063,
0.1874,
0.3625,
0.1862,
0.1945,
0.3051,
0.1173,
0.3541,
0.1007,
0.2784,
0.0217,
0.1173,
0.184,
0.184,
0.2347,
0.3374,
0.1955,
0.3514,
0.2206,
0.3546,
0.109,
0.2824,
0.1708,
0.3514,
0.1958,
0.3958,
0.3013,
0.2485,
0.0979,
0.2875,
0.3013,
0.3124,
0.4051,
0.2764,
0.2542,
0.1285,
0.4053,
0.1895,
0.2375,
0.3196,
0.2625,
0.3735,
0.1874,
0.3462
],
"reward_std": [
0.2209,
0.0756,
0.1148,
0.0078,
0.1227,
0.0992,
0.2901,
0.1895,
0.1031,
0.0952,
0.1109,
0.1909,
0.1995,
0.0,
0.0864,
0.0078,
0.0952,
0.0039,
0.1109,
0.3442,
0.0078,
0.0,
0.0078,
0.1109,
0.0235,
0.0039,
0.0078,
0.0157,
0.0,
0.1895,
0.0078,
0.0007,
0.2614,
0.2484,
0.0864,
0.0046,
0.0275,
0.1345,
0.0629,
0.272,
0.0864,
0.0078,
0.217,
0.0,
0.1218,
0.1061,
0.0668,
0.0078,
0.0157,
0.0157,
0.1407,
0.1266,
0.0235,
0.0,
0.2358,
0.1188,
0.0952,
0.1689,
0.1571,
0.1061,
0.1188,
0.0864,
0.2484,
0.1588,
0.1227,
0.0913,
0.1178,
0.1266,
0.3843,
0.165,
0.2615,
0.0078,
0.0864,
0.0235,
0.107,
0.0078,
0.0914,
0.0385,
0.2751,
0.0864,
0.1366,
0.2013,
0.0235,
0.2751,
0.1336,
0.0235,
0.1188,
0.1856,
0.239,
0.0903,
0.2563,
0.0684,
0.0936,
0.165,
0.1227,
0.1336,
0.0235,
0.0039,
0.1031,
0.0,
0.2925,
0.0989,
0.2641,
0.0,
0.0936,
0.0235,
0.0047,
0.1178,
0.1306,
0.0078,
0.1083,
0.3717,
0.2445,
0.1604,
0.0668,
0.0196,
0.0393,
0.1031,
0.1306,
0.0914,
0.0432,
0.0204,
0.0589,
0.0062,
0.0786,
0.1218,
0.0832,
0.0078,
0.0284,
0.0572,
0.0117,
0.0078,
0.0393,
0.0078,
0.0,
0.1211,
0.0354,
0.1714,
0.1414,
0.1155,
0.0164,
0.0864,
0.0393,
0.2765,
0.1061,
0.0062,
0.1532,
0.0308,
0.0283,
0.0078,
0.004,
0.1227,
0.1109,
0.1375,
0.0975,
0.0275,
0.0982,
0.0196,
0.0235,
0.2476,
0.2358,
0.3922,
0.1266,
0.0668,
0.1031,
0.1045,
0.0165,
0.1188,
0.2131,
0.2209,
0.0864,
0.0589,
0.0075,
0.0786,
0.2726,
0.074,
0.1148,
0.36,
0.004,
0.0786,
0.0236,
0.0236,
0.0078,
0.0668,
0.0991,
0.161,
0.1336,
0.0235,
0.0668,
0.1139,
0.1139,
0.0952,
0.0448,
0.2131,
0.0903,
0.2065,
0.0472,
0.0235,
0.0196,
0.0622
],
"kl": [
0.0,
0.0,
3e-06,
5e-06,
2e-06,
4e-06,
2e-06,
3e-06,
1e-05,
3e-06,
0.001917,
7e-06,
0.001059,
5e-06,
1.3e-05,
3e-06,
5e-06,
6e-06,
8e-06,
8e-06,
1.2e-05,
4.7e-05,
4.8e-05,
3.2e-05,
3.3e-05,
0.000108,
0.00018,
4.2e-05,
0.000189,
0.00012,
9.3e-05,
0.000155,
0.00196,
0.000149,
0.000252,
0.000233,
0.000311,
0.000286,
0.000172,
0.000169,
0.000146,
0.005918,
0.000294,
0.000185,
0.000377,
9.5e-05,
9.7e-05,
0.000185,
0.000191,
0.000213,
0.000139,
0.000185,
0.000328,
0.000269,
0.000423,
0.000312,
0.000403,
0.000629,
0.000958,
0.000654,
0.000763,
0.000818,
0.000679,
0.001007,
0.000409,
0.000742,
0.001032,
0.000433,
0.000424,
0.001175,
0.065512,
0.000491,
0.001283,
0.001551,
0.000743,
0.00205,
0.001048,
0.003152,
0.001422,
0.002206,
0.001268,
0.001376,
0.0006,
0.000647,
0.002288,
0.000986,
0.003312,
0.00304,
0.001072,
0.003339,
0.000917,
0.001449,
0.001279,
0.003114,
0.008185,
0.001049,
0.000395,
0.005086,
0.004066,
0.007162,
0.00197,
0.001734,
0.003092,
0.002859,
0.001129,
0.000954,
0.0037,
0.008848,
0.003338,
0.003114,
0.013662,
0.00386,
0.001231,
0.002798,
0.001819,
0.006505,
0.002929,
0.002063,
0.003029,
0.053483,
0.007063,
0.002513,
0.015775,
0.004121,
0.004982,
0.007406,
0.004541,
0.005338,
0.051157,
0.003495,
0.003043,
0.005987,
0.008458,
0.001201,
0.002248,
0.079172,
0.005182,
0.003687,
0.002551,
0.192144,
0.005151,
0.004355,
0.00832,
0.002969,
0.003038,
0.000624,
0.008018,
0.003553,
0.005504,
0.012273,
0.001665,
0.002202,
0.001595,
0.001366,
0.001963,
0.00278,
0.007543,
0.016324,
0.013537,
0.002584,
0.001686,
0.003062,
0.008756,
0.006232,
0.007933,
0.151755,
0.004479,
0.001787,
0.004011,
0.007719,
0.002736,
0.004371,
0.121158,
0.009865,
0.111958,
0.001871,
0.003641,
0.002508,
0.004169,
0.005104,
0.001672,
0.003376,
0.008028,
0.003024,
0.004631,
0.003015,
0.005141,
0.004173,
0.001354,
0.004468,
0.005965,
0.000716,
0.003562,
0.004835,
0.004605,
0.144598,
0.004515,
0.004581,
0.002088,
0.006087
],
"completion_length": [
317.0,
311.5,
313.0,
337.5,
203.0,
245.0,
277.0,
292.5,
462.0,
322.5,
364.5,
491.0,
413.5,
243.0,
272.0,
286.5,
161.0,
512.0,
224.0,
343.5,
298.0,
120.0,
247.0,
301.0,
512.0,
193.5,
188.0,
478.5,
131.0,
411.5,
344.5,
327.0,
399.0,
223.0,
167.0,
327.0,
285.5,
203.0,
211.5,
322.5,
230.5,
362.5,
372.0,
137.0,
156.0,
354.0,
482.0,
386.0,
274.5,
261.5,
439.5,
438.5,
207.0,
290.5,
196.0,
449.0,
159.0,
272.0,
172.0,
239.0,
337.5,
368.5,
225.0,
424.5,
341.5,
177.5,
179.0,
273.5,
440.5,
362.5,
401.5,
220.0,
295.0,
223.0,
228.0,
350.5,
321.5,
237.5,
350.5,
290.0,
315.0,
341.5,
189.0,
414.0,
270.0,
200.5,
230.5,
267.0,
202.0,
174.5,
376.5,
372.5,
251.0,
322.5,
156.0,
323.5,
252.5,
125.0,
140.5,
121.5,
236.5,
307.5,
198.0,
275.0,
353.0,
211.0,
303.0,
191.0,
205.0,
323.5,
311.5,
343.5,
359.0,
372.5,
394.5,
337.0,
398.0,
267.5,
426.5,
309.5,
231.5,
430.5,
416.0,
369.0,
388.5,
435.5,
382.5,
261.0,
466.0,
425.0,
144.0,
310.5,
302.5,
318.0,
334.5,
512.0,
184.5,
228.5,
310.5,
322.5,
329.0,
187.0,
291.0,
354.0,
280.5,
512.0,
183.0,
357.5,
262.5,
169.5,
343.5,
401.5,
441.5,
426.0,
512.0,
322.5,
456.0,
212.0,
321.0,
324.0,
413.0,
431.0,
273.0,
236.5,
123.5,
496.0,
335.5,
279.5,
322.0,
243.0,
313.5,
227.0,
428.0,
318.5,
389.5,
413.0,
364.0,
488.5,
176.5,
243.0,
262.0,
512.0,
241.0,
244.0,
248.5,
217.0,
310.0,
206.5,
453.0,
284.0,
250.5,
427.5,
391.5,
344.0,
210.0,
330.5,
313.0,
391.0,
222.0,
334.5
],
"loss": [
-0.2943,
0.4545,
-0.3589,
-0.361,
-0.0626,
-0.3143,
-0.2067,
-0.2332,
-0.0765,
0.4151,
-0.2859,
-0.0259,
-0.1684,
0.0,
0.1844,
-0.1791,
0.1228,
-0.0,
-0.0347,
-0.3468,
-0.075,
0.0,
0.1498,
-0.2065,
0.0,
-0.2298,
-0.26,
0.0492,
0.0,
0.0163,
0.0902,
0.0303,
-0.2002,
0.1141,
-0.1438,
0.0212,
-0.3862,
-0.1845,
0.0317,
-0.4153,
0.0904,
0.288,
-0.266,
0.0,
-0.2219,
0.2973,
0.0439,
0.1357,
0.096,
-0.3453,
-0.1166,
-0.1184,
-0.2653,
0.0,
-0.119,
0.0897,
0.1155,
-0.1689,
-0.1602,
0.1478,
-0.3653,
0.275,
-0.0911,
0.1457,
-0.3527,
0.2368,
-0.1816,
-0.1511,
-0.1147,
0.2914,
-0.1945,
-0.0666,
-0.4405,
-0.3663,
-0.0991,
0.3217,
0.4185,
0.1411,
-0.3257,
0.0901,
-0.4172,
-0.2866,
-0.1863,
-0.1554,
0.0183,
-0.2581,
-0.2866,
0.1006,
-0.5773,
-0.2449,
-0.2544,
-0.2644,
0.1295,
-0.0887,
-0.1993,
-0.1671,
-0.0851,
-0.0441,
0.1131,
0.0,
-0.5963,
-0.4238,
-0.1249,
0.0,
-0.032,
-0.2603,
-0.2651,
-0.2293,
-0.2482,
-0.0745,
-0.4547,
-0.3468,
-0.1457,
0.1584,
0.0421,
0.0501,
0.1506,
0.3948,
0.1416,
0.4622,
-0.0655,
0.1332,
-0.1323,
-0.132,
0.1336,
-0.1192,
-0.2391,
0.0054,
0.0696,
-0.0183,
-0.073,
0.0821,
-0.1154,
-0.1383,
0.0,
0.0001,
-0.0783,
0.0016,
-0.0011,
-0.415,
-0.1666,
-0.2606,
-0.0921,
-0.3155,
0.2833,
0.0,
-0.1931,
0.3007,
-0.0013,
-0.0638,
-0.3383,
0.1945,
0.1128,
-0.0365,
0.0,
-0.414,
-0.0867,
-0.2621,
0.4124,
0.0022,
0.1694,
-0.1329,
-0.2148,
-0.0672,
0.0543,
0.0229,
0.3698,
0.1479,
0.2458,
-0.1687,
-0.0845,
0.1057,
-0.1368,
-0.1452,
-0.2222,
0.0137,
0.2873,
0.034,
0.1075,
-0.0407,
-0.1478,
0.0,
0.0232,
0.055,
-0.1805,
-0.3354,
0.0023,
0.1245,
-0.092,
-0.1219,
-0.1001,
0.1396,
-0.2172,
-0.3041,
-0.259,
-0.388,
0.1804,
0.0378,
-0.3296,
0.3155
],
"grad_norm": [
0.1961,
0.1958,
0.167,
0.1481,
0.1628,
0.2135,
0.1081,
0.1042,
0.172,
0.189,
NaN,
0.1705,
NaN,
0.0,
0.2238,
0.1466,
0.1339,
0.1383,
0.14,
0.097,
0.1361,
0.0,
0.285,
0.0758,
0.1444,
0.1839,
0.2411,
0.1278,
0.0,
0.1871,
0.0803,
0.0971,
NaN,
0.2251,
0.2385,
0.1113,
0.2192,
0.2427,
0.1562,
0.1537,
0.2454,
7.313,
0.1808,
0.0,
0.2211,
0.1802,
0.1352,
0.1725,
0.2209,
0.1537,
0.0943,
0.1949,
0.1484,
0.0,
0.1429,
0.1697,
0.1782,
0.1379,
0.1811,
0.1497,
0.2006,
0.2164,
0.1604,
0.2584,
0.1887,
0.1719,
0.1787,
0.1932,
0.162,
0.1837,
6.6024,
0.0937,
0.1919,
0.2166,
0.0878,
0.1349,
0.1933,
0.1453,
0.0689,
0.23,
0.1865,
0.1643,
0.1729,
0.1429,
0.1652,
0.2979,
0.2033,
0.2461,
0.2547,
0.2016,
0.106,
0.115,
0.1593,
0.1303,
0.2509,
0.1655,
0.2473,
0.398,
0.2547,
0.0009,
0.2819,
0.1811,
0.1835,
0.0001,
0.1994,
0.1029,
0.1525,
0.2811,
0.1642,
0.1278,
0.1342,
0.123,
0.2076,
0.1391,
0.2146,
0.2434,
0.136,
0.265,
0.1568,
8.4723,
0.136,
0.1408,
0.1997,
0.1428,
0.1957,
0.153,
0.1334,
0.0977,
5.4034,
0.2068,
0.3558,
0.182,
0.2308,
0.138,
0.0001,
3.9199,
0.1769,
0.207,
0.2048,
9.8743,
0.0885,
0.2152,
0.2054,
0.0973,
0.1709,
0.1017,
0.2027,
0.1274,
0.2776,
0.1704,
0.1746,
0.1885,
0.2155,
0.1581,
0.1066,
0.203,
0.1085,
0.2463,
0.1368,
0.2517,
0.1909,
0.1723,
0.2901,
0.1457,
0.4256,
4.2154,
0.1762,
0.1586,
0.2176,
0.2246,
0.215,
0.2202,
4.6015,
0.2281,
6.0358,
0.1576,
0.2039,
0.1561,
0.2154,
0.1517,
0.1233,
0.1314,
0.2584,
0.1628,
0.2852,
0.2748,
0.1448,
0.2371,
0.1317,
0.222,
0.256,
0.1449,
0.208,
0.1329,
0.1631,
8.4834,
0.176,
0.2077,
0.2308,
0.1885
],
"model": "Qwen/Qwen2.5-3B-Instruct",
"method": "GRPO_200_steps",
"runtime_seconds": 8412,
"peak_reward": 0.5063,
"peak_step": 157,
"final_reward": 0.3462,
"compute_cost": "$0 (free Colab T4)"
}