helloAK96 commited on
Commit
a3c1e24
·
verified ·
1 Parent(s): e1e6bb8

Add training_metrics.json

Browse files
Files changed (1) hide show
  1. training_metrics.json +362 -0
training_metrics.json ADDED
@@ -0,0 +1,362 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "episode": 10,
4
+ "mean_team_reward": -10.014000344276429,
5
+ "mean_oversight_reward": -10.014000344276429,
6
+ "mean_combined_reward": -10.014000344276429,
7
+ "loss": 0.0,
8
+ "kl": 0.0007380821939477755,
9
+ "reward_std": 6.151828861236572
10
+ },
11
+ {
12
+ "episode": 20,
13
+ "mean_team_reward": -12.84000005722046,
14
+ "mean_oversight_reward": -12.84000005722046,
15
+ "mean_combined_reward": -12.84000005722046,
16
+ "loss": 0.0002,
17
+ "kl": 0.0038208378304261715,
18
+ "reward_std": 10.182337665557862
19
+ },
20
+ {
21
+ "episode": 30,
22
+ "mean_team_reward": -8.586000180244445,
23
+ "mean_oversight_reward": -8.586000180244445,
24
+ "mean_combined_reward": -8.586000180244445,
25
+ "loss": 0.0011,
26
+ "kl": 0.027998939517419785,
27
+ "reward_std": 5.727564811706543
28
+ },
29
+ {
30
+ "episode": 40,
31
+ "mean_team_reward": -12.900000286102294,
32
+ "mean_oversight_reward": -12.900000286102294,
33
+ "mean_combined_reward": -12.900000286102294,
34
+ "loss": 0.0005,
35
+ "kl": 0.012779699172824621,
36
+ "reward_std": 7.789488315582275
37
+ },
38
+ {
39
+ "episode": 50,
40
+ "mean_team_reward": -15.486000084877015,
41
+ "mean_oversight_reward": -15.486000084877015,
42
+ "mean_combined_reward": -15.486000084877015,
43
+ "loss": 0.0008,
44
+ "kl": 0.019036068127752513,
45
+ "reward_std": 2.333452320098877
46
+ },
47
+ {
48
+ "episode": 60,
49
+ "mean_team_reward": -5.5560003280639645,
50
+ "mean_oversight_reward": -5.5560003280639645,
51
+ "mean_combined_reward": -5.5560003280639645,
52
+ "loss": 0.0017,
53
+ "kl": 0.041883758595213295,
54
+ "reward_std": 7.789488315582275
55
+ },
56
+ {
57
+ "episode": 70,
58
+ "mean_team_reward": -8.346000218391419,
59
+ "mean_oversight_reward": -8.346000218391419,
60
+ "mean_combined_reward": -8.346000218391419,
61
+ "loss": 0.0029,
62
+ "kl": 0.07239294637311104,
63
+ "reward_std": 9.54594144821167
64
+ },
65
+ {
66
+ "episode": 80,
67
+ "mean_team_reward": -11.808000373840333,
68
+ "mean_oversight_reward": -11.808000373840333,
69
+ "mean_combined_reward": -11.808000373840333,
70
+ "loss": 0.0031,
71
+ "kl": 0.07783110542222857,
72
+ "reward_std": 6.788225078582764
73
+ },
74
+ {
75
+ "episode": 90,
76
+ "mean_team_reward": -10.308000254631043,
77
+ "mean_oversight_reward": -10.308000254631043,
78
+ "mean_combined_reward": -10.308000254631043,
79
+ "loss": 0.0045,
80
+ "kl": 0.1133664520952152,
81
+ "reward_std": 5.939696788787842
82
+ },
83
+ {
84
+ "episode": 100,
85
+ "mean_team_reward": -6.450000238418579,
86
+ "mean_oversight_reward": -6.450000238418579,
87
+ "mean_combined_reward": -6.450000238418579,
88
+ "loss": 0.0047,
89
+ "kl": 0.11815221256983932,
90
+ "reward_std": 12.668524932861327
91
+ },
92
+ {
93
+ "episode": 110,
94
+ "mean_team_reward": -11.046000361442566,
95
+ "mean_oversight_reward": -11.046000361442566,
96
+ "mean_combined_reward": -11.046000361442566,
97
+ "loss": 0.0034,
98
+ "kl": 0.08407748350673501,
99
+ "reward_std": 12.091525745391845
100
+ },
101
+ {
102
+ "episode": 120,
103
+ "mean_team_reward": -7.050000095367432,
104
+ "mean_oversight_reward": -7.050000095367432,
105
+ "mean_combined_reward": -7.050000095367432,
106
+ "loss": 0.0058,
107
+ "kl": 0.14498155544279143,
108
+ "reward_std": 11.8199969291687
109
+ },
110
+ {
111
+ "episode": 130,
112
+ "mean_team_reward": -13.680000162124633,
113
+ "mean_oversight_reward": -13.680000162124633,
114
+ "mean_combined_reward": -13.680000162124633,
115
+ "loss": 0.004,
116
+ "kl": 0.09893948985263705,
117
+ "reward_std": 4.242640686035156
118
+ },
119
+ {
120
+ "episode": 140,
121
+ "mean_team_reward": -10.529999852180481,
122
+ "mean_oversight_reward": -10.529999852180481,
123
+ "mean_combined_reward": -10.529999852180481,
124
+ "loss": 0.0019,
125
+ "kl": 0.046646403381600976,
126
+ "reward_std": 7.848885250091553
127
+ },
128
+ {
129
+ "episode": 150,
130
+ "mean_team_reward": -11.646000242233276,
131
+ "mean_oversight_reward": -11.646000242233276,
132
+ "mean_combined_reward": -11.646000242233276,
133
+ "loss": 0.0067,
134
+ "kl": 0.16682058114820392,
135
+ "reward_std": 12.515789699554443
136
+ },
137
+ {
138
+ "episode": 160,
139
+ "mean_team_reward": -14.202000284194947,
140
+ "mean_oversight_reward": -14.202000284194947,
141
+ "mean_combined_reward": -14.202000284194947,
142
+ "loss": 0.0091,
143
+ "kl": 0.22706542671658098,
144
+ "reward_std": 7.000357151031494
145
+ },
146
+ {
147
+ "episode": 170,
148
+ "mean_team_reward": -13.745999932289124,
149
+ "mean_oversight_reward": -13.745999932289124,
150
+ "mean_combined_reward": -13.745999932289124,
151
+ "loss": 0.0055,
152
+ "kl": 0.13649323517456652,
153
+ "reward_std": 7.8488850593566895
154
+ },
155
+ {
156
+ "episode": 180,
157
+ "mean_team_reward": -6.354000210762024,
158
+ "mean_oversight_reward": -6.354000210762024,
159
+ "mean_combined_reward": -6.354000210762024,
160
+ "loss": 0.0034,
161
+ "kl": 0.08470403763203649,
162
+ "reward_std": 10.275675678253174
163
+ },
164
+ {
165
+ "episode": 190,
166
+ "mean_team_reward": -8.520000505447388,
167
+ "mean_oversight_reward": -8.520000505447388,
168
+ "mean_combined_reward": -8.520000505447388,
169
+ "loss": 0.0111,
170
+ "kl": 0.2777724491432309,
171
+ "reward_std": 7.636753082275391
172
+ },
173
+ {
174
+ "episode": 200,
175
+ "mean_team_reward": -1.6560003757476807,
176
+ "mean_oversight_reward": -1.6560003757476807,
177
+ "mean_combined_reward": -1.6560003757476807,
178
+ "loss": 0.0068,
179
+ "kl": 0.16909255158680025,
180
+ "reward_std": 17.428768157958984
181
+ },
182
+ {
183
+ "episode": 210,
184
+ "mean_team_reward": -8.982000136375428,
185
+ "mean_oversight_reward": -8.982000136375428,
186
+ "mean_combined_reward": -8.982000136375428,
187
+ "loss": 0.0023,
188
+ "kl": 0.05652904201633646,
189
+ "reward_std": 2.7577164649963377
190
+ },
191
+ {
192
+ "episode": 220,
193
+ "mean_team_reward": -9.870000267028809,
194
+ "mean_oversight_reward": -9.870000267028809,
195
+ "mean_combined_reward": -9.870000267028809,
196
+ "loss": 0.01,
197
+ "kl": 0.2508738947930397,
198
+ "reward_std": 9.12167739868164
199
+ },
200
+ {
201
+ "episode": 230,
202
+ "mean_team_reward": -12.036000370979309,
203
+ "mean_oversight_reward": -12.036000370979309,
204
+ "mean_combined_reward": -12.036000370979309,
205
+ "loss": 0.0061,
206
+ "kl": 0.15191540620289742,
207
+ "reward_std": 4.242640686035156
208
+ },
209
+ {
210
+ "episode": 240,
211
+ "mean_team_reward": -6.534000253677368,
212
+ "mean_oversight_reward": -6.534000253677368,
213
+ "mean_combined_reward": -6.534000253677368,
214
+ "loss": 0.0098,
215
+ "kl": 0.24541939058108256,
216
+ "reward_std": 13.09278917312622
217
+ },
218
+ {
219
+ "episode": 250,
220
+ "mean_team_reward": -7.956000471115113,
221
+ "mean_oversight_reward": -7.956000471115113,
222
+ "mean_combined_reward": -7.956000471115113,
223
+ "loss": 0.0075,
224
+ "kl": 0.18757104043179423,
225
+ "reward_std": 13.72918529510498
226
+ },
227
+ {
228
+ "episode": 260,
229
+ "mean_team_reward": -16.434000277519225,
230
+ "mean_oversight_reward": -16.434000277519225,
231
+ "mean_combined_reward": -16.434000277519225,
232
+ "loss": 0.0456,
233
+ "kl": 1.1388184379960875,
234
+ "reward_std": 8.697413349151612
235
+ },
236
+ {
237
+ "episode": 270,
238
+ "mean_team_reward": -10.992000102996826,
239
+ "mean_oversight_reward": -10.992000102996826,
240
+ "mean_combined_reward": -10.992000102996826,
241
+ "loss": 0.0141,
242
+ "kl": 0.35360540022375064,
243
+ "reward_std": 12.88065700531006
244
+ },
245
+ {
246
+ "episode": 280,
247
+ "mean_team_reward": -16.152000665664673,
248
+ "mean_oversight_reward": -16.152000665664673,
249
+ "mean_combined_reward": -16.152000665664673,
250
+ "loss": 0.0256,
251
+ "kl": 0.6391032215789891,
252
+ "reward_std": 11.87939395904541
253
+ },
254
+ {
255
+ "episode": 290,
256
+ "mean_team_reward": -12.534000158309937,
257
+ "mean_oversight_reward": -12.534000158309937,
258
+ "mean_combined_reward": -12.534000158309937,
259
+ "loss": 0.054,
260
+ "kl": 1.349976682337001,
261
+ "reward_std": 7.848885250091553
262
+ },
263
+ {
264
+ "episode": 300,
265
+ "mean_team_reward": -9.768000221252441,
266
+ "mean_oversight_reward": -9.768000221252441,
267
+ "mean_combined_reward": -9.768000221252441,
268
+ "loss": 0.0122,
269
+ "kl": 0.305848811683245,
270
+ "reward_std": 8.06101722717285
271
+ },
272
+ {
273
+ "episode": 310,
274
+ "mean_team_reward": -13.686000275611878,
275
+ "mean_oversight_reward": -13.686000275611878,
276
+ "mean_combined_reward": -13.686000275611878,
277
+ "loss": 0.0062,
278
+ "kl": 0.15440481160767378,
279
+ "reward_std": 7.8488850593566895
280
+ },
281
+ {
282
+ "episode": 320,
283
+ "mean_team_reward": -12.336000442504883,
284
+ "mean_oversight_reward": -12.336000442504883,
285
+ "mean_combined_reward": -12.336000442504883,
286
+ "loss": 0.0088,
287
+ "kl": 0.21957923881709576,
288
+ "reward_std": 8.90954532623291
289
+ },
290
+ {
291
+ "episode": 330,
292
+ "mean_team_reward": -9.864000296592712,
293
+ "mean_oversight_reward": -9.864000296592712,
294
+ "mean_combined_reward": -9.864000296592712,
295
+ "loss": 0.0056,
296
+ "kl": 0.14099774507340043,
297
+ "reward_std": 9.33380937576294
298
+ },
299
+ {
300
+ "episode": 340,
301
+ "mean_team_reward": -5.706000328063965,
302
+ "mean_oversight_reward": -5.706000328063965,
303
+ "mean_combined_reward": -5.706000328063965,
304
+ "loss": 0.0092,
305
+ "kl": 0.2305462906442699,
306
+ "reward_std": 9.698676681518554
307
+ },
308
+ {
309
+ "episode": 350,
310
+ "mean_team_reward": -7.230000281333924,
311
+ "mean_oversight_reward": -7.230000281333924,
312
+ "mean_combined_reward": -7.230000281333924,
313
+ "loss": 0.0103,
314
+ "kl": 0.25858800403075294,
315
+ "reward_std": 15.79110860824585
316
+ },
317
+ {
318
+ "episode": 360,
319
+ "mean_team_reward": -6.306000304222107,
320
+ "mean_oversight_reward": -6.306000304222107,
321
+ "mean_combined_reward": -6.306000304222107,
322
+ "loss": 0.0201,
323
+ "kl": 0.5033964229747653,
324
+ "reward_std": 10.122940635681152
325
+ },
326
+ {
327
+ "episode": 370,
328
+ "mean_team_reward": -9.204000115394592,
329
+ "mean_oversight_reward": -9.204000115394592,
330
+ "mean_combined_reward": -9.204000115394592,
331
+ "loss": 0.0151,
332
+ "kl": 0.37833709553815426,
333
+ "reward_std": 10.335072708129882
334
+ },
335
+ {
336
+ "episode": 380,
337
+ "mean_team_reward": -8.460000228881835,
338
+ "mean_oversight_reward": -8.460000228881835,
339
+ "mean_combined_reward": -8.460000228881835,
340
+ "loss": 0.0112,
341
+ "kl": 0.28013359699398277,
342
+ "reward_std": 10.759336757659913
343
+ },
344
+ {
345
+ "episode": 390,
346
+ "mean_team_reward": -6.1920002698898315,
347
+ "mean_oversight_reward": -6.1920002698898315,
348
+ "mean_combined_reward": -6.1920002698898315,
349
+ "loss": 0.0112,
350
+ "kl": 0.27889416030375286,
351
+ "reward_std": 5.091168785095215
352
+ },
353
+ {
354
+ "episode": 400,
355
+ "mean_team_reward": -13.158000302314758,
356
+ "mean_oversight_reward": -13.158000302314758,
357
+ "mean_combined_reward": -13.158000302314758,
358
+ "loss": 0.0058,
359
+ "kl": 0.14393960006418638,
360
+ "reward_std": 5.727564907073974
361
+ }
362
+ ]