JoshuaFreeman commited on
Commit
49a513e
·
verified ·
1 Parent(s): d217d5a

Upload training_log.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. training_log.json +750 -840
training_log.json CHANGED
@@ -2,1441 +2,1351 @@
2
  {
3
  "update": 5,
4
  "global_step": 10240,
5
- "num_episodes": 5,
6
- "mean_reward": -2.121724510192871,
7
- "mean_length": 337.8,
8
- "loss": 2.8168609142303467,
9
- "sps": 228.7546335682378
10
  },
11
  {
12
  "update": 10,
13
  "global_step": 20480,
14
- "num_episodes": 18,
15
- "mean_reward": -1.9787551561991374,
16
- "mean_length": 282.94444444444446,
17
- "loss": 0.21835778653621674,
18
- "sps": 1443.696434210723
19
  },
20
  {
21
  "update": 15,
22
  "global_step": 30720,
23
- "num_episodes": 18,
24
- "mean_reward": -1.9787551561991374,
25
- "mean_length": 282.94444444444446,
26
- "loss": 0.2217431664466858,
27
- "sps": 2444.104440024094
28
  },
29
  {
30
  "update": 20,
31
  "global_step": 40960,
32
- "num_episodes": 22,
33
- "mean_reward": 0.6390173651955344,
34
- "mean_length": 1597.090909090909,
35
- "loss": 0.5035408735275269,
36
- "sps": 769.1328942825502
37
  },
38
  {
39
  "update": 25,
40
  "global_step": 51200,
41
- "num_episodes": 27,
42
- "mean_reward": 0.33698498761212387,
43
- "mean_length": 1341.111111111111,
44
- "loss": 0.16991910338401794,
45
- "sps": 2449.665853907546
46
  },
47
  {
48
  "update": 30,
49
  "global_step": 61440,
50
- "num_episodes": 29,
51
- "mean_reward": 0.5858152652608937,
52
- "mean_length": 1594.9310344827586,
53
- "loss": 0.6312248110771179,
54
- "sps": 2363.5499282399865
55
  },
56
  {
57
  "update": 35,
58
  "global_step": 71680,
59
- "num_episodes": 29,
60
- "mean_reward": 0.5858152652608937,
61
- "mean_length": 1594.9310344827586,
62
- "loss": 0.4890201985836029,
63
- "sps": 2440.672480568945
64
  },
65
  {
66
  "update": 40,
67
  "global_step": 81920,
68
- "num_episodes": 31,
69
- "mean_reward": 1.2613418640628937,
70
- "mean_length": 1822.0,
71
- "loss": 0.42574575543403625,
72
- "sps": 940.0717318496648
73
  },
74
  {
75
  "update": 45,
76
  "global_step": 92160,
77
- "num_episodes": 55,
78
- "mean_reward": 2.8606436122547496,
79
- "mean_length": 1469.2727272727273,
80
- "loss": 0.5428293943405151,
81
- "sps": 1479.6348998060785
82
  },
83
  {
84
  "update": 50,
85
  "global_step": 102400,
86
- "num_episodes": 58,
87
- "mean_reward": 3.2600761939739358,
88
- "mean_length": 1568.051724137931,
89
- "loss": 0.6221064329147339,
90
- "sps": 2412.4523346060355
91
  },
92
  {
93
  "update": 55,
94
  "global_step": 112640,
95
- "num_episodes": 58,
96
- "mean_reward": 3.2600761939739358,
97
- "mean_length": 1568.051724137931,
98
- "loss": 0.794475257396698,
99
- "sps": 2405.0877911326343
100
  },
101
  {
102
  "update": 60,
103
  "global_step": 122880,
104
- "num_episodes": 58,
105
- "mean_reward": 3.2600761939739358,
106
- "mean_length": 1568.051724137931,
107
- "loss": 1.0936992168426514,
108
- "sps": 2358.874803208204
109
  },
110
  {
111
  "update": 65,
112
  "global_step": 133120,
113
- "num_episodes": 62,
114
- "mean_reward": 7.5118274227265385,
115
- "mean_length": 1954.4516129032259,
116
- "loss": 1.877166986465454,
117
- "sps": 2402.6606220225244
118
  },
119
  {
120
  "update": 70,
121
  "global_step": 143360,
122
- "num_episodes": 63,
123
- "mean_reward": 9.12682440924266,
124
- "mean_length": 2082.15873015873,
125
- "loss": 0.55415278673172,
126
- "sps": 2417.4656020731263
127
  },
128
  {
129
  "update": 75,
130
  "global_step": 153600,
131
- "num_episodes": 63,
132
- "mean_reward": 9.12682440924266,
133
- "mean_length": 2082.15873015873,
134
- "loss": 0.6123632788658142,
135
- "sps": 2415.520694055524
136
  },
137
  {
138
  "update": 80,
139
  "global_step": 163840,
140
- "num_episodes": 63,
141
- "mean_reward": 9.12682440924266,
142
- "mean_length": 2082.15873015873,
143
- "loss": 0.3689587116241455,
144
- "sps": 2464.6240808517623
145
  },
146
  {
147
  "update": 85,
148
  "global_step": 174080,
149
- "num_episodes": 70,
150
- "mean_reward": 9.247220189230783,
151
- "mean_length": 2310.557142857143,
152
- "loss": 0.3228328824043274,
153
- "sps": 2445.2962822104055
154
  },
155
  {
156
  "update": 90,
157
  "global_step": 184320,
158
- "num_episodes": 76,
159
- "mean_reward": 9.205821903128372,
160
- "mean_length": 2268.3026315789475,
161
- "loss": 0.6627960205078125,
162
- "sps": 453.4183326926133
163
  },
164
  {
165
  "update": 95,
166
  "global_step": 194560,
167
- "num_episodes": 80,
168
- "mean_reward": 9.297839653491973,
169
- "mean_length": 2165.9375,
170
- "loss": 1.1097347736358643,
171
- "sps": 2424.5065546850346
172
  },
173
  {
174
  "update": 100,
175
  "global_step": 204800,
176
- "num_episodes": 80,
177
- "mean_reward": 9.297839653491973,
178
- "mean_length": 2165.9375,
179
- "loss": 0.38644012808799744,
180
- "sps": 2386.6710469869367
181
  },
182
  {
183
  "update": 105,
184
  "global_step": 215040,
185
- "num_episodes": 87,
186
- "mean_reward": 11.394574307847297,
187
- "mean_length": 2339.057471264368,
188
- "loss": 0.7209265828132629,
189
- "sps": 2442.8262241524835
190
  },
191
  {
192
  "update": 110,
193
  "global_step": 225280,
194
- "num_episodes": 87,
195
- "mean_reward": 11.394574307847297,
196
- "mean_length": 2339.057471264368,
197
- "loss": 2.1218621730804443,
198
- "sps": 2449.9984290228053
199
  },
200
  {
201
  "update": 115,
202
  "global_step": 235520,
203
- "num_episodes": 90,
204
- "mean_reward": 12.198311275906033,
205
- "mean_length": 2375.788888888889,
206
- "loss": 0.5268573760986328,
207
- "sps": 2438.632604283566
208
  },
209
  {
210
  "update": 120,
211
  "global_step": 245760,
212
- "num_episodes": 90,
213
- "mean_reward": 12.198311275906033,
214
- "mean_length": 2375.788888888889,
215
- "loss": 2.0001754760742188,
216
- "sps": 2413.1198849334774
217
  },
218
  {
219
  "update": 125,
220
  "global_step": 256000,
221
- "num_episodes": 105,
222
- "mean_reward": 12.691367435455323,
223
- "mean_length": 2446.14,
224
- "loss": 0.6336287260055542,
225
- "sps": 619.7035033065293
226
  },
227
  {
228
  "update": 130,
229
  "global_step": 266240,
230
- "num_episodes": 105,
231
- "mean_reward": 12.691367435455323,
232
- "mean_length": 2446.14,
233
- "loss": 0.40229156613349915,
234
- "sps": 2411.1751921586524
235
  },
236
  {
237
  "update": 135,
238
  "global_step": 276480,
239
- "num_episodes": 109,
240
- "mean_reward": 13.886511907577514,
241
- "mean_length": 2526.42,
242
- "loss": 0.45471617579460144,
243
- "sps": 2450.150074118317
244
  },
245
  {
246
  "update": 140,
247
  "global_step": 286720,
248
- "num_episodes": 109,
249
- "mean_reward": 13.886511907577514,
250
- "mean_length": 2526.42,
251
- "loss": 0.4323250949382782,
252
- "sps": 2378.2401254524298
253
  },
254
  {
255
  "update": 145,
256
  "global_step": 296960,
257
- "num_episodes": 121,
258
- "mean_reward": 14.005602750778198,
259
- "mean_length": 2623.1,
260
- "loss": 0.5164788365364075,
261
- "sps": 1792.6912302713768
262
  },
263
  {
264
  "update": 150,
265
  "global_step": 307200,
266
- "num_episodes": 121,
267
- "mean_reward": 14.005602750778198,
268
- "mean_length": 2623.1,
269
- "loss": 0.8702018857002258,
270
- "sps": 2425.950637318183
271
  },
272
  {
273
  "update": 155,
274
  "global_step": 317440,
275
- "num_episodes": 129,
276
- "mean_reward": 14.87307053565979,
277
- "mean_length": 2527.32,
278
- "loss": 0.1953483670949936,
279
- "sps": 2394.950934358607
280
  },
281
  {
282
  "update": 160,
283
  "global_step": 327680,
284
- "num_episodes": 129,
285
- "mean_reward": 14.87307053565979,
286
- "mean_length": 2527.32,
287
- "loss": 0.19992592930793762,
288
- "sps": 2424.040624859784
289
  },
290
  {
291
  "update": 165,
292
  "global_step": 337920,
293
- "num_episodes": 139,
294
- "mean_reward": 16.099841012954712,
295
- "mean_length": 2525.93,
296
- "loss": 0.4464109241962433,
297
- "sps": 1319.6508401584945
298
  },
299
  {
300
  "update": 170,
301
  "global_step": 348160,
302
- "num_episodes": 139,
303
- "mean_reward": 16.099841012954712,
304
- "mean_length": 2525.93,
305
- "loss": 0.10373280942440033,
306
- "sps": 2348.0679386786046
307
  },
308
  {
309
  "update": 175,
310
  "global_step": 358400,
311
- "num_episodes": 142,
312
- "mean_reward": 17.19686735153198,
313
- "mean_length": 2624.69,
314
- "loss": 0.8095718026161194,
315
- "sps": 2386.2480483453646
316
  },
317
  {
318
  "update": 180,
319
  "global_step": 368640,
320
- "num_episodes": 142,
321
- "mean_reward": 17.19686735153198,
322
- "mean_length": 2624.69,
323
- "loss": 0.009909076616168022,
324
- "sps": 2301.590679746089
325
  },
326
  {
327
  "update": 185,
328
  "global_step": 378880,
329
- "num_episodes": 148,
330
- "mean_reward": 19.868873529434204,
331
- "mean_length": 2907.66,
332
- "loss": 1.7244997024536133,
333
- "sps": 2280.4958488818847
334
  },
335
  {
336
  "update": 190,
337
  "global_step": 389120,
338
- "num_episodes": 148,
339
- "mean_reward": 19.868873529434204,
340
- "mean_length": 2907.66,
341
- "loss": 4.516170978546143,
342
- "sps": 2419.553348740851
343
  },
344
  {
345
  "update": 195,
346
  "global_step": 399360,
347
- "num_episodes": 152,
348
- "mean_reward": 20.717632093429565,
349
- "mean_length": 3008.33,
350
- "loss": 3.401921272277832,
351
- "sps": 575.3298806579921
352
  },
353
  {
354
  "update": 200,
355
  "global_step": 409600,
356
- "num_episodes": 153,
357
- "mean_reward": 20.717632093429565,
358
- "mean_length": 3008.33,
359
- "loss": 0.8132290244102478,
360
- "sps": 2450.768029758656
361
  },
362
  {
363
  "update": 205,
364
  "global_step": 419840,
365
- "num_episodes": 162,
366
- "mean_reward": 19.608867626190186,
367
- "mean_length": 2909.79,
368
- "loss": 1.867810845375061,
369
- "sps": 621.2303877555788
370
  },
371
  {
372
  "update": 210,
373
  "global_step": 430080,
374
- "num_episodes": 167,
375
- "mean_reward": 19.028235759735107,
376
- "mean_length": 2619.36,
377
- "loss": 0.8309984803199768,
378
- "sps": 2358.6319149765754
379
  },
380
  {
381
  "update": 215,
382
  "global_step": 440320,
383
- "num_episodes": 169,
384
- "mean_reward": 19.63686834335327,
385
- "mean_length": 2715.53,
386
- "loss": 0.17013275623321533,
387
- "sps": 1381.2913503327266
388
  },
389
  {
390
  "update": 220,
391
  "global_step": 450560,
392
- "num_episodes": 170,
393
- "mean_reward": 19.778069705963134,
394
- "mean_length": 2619.36,
395
- "loss": -0.013407116755843163,
396
- "sps": 2225.527904161147
397
  },
398
  {
399
  "update": 225,
400
  "global_step": 460800,
401
- "num_episodes": 172,
402
- "mean_reward": 21.13547016143799,
403
- "mean_length": 2718.42,
404
- "loss": 0.14364227652549744,
405
- "sps": 2345.342614298781
406
  },
407
  {
408
  "update": 230,
409
  "global_step": 471040,
410
- "num_episodes": 174,
411
- "mean_reward": 22.236838788986205,
412
- "mean_length": 2819.34,
413
- "loss": 0.017919447273015976,
414
- "sps": 2408.348085488877
415
  },
416
  {
417
  "update": 235,
418
  "global_step": 481280,
419
- "num_episodes": 174,
420
- "mean_reward": 22.236838788986205,
421
- "mean_length": 2819.34,
422
- "loss": 2.6431455612182617,
423
- "sps": 2403.528538830992
424
  },
425
  {
426
  "update": 240,
427
  "global_step": 491520,
428
- "num_episodes": 178,
429
- "mean_reward": 21.810234026908873,
430
- "mean_length": 2928.26,
431
- "loss": 0.2556005120277405,
432
- "sps": 2373.833411448431
433
  },
434
  {
435
  "update": 245,
436
  "global_step": 501760,
437
- "num_episodes": 181,
438
- "mean_reward": 23.416525321006773,
439
- "mean_length": 3026.48,
440
- "loss": 3.2893059253692627,
441
- "sps": 2362.3097327902087
442
  },
443
  {
444
  "update": 250,
445
  "global_step": 512000,
446
- "num_episodes": 182,
447
- "mean_reward": 23.666512427330016,
448
- "mean_length": 3026.48,
449
- "loss": 4.634244441986084,
450
- "sps": 2410.981639631563
451
  },
452
  {
453
  "update": 255,
454
  "global_step": 522240,
455
- "num_episodes": 182,
456
- "mean_reward": 23.666512427330016,
457
- "mean_length": 3026.48,
458
- "loss": 1.1017451286315918,
459
- "sps": 2373.168403411863
460
  },
461
  {
462
  "update": 260,
463
  "global_step": 532480,
464
- "num_episodes": 188,
465
- "mean_reward": 24.153521933555602,
466
- "mean_length": 3032.56,
467
- "loss": 0.058166228234767914,
468
- "sps": 549.3090191152681
469
  },
470
  {
471
  "update": 265,
472
  "global_step": 542720,
473
- "num_episodes": 200,
474
- "mean_reward": 24.202102789878847,
475
- "mean_length": 2827.14,
476
- "loss": 3.6524577140808105,
477
- "sps": 2408.936350313052
478
  },
479
  {
480
  "update": 270,
481
  "global_step": 552960,
482
- "num_episodes": 203,
483
- "mean_reward": 25.780276198387146,
484
- "mean_length": 2924.21,
485
- "loss": -0.03986305743455887,
486
- "sps": 2384.8038232666454
487
  },
488
  {
489
  "update": 275,
490
  "global_step": 563200,
491
- "num_episodes": 203,
492
- "mean_reward": 25.780276198387146,
493
- "mean_length": 2924.21,
494
- "loss": 0.009281929582357407,
495
- "sps": 2245.2843029907467
496
  },
497
  {
498
  "update": 280,
499
  "global_step": 573440,
500
- "num_episodes": 204,
501
- "mean_reward": 26.63669138431549,
502
- "mean_length": 3023.78,
503
- "loss": 0.1620613932609558,
504
- "sps": 2155.8537501850938
505
  },
506
  {
507
  "update": 285,
508
  "global_step": 583680,
509
- "num_episodes": 208,
510
- "mean_reward": 27.24311915874481,
511
- "mean_length": 3124.7,
512
- "loss": 0.7804455161094666,
513
- "sps": 2336.540118617715
514
  },
515
  {
516
  "update": 290,
517
  "global_step": 593920,
518
- "num_episodes": 210,
519
- "mean_reward": 27.990238661766053,
520
- "mean_length": 3122.84,
521
- "loss": -0.02732567861676216,
522
- "sps": 2446.515068539969
523
  },
524
  {
525
  "update": 295,
526
  "global_step": 604160,
527
- "num_episodes": 210,
528
- "mean_reward": 27.990238661766053,
529
- "mean_length": 3122.84,
530
- "loss": 0.026210829615592957,
531
- "sps": 2401.0219632291446
532
  },
533
  {
534
  "update": 300,
535
  "global_step": 614400,
536
- "num_episodes": 212,
537
- "mean_reward": 28.8441028547287,
538
- "mean_length": 3221.9,
539
- "loss": 0.018815483897924423,
540
- "sps": 1787.985104867478
541
  },
542
  {
543
  "update": 305,
544
  "global_step": 624640,
545
- "num_episodes": 219,
546
- "mean_reward": 29.601799569129945,
547
- "mean_length": 3224.78,
548
- "loss": -0.01376257836818695,
549
- "sps": 2307.873344880848
550
  },
551
  {
552
  "update": 310,
553
  "global_step": 634880,
554
- "num_episodes": 223,
555
- "mean_reward": 30.112299256324768,
556
- "mean_length": 3222.81,
557
- "loss": -0.13672460615634918,
558
- "sps": 2376.26116166583
559
  },
560
  {
561
  "update": 315,
562
  "global_step": 645120,
563
- "num_episodes": 223,
564
- "mean_reward": 30.112299256324768,
565
- "mean_length": 3222.81,
566
- "loss": 0.24129757285118103,
567
- "sps": 2301.537029038706
568
  },
569
  {
570
  "update": 320,
571
  "global_step": 655360,
572
- "num_episodes": 224,
573
- "mean_reward": 30.965999827384948,
574
- "mean_length": 3320.52,
575
- "loss": 0.3078324496746063,
576
- "sps": 2359.266120378419
577
  },
578
  {
579
  "update": 325,
580
  "global_step": 665600,
581
- "num_episodes": 230,
582
- "mean_reward": 31.090740485191347,
583
- "mean_length": 3419.33,
584
- "loss": 0.002869449555873871,
585
- "sps": 2345.5360183144344
586
  },
587
  {
588
  "update": 330,
589
  "global_step": 675840,
590
- "num_episodes": 233,
591
- "mean_reward": 31.204454226493837,
592
- "mean_length": 3514.15,
593
- "loss": 0.8130831718444824,
594
- "sps": 2348.1224969342966
595
  },
596
  {
597
  "update": 335,
598
  "global_step": 686080,
599
- "num_episodes": 233,
600
- "mean_reward": 31.204454226493837,
601
- "mean_length": 3514.15,
602
- "loss": -0.10025477409362793,
603
- "sps": 2203.7073854518803
604
  },
605
  {
606
  "update": 340,
607
  "global_step": 696320,
608
- "num_episodes": 236,
609
- "mean_reward": 30.600520968437195,
610
- "mean_length": 3419.33,
611
- "loss": 0.11422204971313477,
612
- "sps": 1223.5079096329264
613
  },
614
  {
615
  "update": 345,
616
  "global_step": 706560,
617
- "num_episodes": 244,
618
- "mean_reward": 29.116036429405213,
619
- "mean_length": 3431.39,
620
- "loss": 0.3314732015132904,
621
- "sps": 604.6873121204968
622
  },
623
  {
624
  "update": 350,
625
  "global_step": 716800,
626
- "num_episodes": 248,
627
- "mean_reward": 27.764892373085022,
628
- "mean_length": 3338.43,
629
- "loss": -0.08348219841718674,
630
- "sps": 2334.652125215495
631
  },
632
  {
633
  "update": 355,
634
  "global_step": 727040,
635
- "num_episodes": 248,
636
- "mean_reward": 27.764892373085022,
637
- "mean_length": 3338.43,
638
- "loss": -0.011928500607609749,
639
- "sps": 2266.898774941684
640
  },
641
  {
642
  "update": 360,
643
  "global_step": 737280,
644
- "num_episodes": 251,
645
- "mean_reward": 27.271975588798522,
646
- "mean_length": 3335.03,
647
- "loss": 0.16195061802864075,
648
- "sps": 511.5670473274587
649
  },
650
  {
651
  "update": 365,
652
  "global_step": 747520,
653
- "num_episodes": 261,
654
- "mean_reward": 25.41567024707794,
655
- "mean_length": 3238.35,
656
- "loss": -0.17278775572776794,
657
- "sps": 810.6621085529184
658
  },
659
  {
660
  "update": 370,
661
  "global_step": 757760,
662
- "num_episodes": 274,
663
- "mean_reward": 21.383243069648742,
664
- "mean_length": 2943.09,
665
- "loss": -0.017205242067575455,
666
- "sps": 619.3760616179322
667
  },
668
  {
669
  "update": 375,
670
  "global_step": 768000,
671
- "num_episodes": 274,
672
- "mean_reward": 21.383243069648742,
673
- "mean_length": 2943.09,
674
- "loss": 0.08961383998394012,
675
- "sps": 2236.482803815745
676
  },
677
  {
678
  "update": 380,
679
  "global_step": 778240,
680
- "num_episodes": 274,
681
- "mean_reward": 21.383243069648742,
682
- "mean_length": 2943.09,
683
- "loss": 4.401951313018799,
684
- "sps": 2147.399362575019
685
  },
686
  {
687
  "update": 385,
688
  "global_step": 788480,
689
- "num_episodes": 278,
690
- "mean_reward": 22.28250171661377,
691
- "mean_length": 3028.1,
692
- "loss": 1.4729293584823608,
693
- "sps": 2300.4546825723296
694
  },
695
  {
696
  "update": 390,
697
  "global_step": 798720,
698
- "num_episodes": 280,
699
- "mean_reward": 21.386626567840576,
700
- "mean_length": 3127.16,
701
- "loss": 0.06943056732416153,
702
- "sps": 2341.8789187579932
703
  },
704
  {
705
  "update": 395,
706
  "global_step": 808960,
707
- "num_episodes": 280,
708
- "mean_reward": 21.386626567840576,
709
- "mean_length": 3127.16,
710
- "loss": 0.929630696773529,
711
- "sps": 2232.925422842351
712
  },
713
  {
714
  "update": 400,
715
  "global_step": 819200,
716
- "num_episodes": 280,
717
- "mean_reward": 21.386626567840576,
718
- "mean_length": 3127.16,
719
- "loss": -0.035214584320783615,
720
- "sps": 2187.4650402736006
721
  },
722
  {
723
  "update": 405,
724
  "global_step": 829440,
725
- "num_episodes": 284,
726
- "mean_reward": 21.765649213790894,
727
- "mean_length": 3031.31,
728
- "loss": 3.6039881706237793,
729
- "sps": 2344.35431842087
730
  },
731
  {
732
  "update": 410,
733
  "global_step": 839680,
734
- "num_episodes": 288,
735
- "mean_reward": 23.637630491256715,
736
- "mean_length": 3126.66,
737
- "loss": 0.8354597091674805,
738
- "sps": 1416.6940593723025
739
  },
740
  {
741
  "update": 415,
742
  "global_step": 849920,
743
- "num_episodes": 288,
744
- "mean_reward": 23.637630491256715,
745
- "mean_length": 3126.66,
746
- "loss": 1.1654760837554932,
747
- "sps": 2444.537765977125
748
  },
749
  {
750
  "update": 420,
751
  "global_step": 860160,
752
- "num_episodes": 288,
753
- "mean_reward": 23.637630491256715,
754
- "mean_length": 3126.66,
755
- "loss": 0.7069441676139832,
756
- "sps": 2402.946945101056
757
  },
758
  {
759
  "update": 425,
760
  "global_step": 870400,
761
- "num_episodes": 296,
762
- "mean_reward": 26.219414005279543,
763
- "mean_length": 3426.79,
764
- "loss": 1.0991233587265015,
765
- "sps": 1584.3855114127348
766
  },
767
  {
768
  "update": 430,
769
  "global_step": 880640,
770
- "num_episodes": 302,
771
- "mean_reward": 24.143256397247313,
772
- "mean_length": 3329.74,
773
- "loss": 0.5094761252403259,
774
- "sps": 1460.1652484484139
775
  },
776
  {
777
  "update": 435,
778
  "global_step": 890880,
779
- "num_episodes": 302,
780
- "mean_reward": 24.143256397247313,
781
- "mean_length": 3329.74,
782
- "loss": 1.6765072345733643,
783
- "sps": 2365.579425966714
784
  },
785
  {
786
  "update": 440,
787
  "global_step": 901120,
788
- "num_episodes": 302,
789
- "mean_reward": 24.143256397247313,
790
- "mean_length": 3329.74,
791
- "loss": -0.044437870383262634,
792
- "sps": 2282.7314942544617
793
  },
794
  {
795
  "update": 445,
796
  "global_step": 911360,
797
- "num_episodes": 308,
798
- "mean_reward": 23.534629821777344,
799
- "mean_length": 3238.0,
800
- "loss": 0.4068017303943634,
801
- "sps": 865.914540942848
802
  },
803
  {
804
  "update": 450,
805
  "global_step": 921600,
806
- "num_episodes": 314,
807
- "mean_reward": 23.246068153381348,
808
- "mean_length": 3138.43,
809
- "loss": 0.32406821846961975,
810
- "sps": 830.3529260738604
811
  },
812
  {
813
  "update": 455,
814
  "global_step": 931840,
815
- "num_episodes": 316,
816
- "mean_reward": 22.337194681167603,
817
- "mean_length": 3057.38,
818
- "loss": 0.21466466784477234,
819
- "sps": 2411.145412824079
820
  },
821
  {
822
  "update": 460,
823
  "global_step": 942080,
824
- "num_episodes": 316,
825
- "mean_reward": 22.337194681167603,
826
- "mean_length": 3057.38,
827
- "loss": 4.0019731521606445,
828
- "sps": 2372.8288290677337
829
  },
830
  {
831
  "update": 465,
832
  "global_step": 952320,
833
- "num_episodes": 321,
834
- "mean_reward": 22.935798473358155,
835
- "mean_length": 3155.68,
836
- "loss": 3.8567330837249756,
837
- "sps": 796.2439424516102
838
  },
839
  {
840
  "update": 470,
841
  "global_step": 962560,
842
- "num_episodes": 326,
843
- "mean_reward": 23.01468356132507,
844
- "mean_length": 3080.66,
845
- "loss": 0.04481935873627663,
846
- "sps": 2363.29177191282
847
  },
848
  {
849
  "update": 475,
850
  "global_step": 972800,
851
- "num_episodes": 329,
852
- "mean_reward": 24.00417799949646,
853
- "mean_length": 3081.1,
854
- "loss": -0.09712222218513489,
855
- "sps": 2323.0109350014427
856
  },
857
  {
858
  "update": 480,
859
  "global_step": 983040,
860
- "num_episodes": 329,
861
- "mean_reward": 24.00417799949646,
862
- "mean_length": 3081.1,
863
- "loss": 0.29562610387802124,
864
- "sps": 2249.6786763931746
865
  },
866
  {
867
  "update": 485,
868
  "global_step": 993280,
869
- "num_episodes": 330,
870
- "mean_reward": 24.60798607826233,
871
- "mean_length": 3177.27,
872
- "loss": 0.02162332460284233,
873
- "sps": 2222.1627608215495
874
  },
875
  {
876
  "update": 490,
877
  "global_step": 1003520,
878
- "num_episodes": 334,
879
- "mean_reward": 25.107961702346802,
880
- "mean_length": 3177.27,
881
- "loss": 0.30668944120407104,
882
- "sps": 2336.9965399128855
883
  },
884
  {
885
  "update": 495,
886
  "global_step": 1013760,
887
- "num_episodes": 337,
888
- "mean_reward": 25.60792893409729,
889
- "mean_length": 3175.92,
890
- "loss": 0.40231630206108093,
891
- "sps": 1175.8081438816857
892
  },
893
  {
894
  "update": 500,
895
  "global_step": 1024000,
896
- "num_episodes": 337,
897
- "mean_reward": 25.60792893409729,
898
- "mean_length": 3175.92,
899
- "loss": 0.33998745679855347,
900
- "sps": 2419.5867438311334
901
  },
902
  {
903
  "update": 505,
904
  "global_step": 1034240,
905
- "num_episodes": 340,
906
- "mean_reward": 25.602466859817504,
907
- "mean_length": 3171.49,
908
- "loss": 0.09274712204933167,
909
- "sps": 2409.938615394154
910
  },
911
  {
912
  "update": 510,
913
  "global_step": 1044480,
914
- "num_episodes": 346,
915
- "mean_reward": 25.201329984664916,
916
- "mean_length": 3263.94,
917
- "loss": 0.2879057228565216,
918
- "sps": 976.7251772331928
919
  },
920
  {
921
  "update": 515,
922
  "global_step": 1054720,
923
- "num_episodes": 347,
924
- "mean_reward": 25.555161905288696,
925
- "mean_length": 3361.65,
926
- "loss": 0.39604732394218445,
927
- "sps": 2406.938365656791
928
  },
929
  {
930
  "update": 520,
931
  "global_step": 1064960,
932
- "num_episodes": 347,
933
- "mean_reward": 25.555161905288696,
934
- "mean_length": 3361.65,
935
- "loss": 0.03350641578435898,
936
- "sps": 2384.9415454427835
937
  },
938
  {
939
  "update": 525,
940
  "global_step": 1075200,
941
- "num_episodes": 348,
942
- "mean_reward": 26.658946142196655,
943
- "mean_length": 3459.36,
944
- "loss": 0.8593693971633911,
945
- "sps": 2415.202845406159
946
  },
947
  {
948
  "update": 530,
949
  "global_step": 1085440,
950
- "num_episodes": 355,
951
- "mean_reward": 27.258036041259764,
952
- "mean_length": 3553.8,
953
- "loss": 0.4878949820995331,
954
- "sps": 710.1710154431543
955
  },
956
  {
957
  "update": 535,
958
  "global_step": 1095680,
959
- "num_episodes": 368,
960
- "mean_reward": 28.800746297836305,
961
- "mean_length": 3371.19,
962
- "loss": 0.21109388768672943,
963
- "sps": 532.1724281533873
964
  },
965
  {
966
  "update": 540,
967
  "global_step": 1105920,
968
- "num_episodes": 371,
969
- "mean_reward": 29.045905199050903,
970
- "mean_length": 3369.84,
971
- "loss": 0.14603528380393982,
972
- "sps": 2335.2550876461346
973
  },
974
  {
975
  "update": 545,
976
  "global_step": 1116160,
977
- "num_episodes": 373,
978
- "mean_reward": 29.899697828292847,
979
- "mean_length": 3466.01,
980
- "loss": -0.04600970447063446,
981
- "sps": 2305.921805837758
982
  },
983
  {
984
  "update": 550,
985
  "global_step": 1126400,
986
- "num_episodes": 374,
987
- "mean_reward": 31.003479852676392,
988
- "mean_length": 3565.07,
989
- "loss": 0.11138609051704407,
990
- "sps": 2400.3530395714765
991
  },
992
  {
993
  "update": 555,
994
  "global_step": 1136640,
995
- "num_episodes": 377,
996
- "mean_reward": 31.149640073776244,
997
- "mean_length": 3467.36,
998
- "loss": 0.35408008098602295,
999
- "sps": 1863.2471573876685
1000
  },
1001
  {
1002
  "update": 560,
1003
  "global_step": 1146880,
1004
- "num_episodes": 378,
1005
- "mean_reward": 31.50347306251526,
1006
- "mean_length": 3563.53,
1007
- "loss": 0.03238815441727638,
1008
- "sps": 2410.263871749416
1009
  },
1010
  {
1011
  "update": 565,
1012
  "global_step": 1157120,
1013
- "num_episodes": 379,
1014
- "mean_reward": 31.503473825454712,
1015
- "mean_length": 3563.53,
1016
- "loss": 1.2669872045516968,
1017
- "sps": 2435.9597448206396
1018
  },
1019
  {
1020
  "update": 570,
1021
  "global_step": 1167360,
1022
- "num_episodes": 381,
1023
- "mean_reward": 30.14463397026062,
1024
- "mean_length": 3465.82,
1025
- "loss": 1.1442028284072876,
1026
- "sps": 2364.850671069342
1027
  },
1028
  {
1029
  "update": 575,
1030
  "global_step": 1177600,
1031
- "num_episodes": 387,
1032
- "mean_reward": 27.80320453643799,
1033
- "mean_length": 3366.44,
1034
- "loss": 0.08564125001430511,
1035
- "sps": 566.4595364452883
1036
  },
1037
  {
1038
  "update": 580,
1039
  "global_step": 1187840,
1040
- "num_episodes": 389,
1041
- "mean_reward": 27.20208529472351,
1042
- "mean_length": 3266.87,
1043
- "loss": 0.14057211577892303,
1044
- "sps": 2297.6598442501095
1045
  },
1046
  {
1047
  "update": 585,
1048
  "global_step": 1198080,
1049
- "num_episodes": 396,
1050
- "mean_reward": 26.080838441848755,
1051
- "mean_length": 3169.09,
1052
- "loss": 0.0875428318977356,
1053
- "sps": 2440.645435446852
1054
  },
1055
  {
1056
  "update": 590,
1057
  "global_step": 1208320,
1058
- "num_episodes": 403,
1059
- "mean_reward": 24.67230319023132,
1060
- "mean_length": 3069.72,
1061
- "loss": -0.005318094044923782,
1062
- "sps": 2415.1756827351583
1063
  },
1064
  {
1065
  "update": 595,
1066
  "global_step": 1218560,
1067
- "num_episodes": 406,
1068
- "mean_reward": 23.778743057250978,
1069
- "mean_length": 3164.03,
1070
- "loss": 0.07221215218305588,
1071
- "sps": 1535.4284659165353
1072
  },
1073
  {
1074
  "update": 600,
1075
  "global_step": 1228800,
1076
- "num_episodes": 413,
1077
- "mean_reward": 22.0106067276001,
1078
- "mean_length": 2967.58,
1079
- "loss": 0.35672813653945923,
1080
- "sps": 2410.0731702663725
1081
  },
1082
  {
1083
  "update": 605,
1084
  "global_step": 1239040,
1085
- "num_episodes": 418,
1086
- "mean_reward": 20.706662797927855,
1087
- "mean_length": 2849.49,
1088
- "loss": 0.1961039900779724,
1089
- "sps": 2444.0300319234293
1090
  },
1091
  {
1092
  "update": 610,
1093
  "global_step": 1249280,
1094
- "num_episodes": 420,
1095
- "mean_reward": 21.058296146392824,
1096
- "mean_length": 2949.06,
1097
- "loss": 0.20069828629493713,
1098
- "sps": 2409.835173874158
1099
  },
1100
  {
1101
  "update": 615,
1102
  "global_step": 1259520,
1103
- "num_episodes": 420,
1104
- "mean_reward": 21.058296146392824,
1105
- "mean_length": 2949.06,
1106
- "loss": 0.6283493041992188,
1107
- "sps": 2351.264188304028
1108
  },
1109
  {
1110
  "update": 620,
1111
  "global_step": 1269760,
1112
- "num_episodes": 436,
1113
- "mean_reward": 16.700364379882814,
1114
- "mean_length": 2538.45,
1115
- "loss": 0.06421943008899689,
1116
- "sps": 581.6206601898414
1117
  },
1118
  {
1119
  "update": 625,
1120
  "global_step": 1280000,
1121
- "num_episodes": 437,
1122
- "mean_reward": 17.554159069061278,
1123
- "mean_length": 2634.62,
1124
- "loss": 0.025755397975444794,
1125
- "sps": 2336.2122156272303
1126
  },
1127
  {
1128
  "update": 630,
1129
  "global_step": 1290240,
1130
- "num_episodes": 438,
1131
- "mean_reward": 17.804676685333252,
1132
- "mean_length": 2634.62,
1133
- "loss": -0.039572782814502716,
1134
- "sps": 2353.5698753913625
1135
  },
1136
  {
1137
  "update": 635,
1138
  "global_step": 1300480,
1139
- "num_episodes": 438,
1140
- "mean_reward": 17.804676685333252,
1141
- "mean_length": 2634.62,
1142
- "loss": -0.0706489086151123,
1143
- "sps": 2283.6836848035246
1144
  },
1145
  {
1146
  "update": 640,
1147
  "global_step": 1310720,
1148
- "num_episodes": 441,
1149
- "mean_reward": 19.15775371551514,
1150
- "mean_length": 2733.68,
1151
- "loss": -0.09373792260885239,
1152
- "sps": 1474.6357445156093
1153
  },
1154
  {
1155
  "update": 645,
1156
  "global_step": 1320960,
1157
- "num_episodes": 442,
1158
- "mean_reward": 20.157576084136963,
1159
- "mean_length": 2733.68,
1160
- "loss": 0.11351752281188965,
1161
- "sps": 2334.5626594322757
1162
  },
1163
  {
1164
  "update": 650,
1165
  "global_step": 1331200,
1166
- "num_episodes": 444,
1167
- "mean_reward": 20.267150926589967,
1168
- "mean_length": 2833.09,
1169
- "loss": -0.18438173830509186,
1170
- "sps": 2236.677306457589
1171
  },
1172
  {
1173
  "update": 655,
1174
  "global_step": 1341440,
1175
- "num_episodes": 444,
1176
- "mean_reward": 20.267150926589967,
1177
- "mean_length": 2833.09,
1178
- "loss": -0.07986398041248322,
1179
- "sps": 2210.239531580035
1180
  },
1181
  {
1182
  "update": 660,
1183
  "global_step": 1351680,
1184
- "num_episodes": 447,
1185
- "mean_reward": 21.873516216278077,
1186
- "mean_length": 2929.77,
1187
- "loss": -0.05535223335027695,
1188
- "sps": 2385.7304437353546
1189
  },
1190
  {
1191
  "update": 665,
1192
  "global_step": 1361920,
1193
- "num_episodes": 449,
1194
- "mean_reward": 21.519568309783935,
1195
- "mean_length": 2830.71,
1196
- "loss": 0.10434924066066742,
1197
- "sps": 2330.8125431773424
1198
  },
1199
  {
1200
  "update": 670,
1201
  "global_step": 1372160,
1202
- "num_episodes": 451,
1203
- "mean_reward": 22.623257703781128,
1204
- "mean_length": 2927.07,
1205
- "loss": 0.29875361919403076,
1206
- "sps": 2388.3964178785764
1207
  },
1208
  {
1209
  "update": 675,
1210
  "global_step": 1382400,
1211
- "num_episodes": 451,
1212
- "mean_reward": 22.623257703781128,
1213
- "mean_length": 2927.07,
1214
- "loss": 0.01194491982460022,
1215
- "sps": 2360.8138719071453
1216
  },
1217
  {
1218
  "update": 680,
1219
  "global_step": 1392640,
1220
- "num_episodes": 454,
1221
- "mean_reward": 23.234625358581543,
1222
- "mean_length": 3026.64,
1223
- "loss": 0.1673501431941986,
1224
- "sps": 1557.6244924988657
1225
  },
1226
  {
1227
  "update": 685,
1228
  "global_step": 1402880,
1229
- "num_episodes": 458,
1230
- "mean_reward": 23.841174821853638,
1231
- "mean_length": 3124.86,
1232
- "loss": 0.026581741869449615,
1233
- "sps": 2296.8415092422506
1234
  },
1235
  {
1236
  "update": 690,
1237
  "global_step": 1413120,
1238
- "num_episodes": 459,
1239
- "mean_reward": 23.949846239089965,
1240
- "mean_length": 3221.03,
1241
- "loss": 1.637216329574585,
1242
- "sps": 413.9001244167228
1243
  },
1244
  {
1245
  "update": 695,
1246
  "global_step": 1423360,
1247
- "num_episodes": 465,
1248
- "mean_reward": 24.109339590072633,
1249
- "mean_length": 3246.44,
1250
- "loss": 0.4041682779788971,
1251
- "sps": 1973.9302703248002
1252
  },
1253
  {
1254
  "update": 700,
1255
  "global_step": 1433600,
1256
- "num_episodes": 466,
1257
- "mean_reward": 24.21024447441101,
1258
- "mean_length": 3344.24,
1259
- "loss": 0.13472315669059753,
1260
- "sps": 2279.0860535321326
1261
  },
1262
  {
1263
  "update": 705,
1264
  "global_step": 1443840,
1265
- "num_episodes": 468,
1266
- "mean_reward": 24.856354351043702,
1267
- "mean_length": 3440.41,
1268
- "loss": 0.6666802763938904,
1269
- "sps": 2315.9586381196755
1270
  },
1271
  {
1272
  "update": 710,
1273
  "global_step": 1454080,
1274
- "num_episodes": 468,
1275
- "mean_reward": 24.856354351043702,
1276
- "mean_length": 3440.41,
1277
- "loss": -0.15654346346855164,
1278
- "sps": 2242.2811278107365
1279
  },
1280
  {
1281
  "update": 715,
1282
  "global_step": 1464320,
1283
- "num_episodes": 477,
1284
- "mean_reward": 24.00254797935486,
1285
- "mean_length": 3342.7,
1286
- "loss": -0.14158941805362701,
1287
- "sps": 2301.1874356888225
1288
  },
1289
  {
1290
  "update": 720,
1291
  "global_step": 1474560,
1292
- "num_episodes": 480,
1293
- "mean_reward": 24.0372780418396,
1294
- "mean_length": 3146.96,
1295
- "loss": 0.10493358224630356,
1296
- "sps": 843.3732587375952
1297
  },
1298
  {
1299
  "update": 725,
1300
  "global_step": 1484800,
1301
- "num_episodes": 481,
1302
- "mean_reward": 23.896179141998292,
1303
- "mean_length": 3244.67,
1304
- "loss": 0.01014188677072525,
1305
- "sps": 2387.5931465826584
1306
  },
1307
  {
1308
  "update": 730,
1309
  "global_step": 1495040,
1310
- "num_episodes": 481,
1311
- "mean_reward": 23.896179141998292,
1312
- "mean_length": 3244.67,
1313
- "loss": 1.5800291299819946,
1314
- "sps": 2199.482155508925
1315
  },
1316
  {
1317
  "update": 735,
1318
  "global_step": 1505280,
1319
- "num_episodes": 486,
1320
- "mean_reward": 23.005251121520995,
1321
- "mean_length": 3344.42,
1322
- "loss": 2.476745128631592,
1323
- "sps": 555.9822817000502
1324
  },
1325
  {
1326
  "update": 740,
1327
  "global_step": 1515520,
1328
- "num_episodes": 487,
1329
- "mean_reward": 22.755260047912596,
1330
- "mean_length": 3344.42,
1331
- "loss": 0.043340131640434265,
1332
- "sps": 2358.713519842451
1333
  },
1334
  {
1335
  "update": 745,
1336
  "global_step": 1525760,
1337
- "num_episodes": 489,
1338
- "mean_reward": 23.35637879371643,
1339
- "mean_length": 3442.64,
1340
- "loss": 0.8548387885093689,
1341
- "sps": 2384.7462230357132
1342
  },
1343
  {
1344
  "update": 750,
1345
  "global_step": 1536000,
1346
- "num_episodes": 489,
1347
- "mean_reward": 23.35637879371643,
1348
- "mean_length": 3442.64,
1349
- "loss": 0.06128331273794174,
1350
- "sps": 2389.02547098786
1351
- },
1352
- {
1353
- "update": 755,
1354
- "global_step": 1546240,
1355
- "num_episodes": 495,
1356
- "mean_reward": 23.45271695137024,
1357
- "mean_length": 3539.52,
1358
- "loss": 0.010654259473085403,
1359
- "sps": 451.1509132304138
1360
- },
1361
- {
1362
- "update": 760,
1363
- "global_step": 1556480,
1364
- "num_episodes": 498,
1365
- "mean_reward": 24.20286971092224,
1366
- "mean_length": 3541.89,
1367
- "loss": -0.015714531764388084,
1368
- "sps": 2272.008455427166
1369
- },
1370
- {
1371
- "update": 765,
1372
- "global_step": 1566720,
1373
- "num_episodes": 503,
1374
- "mean_reward": 24.520383558273316,
1375
- "mean_length": 3642.43,
1376
- "loss": 1.2781355381011963,
1377
- "sps": 2234.132232505493
1378
- },
1379
- {
1380
- "update": 770,
1381
- "global_step": 1576960,
1382
- "num_episodes": 503,
1383
- "mean_reward": 24.520383558273316,
1384
- "mean_length": 3642.43,
1385
- "loss": 0.005558963865041733,
1386
- "sps": 2252.444167541167
1387
- },
1388
- {
1389
- "update": 775,
1390
- "global_step": 1587200,
1391
- "num_episodes": 508,
1392
- "mean_reward": 26.765025606155394,
1393
- "mean_length": 3642.43,
1394
- "loss": 1.5083853006362915,
1395
- "sps": 1001.7898928543148
1396
- },
1397
- {
1398
- "update": 780,
1399
- "global_step": 1597440,
1400
- "num_episodes": 512,
1401
- "mean_reward": 27.61695213317871,
1402
- "mean_length": 3739.96,
1403
- "loss": -0.03274097293615341,
1404
- "sps": 406.1884387297904
1405
- },
1406
- {
1407
- "update": 785,
1408
- "global_step": 1607680,
1409
- "num_episodes": 517,
1410
- "mean_reward": 29.239461097717285,
1411
- "mean_length": 3749.65,
1412
- "loss": 0.056141212582588196,
1413
- "sps": 711.222362259853
1414
- },
1415
- {
1416
- "update": 790,
1417
- "global_step": 1617920,
1418
- "num_episodes": 517,
1419
- "mean_reward": 29.239461097717285,
1420
- "mean_length": 3749.65,
1421
- "loss": 0.08803388476371765,
1422
- "sps": 2290.465987254353
1423
- },
1424
- {
1425
- "update": 795,
1426
- "global_step": 1628160,
1427
- "num_episodes": 524,
1428
- "mean_reward": 28.533955936431884,
1429
- "mean_length": 3547.65,
1430
- "loss": 5.9130449295043945,
1431
- "sps": 1206.367279137959
1432
- },
1433
- {
1434
- "update": 800,
1435
- "global_step": 1638400,
1436
- "num_episodes": 528,
1437
- "mean_reward": 29.898164215087892,
1438
- "mean_length": 3657.29,
1439
- "loss": 0.8671517372131348,
1440
- "sps": 2301.788037202966
1441
  }
1442
  ]
 
2
  {
3
  "update": 5,
4
  "global_step": 10240,
5
+ "num_episodes": 12,
6
+ "mean_reward": -1.8039220174153645,
7
+ "mean_length": 214.16666666666666,
8
+ "loss": 1.5202983617782593,
9
+ "sps": 1350.1393046305259
10
  },
11
  {
12
  "update": 10,
13
  "global_step": 20480,
14
+ "num_episodes": 12,
15
+ "mean_reward": -1.8039220174153645,
16
+ "mean_length": 214.16666666666666,
17
+ "loss": 0.6584598422050476,
18
+ "sps": 2047.2883333230054
19
  },
20
  {
21
  "update": 15,
22
  "global_step": 30720,
23
+ "num_episodes": 12,
24
+ "mean_reward": -1.8039220174153645,
25
+ "mean_length": 214.16666666666666,
26
+ "loss": 0.6658369898796082,
27
+ "sps": 2055.41935946998
28
  },
29
  {
30
  "update": 20,
31
  "global_step": 40960,
32
+ "num_episodes": 15,
33
+ "mean_reward": 2.1853319803873696,
34
+ "mean_length": 1510.9333333333334,
35
+ "loss": 1.3961129188537598,
36
+ "sps": 1412.8079652055148
37
  },
38
  {
39
  "update": 25,
40
  "global_step": 51200,
41
+ "num_episodes": 22,
42
+ "mean_reward": 2.625577276403254,
43
+ "mean_length": 1983.409090909091,
44
+ "loss": 1.2982873916625977,
45
+ "sps": 470.35650512927947
46
  },
47
  {
48
  "update": 30,
49
  "global_step": 61440,
50
+ "num_episodes": 22,
51
+ "mean_reward": 2.625577276403254,
52
+ "mean_length": 1983.409090909091,
53
+ "loss": 1.0302618741989136,
54
+ "sps": 2216.3140512542655
55
  },
56
  {
57
  "update": 35,
58
  "global_step": 71680,
59
+ "num_episodes": 22,
60
+ "mean_reward": 2.625577276403254,
61
+ "mean_length": 1983.409090909091,
62
+ "loss": 0.9787145853042603,
63
+ "sps": 2070.449469801819
64
  },
65
  {
66
  "update": 40,
67
  "global_step": 81920,
68
+ "num_episodes": 27,
69
+ "mean_reward": 3.300225363837348,
70
+ "mean_length": 2365.4074074074074,
71
+ "loss": 0.8183227777481079,
72
+ "sps": 755.8004689166112
73
  },
74
  {
75
  "update": 45,
76
  "global_step": 92160,
77
+ "num_episodes": 33,
78
+ "mean_reward": 3.7748724330555308,
79
+ "mean_length": 2564.090909090909,
80
+ "loss": 0.3192096948623657,
81
+ "sps": 1984.8729329667028
82
  },
83
  {
84
  "update": 50,
85
  "global_step": 102400,
86
+ "num_episodes": 33,
87
+ "mean_reward": 3.7748724330555308,
88
+ "mean_length": 2564.090909090909,
89
+ "loss": 0.8334144949913025,
90
+ "sps": 2085.15978587018
91
  },
92
  {
93
  "update": 55,
94
  "global_step": 112640,
95
+ "num_episodes": 33,
96
+ "mean_reward": 3.7748724330555308,
97
+ "mean_length": 2564.090909090909,
98
+ "loss": 0.5191329121589661,
99
+ "sps": 2075.918435842757
100
  },
101
  {
102
  "update": 60,
103
  "global_step": 122880,
104
+ "num_episodes": 35,
105
+ "mean_reward": 5.517268398829869,
106
+ "mean_length": 2989.0,
107
+ "loss": 0.6924473643302917,
108
+ "sps": 2020.4075872880348
109
  },
110
  {
111
  "update": 65,
112
  "global_step": 133120,
113
+ "num_episodes": 40,
114
+ "mean_reward": 5.594199681282044,
115
+ "mean_length": 3128.125,
116
+ "loss": 1.9699269533157349,
117
+ "sps": 429.94535608180354
118
  },
119
  {
120
  "update": 70,
121
  "global_step": 143360,
122
+ "num_episodes": 41,
123
+ "mean_reward": 5.430626520296422,
124
+ "mean_length": 3061.170731707317,
125
+ "loss": 2.169956684112549,
126
+ "sps": 2246.502749698459
127
  },
128
  {
129
  "update": 75,
130
  "global_step": 153600,
131
+ "num_episodes": 41,
132
+ "mean_reward": 5.430626520296422,
133
+ "mean_length": 3061.170731707317,
134
+ "loss": 2.5021121501922607,
135
+ "sps": 2240.800087024051
136
  },
137
  {
138
  "update": 80,
139
  "global_step": 163840,
140
+ "num_episodes": 46,
141
+ "mean_reward": 10.588201502095098,
142
+ "mean_length": 3177.4565217391305,
143
+ "loss": 2.6493680477142334,
144
+ "sps": 436.6488855176695
145
  },
146
  {
147
  "update": 85,
148
  "global_step": 174080,
149
+ "num_episodes": 52,
150
+ "mean_reward": 13.96443979556744,
151
+ "mean_length": 3211.1923076923076,
152
+ "loss": 2.5603184700012207,
153
+ "sps": 2163.643359340071
154
  },
155
  {
156
  "update": 90,
157
  "global_step": 184320,
158
+ "num_episodes": 52,
159
+ "mean_reward": 13.96443979556744,
160
+ "mean_length": 3211.1923076923076,
161
+ "loss": 2.336817502975464,
162
+ "sps": 2163.4161260603714
163
  },
164
  {
165
  "update": 95,
166
  "global_step": 194560,
167
+ "num_episodes": 52,
168
+ "mean_reward": 13.96443979556744,
169
+ "mean_length": 3211.1923076923076,
170
+ "loss": 3.234126091003418,
171
+ "sps": 2186.3643111360093
172
  },
173
  {
174
  "update": 100,
175
  "global_step": 204800,
176
+ "num_episodes": 55,
177
+ "mean_reward": 17.41634755568071,
178
+ "mean_length": 3220.3454545454547,
179
+ "loss": 5.442279815673828,
180
+ "sps": 1960.8741209669945
181
  },
182
  {
183
  "update": 105,
184
  "global_step": 215040,
185
+ "num_episodes": 62,
186
+ "mean_reward": 27.910769308767012,
187
+ "mean_length": 3345.3387096774195,
188
+ "loss": 2.81072735786438,
189
+ "sps": 1303.1946553744522
190
  },
191
  {
192
  "update": 110,
193
  "global_step": 225280,
194
+ "num_episodes": 66,
195
+ "mean_reward": 26.517480315584123,
196
+ "mean_length": 3151.8939393939395,
197
+ "loss": 0.830104649066925,
198
+ "sps": 1994.5176102850091
199
  },
200
  {
201
  "update": 115,
202
  "global_step": 235520,
203
+ "num_episodes": 66,
204
+ "mean_reward": 26.517480315584123,
205
+ "mean_length": 3151.8939393939395,
206
+ "loss": 0.8076149225234985,
207
+ "sps": 2020.1985152956534
208
  },
209
  {
210
  "update": 120,
211
  "global_step": 245760,
212
+ "num_episodes": 69,
213
+ "mean_reward": 35.54813836968464,
214
+ "mean_length": 3449.6376811594205,
215
+ "loss": 1.7053556442260742,
216
+ "sps": 1392.6768298802306
217
  },
218
  {
219
  "update": 125,
220
  "global_step": 256000,
221
+ "num_episodes": 72,
222
+ "mean_reward": 34.35707297590044,
223
+ "mean_length": 3315.0,
224
+ "loss": 0.5338165163993835,
225
+ "sps": 2218.857641762577
226
  },
227
  {
228
  "update": 130,
229
  "global_step": 266240,
230
+ "num_episodes": 73,
231
+ "mean_reward": 37.437843152921495,
232
+ "mean_length": 3406.5753424657532,
233
+ "loss": 0.31061413884162903,
234
+ "sps": 2123.373275302262
235
  },
236
  {
237
  "update": 135,
238
  "global_step": 276480,
239
+ "num_episodes": 73,
240
+ "mean_reward": 37.437843152921495,
241
+ "mean_length": 3406.5753424657532,
242
+ "loss": 0.370238333940506,
243
+ "sps": 2270.1968695887504
244
  },
245
  {
246
  "update": 140,
247
  "global_step": 286720,
248
+ "num_episodes": 76,
249
+ "mean_reward": 42.764281247791494,
250
+ "mean_length": 3535.8289473684213,
251
+ "loss": 0.4431289732456207,
252
+ "sps": 2235.4805453156696
253
  },
254
  {
255
  "update": 145,
256
  "global_step": 296960,
257
+ "num_episodes": 78,
258
+ "mean_reward": 45.29765256245931,
259
+ "mean_length": 3576.3076923076924,
260
+ "loss": 0.11675499379634857,
261
+ "sps": 2248.214328955258
262
  },
263
  {
264
  "update": 150,
265
  "global_step": 307200,
266
+ "num_episodes": 79,
267
+ "mean_reward": 48.32240367841117,
268
+ "mean_length": 3657.6202531645567,
269
+ "loss": 0.3231047987937927,
270
+ "sps": 2273.3126392381514
271
  },
272
  {
273
  "update": 155,
274
  "global_step": 317440,
275
+ "num_episodes": 79,
276
+ "mean_reward": 48.32240367841117,
277
+ "mean_length": 3657.6202531645567,
278
+ "loss": 0.24974392354488373,
279
+ "sps": 2276.2572089535233
280
  },
281
  {
282
  "update": 160,
283
  "global_step": 327680,
284
+ "num_episodes": 90,
285
+ "mean_reward": 48.88773588604397,
286
+ "mean_length": 3444.8333333333335,
287
+ "loss": 1.1440778970718384,
288
+ "sps": 469.25381312597585
289
  },
290
  {
291
  "update": 165,
292
  "global_step": 337920,
293
+ "num_episodes": 99,
294
+ "mean_reward": 50.101470494511155,
295
+ "mean_length": 3348.060606060606,
296
+ "loss": 0.3425019383430481,
297
+ "sps": 1903.5186981694392
298
  },
299
  {
300
  "update": 170,
301
  "global_step": 348160,
302
+ "num_episodes": 99,
303
+ "mean_reward": 50.101470494511155,
304
+ "mean_length": 3348.060606060606,
305
+ "loss": 0.18799301981925964,
306
+ "sps": 2130.3254740506277
307
  },
308
  {
309
  "update": 175,
310
  "global_step": 358400,
311
+ "num_episodes": 99,
312
+ "mean_reward": 50.101470494511155,
313
+ "mean_length": 3348.060606060606,
314
+ "loss": 0.11751141399145126,
315
+ "sps": 2125.2682193036194
316
  },
317
  {
318
  "update": 180,
319
  "global_step": 368640,
320
+ "num_episodes": 101,
321
+ "mean_reward": 52.442985696792604,
322
+ "mean_length": 3414.58,
323
+ "loss": 0.2516184449195862,
324
+ "sps": 1703.135913858338
325
  },
326
  {
327
  "update": 185,
328
  "global_step": 378880,
329
+ "num_episodes": 114,
330
+ "mean_reward": 61.27311011314392,
331
+ "mean_length": 3506.98,
332
+ "loss": 0.11899760365486145,
333
+ "sps": 2246.894695169401
334
  },
335
  {
336
  "update": 190,
337
  "global_step": 389120,
338
+ "num_episodes": 114,
339
+ "mean_reward": 61.27311011314392,
340
+ "mean_length": 3506.98,
341
+ "loss": 0.30889129638671875,
342
+ "sps": 2289.0664114120555
343
  },
344
  {
345
  "update": 195,
346
  "global_step": 399360,
347
+ "num_episodes": 114,
348
+ "mean_reward": 61.27311011314392,
349
+ "mean_length": 3506.98,
350
+ "loss": -0.06268183887004852,
351
+ "sps": 2277.013858345863
352
  },
353
  {
354
  "update": 200,
355
  "global_step": 409600,
356
+ "num_episodes": 114,
357
+ "mean_reward": 61.27311011314392,
358
+ "mean_length": 3506.98,
359
+ "loss": 0.5904355645179749,
360
+ "sps": 2173.4262065580283
361
  },
362
  {
363
  "update": 205,
364
  "global_step": 419840,
365
+ "num_episodes": 121,
366
+ "mean_reward": 72.71755365371705,
367
+ "mean_length": 3714.22,
368
+ "loss": 1.7067689895629883,
369
+ "sps": 308.3386383776914
370
  },
371
  {
372
  "update": 210,
373
  "global_step": 430080,
374
+ "num_episodes": 123,
375
+ "mean_reward": 72.60844767570495,
376
+ "mean_length": 3634.11,
377
+ "loss": 0.22778448462486267,
378
+ "sps": 2270.040885359684
379
  },
380
  {
381
  "update": 215,
382
  "global_step": 440320,
383
+ "num_episodes": 123,
384
+ "mean_reward": 72.60844767570495,
385
+ "mean_length": 3634.11,
386
+ "loss": 0.4201895296573639,
387
+ "sps": 2286.5998087120915
388
  },
389
  {
390
  "update": 220,
391
  "global_step": 450560,
392
+ "num_episodes": 123,
393
+ "mean_reward": 72.60844767570495,
394
+ "mean_length": 3634.11,
395
+ "loss": 0.08545871078968048,
396
+ "sps": 2301.79050438536
397
  },
398
  {
399
  "update": 225,
400
  "global_step": 460800,
401
+ "num_episodes": 129,
402
+ "mean_reward": 81.06719317436219,
403
+ "mean_length": 3731.63,
404
+ "loss": 0.4734661877155304,
405
+ "sps": 1893.4799410834491
406
  },
407
  {
408
  "update": 230,
409
  "global_step": 471040,
410
+ "num_episodes": 132,
411
+ "mean_reward": 83.9696240234375,
412
+ "mean_length": 3736.94,
413
+ "loss": 2.656114101409912,
414
+ "sps": 1747.8656326711334
415
  },
416
  {
417
  "update": 235,
418
  "global_step": 481280,
419
+ "num_episodes": 132,
420
+ "mean_reward": 83.9696240234375,
421
+ "mean_length": 3736.94,
422
+ "loss": 0.3720909357070923,
423
+ "sps": 2290.767122681247
424
  },
425
  {
426
  "update": 240,
427
  "global_step": 491520,
428
+ "num_episodes": 133,
429
+ "mean_reward": 86.32599678993225,
430
+ "mean_length": 3836.51,
431
+ "loss": 0.28272855281829834,
432
+ "sps": 1660.2388311365764
433
  },
434
  {
435
  "update": 245,
436
  "global_step": 501760,
437
+ "num_episodes": 139,
438
+ "mean_reward": 90.87246092796326,
439
+ "mean_length": 3642.75,
440
+ "loss": 0.7918240427970886,
441
+ "sps": 226.8334199523192
442
  },
443
  {
444
  "update": 250,
445
  "global_step": 512000,
446
+ "num_episodes": 144,
447
+ "mean_reward": 93.14730869293213,
448
+ "mean_length": 3661.34,
449
+ "loss": 0.5329251885414124,
450
+ "sps": 1291.7356099331766
451
  },
452
  {
453
  "update": 255,
454
  "global_step": 522240,
455
+ "num_episodes": 147,
456
+ "mean_reward": 90.92609573364258,
457
+ "mean_length": 3465.71,
458
+ "loss": 0.2208552062511444,
459
+ "sps": 2176.5233668919136
460
  },
461
  {
462
  "update": 260,
463
  "global_step": 532480,
464
+ "num_episodes": 148,
465
+ "mean_reward": 94.03146264076233,
466
+ "mean_length": 3565.28,
467
+ "loss": 0.3058362305164337,
468
+ "sps": 1287.827730215577
469
  },
470
  {
471
  "update": 265,
472
  "global_step": 542720,
473
+ "num_episodes": 153,
474
+ "mean_reward": 93.66601790428162,
475
+ "mean_length": 3469.44,
476
+ "loss": 0.2177593857049942,
477
+ "sps": 2185.2068742705324
478
  },
479
  {
480
  "update": 270,
481
  "global_step": 552960,
482
+ "num_episodes": 154,
483
+ "mean_reward": 96.52238609313964,
484
+ "mean_length": 3569.01,
485
+ "loss": 0.1883123517036438,
486
+ "sps": 2191.858927925312
487
  },
488
  {
489
  "update": 275,
490
  "global_step": 563200,
491
+ "num_episodes": 155,
492
+ "mean_reward": 99.62512998580932,
493
+ "mean_length": 3668.07,
494
+ "loss": 0.27221783995628357,
495
+ "sps": 2123.45043596574
496
  },
497
  {
498
  "update": 280,
499
  "global_step": 573440,
500
+ "num_episodes": 155,
501
+ "mean_reward": 99.62512998580932,
502
+ "mean_length": 3668.07,
503
+ "loss": 0.18106494843959808,
504
+ "sps": 2141.5921280557186
505
  },
506
  {
507
  "update": 285,
508
  "global_step": 583680,
509
+ "num_episodes": 162,
510
+ "mean_reward": 100.8679942893982,
511
+ "mean_length": 3670.27,
512
+ "loss": 0.5695434212684631,
513
+ "sps": 500.88139423117855
514
  },
515
  {
516
  "update": 290,
517
  "global_step": 593920,
518
+ "num_episodes": 162,
519
+ "mean_reward": 100.8679942893982,
520
+ "mean_length": 3670.27,
521
+ "loss": 0.07588323950767517,
522
+ "sps": 2141.039120248053
523
  },
524
  {
525
  "update": 295,
526
  "global_step": 604160,
527
+ "num_episodes": 164,
528
+ "mean_reward": 103.97303117752075,
529
+ "mean_length": 3770.47,
530
+ "loss": 0.3367934823036194,
531
+ "sps": 2232.451302693334
532
  },
533
  {
534
  "update": 300,
535
  "global_step": 614400,
536
+ "num_episodes": 164,
537
+ "mean_reward": 103.97303117752075,
538
+ "mean_length": 3770.47,
539
+ "loss": 0.23896968364715576,
540
+ "sps": 2206.894347275839
541
  },
542
  {
543
  "update": 305,
544
  "global_step": 624640,
545
+ "num_episodes": 168,
546
+ "mean_reward": 108.57997800827026,
547
+ "mean_length": 3866.64,
548
+ "loss": 0.8634886145591736,
549
+ "sps": 2068.617103764695
550
  },
551
  {
552
  "update": 310,
553
  "global_step": 634880,
554
+ "num_episodes": 168,
555
+ "mean_reward": 108.57997800827026,
556
+ "mean_length": 3866.64,
557
+ "loss": 0.05964889004826546,
558
+ "sps": 2227.05463999608
559
  },
560
  {
561
  "update": 315,
562
  "global_step": 645120,
563
+ "num_episodes": 170,
564
+ "mean_reward": 108.82997495651244,
565
+ "mean_length": 3866.64,
566
+ "loss": 0.3584131598472595,
567
+ "sps": 2093.8527505731936
568
  },
569
  {
570
  "update": 320,
571
  "global_step": 655360,
572
+ "num_episodes": 170,
573
+ "mean_reward": 108.82997495651244,
574
+ "mean_length": 3866.64,
575
+ "loss": 0.11968313157558441,
576
+ "sps": 2108.9095828206277
577
  },
578
  {
579
  "update": 325,
580
  "global_step": 665600,
581
+ "num_episodes": 174,
582
+ "mean_reward": 112.43493017196656,
583
+ "mean_length": 3960.91,
584
+ "loss": 1.6604670286178589,
585
+ "sps": 2079.794147719775
586
  },
587
  {
588
  "update": 330,
589
  "global_step": 675840,
590
+ "num_episodes": 175,
591
+ "mean_reward": 115.29129805564881,
592
+ "mean_length": 4060.48,
593
+ "loss": 0.19095765054225922,
594
+ "sps": 2102.571665929904
595
  },
596
  {
597
  "update": 335,
598
  "global_step": 686080,
599
+ "num_episodes": 175,
600
+ "mean_reward": 115.29129805564881,
601
+ "mean_length": 4060.48,
602
+ "loss": 0.14229056239128113,
603
+ "sps": 2107.3760666985922
604
  },
605
  {
606
  "update": 340,
607
  "global_step": 696320,
608
+ "num_episodes": 175,
609
+ "mean_reward": 115.29129805564881,
610
+ "mean_length": 4060.48,
611
+ "loss": 0.36735689640045166,
612
+ "sps": 2157.1693383290976
613
  },
614
  {
615
  "update": 345,
616
  "global_step": 706560,
617
+ "num_episodes": 189,
618
+ "mean_reward": 109.84256043434144,
619
+ "mean_length": 3863.03,
620
+ "loss": 0.7830713987350464,
621
+ "sps": 381.54779046834835
622
  },
623
  {
624
  "update": 350,
625
  "global_step": 716800,
626
+ "num_episodes": 193,
627
+ "mean_reward": 109.65507596015931,
628
+ "mean_length": 3901.01,
629
+ "loss": 9.749893188476562,
630
+ "sps": 258.15120412826286
631
  },
632
  {
633
  "update": 355,
634
  "global_step": 727040,
635
+ "num_episodes": 197,
636
+ "mean_reward": 109.78353757858277,
637
+ "mean_length": 3921.28,
638
+ "loss": 0.27189120650291443,
639
+ "sps": 2234.7686845905673
640
  },
641
  {
642
  "update": 360,
643
  "global_step": 737280,
644
+ "num_episodes": 198,
645
+ "mean_reward": 110.03232297897338,
646
+ "mean_length": 3921.28,
647
+ "loss": 0.27451807260513306,
648
+ "sps": 2208.748818348954
649
  },
650
  {
651
  "update": 365,
652
  "global_step": 747520,
653
+ "num_episodes": 198,
654
+ "mean_reward": 110.03232297897338,
655
+ "mean_length": 3921.28,
656
+ "loss": 0.3887019753456116,
657
+ "sps": 2179.671400488869
658
  },
659
  {
660
  "update": 370,
661
  "global_step": 757760,
662
+ "num_episodes": 200,
663
+ "mean_reward": 110.02969996452332,
664
+ "mean_length": 3920.77,
665
+ "loss": 1.3407719135284424,
666
+ "sps": 190.58035959703773
667
  },
668
  {
669
  "update": 375,
670
  "global_step": 768000,
671
+ "num_episodes": 205,
672
+ "mean_reward": 113.28294358253478,
673
+ "mean_length": 4042.39,
674
+ "loss": 2.234469175338745,
675
+ "sps": 165.74069059867267
676
  },
677
  {
678
  "update": 380,
679
  "global_step": 778240,
680
+ "num_episodes": 209,
681
+ "mean_reward": 112.94422784805298,
682
+ "mean_length": 4057.27,
683
+ "loss": 0.2318531721830368,
684
+ "sps": 2224.56366677181
685
  },
686
  {
687
  "update": 385,
688
  "global_step": 788480,
689
+ "num_episodes": 209,
690
+ "mean_reward": 112.94422784805298,
691
+ "mean_length": 4057.27,
692
+ "loss": 0.2268732637166977,
693
+ "sps": 2253.631967291392
694
  },
695
  {
696
  "update": 390,
697
  "global_step": 798720,
698
+ "num_episodes": 210,
699
+ "mean_reward": 115.53032508850097,
700
+ "mean_length": 4153.44,
701
+ "loss": 0.06943273544311523,
702
+ "sps": 2241.199985702131
703
  },
704
  {
705
  "update": 395,
706
  "global_step": 808960,
707
+ "num_episodes": 211,
708
+ "mean_reward": 118.63075592041015,
709
+ "mean_length": 4252.74,
710
+ "loss": 0.21790897846221924,
711
+ "sps": 2233.8736866140016
712
  },
713
  {
714
  "update": 400,
715
  "global_step": 819200,
716
+ "num_episodes": 218,
717
+ "mean_reward": 112.67819341659546,
718
+ "mean_length": 4053.49,
719
+ "loss": 0.28217822313308716,
720
+ "sps": 2245.8419819824057
721
  },
722
  {
723
  "update": 405,
724
  "global_step": 829440,
725
+ "num_episodes": 218,
726
+ "mean_reward": 112.67819341659546,
727
+ "mean_length": 4053.49,
728
+ "loss": 0.07389844954013824,
729
+ "sps": 2093.830293640606
730
  },
731
  {
732
  "update": 410,
733
  "global_step": 839680,
734
+ "num_episodes": 219,
735
+ "mean_reward": 112.67819341659546,
736
+ "mean_length": 4053.49,
737
+ "loss": 0.19773584604263306,
738
+ "sps": 2089.0640172885346
739
  },
740
  {
741
  "update": 415,
742
  "global_step": 849920,
743
+ "num_episodes": 221,
744
+ "mean_reward": 115.57382575035095,
745
+ "mean_length": 4142.16,
746
+ "loss": 0.2218392789363861,
747
+ "sps": 2037.8682435790647
748
  },
749
  {
750
  "update": 420,
751
  "global_step": 860160,
752
+ "num_episodes": 225,
753
+ "mean_reward": 118.26686740875245,
754
+ "mean_length": 4220.72,
755
+ "loss": 0.16683566570281982,
756
+ "sps": 2062.011715868465
757
  },
758
  {
759
  "update": 425,
760
  "global_step": 870400,
761
+ "num_episodes": 225,
762
+ "mean_reward": 118.26686740875245,
763
+ "mean_length": 4220.72,
764
+ "loss": 0.3665807843208313,
765
+ "sps": 2085.879289076417
766
  },
767
  {
768
  "update": 430,
769
  "global_step": 880640,
770
+ "num_episodes": 227,
771
+ "mean_reward": 115.4131219291687,
772
+ "mean_length": 4123.01,
773
+ "loss": 0.2211420238018036,
774
+ "sps": 1319.1510855667186
775
  },
776
  {
777
  "update": 435,
778
  "global_step": 890880,
779
+ "num_episodes": 228,
780
+ "mean_reward": 118.51452213287354,
781
+ "mean_length": 4220.72,
782
+ "loss": 0.01409757137298584,
783
+ "sps": 2249.26043334859
784
  },
785
  {
786
  "update": 440,
787
  "global_step": 901120,
788
+ "num_episodes": 232,
789
+ "mean_reward": 121.46894996643067,
790
+ "mean_length": 4313.96,
791
+ "loss": 0.34094715118408203,
792
+ "sps": 2235.574796161653
793
  },
794
  {
795
  "update": 445,
796
  "global_step": 911360,
797
+ "num_episodes": 232,
798
+ "mean_reward": 121.46894996643067,
799
+ "mean_length": 4313.96,
800
+ "loss": 0.03650672733783722,
801
+ "sps": 2254.642878384582
802
  },
803
  {
804
  "update": 450,
805
  "global_step": 921600,
806
+ "num_episodes": 233,
807
+ "mean_reward": 122.21637367248535,
808
+ "mean_length": 4313.96,
809
+ "loss": 0.4388778805732727,
810
+ "sps": 2230.3176453482333
811
  },
812
  {
813
  "update": 455,
814
  "global_step": 931840,
815
+ "num_episodes": 235,
816
+ "mean_reward": 122.47082150459289,
817
+ "mean_length": 4315.26,
818
+ "loss": 0.04285623878240585,
819
+ "sps": 2245.5355177671836
820
  },
821
  {
822
  "update": 460,
823
  "global_step": 942080,
824
+ "num_episodes": 241,
825
+ "mean_reward": 125.87507015228272,
826
+ "mean_length": 4387.15,
827
+ "loss": 0.18071885406970978,
828
+ "sps": 2186.650382997658
829
  },
830
  {
831
  "update": 465,
832
  "global_step": 952320,
833
+ "num_episodes": 241,
834
+ "mean_reward": 125.87507015228272,
835
+ "mean_length": 4387.15,
836
+ "loss": 0.15905889868736267,
837
+ "sps": 2246.1825959008293
838
  },
839
  {
840
  "update": 470,
841
  "global_step": 962560,
842
+ "num_episodes": 243,
843
+ "mean_reward": 125.64015349388123,
844
+ "mean_length": 4385.41,
845
+ "loss": 0.2828221917152405,
846
+ "sps": 2237.777989892148
847
  },
848
  {
849
  "update": 475,
850
  "global_step": 972800,
851
+ "num_episodes": 244,
852
+ "mean_reward": 128.7428973865509,
853
+ "mean_length": 4484.47,
854
+ "loss": -0.001385476440191269,
855
+ "sps": 2224.056236169126
856
  },
857
  {
858
  "update": 480,
859
  "global_step": 983040,
860
+ "num_episodes": 247,
861
+ "mean_reward": 134.21521110534667,
862
+ "mean_length": 4676.7,
863
+ "loss": 0.6856433153152466,
864
+ "sps": 166.9800451494931
865
  },
866
  {
867
  "update": 485,
868
  "global_step": 993280,
869
+ "num_episodes": 248,
870
+ "mean_reward": 131.0992687225342,
871
+ "mean_length": 4598.8,
872
+ "loss": 0.22580231726169586,
873
+ "sps": 2205.4840180784313
874
  },
875
  {
876
  "update": 490,
877
  "global_step": 1003520,
878
+ "num_episodes": 253,
879
+ "mean_reward": 130.85596141815185,
880
+ "mean_length": 4590.29,
881
+ "loss": 15.813247680664062,
882
+ "sps": 1554.4720078900461
883
  },
884
  {
885
  "update": 495,
886
  "global_step": 1013760,
887
+ "num_episodes": 262,
888
+ "mean_reward": 118.73081938743591,
889
+ "mean_length": 4225.92,
890
+ "loss": 3.043635368347168,
891
+ "sps": 364.1160359745992
892
  },
893
  {
894
  "update": 500,
895
  "global_step": 1024000,
896
+ "num_episodes": 266,
897
+ "mean_reward": 112.27171317100525,
898
+ "mean_length": 4026.32,
899
+ "loss": 1.5756291151046753,
900
+ "sps": 2216.9506893762605
901
  },
902
  {
903
  "update": 505,
904
  "global_step": 1034240,
905
+ "num_episodes": 271,
906
+ "mean_reward": 106.31812086105347,
907
+ "mean_length": 3831.41,
908
+ "loss": 0.8373554348945618,
909
+ "sps": 2230.0889295621437
910
  },
911
  {
912
  "update": 510,
913
  "global_step": 1044480,
914
+ "num_episodes": 271,
915
+ "mean_reward": 106.31812086105347,
916
+ "mean_length": 3831.41,
917
+ "loss": 0.1685715615749359,
918
+ "sps": 2250.612331563048
919
  },
920
  {
921
  "update": 515,
922
  "global_step": 1054720,
923
+ "num_episodes": 279,
924
+ "mean_reward": 94.40078766822815,
925
+ "mean_length": 3437.38,
926
+ "loss": 0.3164912760257721,
927
+ "sps": 755.1427540116495
928
  },
929
  {
930
  "update": 520,
931
  "global_step": 1064960,
932
+ "num_episodes": 284,
933
+ "mean_reward": 97.14825866699219,
934
+ "mean_length": 3536.28,
935
+ "loss": 2.4807233810424805,
936
+ "sps": 2229.202303336277
937
  },
938
  {
939
  "update": 525,
940
  "global_step": 1075200,
941
+ "num_episodes": 288,
942
+ "mean_reward": 100.25763621330262,
943
+ "mean_length": 3636.97,
944
+ "loss": 7.4268412590026855,
945
+ "sps": 2240.4780501902205
946
  },
947
  {
948
  "update": 530,
949
  "global_step": 1085440,
950
+ "num_episodes": 288,
951
+ "mean_reward": 100.25763621330262,
952
+ "mean_length": 3636.97,
953
+ "loss": 13.631644248962402,
954
+ "sps": 2167.9135230629645
955
  },
956
  {
957
  "update": 535,
958
  "global_step": 1095680,
959
+ "num_episodes": 289,
960
+ "mean_reward": 103.11298480987548,
961
+ "mean_length": 3736.54,
962
+ "loss": 16.333158493041992,
963
+ "sps": 2159.176669252336
964
  },
965
  {
966
  "update": 540,
967
  "global_step": 1105920,
968
+ "num_episodes": 292,
969
+ "mean_reward": 103.19977847099304,
970
+ "mean_length": 3834.19,
971
+ "loss": 1.755424976348877,
972
+ "sps": 2140.8384853415
973
  },
974
  {
975
  "update": 545,
976
  "global_step": 1116160,
977
+ "num_episodes": 293,
978
+ "mean_reward": 106.40395077705384,
979
+ "mean_length": 3909.42,
980
+ "loss": 2.900822639465332,
981
+ "sps": 175.96426214417178
982
  },
983
  {
984
  "update": 550,
985
  "global_step": 1126400,
986
+ "num_episodes": 296,
987
+ "mean_reward": 106.08349679946899,
988
+ "mean_length": 3927.6,
989
+ "loss": 3.329728126525879,
990
+ "sps": 162.40274463106428
991
  },
992
  {
993
  "update": 555,
994
  "global_step": 1136640,
995
+ "num_episodes": 300,
996
+ "mean_reward": 100.18753468513489,
997
+ "mean_length": 3833.08,
998
+ "loss": 0.3283099830150604,
999
+ "sps": 2190.1276229463892
1000
  },
1001
  {
1002
  "update": 560,
1003
  "global_step": 1146880,
1004
+ "num_episodes": 307,
1005
+ "mean_reward": 100.06094589233399,
1006
+ "mean_length": 3798.23,
1007
+ "loss": 3.218208074569702,
1008
+ "sps": 196.6633145580635
1009
  },
1010
  {
1011
  "update": 565,
1012
  "global_step": 1157120,
1013
+ "num_episodes": 308,
1014
+ "mean_reward": 99.96278611183166,
1015
+ "mean_length": 3815.32,
1016
+ "loss": 0.05826599895954132,
1017
+ "sps": 2198.7480074814316
1018
  },
1019
  {
1020
  "update": 570,
1021
  "global_step": 1167360,
1022
+ "num_episodes": 308,
1023
+ "mean_reward": 99.96278611183166,
1024
+ "mean_length": 3815.32,
1025
+ "loss": 0.19391702115535736,
1026
+ "sps": 2247.8319226488716
1027
  },
1028
  {
1029
  "update": 575,
1030
  "global_step": 1177600,
1031
+ "num_episodes": 311,
1032
+ "mean_reward": 97.06219799995422,
1033
+ "mean_length": 3719.15,
1034
+ "loss": 0.5620024800300598,
1035
+ "sps": 2199.015937226961
1036
  },
1037
  {
1038
  "update": 580,
1039
  "global_step": 1187840,
1040
+ "num_episodes": 316,
1041
+ "mean_reward": 96.72605070114136,
1042
+ "mean_length": 3721.99,
1043
+ "loss": 9.720523834228516,
1044
+ "sps": 1486.7675112343536
1045
  },
1046
  {
1047
  "update": 585,
1048
  "global_step": 1198080,
1049
+ "num_episodes": 318,
1050
+ "mean_reward": 96.66609871864318,
1051
+ "mean_length": 3723.94,
1052
+ "loss": 0.6666443943977356,
1053
+ "sps": 2234.009052616905
1054
  },
1055
  {
1056
  "update": 590,
1057
  "global_step": 1208320,
1058
+ "num_episodes": 318,
1059
+ "mean_reward": 96.66609871864318,
1060
+ "mean_length": 3723.94,
1061
+ "loss": 0.2884657084941864,
1062
+ "sps": 2239.9481475435614
1063
  },
1064
  {
1065
  "update": 595,
1066
  "global_step": 1218560,
1067
+ "num_episodes": 320,
1068
+ "mean_reward": 96.41710000038147,
1069
+ "mean_length": 3723.94,
1070
+ "loss": 0.8150730133056641,
1071
+ "sps": 2242.7699730761115
1072
  },
1073
  {
1074
  "update": 600,
1075
  "global_step": 1228800,
1076
+ "num_episodes": 322,
1077
+ "mean_reward": 101.80320601463318,
1078
+ "mean_length": 3725.16,
1079
+ "loss": 0.8642079830169678,
1080
+ "sps": 285.7807108395187
1081
  },
1082
  {
1083
  "update": 605,
1084
  "global_step": 1239040,
1085
+ "num_episodes": 326,
1086
+ "mean_reward": 98.85420690536499,
1087
+ "mean_length": 3650.4,
1088
+ "loss": 0.6339423060417175,
1089
+ "sps": 1850.348648405877
1090
  },
1091
  {
1092
  "update": 610,
1093
  "global_step": 1249280,
1094
+ "num_episodes": 326,
1095
+ "mean_reward": 98.85420690536499,
1096
+ "mean_length": 3650.4,
1097
+ "loss": 0.5616691708564758,
1098
+ "sps": 2222.6578253029397
1099
  },
1100
  {
1101
  "update": 615,
1102
  "global_step": 1259520,
1103
+ "num_episodes": 331,
1104
+ "mean_reward": 90.45196411609649,
1105
+ "mean_length": 3548.02,
1106
+ "loss": 0.2677087187767029,
1107
+ "sps": 2224.9883354292588
1108
  },
1109
  {
1110
  "update": 620,
1111
  "global_step": 1269760,
1112
+ "num_episodes": 331,
1113
+ "mean_reward": 90.45196411609649,
1114
+ "mean_length": 3548.02,
1115
+ "loss": 0.05597818270325661,
1116
+ "sps": 2232.8842122446686
1117
  },
1118
  {
1119
  "update": 625,
1120
  "global_step": 1280000,
1121
+ "num_episodes": 335,
1122
+ "mean_reward": 89.9470157957077,
1123
+ "mean_length": 3546.72,
1124
+ "loss": 0.4655948281288147,
1125
+ "sps": 314.0543075325823
1126
  },
1127
  {
1128
  "update": 630,
1129
  "global_step": 1290240,
1130
+ "num_episodes": 337,
1131
+ "mean_reward": 83.67983005046844,
1132
+ "mean_length": 3359.15,
1133
+ "loss": 0.0969170331954956,
1134
+ "sps": 2181.9774383339013
1135
  },
1136
  {
1137
  "update": 635,
1138
  "global_step": 1300480,
1139
+ "num_episodes": 342,
1140
+ "mean_reward": 86.38446100711822,
1141
+ "mean_length": 3465.84,
1142
+ "loss": 0.7292745113372803,
1143
+ "sps": 2124.75987235513
1144
  },
1145
  {
1146
  "update": 640,
1147
  "global_step": 1310720,
1148
+ "num_episodes": 342,
1149
+ "mean_reward": 86.38446100711822,
1150
+ "mean_length": 3465.84,
1151
+ "loss": 0.04831065982580185,
1152
+ "sps": 2168.1603087847257
1153
  },
1154
  {
1155
  "update": 645,
1156
  "global_step": 1320960,
1157
+ "num_episodes": 343,
1158
+ "mean_reward": 89.47943027019501,
1159
+ "mean_length": 3565.41,
1160
+ "loss": 0.1487177461385727,
1161
+ "sps": 2107.2008167857116
1162
  },
1163
  {
1164
  "update": 650,
1165
  "global_step": 1331200,
1166
+ "num_episodes": 344,
1167
+ "mean_reward": 89.2304312467575,
1168
+ "mean_length": 3565.41,
1169
+ "loss": 0.12002412974834442,
1170
+ "sps": 2127.214484781757
1171
  },
1172
  {
1173
  "update": 655,
1174
  "global_step": 1341440,
1175
+ "num_episodes": 347,
1176
+ "mean_reward": 88.98148257732392,
1177
+ "mean_length": 3565.41,
1178
+ "loss": 0.1547270268201828,
1179
+ "sps": 2165.6753206938283
1180
  },
1181
  {
1182
  "update": 660,
1183
  "global_step": 1351680,
1184
+ "num_episodes": 347,
1185
+ "mean_reward": 88.98148257732392,
1186
+ "mean_length": 3565.41,
1187
+ "loss": 0.1951214224100113,
1188
+ "sps": 2229.5540914965095
1189
  },
1190
  {
1191
  "update": 665,
1192
  "global_step": 1361920,
1193
+ "num_episodes": 350,
1194
+ "mean_reward": 88.69014154911041,
1195
+ "mean_length": 3547.65,
1196
+ "loss": 0.02001141756772995,
1197
+ "sps": 2220.3189922208117
1198
  },
1199
  {
1200
  "update": 670,
1201
  "global_step": 1372160,
1202
+ "num_episodes": 353,
1203
+ "mean_reward": 91.77625680446624,
1204
+ "mean_length": 3645.53,
1205
+ "loss": 0.1334764063358307,
1206
+ "sps": 2217.9524326914734
1207
  },
1208
  {
1209
  "update": 675,
1210
  "global_step": 1382400,
1211
+ "num_episodes": 356,
1212
+ "mean_reward": 94.39934126377106,
1213
+ "mean_length": 3746.85,
1214
+ "loss": 1.491716742515564,
1215
+ "sps": 115.85030478370321
1216
  },
1217
  {
1218
  "update": 680,
1219
  "global_step": 1392640,
1220
+ "num_episodes": 362,
1221
+ "mean_reward": 94.36987939357758,
1222
+ "mean_length": 3759.39,
1223
+ "loss": 0.08460421860218048,
1224
+ "sps": 466.5453273147261
1225
  },
1226
  {
1227
  "update": 685,
1228
  "global_step": 1402880,
1229
+ "num_episodes": 370,
1230
+ "mean_reward": 91.42453453540801,
1231
+ "mean_length": 3664.28,
1232
+ "loss": 0.06896203756332397,
1233
+ "sps": 2211.12877876806
1234
  },
1235
  {
1236
  "update": 690,
1237
  "global_step": 1413120,
1238
+ "num_episodes": 371,
1239
+ "mean_reward": 94.27827970981598,
1240
+ "mean_length": 3763.34,
1241
+ "loss": 0.08465250581502914,
1242
+ "sps": 2224.3649291369943
1243
  },
1244
  {
1245
  "update": 695,
1246
  "global_step": 1423360,
1247
+ "num_episodes": 372,
1248
+ "mean_reward": 94.03031706333161,
1249
+ "mean_length": 3763.34,
1250
+ "loss": 0.12509916722774506,
1251
+ "sps": 2233.8556777814188
1252
  },
1253
  {
1254
  "update": 700,
1255
  "global_step": 1433600,
1256
+ "num_episodes": 372,
1257
+ "mean_reward": 94.03031706333161,
1258
+ "mean_length": 3763.34,
1259
+ "loss": 0.05390219762921333,
1260
+ "sps": 2225.4858130028433
1261
  },
1262
  {
1263
  "update": 705,
1264
  "global_step": 1443840,
1265
+ "num_episodes": 378,
1266
+ "mean_reward": 100.22330937862397,
1267
+ "mean_length": 3957.32,
1268
+ "loss": 1.2421371936798096,
1269
+ "sps": 176.73331940579132
1270
  },
1271
  {
1272
  "update": 710,
1273
  "global_step": 1454080,
1274
+ "num_episodes": 380,
1275
+ "mean_reward": 100.36302034854889,
1276
+ "mean_length": 3980.35,
1277
+ "loss": 0.06020417809486389,
1278
+ "sps": 2151.8459777584108
1279
  },
1280
  {
1281
  "update": 715,
1282
  "global_step": 1464320,
1283
+ "num_episodes": 383,
1284
+ "mean_reward": 103.226567196846,
1285
+ "mean_length": 4079.78,
1286
+ "loss": 0.03555985540151596,
1287
+ "sps": 779.0322633363131
1288
  },
1289
  {
1290
  "update": 720,
1291
  "global_step": 1474560,
1292
+ "num_episodes": 385,
1293
+ "mean_reward": 100.37291463375091,
1294
+ "mean_length": 3983.61,
1295
+ "loss": 0.42947918176651,
1296
+ "sps": 1976.0163143357508
1297
  },
1298
  {
1299
  "update": 725,
1300
  "global_step": 1484800,
1301
+ "num_episodes": 387,
1302
+ "mean_reward": 102.8877759027481,
1303
+ "mean_length": 4082.64,
1304
+ "loss": 0.3529297709465027,
1305
+ "sps": 2149.9991470019922
1306
  },
1307
  {
1308
  "update": 730,
1309
  "global_step": 1495040,
1310
+ "num_episodes": 393,
1311
+ "mean_reward": 96.80677964687348,
1312
+ "mean_length": 3770.28,
1313
+ "loss": 0.5400058627128601,
1314
+ "sps": 182.2636316464271
1315
  },
1316
  {
1317
  "update": 735,
1318
  "global_step": 1505280,
1319
+ "num_episodes": 397,
1320
+ "mean_reward": 99.80784844875336,
1321
+ "mean_length": 3841.74,
1322
+ "loss": 0.07788175344467163,
1323
+ "sps": 1880.7298583574118
1324
  },
1325
  {
1326
  "update": 740,
1327
  "global_step": 1515520,
1328
+ "num_episodes": 400,
1329
+ "mean_reward": 102.611374335289,
1330
+ "mean_length": 3837.4,
1331
+ "loss": 0.12514562904834747,
1332
+ "sps": 2199.6995663818816
1333
  },
1334
  {
1335
  "update": 745,
1336
  "global_step": 1525760,
1337
+ "num_episodes": 402,
1338
+ "mean_reward": 102.8225530385971,
1339
+ "mean_length": 3839.8,
1340
+ "loss": 0.1331540048122406,
1341
+ "sps": 505.88516130939627
1342
  },
1343
  {
1344
  "update": 750,
1345
  "global_step": 1536000,
1346
+ "num_episodes": 402,
1347
+ "mean_reward": 102.8225530385971,
1348
+ "mean_length": 3839.8,
1349
+ "loss": -0.008329648524522781,
1350
+ "sps": 2268.146789029585
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1351
  }
1352
  ]