openfront-rl-agent / training_log.json
JoshuaFreeman's picture
Upload training_log.json with huggingface_hub
41d2b77 verified
[
{
"update": 5,
"global_step": 20480,
"num_episodes": 5,
"mean_reward": -0.4786701202392578,
"mean_length": 232.8,
"survival_pct": 0.023280000000000002,
"max_steps": 10000,
"loss": 0.12873922288417816,
"sps": 2902.7821104130076
},
{
"update": 10,
"global_step": 40960,
"num_episodes": 5,
"mean_reward": -0.4786701202392578,
"mean_length": 232.8,
"survival_pct": 0.023280000000000002,
"max_steps": 10000,
"loss": 1.048768401145935,
"sps": 2989.934249487722
},
{
"update": 15,
"global_step": 61440,
"num_episodes": 5,
"mean_reward": -0.4786701202392578,
"mean_length": 232.8,
"survival_pct": 0.023280000000000002,
"max_steps": 10000,
"loss": 0.4728538990020752,
"sps": 2892.3409931007545
},
{
"update": 20,
"global_step": 81920,
"num_episodes": 13,
"mean_reward": 70.43313554617075,
"mean_length": 4721.461538461538,
"survival_pct": 0.4721461538461538,
"max_steps": 10000,
"loss": 0.8401235342025757,
"sps": 1363.4104809447254
},
{
"update": 25,
"global_step": 102400,
"num_episodes": 25,
"mean_reward": 49.477407665252684,
"mean_length": 3493.36,
"survival_pct": 0.34933600000000004,
"max_steps": 10000,
"loss": 2.6180195808410645,
"sps": 1433.9218398141863
},
{
"update": 30,
"global_step": 122880,
"num_episodes": 25,
"mean_reward": 49.477407665252684,
"mean_length": 3493.36,
"survival_pct": 0.34933600000000004,
"max_steps": 10000,
"loss": -0.17261816561222076,
"sps": 2402.5672121197713
},
{
"update": 35,
"global_step": 143360,
"num_episodes": 25,
"mean_reward": 49.477407665252684,
"mean_length": 3493.36,
"survival_pct": 0.34933600000000004,
"max_steps": 10000,
"loss": -0.09139963984489441,
"sps": 2342.5616398383595
},
{
"update": 40,
"global_step": 163840,
"num_episodes": 29,
"mean_reward": 63.332810089505955,
"mean_length": 4053.3793103448274,
"survival_pct": 0.40533793103448273,
"max_steps": 10000,
"loss": 2.368999719619751,
"sps": 803.2009899550957
},
{
"update": 45,
"global_step": 184320,
"num_episodes": 48,
"mean_reward": 61.90170055627823,
"mean_length": 3634.7916666666665,
"survival_pct": 0.36347916666666663,
"max_steps": 10000,
"loss": 25.510446548461914,
"sps": 876.6005063299407
},
{
"update": 50,
"global_step": 204800,
"num_episodes": 51,
"mean_reward": 58.35679834964228,
"mean_length": 3464.156862745098,
"survival_pct": 0.3464156862745098,
"max_steps": 10000,
"loss": 1.516045331954956,
"sps": 2241.345890706289
},
{
"update": 55,
"global_step": 225280,
"num_episodes": 51,
"mean_reward": 58.35679834964228,
"mean_length": 3464.156862745098,
"survival_pct": 0.3464156862745098,
"max_steps": 10000,
"loss": -0.1645357310771942,
"sps": 2287.8772324784622
},
{
"update": 60,
"global_step": 245760,
"num_episodes": 55,
"mean_reward": 61.13233257640492,
"mean_length": 3583.0363636363636,
"survival_pct": 0.3583036363636364,
"max_steps": 10000,
"loss": 1.9156525135040283,
"sps": 1777.0489161780952
},
{
"update": 65,
"global_step": 266240,
"num_episodes": 65,
"mean_reward": 70.67225723266601,
"mean_length": 3672.6153846153848,
"survival_pct": 0.3672615384615385,
"max_steps": 10000,
"loss": 0.939544141292572,
"sps": 2089.239566218028
},
{
"update": 70,
"global_step": 286720,
"num_episodes": 71,
"mean_reward": 69.33942423404103,
"mean_length": 3655.2535211267605,
"survival_pct": 0.36552535211267606,
"max_steps": 10000,
"loss": 1.1759222745895386,
"sps": 2632.098762980766
},
{
"update": 75,
"global_step": 307200,
"num_episodes": 71,
"mean_reward": 69.33942423404103,
"mean_length": 3655.2535211267605,
"survival_pct": 0.36552535211267606,
"max_steps": 10000,
"loss": 1.520628571510315,
"sps": 2482.8506672459066
},
{
"update": 80,
"global_step": 327680,
"num_episodes": 73,
"mean_reward": 73.5122941879377,
"mean_length": 3829.082191780822,
"survival_pct": 0.3829082191780822,
"max_steps": 10000,
"loss": 0.8090716004371643,
"sps": 1082.2416143589735
},
{
"update": 85,
"global_step": 348160,
"num_episodes": 83,
"mean_reward": 73.65270042993936,
"mean_length": 3811.9518072289156,
"survival_pct": 0.38119518072289155,
"max_steps": 10000,
"loss": 6.820394515991211,
"sps": 823.4620989576031
},
{
"update": 90,
"global_step": 368640,
"num_episodes": 91,
"mean_reward": 75.30516777981768,
"mean_length": 3846.164835164835,
"survival_pct": 0.38461648351648353,
"max_steps": 10000,
"loss": 2.671729803085327,
"sps": 1723.468236780256
},
{
"update": 95,
"global_step": 389120,
"num_episodes": 91,
"mean_reward": 75.30516777981768,
"mean_length": 3846.164835164835,
"survival_pct": 0.38461648351648353,
"max_steps": 10000,
"loss": 15.690156936645508,
"sps": 1463.8510964098139
},
{
"update": 100,
"global_step": 409600,
"num_episodes": 91,
"mean_reward": 75.30516777981768,
"mean_length": 3846.164835164835,
"survival_pct": 0.38461648351648353,
"max_steps": 10000,
"loss": 0.16147078573703766,
"sps": 1544.6687513559787
},
{
"update": 105,
"global_step": 430080,
"num_episodes": 95,
"mean_reward": 91.60696769011648,
"mean_length": 4105.273684210526,
"survival_pct": 0.4105273684210526,
"max_steps": 10000,
"loss": 0.28715068101882935,
"sps": 1491.869871137239
},
{
"update": 110,
"global_step": 450560,
"num_episodes": 103,
"mean_reward": 112.06914116382599,
"mean_length": 4315.97,
"survival_pct": 0.431597,
"max_steps": 10000,
"loss": 0.6831140518188477,
"sps": 1664.9507448966885
},
{
"update": 115,
"global_step": 471040,
"num_episodes": 103,
"mean_reward": 112.06914116382599,
"mean_length": 4315.97,
"survival_pct": 0.431597,
"max_steps": 10000,
"loss": -0.07581882178783417,
"sps": 2436.273752467313
},
{
"update": 120,
"global_step": 491520,
"num_episodes": 103,
"mean_reward": 112.06914116382599,
"mean_length": 4315.97,
"survival_pct": 0.431597,
"max_steps": 10000,
"loss": -0.12825502455234528,
"sps": 2290.0122957801336
},
{
"update": 125,
"global_step": 512000,
"num_episodes": 109,
"mean_reward": 120.03564118385314,
"mean_length": 4411.64,
"survival_pct": 0.44116400000000006,
"max_steps": 10000,
"loss": 0.5430532097816467,
"sps": 2629.093479829129
},
{
"update": 130,
"global_step": 532480,
"num_episodes": 115,
"mean_reward": 123.0210286808014,
"mean_length": 4319.07,
"survival_pct": 0.431907,
"max_steps": 10000,
"loss": 6.376620769500732,
"sps": 960.5544143123027
},
{
"update": 135,
"global_step": 552960,
"num_episodes": 116,
"mean_reward": 123.100150847435,
"mean_length": 4326.72,
"survival_pct": 0.432672,
"max_steps": 10000,
"loss": 0.21949227154254913,
"sps": 2445.2169291143578
},
{
"update": 140,
"global_step": 573440,
"num_episodes": 116,
"mean_reward": 123.100150847435,
"mean_length": 4326.72,
"survival_pct": 0.432672,
"max_steps": 10000,
"loss": -0.2501995265483856,
"sps": 2494.697519833577
},
{
"update": 145,
"global_step": 593920,
"num_episodes": 123,
"mean_reward": 134.99672790527345,
"mean_length": 4791.57,
"survival_pct": 0.47915699999999994,
"max_steps": 10000,
"loss": 31.24036407470703,
"sps": 657.6329559456061
},
{
"update": 150,
"global_step": 614400,
"num_episodes": 129,
"mean_reward": 136.9602340936661,
"mean_length": 4811.37,
"survival_pct": 0.481137,
"max_steps": 10000,
"loss": 0.5206527709960938,
"sps": 2480.224703717002
},
{
"update": 155,
"global_step": 634880,
"num_episodes": 132,
"mean_reward": 133.16078406333924,
"mean_length": 4618.29,
"survival_pct": 0.461829,
"max_steps": 10000,
"loss": 0.5726419687271118,
"sps": 2234.639911831208
},
{
"update": 160,
"global_step": 655360,
"num_episodes": 132,
"mean_reward": 133.16078406333924,
"mean_length": 4618.29,
"survival_pct": 0.461829,
"max_steps": 10000,
"loss": -0.06972374022006989,
"sps": 2060.961397107338
},
{
"update": 165,
"global_step": 675840,
"num_episodes": 139,
"mean_reward": 134.1688402891159,
"mean_length": 4694.54,
"survival_pct": 0.469454,
"max_steps": 10000,
"loss": 1.1694589853286743,
"sps": 2018.626635358524
},
{
"update": 170,
"global_step": 696320,
"num_episodes": 144,
"mean_reward": 142.00250946283342,
"mean_length": 4984.25,
"survival_pct": 0.498425,
"max_steps": 10000,
"loss": 2.366715669631958,
"sps": 1592.928048802198
},
{
"update": 175,
"global_step": 716800,
"num_episodes": 146,
"mean_reward": 144.95669583559035,
"mean_length": 5095.59,
"survival_pct": 0.509559,
"max_steps": 10000,
"loss": 21.034530639648438,
"sps": 1872.9182768017681
},
{
"update": 180,
"global_step": 737280,
"num_episodes": 146,
"mean_reward": 144.95669583559035,
"mean_length": 5095.59,
"survival_pct": 0.509559,
"max_steps": 10000,
"loss": 0.10471776127815247,
"sps": 2076.429778308613
},
{
"update": 185,
"global_step": 757760,
"num_episodes": 152,
"mean_reward": 152.9122651386261,
"mean_length": 5266.79,
"survival_pct": 0.526679,
"max_steps": 10000,
"loss": 28.784088134765625,
"sps": 1428.3776386526256
},
{
"update": 190,
"global_step": 778240,
"num_episodes": 155,
"mean_reward": 159.41734125614167,
"mean_length": 5462.84,
"survival_pct": 0.546284,
"max_steps": 10000,
"loss": 0.5585659742355347,
"sps": 1503.145884656211
},
{
"update": 195,
"global_step": 798720,
"num_episodes": 158,
"mean_reward": 176.945558218956,
"mean_length": 5559.17,
"survival_pct": 0.555917,
"max_steps": 10000,
"loss": 48.71400451660156,
"sps": 522.6239066604847
},
{
"update": 200,
"global_step": 819200,
"num_episodes": 159,
"mean_reward": 170.58766025066376,
"mean_length": 5475.19,
"survival_pct": 0.547519,
"max_steps": 10000,
"loss": 15.522979736328125,
"sps": 671.7228145587918
},
{
"update": 205,
"global_step": 839680,
"num_episodes": 159,
"mean_reward": 170.58766025066376,
"mean_length": 5475.19,
"survival_pct": 0.36501266666666665,
"max_steps": 15000,
"loss": 4.25346040725708,
"sps": 708.7725246983115
},
{
"update": 210,
"global_step": 860160,
"num_episodes": 159,
"mean_reward": 170.58766025066376,
"mean_length": 5475.19,
"survival_pct": 0.36501266666666665,
"max_steps": 15000,
"loss": 0.7638179659843445,
"sps": 658.5550006478268
},
{
"update": 215,
"global_step": 880640,
"num_episodes": 163,
"mean_reward": 192.89881912708282,
"mean_length": 5817.41,
"survival_pct": 0.3878273333333333,
"max_steps": 15000,
"loss": 12.150495529174805,
"sps": 353.88007707970496
},
{
"update": 220,
"global_step": 901120,
"num_episodes": 166,
"mean_reward": 207.2202451276779,
"mean_length": 6161.78,
"survival_pct": 0.41078533333333334,
"max_steps": 15000,
"loss": 39.93988037109375,
"sps": 320.11899245227085
},
{
"update": 225,
"global_step": 921600,
"num_episodes": 170,
"mean_reward": 259.2956739234924,
"mean_length": 6429.44,
"survival_pct": 0.4286293333333333,
"max_steps": 15000,
"loss": 86.16336822509766,
"sps": 269.09665591603243
},
{
"update": 230,
"global_step": 942080,
"num_episodes": 174,
"mean_reward": 253.5440968155861,
"mean_length": 6199.25,
"survival_pct": 0.41328333333333334,
"max_steps": 15000,
"loss": 95.76709747314453,
"sps": 432.8286117914245
},
{
"update": 235,
"global_step": 962560,
"num_episodes": 175,
"mean_reward": 251.54071150064468,
"mean_length": 6138.37,
"survival_pct": 0.4092246666666667,
"max_steps": 15000,
"loss": 0.6741273999214172,
"sps": 2603.814903422436
},
{
"update": 240,
"global_step": 983040,
"num_episodes": 176,
"mean_reward": 253.30119943857193,
"mean_length": 6284.35,
"survival_pct": 0.4189566666666667,
"max_steps": 15000,
"loss": 0.1569591909646988,
"sps": 2278.9720754815094
},
{
"update": 245,
"global_step": 1003520,
"num_episodes": 177,
"mean_reward": 256.35702211141586,
"mean_length": 6425.14,
"survival_pct": 0.4283426666666667,
"max_steps": 15000,
"loss": -0.17721155285835266,
"sps": 2303.773051684429
},
{
"update": 250,
"global_step": 1024000,
"num_episodes": 179,
"mean_reward": 255.5962089705467,
"mean_length": 6525.14,
"survival_pct": 0.43500933333333336,
"max_steps": 15000,
"loss": 0.23589976131916046,
"sps": 2206.427532109994
},
{
"update": 255,
"global_step": 1044480,
"num_episodes": 180,
"mean_reward": 257.4188562893867,
"mean_length": 6668.15,
"survival_pct": 0.4445433333333333,
"max_steps": 15000,
"loss": 0.549183189868927,
"sps": 2211.682715091644
},
{
"update": 260,
"global_step": 1064960,
"num_episodes": 183,
"mean_reward": 269.09694628953935,
"mean_length": 7078.44,
"survival_pct": 0.471896,
"max_steps": 15000,
"loss": 1.171219825744629,
"sps": 2017.4015932661957
},
{
"update": 265,
"global_step": 1085440,
"num_episodes": 183,
"mean_reward": 269.09694628953935,
"mean_length": 7078.44,
"survival_pct": 0.471896,
"max_steps": 15000,
"loss": 0.1275859922170639,
"sps": 1835.885855759612
},
{
"update": 270,
"global_step": 1105920,
"num_episodes": 184,
"mean_reward": 270.90041587114337,
"mean_length": 7220.17,
"survival_pct": 0.4813446666666667,
"max_steps": 15000,
"loss": 0.09780505299568176,
"sps": 1811.136951881794
},
{
"update": 275,
"global_step": 1126400,
"num_episodes": 186,
"mean_reward": 279.4690273213387,
"mean_length": 7320.17,
"survival_pct": 0.48801133333333335,
"max_steps": 15000,
"loss": 15.602858543395996,
"sps": 588.0868439042007
},
{
"update": 280,
"global_step": 1146880,
"num_episodes": 188,
"mean_reward": 288.84978276491165,
"mean_length": 7391.33,
"survival_pct": 0.4927553333333333,
"max_steps": 15000,
"loss": 0.023340240120887756,
"sps": 1795.065224055729
},
{
"update": 285,
"global_step": 1167360,
"num_episodes": 190,
"mean_reward": 304.26770622015,
"mean_length": 7681.49,
"survival_pct": 0.5120993333333334,
"max_steps": 15000,
"loss": -0.13154439628124237,
"sps": 2105.6127488836673
},
{
"update": 290,
"global_step": 1187840,
"num_episodes": 193,
"mean_reward": 299.99041105508803,
"mean_length": 7774.97,
"survival_pct": 0.5183313333333334,
"max_steps": 15000,
"loss": 27.63266944885254,
"sps": 1435.2377599727854
},
{
"update": 295,
"global_step": 1208320,
"num_episodes": 193,
"mean_reward": 299.99041105508803,
"mean_length": 7774.97,
"survival_pct": 0.5183313333333334,
"max_steps": 15000,
"loss": 0.08694343268871307,
"sps": 1579.9578506707744
},
{
"update": 300,
"global_step": 1228800,
"num_episodes": 194,
"mean_reward": 299.24194776773453,
"mean_length": 7824.97,
"survival_pct": 0.5216646666666667,
"max_steps": 15000,
"loss": -0.08208262920379639,
"sps": 1713.483711856869
},
{
"update": 305,
"global_step": 1249280,
"num_episodes": 199,
"mean_reward": 291.81668387651445,
"mean_length": 7825.67,
"survival_pct": 0.5217113333333333,
"max_steps": 15000,
"loss": 5.8720316886901855,
"sps": 1446.1967920114107
},
{
"update": 310,
"global_step": 1269760,
"num_episodes": 201,
"mean_reward": 274.33396270036695,
"mean_length": 7777.49,
"survival_pct": 0.5184993333333333,
"max_steps": 15000,
"loss": 25.04059600830078,
"sps": 611.9018372332281
},
{
"update": 315,
"global_step": 1290240,
"num_episodes": 205,
"mean_reward": 272.47521389484405,
"mean_length": 7893.17,
"survival_pct": 0.5262113333333334,
"max_steps": 15000,
"loss": 35.360877990722656,
"sps": 393.4470006251
},
{
"update": 320,
"global_step": 1310720,
"num_episodes": 212,
"mean_reward": 274.2415503978729,
"mean_length": 7724.58,
"survival_pct": 0.514972,
"max_steps": 15000,
"loss": 17.640727996826172,
"sps": 576.0717870047208
},
{
"update": 325,
"global_step": 1331200,
"num_episodes": 212,
"mean_reward": 274.2415503978729,
"mean_length": 7724.58,
"survival_pct": 0.514972,
"max_steps": 15000,
"loss": 4.197415351867676,
"sps": 763.5424454388547
},
{
"update": 330,
"global_step": 1351680,
"num_episodes": 213,
"mean_reward": 276.0169840526581,
"mean_length": 7871.27,
"survival_pct": 0.5247513333333333,
"max_steps": 15000,
"loss": 3.047353982925415,
"sps": 822.0506273160125
},
{
"update": 335,
"global_step": 1372160,
"num_episodes": 214,
"mean_reward": 277.7569498729706,
"mean_length": 8018.5,
"survival_pct": 0.5345666666666666,
"max_steps": 15000,
"loss": 5.528885364532471,
"sps": 753.0351947572923
},
{
"update": 340,
"global_step": 1392640,
"num_episodes": 215,
"mean_reward": 278.02211222648623,
"mean_length": 8068.5,
"survival_pct": 0.5379,
"max_steps": 15000,
"loss": 1.2490136623382568,
"sps": 815.783836720429
},
{
"update": 345,
"global_step": 1413120,
"num_episodes": 217,
"mean_reward": 314.82606459617614,
"mean_length": 8257.34,
"survival_pct": 0.5504893333333334,
"max_steps": 15000,
"loss": 0.16502337157726288,
"sps": 2263.8703548795343
},
{
"update": 350,
"global_step": 1433600,
"num_episodes": 220,
"mean_reward": 337.1490696239471,
"mean_length": 8503.93,
"survival_pct": 0.5669286666666666,
"max_steps": 15000,
"loss": -0.23407027125358582,
"sps": 2114.7366860740576
},
{
"update": 355,
"global_step": 1454080,
"num_episodes": 220,
"mean_reward": 337.1490696239471,
"mean_length": 8503.93,
"survival_pct": 0.5669286666666666,
"max_steps": 15000,
"loss": -0.08559620380401611,
"sps": 2031.0806393625717
},
{
"update": 360,
"global_step": 1474560,
"num_episodes": 221,
"mean_reward": 339.0253634929657,
"mean_length": 8652.63,
"survival_pct": 0.576842,
"max_steps": 15000,
"loss": -0.06816114485263824,
"sps": 2006.9790581005002
},
{
"update": 365,
"global_step": 1495040,
"num_episodes": 222,
"mean_reward": 343.7125220012665,
"mean_length": 8702.63,
"survival_pct": 0.5801753333333333,
"max_steps": 15000,
"loss": -0.10728916525840759,
"sps": 2019.2784003150005
},
{
"update": 370,
"global_step": 1515520,
"num_episodes": 223,
"mean_reward": 347.4228830242157,
"mean_length": 8752.63,
"survival_pct": 0.5835086666666666,
"max_steps": 15000,
"loss": -0.20886194705963135,
"sps": 1983.558184566299
},
{
"update": 375,
"global_step": 1536000,
"num_episodes": 229,
"mean_reward": 347.23676467895507,
"mean_length": 8733.48,
"survival_pct": 0.582232,
"max_steps": 15000,
"loss": 8.476018905639648,
"sps": 771.0850934786216
},
{
"update": 380,
"global_step": 1556480,
"num_episodes": 234,
"mean_reward": 365.97415913581847,
"mean_length": 8974.59,
"survival_pct": 0.598306,
"max_steps": 15000,
"loss": 12.912026405334473,
"sps": 410.00426579514084
},
{
"update": 385,
"global_step": 1576960,
"num_episodes": 235,
"mean_reward": 364.00330050468443,
"mean_length": 8891.43,
"survival_pct": 0.592762,
"max_steps": 15000,
"loss": -0.11877703666687012,
"sps": 2193.265879978675
},
{
"update": 390,
"global_step": 1597440,
"num_episodes": 236,
"mean_reward": 367.3724857187271,
"mean_length": 9039.65,
"survival_pct": 0.6026433333333333,
"max_steps": 15000,
"loss": -0.11603386700153351,
"sps": 2297.7713967547143
},
{
"update": 395,
"global_step": 1617920,
"num_episodes": 238,
"mean_reward": 372.63827639579773,
"mean_length": 9336.77,
"survival_pct": 0.6224513333333334,
"max_steps": 15000,
"loss": -0.1684369295835495,
"sps": 2389.134775345221
},
{
"update": 400,
"global_step": 1638400,
"num_episodes": 238,
"mean_reward": 372.63827639579773,
"mean_length": 9336.77,
"survival_pct": 0.6224513333333334,
"max_steps": 15000,
"loss": -0.02054491639137268,
"sps": 2482.540323380072
},
{
"update": 405,
"global_step": 1658880,
"num_episodes": 238,
"mean_reward": 372.63827639579773,
"mean_length": 9336.77,
"survival_pct": 0.46683850000000005,
"max_steps": 20000,
"loss": -0.06290135532617569,
"sps": 2344.9434191419
},
{
"update": 410,
"global_step": 1679360,
"num_episodes": 238,
"mean_reward": 372.63827639579773,
"mean_length": 9336.77,
"survival_pct": 0.46683850000000005,
"max_steps": 20000,
"loss": -0.250944048166275,
"sps": 2189.1388557903356
},
{
"update": 415,
"global_step": 1699840,
"num_episodes": 244,
"mean_reward": 370.749574341774,
"mean_length": 9542.11,
"survival_pct": 0.4771055,
"max_steps": 20000,
"loss": 8.776338577270508,
"sps": 833.8081182273202
},
{
"update": 420,
"global_step": 1720320,
"num_episodes": 247,
"mean_reward": 366.3301660585403,
"mean_length": 9730.75,
"survival_pct": 0.4865375,
"max_steps": 20000,
"loss": 258.7391052246094,
"sps": 608.4010837129435
},
{
"update": 425,
"global_step": 1740800,
"num_episodes": 247,
"mean_reward": 366.3301660585403,
"mean_length": 9730.75,
"survival_pct": 0.4865375,
"max_steps": 20000,
"loss": 16.064197540283203,
"sps": 685.7919661773449
},
{
"update": 430,
"global_step": 1761280,
"num_episodes": 249,
"mean_reward": 369.06732979297635,
"mean_length": 9930.93,
"survival_pct": 0.4965465,
"max_steps": 20000,
"loss": 11.074816703796387,
"sps": 775.2822777334978
},
{
"update": 435,
"global_step": 1781760,
"num_episodes": 252,
"mean_reward": 369.86892349243163,
"mean_length": 10135.24,
"survival_pct": 0.5067619999999999,
"max_steps": 20000,
"loss": 8.431387901306152,
"sps": 1088.379163272055
},
{
"update": 440,
"global_step": 1802240,
"num_episodes": 252,
"mean_reward": 369.86892349243163,
"mean_length": 10135.24,
"survival_pct": 0.5067619999999999,
"max_steps": 20000,
"loss": 4.96181583404541,
"sps": 1059.8013951608161
},
{
"update": 445,
"global_step": 1822720,
"num_episodes": 252,
"mean_reward": 369.86892349243163,
"mean_length": 10135.24,
"survival_pct": 0.5067619999999999,
"max_steps": 20000,
"loss": 3.2583541870117188,
"sps": 1049.9750481736253
},
{
"update": 450,
"global_step": 1843200,
"num_episodes": 252,
"mean_reward": 369.86892349243163,
"mean_length": 10135.24,
"survival_pct": 0.5067619999999999,
"max_steps": 20000,
"loss": 1.682092308998108,
"sps": 1042.7284091211875
},
{
"update": 455,
"global_step": 1863680,
"num_episodes": 255,
"mean_reward": 370.1215773010254,
"mean_length": 10435.24,
"survival_pct": 0.521762,
"max_steps": 20000,
"loss": 5.13987398147583,
"sps": 399.81189577443547
},
{
"update": 460,
"global_step": 1884160,
"num_episodes": 257,
"mean_reward": 501.74020595550536,
"mean_length": 10734.18,
"survival_pct": 0.536709,
"max_steps": 20000,
"loss": 19.97075843811035,
"sps": 377.7733693034371
},
{
"update": 465,
"global_step": 1904640,
"num_episodes": 259,
"mean_reward": 499.11380367279054,
"mean_length": 10838.47,
"survival_pct": 0.5419235,
"max_steps": 20000,
"loss": 6.397243022918701,
"sps": 542.2637076266058
},
{
"update": 470,
"global_step": 1925120,
"num_episodes": 260,
"mean_reward": 485.6533655166626,
"mean_length": 10696.07,
"survival_pct": 0.5348035,
"max_steps": 20000,
"loss": 0.6303369998931885,
"sps": 817.7485527810969
},
{
"update": 475,
"global_step": 1945600,
"num_episodes": 263,
"mean_reward": 484.7361448955536,
"mean_length": 10797.13,
"survival_pct": 0.5398565,
"max_steps": 20000,
"loss": 0.7859541773796082,
"sps": 844.7722671839322
},
{
"update": 480,
"global_step": 1966080,
"num_episodes": 263,
"mean_reward": 484.7361448955536,
"mean_length": 10797.13,
"survival_pct": 0.5398565,
"max_steps": 20000,
"loss": 0.6309153437614441,
"sps": 838.773769030313
},
{
"update": 485,
"global_step": 1986560,
"num_episodes": 263,
"mean_reward": 484.7361448955536,
"mean_length": 10797.13,
"survival_pct": 0.5398565,
"max_steps": 20000,
"loss": 0.18543700873851776,
"sps": 820.910360397683
},
{
"update": 490,
"global_step": 2007040,
"num_episodes": 263,
"mean_reward": 484.7361448955536,
"mean_length": 10797.13,
"survival_pct": 0.5398565,
"max_steps": 20000,
"loss": 0.3058473467826843,
"sps": 842.5485957089527
},
{
"update": 495,
"global_step": 2027520,
"num_episodes": 269,
"mean_reward": 425.1001238536835,
"mean_length": 10806.87,
"survival_pct": 0.5403435000000001,
"max_steps": 20000,
"loss": 2.353271245956421,
"sps": 454.6827555011673
},
{
"update": 500,
"global_step": 2048000,
"num_episodes": 269,
"mean_reward": 425.1001238536835,
"mean_length": 10806.87,
"survival_pct": 0.5403435000000001,
"max_steps": 20000,
"loss": 1.0133743286132812,
"sps": 475.15957681047615
},
{
"update": 505,
"global_step": 2068480,
"num_episodes": 270,
"mean_reward": 437.5397934818268,
"mean_length": 10983.17,
"survival_pct": 0.5491585,
"max_steps": 20000,
"loss": 0.6723721027374268,
"sps": 601.483216529865
},
{
"update": 510,
"global_step": 2088960,
"num_episodes": 271,
"mean_reward": 439.91473383665084,
"mean_length": 11180.4,
"survival_pct": 0.55902,
"max_steps": 20000,
"loss": -0.033539168536663055,
"sps": 736.1812387453508
},
{
"update": 515,
"global_step": 2109440,
"num_episodes": 275,
"mean_reward": 531.3333820033073,
"mean_length": 11478.88,
"survival_pct": 0.573944,
"max_steps": 20000,
"loss": 0.6327868700027466,
"sps": 564.0725158498853
},
{
"update": 520,
"global_step": 2129920,
"num_episodes": 275,
"mean_reward": 531.3333820033073,
"mean_length": 11478.88,
"survival_pct": 0.573944,
"max_steps": 20000,
"loss": 0.49002259969711304,
"sps": 658.4478053303787
},
{
"update": 525,
"global_step": 2150400,
"num_episodes": 275,
"mean_reward": 531.3333820033073,
"mean_length": 11478.88,
"survival_pct": 0.573944,
"max_steps": 20000,
"loss": 0.24342404305934906,
"sps": 891.360496999003
},
{
"update": 530,
"global_step": 2170880,
"num_episodes": 275,
"mean_reward": 531.3333820033073,
"mean_length": 11478.88,
"survival_pct": 0.573944,
"max_steps": 20000,
"loss": 0.16686059534549713,
"sps": 1010.9528682298816
},
{
"update": 535,
"global_step": 2191360,
"num_episodes": 284,
"mean_reward": 521.5117145895958,
"mean_length": 10942.83,
"survival_pct": 0.5471415,
"max_steps": 20000,
"loss": 0.2558882236480713,
"sps": 469.7280569365049
},
{
"update": 540,
"global_step": 2211840,
"num_episodes": 284,
"mean_reward": 521.5117145895958,
"mean_length": 10942.83,
"survival_pct": 0.5471415,
"max_steps": 20000,
"loss": 0.1111864298582077,
"sps": 509.6937089521736
},
{
"update": 545,
"global_step": 2232320,
"num_episodes": 286,
"mean_reward": 512.1946889853477,
"mean_length": 11042.83,
"survival_pct": 0.5521415,
"max_steps": 20000,
"loss": 3.9095962047576904,
"sps": 511.8590018524491
},
{
"update": 550,
"global_step": 2252800,
"num_episodes": 287,
"mean_reward": 501.32572416067126,
"mean_length": 10911.73,
"survival_pct": 0.5455865,
"max_steps": 20000,
"loss": -0.03819906711578369,
"sps": 632.8032522356763
},
{
"update": 555,
"global_step": 2273280,
"num_episodes": 292,
"mean_reward": 503.7082317852974,
"mean_length": 10844.31,
"survival_pct": 0.5422155,
"max_steps": 20000,
"loss": 10.365863800048828,
"sps": 429.53248817964095
},
{
"update": 560,
"global_step": 2293760,
"num_episodes": 292,
"mean_reward": 503.7082317852974,
"mean_length": 10844.31,
"survival_pct": 0.5422155,
"max_steps": 20000,
"loss": 2.5824058055877686,
"sps": 835.0918364484975
},
{
"update": 565,
"global_step": 2314240,
"num_episodes": 292,
"mean_reward": 503.7082317852974,
"mean_length": 10844.31,
"survival_pct": 0.5422155,
"max_steps": 20000,
"loss": -0.015099406242370605,
"sps": 823.3139156925881
},
{
"update": 570,
"global_step": 2334720,
"num_episodes": 292,
"mean_reward": 503.7082317852974,
"mean_length": 10844.31,
"survival_pct": 0.5422155,
"max_steps": 20000,
"loss": 0.004335612058639526,
"sps": 847.512127655439
},
{
"update": 575,
"global_step": 2355200,
"num_episodes": 296,
"mean_reward": 506.11397255182266,
"mean_length": 11193.09,
"survival_pct": 0.5596545,
"max_steps": 20000,
"loss": 1.3773071765899658,
"sps": 538.0889941452151
},
{
"update": 580,
"global_step": 2375680,
"num_episodes": 296,
"mean_reward": 506.11397255182266,
"mean_length": 11193.09,
"survival_pct": 0.5596545,
"max_steps": 20000,
"loss": 0.6394574046134949,
"sps": 502.97316152248266
},
{
"update": 585,
"global_step": 2396160,
"num_episodes": 297,
"mean_reward": 508.4939224600792,
"mean_length": 11391.04,
"survival_pct": 0.5695520000000001,
"max_steps": 20000,
"loss": 36.28646469116211,
"sps": 439.92401762976914
},
{
"update": 590,
"global_step": 2416640,
"num_episodes": 298,
"mean_reward": 510.8762067055702,
"mean_length": 11589.3,
"survival_pct": 0.579465,
"max_steps": 20000,
"loss": 13.898605346679688,
"sps": 282.2760969771352
},
{
"update": 595,
"global_step": 2437120,
"num_episodes": 301,
"mean_reward": 526.9176897263527,
"mean_length": 11688.03,
"survival_pct": 0.5844015,
"max_steps": 20000,
"loss": 38.274803161621094,
"sps": 332.3030413271302
},
{
"update": 600,
"global_step": 2457600,
"num_episodes": 303,
"mean_reward": 525.6999688172341,
"mean_length": 11569.45,
"survival_pct": 0.5784725000000001,
"max_steps": 20000,
"loss": 2.9384140968322754,
"sps": 364.8644804633228
},
{
"update": 605,
"global_step": 2478080,
"num_episodes": 304,
"mean_reward": 524.2420133042335,
"mean_length": 11426.62,
"survival_pct": 0.45706480000000005,
"max_steps": 25000,
"loss": 438.5118408203125,
"sps": 420.0558278495418
},
{
"update": 610,
"global_step": 2498560,
"num_episodes": 304,
"mean_reward": 524.2420133042335,
"mean_length": 11426.62,
"survival_pct": 0.45706480000000005,
"max_steps": 25000,
"loss": 6.759511947631836,
"sps": 435.9725569680644
},
{
"update": 615,
"global_step": 2519040,
"num_episodes": 304,
"mean_reward": 524.2420133042335,
"mean_length": 11426.62,
"survival_pct": 0.45706480000000005,
"max_steps": 25000,
"loss": 2.7961549758911133,
"sps": 483.2696887623864
},
{
"update": 620,
"global_step": 2539520,
"num_episodes": 304,
"mean_reward": 524.2420133042335,
"mean_length": 11426.62,
"survival_pct": 0.45706480000000005,
"max_steps": 25000,
"loss": 2.2368013858795166,
"sps": 488.6334457862468
},
{
"update": 625,
"global_step": 2560000,
"num_episodes": 309,
"mean_reward": 559.3683041572571,
"mean_length": 12243.92,
"survival_pct": 0.4897568,
"max_steps": 25000,
"loss": 706.3812255859375,
"sps": 240.30170576065763
},
{
"update": 630,
"global_step": 2580480,
"num_episodes": 311,
"mean_reward": 572.5723537635803,
"mean_length": 12360.26,
"survival_pct": 0.4944104,
"max_steps": 25000,
"loss": 13.363809585571289,
"sps": 266.8083792943256
},
{
"update": 635,
"global_step": 2600960,
"num_episodes": 316,
"mean_reward": 564.724127240181,
"mean_length": 11785.88,
"survival_pct": 0.47143519999999994,
"max_steps": 25000,
"loss": 6.69994592666626,
"sps": 303.78529409460765
},
{
"update": 640,
"global_step": 2621440,
"num_episodes": 316,
"mean_reward": 564.724127240181,
"mean_length": 11785.88,
"survival_pct": 0.47143519999999994,
"max_steps": 25000,
"loss": 629.8490600585938,
"sps": 347.05583673981266
},
{
"update": 645,
"global_step": 2641920,
"num_episodes": 325,
"mean_reward": 599.5753115653991,
"mean_length": 11091.83,
"survival_pct": 0.4436732,
"max_steps": 25000,
"loss": 14.624711990356445,
"sps": 202.6999148028545
},
{
"update": 650,
"global_step": 2662400,
"num_episodes": 326,
"mean_reward": 681.9963491630555,
"mean_length": 11191.83,
"survival_pct": 0.4476732,
"max_steps": 25000,
"loss": 14.961444854736328,
"sps": 324.3719674553881
},
{
"update": 655,
"global_step": 2682880,
"num_episodes": 329,
"mean_reward": 681.6811992406845,
"mean_length": 11184.4,
"survival_pct": 0.447376,
"max_steps": 25000,
"loss": 1098.8870849609375,
"sps": 282.5805927303007
},
{
"update": 660,
"global_step": 2703360,
"num_episodes": 329,
"mean_reward": 681.6811992406845,
"mean_length": 11184.4,
"survival_pct": 0.447376,
"max_steps": 25000,
"loss": 18.618370056152344,
"sps": 328.8874894706864
},
{
"update": 665,
"global_step": 2723840,
"num_episodes": 330,
"mean_reward": 669.1296841955185,
"mean_length": 11166.48,
"survival_pct": 0.4466592,
"max_steps": 25000,
"loss": 0.3438085615634918,
"sps": 350.7381600879147
},
{
"update": 670,
"global_step": 2744320,
"num_episodes": 333,
"mean_reward": 663.7387618637085,
"mean_length": 11120.64,
"survival_pct": 0.4448256,
"max_steps": 25000,
"loss": 1.9492148160934448,
"sps": 336.44611386700046
},
{
"update": 675,
"global_step": 2764800,
"num_episodes": 340,
"mean_reward": 764.0141823387146,
"mean_length": 11231.97,
"survival_pct": 0.4492788,
"max_steps": 25000,
"loss": 11.20479679107666,
"sps": 269.88464376416727
},
{
"update": 680,
"global_step": 2785280,
"num_episodes": 341,
"mean_reward": 763.7054350566864,
"mean_length": 11237.76,
"survival_pct": 0.44951040000000003,
"max_steps": 25000,
"loss": 2.3942978382110596,
"sps": 368.845880053541
},
{
"update": 685,
"global_step": 2805760,
"num_episodes": 341,
"mean_reward": 763.7054350566864,
"mean_length": 11237.76,
"survival_pct": 0.44951040000000003,
"max_steps": 25000,
"loss": 0.6632025241851807,
"sps": 437.7624284451732
},
{
"update": 690,
"global_step": 2826240,
"num_episodes": 341,
"mean_reward": 763.7054350566864,
"mean_length": 11237.76,
"survival_pct": 0.44951040000000003,
"max_steps": 25000,
"loss": 0.12569601833820343,
"sps": 456.53233478658586
},
{
"update": 695,
"global_step": 2846720,
"num_episodes": 343,
"mean_reward": 799.47291888237,
"mean_length": 11532.37,
"survival_pct": 0.4612948,
"max_steps": 25000,
"loss": 32.66535949707031,
"sps": 482.45747353519477
},
{
"update": 700,
"global_step": 2867200,
"num_episodes": 344,
"mean_reward": 900.5657841777802,
"mean_length": 11582.37,
"survival_pct": 0.4632948,
"max_steps": 25000,
"loss": 4.394363880157471,
"sps": 602.707199771208
},
{
"update": 705,
"global_step": 2887680,
"num_episodes": 344,
"mean_reward": 900.5657841777802,
"mean_length": 11582.37,
"survival_pct": 0.4632948,
"max_steps": 25000,
"loss": 1.1503143310546875,
"sps": 592.2907627828818
},
{
"update": 710,
"global_step": 2908160,
"num_episodes": 345,
"mean_reward": 917.1899351406097,
"mean_length": 11829.79,
"survival_pct": 0.47319160000000005,
"max_steps": 25000,
"loss": 20.407194137573242,
"sps": 459.7672415701048
},
{
"update": 715,
"global_step": 2928640,
"num_episodes": 346,
"mean_reward": 915.0635627651214,
"mean_length": 11657.27,
"survival_pct": 0.4662908,
"max_steps": 25000,
"loss": 51.53656768798828,
"sps": 484.2493919142758
},
{
"update": 720,
"global_step": 2949120,
"num_episodes": 347,
"mean_reward": 915.567684469223,
"mean_length": 11707.27,
"survival_pct": 0.4682908,
"max_steps": 25000,
"loss": 2.852640151977539,
"sps": 564.1661889935658
},
{
"update": 725,
"global_step": 2969600,
"num_episodes": 351,
"mean_reward": 933.7657800292968,
"mean_length": 11805.02,
"survival_pct": 0.47220080000000003,
"max_steps": 25000,
"loss": 37.6703987121582,
"sps": 269.4710162736009
},
{
"update": 730,
"global_step": 2990080,
"num_episodes": 356,
"mean_reward": 790.1477946281433,
"mean_length": 11063.86,
"survival_pct": 0.4425544,
"max_steps": 25000,
"loss": 92.7292709350586,
"sps": 249.14561378417451
},
{
"update": 735,
"global_step": 3010560,
"num_episodes": 356,
"mean_reward": 790.1477946281433,
"mean_length": 11063.86,
"survival_pct": 0.4425544,
"max_steps": 25000,
"loss": 3.52268648147583,
"sps": 336.33639220363955
},
{
"update": 740,
"global_step": 3031040,
"num_episodes": 356,
"mean_reward": 790.1477946281433,
"mean_length": 11063.86,
"survival_pct": 0.4425544,
"max_steps": 25000,
"loss": 1.0818921327590942,
"sps": 322.89272707726104
},
{
"update": 745,
"global_step": 3051520,
"num_episodes": 363,
"mean_reward": 836.2594112110138,
"mean_length": 10753.72,
"survival_pct": 0.4301488,
"max_steps": 25000,
"loss": 6.418513774871826,
"sps": 303.95242510324334
},
{
"update": 750,
"global_step": 3072000,
"num_episodes": 369,
"mean_reward": 865.2751739215851,
"mean_length": 10221.97,
"survival_pct": 0.4088788,
"max_steps": 25000,
"loss": 4.221797943115234,
"sps": 550.4274763185425
},
{
"update": 755,
"global_step": 3092480,
"num_episodes": 369,
"mean_reward": 865.2751739215851,
"mean_length": 10221.97,
"survival_pct": 0.4088788,
"max_steps": 25000,
"loss": 1.761741280555725,
"sps": 498.20561049907593
},
{
"update": 760,
"global_step": 3112960,
"num_episodes": 369,
"mean_reward": 865.2751739215851,
"mean_length": 10221.97,
"survival_pct": 0.4088788,
"max_steps": 25000,
"loss": 58.8026008605957,
"sps": 521.3350619859364
},
{
"update": 765,
"global_step": 3133440,
"num_episodes": 370,
"mean_reward": 860.6973748493194,
"mean_length": 10271.97,
"survival_pct": 0.4108788,
"max_steps": 25000,
"loss": 1.9308984279632568,
"sps": 438.4037568068571
},
{
"update": 770,
"global_step": 3153920,
"num_episodes": 373,
"mean_reward": 770.2798270845414,
"mean_length": 9927.86,
"survival_pct": 0.39711440000000003,
"max_steps": 25000,
"loss": 2.7318155765533447,
"sps": 348.45193944086344
},
{
"update": 775,
"global_step": 3174400,
"num_episodes": 377,
"mean_reward": 806.3474672365188,
"mean_length": 10020.65,
"survival_pct": 0.40082599999999996,
"max_steps": 25000,
"loss": 0.32981979846954346,
"sps": 555.3298506322647
},
{
"update": 780,
"global_step": 3194880,
"num_episodes": 383,
"mean_reward": 853.3938677740097,
"mean_length": 10073.78,
"survival_pct": 0.4029512,
"max_steps": 25000,
"loss": 0.16146810352802277,
"sps": 1099.1025857576844
},
{
"update": 785,
"global_step": 3215360,
"num_episodes": 383,
"mean_reward": 853.3938677740097,
"mean_length": 10073.78,
"survival_pct": 0.4029512,
"max_steps": 25000,
"loss": -0.09470260143280029,
"sps": 1762.63191717742
},
{
"update": 790,
"global_step": 3235840,
"num_episodes": 384,
"mean_reward": 853.8934272527695,
"mean_length": 10123.78,
"survival_pct": 0.4049512,
"max_steps": 25000,
"loss": -0.06440502405166626,
"sps": 1195.8323452559932
},
{
"update": 795,
"global_step": 3256320,
"num_episodes": 384,
"mean_reward": 853.8934272527695,
"mean_length": 10123.78,
"survival_pct": 0.4049512,
"max_steps": 25000,
"loss": -0.15972009301185608,
"sps": 888.4678009939021
},
{
"update": 800,
"global_step": 3276800,
"num_episodes": 388,
"mean_reward": 838.5986885023117,
"mean_length": 10010.11,
"survival_pct": 0.40040440000000005,
"max_steps": 25000,
"loss": 0.09807762503623962,
"sps": 471.1430031097595
},
{
"update": 805,
"global_step": 3297280,
"num_episodes": 388,
"mean_reward": 838.5986885023117,
"mean_length": 10010.11,
"survival_pct": 0.33367033333333335,
"max_steps": 30000,
"loss": -0.0691152960062027,
"sps": 1265.7956126806866
},
{
"update": 810,
"global_step": 3317760,
"num_episodes": 388,
"mean_reward": 838.5986885023117,
"mean_length": 10010.11,
"survival_pct": 0.33367033333333335,
"max_steps": 30000,
"loss": -0.1727883517742157,
"sps": 1231.865460337574
},
{
"update": 815,
"global_step": 3338240,
"num_episodes": 388,
"mean_reward": 838.5986885023117,
"mean_length": 10010.11,
"survival_pct": 0.33367033333333335,
"max_steps": 30000,
"loss": -0.11673803627490997,
"sps": 1256.6835286888843
},
{
"update": 820,
"global_step": 3358720,
"num_episodes": 388,
"mean_reward": 838.5986885023117,
"mean_length": 10010.11,
"survival_pct": 0.33367033333333335,
"max_steps": 30000,
"loss": -0.2521955370903015,
"sps": 1137.9313207515677
},
{
"update": 825,
"global_step": 3379200,
"num_episodes": 391,
"mean_reward": 839.7568208217621,
"mean_length": 10100.5,
"survival_pct": 0.33668333333333333,
"max_steps": 30000,
"loss": -0.10739608108997345,
"sps": 1220.8177867257723
},
{
"update": 830,
"global_step": 3399680,
"num_episodes": 395,
"mean_reward": 839.0241325330734,
"mean_length": 10101.66,
"survival_pct": 0.336722,
"max_steps": 30000,
"loss": 18.019046783447266,
"sps": 656.125042039298
},
{
"update": 835,
"global_step": 3420160,
"num_episodes": 402,
"mean_reward": 813.9229806566238,
"mean_length": 9366.46,
"survival_pct": 0.3122153333333333,
"max_steps": 30000,
"loss": 3.2142348289489746,
"sps": 348.9181765210399
},
{
"update": 840,
"global_step": 3440640,
"num_episodes": 408,
"mean_reward": 770.4422649216652,
"mean_length": 8932.2,
"survival_pct": 0.29774,
"max_steps": 30000,
"loss": 29.716121673583984,
"sps": 258.20931803063246
},
{
"update": 845,
"global_step": 3461120,
"num_episodes": 409,
"mean_reward": 767.6314169716835,
"mean_length": 8691.39,
"survival_pct": 0.289713,
"max_steps": 30000,
"loss": 0.5027515888214111,
"sps": 910.4499991149808
},
{
"update": 850,
"global_step": 3481600,
"num_episodes": 412,
"mean_reward": 755.9645525097847,
"mean_length": 8707.21,
"survival_pct": 0.2902403333333333,
"max_steps": 30000,
"loss": 2.889087438583374,
"sps": 492.2319923662775
},
{
"update": 855,
"global_step": 3502080,
"num_episodes": 415,
"mean_reward": 755.2824851679802,
"mean_length": 8717.11,
"survival_pct": 0.2905703333333334,
"max_steps": 30000,
"loss": -0.22625428438186646,
"sps": 1029.9811438873032
},
{
"update": 860,
"global_step": 3522560,
"num_episodes": 420,
"mean_reward": 659.1974053931236,
"mean_length": 9077.37,
"survival_pct": 0.30257900000000004,
"max_steps": 30000,
"loss": 2.288820505142212,
"sps": 1016.411393455018
},
{
"update": 865,
"global_step": 3543040,
"num_episodes": 420,
"mean_reward": 659.1974053931236,
"mean_length": 9077.37,
"survival_pct": 0.30257900000000004,
"max_steps": 30000,
"loss": 0.2482612133026123,
"sps": 1109.4319732205106
},
{
"update": 870,
"global_step": 3563520,
"num_episodes": 420,
"mean_reward": 659.1974053931236,
"mean_length": 9077.37,
"survival_pct": 0.30257900000000004,
"max_steps": 30000,
"loss": 0.09613563120365143,
"sps": 1196.7584168005542
},
{
"update": 875,
"global_step": 3584000,
"num_episodes": 420,
"mean_reward": 659.1974053931236,
"mean_length": 9077.37,
"survival_pct": 0.30257900000000004,
"max_steps": 30000,
"loss": 0.2699776887893677,
"sps": 1128.2554610475702
},
{
"update": 880,
"global_step": 3604480,
"num_episodes": 420,
"mean_reward": 659.1974053931236,
"mean_length": 9077.37,
"survival_pct": 0.30257900000000004,
"max_steps": 30000,
"loss": 0.020249858498573303,
"sps": 1162.0742075936244
},
{
"update": 885,
"global_step": 3624960,
"num_episodes": 421,
"mean_reward": 662.2708982825279,
"mean_length": 9375.82,
"survival_pct": 0.3125273333333333,
"max_steps": 30000,
"loss": 0.4013591408729553,
"sps": 1113.5340322378984
},
{
"update": 890,
"global_step": 3645440,
"num_episodes": 422,
"mean_reward": 665.2683095526695,
"mean_length": 9673.23,
"survival_pct": 0.322441,
"max_steps": 30000,
"loss": 2.35748028755188,
"sps": 803.983802083326
},
{
"update": 895,
"global_step": 3665920,
"num_episodes": 425,
"mean_reward": 654.4653234362602,
"mean_length": 9739.55,
"survival_pct": 0.3246516666666666,
"max_steps": 30000,
"loss": 19.605010986328125,
"sps": 421.5912921054319
},
{
"update": 900,
"global_step": 3686400,
"num_episodes": 428,
"mean_reward": 590.155419728756,
"mean_length": 10093.48,
"survival_pct": 0.3364493333333333,
"max_steps": 30000,
"loss": 38.713653564453125,
"sps": 564.4312985780526
},
{
"update": 905,
"global_step": 3706880,
"num_episodes": 428,
"mean_reward": 590.155419728756,
"mean_length": 10093.48,
"survival_pct": 0.3364493333333333,
"max_steps": 30000,
"loss": 136.64389038085938,
"sps": 613.1553074617179
},
{
"update": 910,
"global_step": 3727360,
"num_episodes": 429,
"mean_reward": 590.9192177844047,
"mean_length": 10167.56,
"survival_pct": 0.33891866666666665,
"max_steps": 30000,
"loss": 8.999781608581543,
"sps": 511.63656492456863
},
{
"update": 915,
"global_step": 3747840,
"num_episodes": 431,
"mean_reward": 590.5451223254204,
"mean_length": 10116.26,
"survival_pct": 0.33720866666666666,
"max_steps": 30000,
"loss": 49.023075103759766,
"sps": 506.30244315776105
},
{
"update": 920,
"global_step": 3768320,
"num_episodes": 434,
"mean_reward": 599.4833398604393,
"mean_length": 10470.43,
"survival_pct": 0.3490143333333333,
"max_steps": 30000,
"loss": 33.072776794433594,
"sps": 346.8145463929963
},
{
"update": 925,
"global_step": 3788800,
"num_episodes": 436,
"mean_reward": 494.2830310034752,
"mean_length": 10279.47,
"survival_pct": 0.342649,
"max_steps": 30000,
"loss": 0.7968235015869141,
"sps": 973.2809780471914
},
{
"update": 930,
"global_step": 3809280,
"num_episodes": 436,
"mean_reward": 494.2830310034752,
"mean_length": 10279.47,
"survival_pct": 0.342649,
"max_steps": 30000,
"loss": -0.05825723707675934,
"sps": 966.3113177290679
},
{
"update": 935,
"global_step": 3829760,
"num_episodes": 436,
"mean_reward": 494.2830310034752,
"mean_length": 10279.47,
"survival_pct": 0.342649,
"max_steps": 30000,
"loss": -0.10611464828252792,
"sps": 1015.3673402421392
},
{
"update": 940,
"global_step": 3850240,
"num_episodes": 437,
"mean_reward": 502.9565402960777,
"mean_length": 10567.82,
"survival_pct": 0.35226066666666667,
"max_steps": 30000,
"loss": -0.08356830477714539,
"sps": 944.3927727566447
},
{
"update": 945,
"global_step": 3870720,
"num_episodes": 439,
"mean_reward": 500.08963894605637,
"mean_length": 10318.88,
"survival_pct": 0.34396266666666664,
"max_steps": 30000,
"loss": 5.465578556060791,
"sps": 529.1770253242089
},
{
"update": 950,
"global_step": 3891200,
"num_episodes": 440,
"mean_reward": 503.0822184062004,
"mean_length": 10614.79,
"survival_pct": 0.35382633333333335,
"max_steps": 30000,
"loss": 3.838916301727295,
"sps": 760.7930670056104
},
{
"update": 955,
"global_step": 3911680,
"num_episodes": 442,
"mean_reward": 467.75900787115097,
"mean_length": 10391.71,
"survival_pct": 0.3463903333333333,
"max_steps": 30000,
"loss": -0.04477877914905548,
"sps": 965.6263182051849
},
{
"update": 960,
"global_step": 3932160,
"num_episodes": 445,
"mean_reward": 385.69126527786256,
"mean_length": 10244.33,
"survival_pct": 0.3414776666666667,
"max_steps": 30000,
"loss": 0.345672070980072,
"sps": 1268.264040705237
},
{
"update": 965,
"global_step": 3952640,
"num_episodes": 445,
"mean_reward": 385.69126527786256,
"mean_length": 10244.33,
"survival_pct": 0.3414776666666667,
"max_steps": 30000,
"loss": -0.09349031746387482,
"sps": 1253.8499203200772
},
{
"update": 970,
"global_step": 3973120,
"num_episodes": 446,
"mean_reward": 388.8185606575012,
"mean_length": 10516.85,
"survival_pct": 0.35056166666666666,
"max_steps": 30000,
"loss": 9.252518653869629,
"sps": 855.3500954683526
},
{
"update": 975,
"global_step": 3993600,
"num_episodes": 447,
"mean_reward": 386.00149038314817,
"mean_length": 10275.2,
"survival_pct": 0.3425066666666667,
"max_steps": 30000,
"loss": 21.241113662719727,
"sps": 950.7120230957267
},
{
"update": 980,
"global_step": 4014080,
"num_episodes": 453,
"mean_reward": 415.23604825496676,
"mean_length": 10424.75,
"survival_pct": 0.34749166666666664,
"max_steps": 30000,
"loss": 6.508986473083496,
"sps": 715.3695692293137
},
{
"update": 985,
"global_step": 4034560,
"num_episodes": 458,
"mean_reward": 411.35455381393433,
"mean_length": 10193.61,
"survival_pct": 0.339787,
"max_steps": 30000,
"loss": 14.101386070251465,
"sps": 436.54294748826067
},
{
"update": 990,
"global_step": 4055040,
"num_episodes": 460,
"mean_reward": 352.4448234796524,
"mean_length": 9962.59,
"survival_pct": 0.3320863333333333,
"max_steps": 30000,
"loss": 5.007307529449463,
"sps": 657.8546600358904
},
{
"update": 995,
"global_step": 4075520,
"num_episodes": 461,
"mean_reward": 352.60661952495576,
"mean_length": 9978.71,
"survival_pct": 0.33262366666666665,
"max_steps": 30000,
"loss": -0.05159700661897659,
"sps": 731.8566720639022
},
{
"update": 1000,
"global_step": 4096000,
"num_episodes": 461,
"mean_reward": 352.60661952495576,
"mean_length": 9978.71,
"survival_pct": 0.33262366666666665,
"max_steps": 30000,
"loss": -0.05063310265541077,
"sps": 1192.9198062233531
},
{
"update": 1005,
"global_step": 4116480,
"num_episodes": 461,
"mean_reward": 352.60661952495576,
"mean_length": 9978.71,
"survival_pct": 0.33262366666666665,
"max_steps": 30000,
"loss": -0.12464120984077454,
"sps": 1133.1887649612727
},
{
"update": 1010,
"global_step": 4136960,
"num_episodes": 465,
"mean_reward": 371.26235566139223,
"mean_length": 10558.13,
"survival_pct": 0.35193766666666665,
"max_steps": 30000,
"loss": 7.669186115264893,
"sps": 701.8889529681794
},
{
"update": 1015,
"global_step": 4157440,
"num_episodes": 466,
"mean_reward": 375.76679421424865,
"mean_length": 10855.0,
"survival_pct": 0.36183333333333334,
"max_steps": 30000,
"loss": 12.382041931152344,
"sps": 489.4857959286267
},
{
"update": 1020,
"global_step": 4177920,
"num_episodes": 469,
"mean_reward": 341.31604763984683,
"mean_length": 10927.42,
"survival_pct": 0.3642473333333333,
"max_steps": 30000,
"loss": 19.83820915222168,
"sps": 590.4760026903639
},
{
"update": 1025,
"global_step": 4198400,
"num_episodes": 473,
"mean_reward": 331.36366960048673,
"mean_length": 10432.91,
"survival_pct": 0.34776366666666664,
"max_steps": 30000,
"loss": 183.8989715576172,
"sps": 434.18935040659056
},
{
"update": 1030,
"global_step": 4218880,
"num_episodes": 475,
"mean_reward": 303.6989562559128,
"mean_length": 10480.3,
"survival_pct": 0.3493433333333333,
"max_steps": 30000,
"loss": 139.46194458007812,
"sps": 590.2434550847422
},
{
"update": 1035,
"global_step": 4239360,
"num_episodes": 475,
"mean_reward": 303.6989562559128,
"mean_length": 10480.3,
"survival_pct": 0.3493433333333333,
"max_steps": 30000,
"loss": 4.460475444793701,
"sps": 539.7108366907394
},
{
"update": 1040,
"global_step": 4259840,
"num_episodes": 477,
"mean_reward": 304.03712359905245,
"mean_length": 10529.43,
"survival_pct": 0.350981,
"max_steps": 30000,
"loss": 41.53654098510742,
"sps": 570.0864654535922
},
{
"update": 1045,
"global_step": 4280320,
"num_episodes": 482,
"mean_reward": 257.8429533290863,
"mean_length": 10591.03,
"survival_pct": 0.35303433333333334,
"max_steps": 30000,
"loss": 12.68658447265625,
"sps": 306.11899074758105
},
{
"update": 1050,
"global_step": 4300800,
"num_episodes": 484,
"mean_reward": 258.245273809433,
"mean_length": 10666.92,
"survival_pct": 0.355564,
"max_steps": 30000,
"loss": 34.416778564453125,
"sps": 343.9275197061659
},
{
"update": 1055,
"global_step": 4321280,
"num_episodes": 484,
"mean_reward": 258.245273809433,
"mean_length": 10666.92,
"survival_pct": 0.355564,
"max_steps": 30000,
"loss": 0.8467625975608826,
"sps": 529.8299357550097
},
{
"update": 1060,
"global_step": 4341760,
"num_episodes": 484,
"mean_reward": 258.245273809433,
"mean_length": 10666.92,
"survival_pct": 0.355564,
"max_steps": 30000,
"loss": 0.5682471990585327,
"sps": 729.0597707102293
},
{
"update": 1065,
"global_step": 4362240,
"num_episodes": 484,
"mean_reward": 258.245273809433,
"mean_length": 10666.92,
"survival_pct": 0.355564,
"max_steps": 30000,
"loss": 0.36413297057151794,
"sps": 703.8506900902968
},
{
"update": 1070,
"global_step": 4382720,
"num_episodes": 491,
"mean_reward": 255.56333970069886,
"mean_length": 10467.79,
"survival_pct": 0.34892633333333334,
"max_steps": 30000,
"loss": 30.412242889404297,
"sps": 257.45270078331555
},
{
"update": 1075,
"global_step": 4403200,
"num_episodes": 491,
"mean_reward": 255.56333970069886,
"mean_length": 10467.79,
"survival_pct": 0.34892633333333334,
"max_steps": 30000,
"loss": 0.4469672739505768,
"sps": 393.6182671059828
},
{
"update": 1080,
"global_step": 4423680,
"num_episodes": 493,
"mean_reward": 270.52365421295167,
"mean_length": 10466.3,
"survival_pct": 0.34887666666666667,
"max_steps": 30000,
"loss": 1.5025949478149414,
"sps": 297.01417371765183
},
{
"update": 1085,
"global_step": 4444160,
"num_episodes": 494,
"mean_reward": 351.42790958404544,
"mean_length": 10466.3,
"survival_pct": 0.34887666666666667,
"max_steps": 30000,
"loss": 713.0115356445312,
"sps": 337.53102630233496
},
{
"update": 1090,
"global_step": 4464640,
"num_episodes": 501,
"mean_reward": 389.6291408967972,
"mean_length": 10484.48,
"survival_pct": 0.34948266666666666,
"max_steps": 30000,
"loss": 376.2599182128906,
"sps": 322.42891915898485
},
{
"update": 1095,
"global_step": 4485120,
"num_episodes": 503,
"mean_reward": 389.9803589296341,
"mean_length": 10507.49,
"survival_pct": 0.3502496666666667,
"max_steps": 30000,
"loss": 1246.39453125,
"sps": 366.59931296887805
},
{
"update": 1100,
"global_step": 4505600,
"num_episodes": 503,
"mean_reward": 389.9803589296341,
"mean_length": 10507.49,
"survival_pct": 0.3502496666666667,
"max_steps": 30000,
"loss": 0.49186083674430847,
"sps": 434.33848528937693
},
{
"update": 1105,
"global_step": 4526080,
"num_episodes": 505,
"mean_reward": 402.4173453474045,
"mean_length": 10496.98,
"survival_pct": 0.34989933333333334,
"max_steps": 30000,
"loss": 17.757164001464844,
"sps": 288.5970778332255
},
{
"update": 1110,
"global_step": 4546560,
"num_episodes": 508,
"mean_reward": 443.8068909239769,
"mean_length": 11091.98,
"survival_pct": 0.36973266666666665,
"max_steps": 30000,
"loss": 69.42852783203125,
"sps": 284.1876581819077
},
{
"update": 1115,
"global_step": 4567040,
"num_episodes": 508,
"mean_reward": 443.8068909239769,
"mean_length": 11091.98,
"survival_pct": 0.36973266666666665,
"max_steps": 30000,
"loss": 1.8417774438858032,
"sps": 318.68963440900745
},
{
"update": 1120,
"global_step": 4587520,
"num_episodes": 511,
"mean_reward": 441.4550348258019,
"mean_length": 10837.05,
"survival_pct": 0.361235,
"max_steps": 30000,
"loss": 255.734619140625,
"sps": 297.64729649475885
},
{
"update": 1125,
"global_step": 4608000,
"num_episodes": 511,
"mean_reward": 441.4550348258019,
"mean_length": 10837.05,
"survival_pct": 0.361235,
"max_steps": 30000,
"loss": 1.7074986696243286,
"sps": 348.6616523408526
},
{
"update": 1130,
"global_step": 4628480,
"num_episodes": 514,
"mean_reward": 492.0692998147011,
"mean_length": 11430.47,
"survival_pct": 0.38101566666666664,
"max_steps": 30000,
"loss": 2.311823844909668,
"sps": 406.9030800405532
},
{
"update": 1135,
"global_step": 4648960,
"num_episodes": 515,
"mean_reward": 563.7019203495979,
"mean_length": 11721.54,
"survival_pct": 0.390718,
"max_steps": 30000,
"loss": 1047.7276611328125,
"sps": 314.10929898406806
},
{
"update": 1140,
"global_step": 4669440,
"num_episodes": 516,
"mean_reward": 560.5978558659554,
"mean_length": 11438.6,
"survival_pct": 0.38128666666666666,
"max_steps": 30000,
"loss": 9.904751777648926,
"sps": 403.9643544594583
},
{
"update": 1145,
"global_step": 4689920,
"num_episodes": 516,
"mean_reward": 560.5978558659554,
"mean_length": 11438.6,
"survival_pct": 0.38128666666666666,
"max_steps": 30000,
"loss": 0.4376460909843445,
"sps": 554.0697388186311
},
{
"update": 1150,
"global_step": 4710400,
"num_episodes": 518,
"mean_reward": 564.0319487595558,
"mean_length": 11749.21,
"survival_pct": 0.3916403333333333,
"max_steps": 30000,
"loss": 0.26738616824150085,
"sps": 579.422583932766
},
{
"update": 1155,
"global_step": 4730880,
"num_episodes": 519,
"mean_reward": 564.0318553757668,
"mean_length": 11749.21,
"survival_pct": 0.3916403333333333,
"max_steps": 30000,
"loss": 2.65568208694458,
"sps": 435.32093187971185
},
{
"update": 1160,
"global_step": 4751360,
"num_episodes": 520,
"mean_reward": 606.8646422314644,
"mean_length": 12037.17,
"survival_pct": 0.401239,
"max_steps": 30000,
"loss": 0.08337657153606415,
"sps": 480.58739931645295
},
{
"update": 1165,
"global_step": 4771840,
"num_episodes": 523,
"mean_reward": 600.3835196709633,
"mean_length": 11435.34,
"survival_pct": 0.381178,
"max_steps": 30000,
"loss": 8.820674896240234,
"sps": 284.63083015732
},
{
"update": 1170,
"global_step": 4792320,
"num_episodes": 524,
"mean_reward": 612.3443924736977,
"mean_length": 11435.34,
"survival_pct": 0.381178,
"max_steps": 30000,
"loss": 34.61191940307617,
"sps": 250.67135617300423
},
{
"update": 1175,
"global_step": 4812800,
"num_episodes": 525,
"mean_reward": 612.3594747567176,
"mean_length": 11474.33,
"survival_pct": 0.38247766666666666,
"max_steps": 30000,
"loss": 1.2452768087387085,
"sps": 607.4645366539573
},
{
"update": 1180,
"global_step": 4833280,
"num_episodes": 527,
"mean_reward": 692.7924189066887,
"mean_length": 11466.93,
"survival_pct": 0.382231,
"max_steps": 30000,
"loss": 0.4541545510292053,
"sps": 527.819759792867
},
{
"update": 1185,
"global_step": 4853760,
"num_episodes": 527,
"mean_reward": 692.7924189066887,
"mean_length": 11466.93,
"survival_pct": 0.382231,
"max_steps": 30000,
"loss": 0.30217307806015015,
"sps": 510.7606822708712
},
{
"update": 1190,
"global_step": 4874240,
"num_episodes": 531,
"mean_reward": 708.1065727066994,
"mean_length": 11363.17,
"survival_pct": 0.3787723333333333,
"max_steps": 30000,
"loss": 0.9803248643875122,
"sps": 410.8110604092221
},
{
"update": 1195,
"global_step": 4894720,
"num_episodes": 531,
"mean_reward": 708.1065727066994,
"mean_length": 11363.17,
"survival_pct": 0.3787723333333333,
"max_steps": 30000,
"loss": 0.13086289167404175,
"sps": 615.3777693911919
},
{
"update": 1200,
"global_step": 4915200,
"num_episodes": 534,
"mean_reward": 699.0353853631019,
"mean_length": 11058.72,
"survival_pct": 0.36862399999999995,
"max_steps": 30000,
"loss": 712.1206665039062,
"sps": 388.8735852210615
},
{
"update": 1205,
"global_step": 4935680,
"num_episodes": 535,
"mean_reward": 698.9299844956398,
"mean_length": 11044.65,
"survival_pct": 0.27611625,
"max_steps": 40000,
"loss": 0.9771831035614014,
"sps": 687.4149876254356
},
{
"update": 1210,
"global_step": 4956160,
"num_episodes": 535,
"mean_reward": 698.9299844956398,
"mean_length": 11044.65,
"survival_pct": 0.27611625,
"max_steps": 40000,
"loss": 0.8561661243438721,
"sps": 675.4253456273739
},
{
"update": 1215,
"global_step": 4976640,
"num_episodes": 535,
"mean_reward": 698.9299844956398,
"mean_length": 11044.65,
"survival_pct": 0.27611625,
"max_steps": 40000,
"loss": 0.5291672945022583,
"sps": 747.9105659323011
},
{
"update": 1220,
"global_step": 4997120,
"num_episodes": 535,
"mean_reward": 698.9299844956398,
"mean_length": 11044.65,
"survival_pct": 0.27611625,
"max_steps": 40000,
"loss": 0.3995021879673004,
"sps": 710.3102224548963
},
{
"update": 1225,
"global_step": 5017600,
"num_episodes": 535,
"mean_reward": 698.9299844956398,
"mean_length": 11044.65,
"survival_pct": 0.27611625,
"max_steps": 40000,
"loss": 0.19824837148189545,
"sps": 714.1081614413822
},
{
"update": 1230,
"global_step": 5038080,
"num_episodes": 537,
"mean_reward": 694.6386151909828,
"mean_length": 11117.3,
"survival_pct": 0.27793249999999997,
"max_steps": 40000,
"loss": 0.6759960651397705,
"sps": 620.8841198425342
},
{
"update": 1235,
"global_step": 5058560,
"num_episodes": 543,
"mean_reward": 676.4015409398079,
"mean_length": 10875.12,
"survival_pct": 0.271878,
"max_steps": 40000,
"loss": 8.773111343383789,
"sps": 504.81741951884067
},
{
"update": 1240,
"global_step": 5079040,
"num_episodes": 544,
"mean_reward": 655.3968575978279,
"mean_length": 10576.9,
"survival_pct": 0.2644225,
"max_steps": 40000,
"loss": 0.13716170191764832,
"sps": 568.9063484934894
},
{
"update": 1245,
"global_step": 5099520,
"num_episodes": 544,
"mean_reward": 655.3968575978279,
"mean_length": 10576.9,
"survival_pct": 0.2644225,
"max_steps": 40000,
"loss": -0.016505300998687744,
"sps": 639.2019983734826
},
{
"update": 1250,
"global_step": 5120000,
"num_episodes": 546,
"mean_reward": 697.8135006427765,
"mean_length": 11074.28,
"survival_pct": 0.276857,
"max_steps": 40000,
"loss": 44.272918701171875,
"sps": 571.4595742161331
},
{
"update": 1255,
"global_step": 5140480,
"num_episodes": 546,
"mean_reward": 697.8135006427765,
"mean_length": 11074.28,
"survival_pct": 0.276857,
"max_steps": 40000,
"loss": 402.4531555175781,
"sps": 630.3028273736365
},
{
"update": 1260,
"global_step": 5160960,
"num_episodes": 551,
"mean_reward": 682.566458747387,
"mean_length": 11177.02,
"survival_pct": 0.2794255,
"max_steps": 40000,
"loss": 59.98308181762695,
"sps": 566.8093758374978
},
{
"update": 1265,
"global_step": 5181440,
"num_episodes": 553,
"mean_reward": 696.4643092989921,
"mean_length": 10984.46,
"survival_pct": 0.27461149999999995,
"max_steps": 40000,
"loss": 1.1551482677459717,
"sps": 808.9894187333804
},
{
"update": 1270,
"global_step": 5201920,
"num_episodes": 556,
"mean_reward": 701.1732182240486,
"mean_length": 11378.53,
"survival_pct": 0.28446325,
"max_steps": 40000,
"loss": 2.668344020843506,
"sps": 746.5326784132175
},
{
"update": 1275,
"global_step": 5222400,
"num_episodes": 556,
"mean_reward": 701.1732182240486,
"mean_length": 11378.53,
"survival_pct": 0.28446325,
"max_steps": 40000,
"loss": 0.8112964630126953,
"sps": 834.4341431680355
},
{
"update": 1280,
"global_step": 5242880,
"num_episodes": 559,
"mean_reward": 707.131958372593,
"mean_length": 11746.87,
"survival_pct": 0.29367175,
"max_steps": 40000,
"loss": 8.622824668884277,
"sps": 541.8830935414046
},
{
"update": 1285,
"global_step": 5263360,
"num_episodes": 560,
"mean_reward": 707.348245446682,
"mean_length": 11769.75,
"survival_pct": 0.29424375,
"max_steps": 40000,
"loss": 6.280955791473389,
"sps": 788.0408244951375
},
{
"update": 1290,
"global_step": 5283840,
"num_episodes": 560,
"mean_reward": 707.348245446682,
"mean_length": 11769.75,
"survival_pct": 0.29424375,
"max_steps": 40000,
"loss": 0.17306624352931976,
"sps": 826.4544197536233
},
{
"update": 1295,
"global_step": 5304320,
"num_episodes": 560,
"mean_reward": 707.348245446682,
"mean_length": 11769.75,
"survival_pct": 0.29424375,
"max_steps": 40000,
"loss": -0.04585009068250656,
"sps": 839.4415230223722
},
{
"update": 1300,
"global_step": 5324800,
"num_episodes": 560,
"mean_reward": 707.348245446682,
"mean_length": 11769.75,
"survival_pct": 0.29424375,
"max_steps": 40000,
"loss": -0.1055583506822586,
"sps": 831.0648597366617
},
{
"update": 1305,
"global_step": 5345280,
"num_episodes": 560,
"mean_reward": 707.348245446682,
"mean_length": 11769.75,
"survival_pct": 0.29424375,
"max_steps": 40000,
"loss": 0.10219299793243408,
"sps": 850.5287420009836
},
{
"update": 1310,
"global_step": 5365760,
"num_episodes": 561,
"mean_reward": 711.5321604895591,
"mean_length": 12147.49,
"survival_pct": 0.30368725,
"max_steps": 40000,
"loss": 1.737269639968872,
"sps": 713.2928399136136
},
{
"update": 1315,
"global_step": 5386240,
"num_episodes": 563,
"mean_reward": 705.7887069511413,
"mean_length": 12247.84,
"survival_pct": 0.306196,
"max_steps": 40000,
"loss": 146.1320037841797,
"sps": 515.8274568639007
},
{
"update": 1320,
"global_step": 5406720,
"num_episodes": 563,
"mean_reward": 705.7887069511413,
"mean_length": 12247.84,
"survival_pct": 0.306196,
"max_steps": 40000,
"loss": 0.19398686289787292,
"sps": 800.8556253753457
},
{
"update": 1325,
"global_step": 5427200,
"num_episodes": 564,
"mean_reward": 722.067927532196,
"mean_length": 12347.84,
"survival_pct": 0.308696,
"max_steps": 40000,
"loss": 72.42268371582031,
"sps": 507.17764221119916
},
{
"update": 1330,
"global_step": 5447680,
"num_episodes": 567,
"mean_reward": 809.5898822021485,
"mean_length": 12150.75,
"survival_pct": 0.30376875,
"max_steps": 40000,
"loss": 0.290306031703949,
"sps": 764.8144386668907
},
{
"update": 1335,
"global_step": 5468160,
"num_episodes": 567,
"mean_reward": 809.5898822021485,
"mean_length": 12150.75,
"survival_pct": 0.30376875,
"max_steps": 40000,
"loss": 0.8032262325286865,
"sps": 799.1206249673232
},
{
"update": 1340,
"global_step": 5488640,
"num_episodes": 568,
"mean_reward": 813.5851877593994,
"mean_length": 12546.64,
"survival_pct": 0.313666,
"max_steps": 40000,
"loss": 60.33620071411133,
"sps": 555.4755949712716
},
{
"update": 1345,
"global_step": 5509120,
"num_episodes": 570,
"mean_reward": 897.157964668274,
"mean_length": 13314.53,
"survival_pct": 0.33286325,
"max_steps": 40000,
"loss": 794.7894897460938,
"sps": 305.32909580240744
},
{
"update": 1350,
"global_step": 5529600,
"num_episodes": 571,
"mean_reward": 897.837958946228,
"mean_length": 13330.75,
"survival_pct": 0.33326875,
"max_steps": 40000,
"loss": -0.03776288032531738,
"sps": 738.9189775623264
},
{
"update": 1355,
"global_step": 5550080,
"num_episodes": 571,
"mean_reward": 897.837958946228,
"mean_length": 13330.75,
"survival_pct": 0.33326875,
"max_steps": 40000,
"loss": 0.003855481743812561,
"sps": 837.3194861665531
},
{
"update": 1360,
"global_step": 5570560,
"num_episodes": 571,
"mean_reward": 897.837958946228,
"mean_length": 13330.75,
"survival_pct": 0.33326875,
"max_steps": 40000,
"loss": -0.0835946649312973,
"sps": 852.7604200993885
},
{
"update": 1365,
"global_step": 5591040,
"num_episodes": 573,
"mean_reward": 901.498504357338,
"mean_length": 13730.89,
"survival_pct": 0.34327225,
"max_steps": 40000,
"loss": 0.6666049957275391,
"sps": 798.1656538062524
},
{
"update": 1370,
"global_step": 5611520,
"num_episodes": 573,
"mean_reward": 901.498504357338,
"mean_length": 13730.89,
"survival_pct": 0.34327225,
"max_steps": 40000,
"loss": -0.11962562799453735,
"sps": 837.7882020187046
},
{
"update": 1375,
"global_step": 5632000,
"num_episodes": 573,
"mean_reward": 901.498504357338,
"mean_length": 13730.89,
"survival_pct": 0.34327225,
"max_steps": 40000,
"loss": 0.004459500312805176,
"sps": 823.1821543483084
},
{
"update": 1380,
"global_step": 5652480,
"num_episodes": 573,
"mean_reward": 901.498504357338,
"mean_length": 13730.89,
"survival_pct": 0.34327225,
"max_steps": 40000,
"loss": -0.17405246198177338,
"sps": 832.9659390943079
},
{
"update": 1385,
"global_step": 5672960,
"num_episodes": 575,
"mean_reward": 940.8680406999588,
"mean_length": 13834.99,
"survival_pct": 0.34587475,
"max_steps": 40000,
"loss": 24.183181762695312,
"sps": 560.738665689664
},
{
"update": 1390,
"global_step": 5693440,
"num_episodes": 575,
"mean_reward": 940.8680406999588,
"mean_length": 13834.99,
"survival_pct": 0.34587475,
"max_steps": 40000,
"loss": 0.48853129148483276,
"sps": 896.2171670714368
},
{
"update": 1395,
"global_step": 5713920,
"num_episodes": 576,
"mean_reward": 982.5036016130448,
"mean_length": 13934.99,
"survival_pct": 0.34837475,
"max_steps": 40000,
"loss": 8.266661643981934,
"sps": 749.770689049747
},
{
"update": 1400,
"global_step": 5734400,
"num_episodes": 576,
"mean_reward": 982.5036016130448,
"mean_length": 13934.99,
"survival_pct": 0.34837475,
"max_steps": 40000,
"loss": -0.013010233640670776,
"sps": 869.1728201417309
},
{
"update": 1405,
"global_step": 5754880,
"num_episodes": 579,
"mean_reward": 1029.220560479164,
"mean_length": 14034.37,
"survival_pct": 0.35085925,
"max_steps": 40000,
"loss": 29.333189010620117,
"sps": 539.5250549272407
},
{
"update": 1410,
"global_step": 5775360,
"num_episodes": 585,
"mean_reward": 1027.995834054947,
"mean_length": 13820.46,
"survival_pct": 0.34551149999999997,
"max_steps": 40000,
"loss": 18.8126220703125,
"sps": 439.6503781444294
},
{
"update": 1415,
"global_step": 5795840,
"num_episodes": 585,
"mean_reward": 1027.995834054947,
"mean_length": 13820.46,
"survival_pct": 0.34551149999999997,
"max_steps": 40000,
"loss": 0.8571314811706543,
"sps": 639.3946228140344
},
{
"update": 1420,
"global_step": 5816320,
"num_episodes": 586,
"mean_reward": 1043.9769775485993,
"mean_length": 13920.46,
"survival_pct": 0.3480115,
"max_steps": 40000,
"loss": 17.923433303833008,
"sps": 384.01858038202676
},
{
"update": 1425,
"global_step": 5836800,
"num_episodes": 587,
"mean_reward": 1106.1342781925202,
"mean_length": 14317.99,
"survival_pct": 0.35794975,
"max_steps": 40000,
"loss": 9.14981460571289,
"sps": 439.45182965039373
},
{
"update": 1430,
"global_step": 5857280,
"num_episodes": 591,
"mean_reward": 1116.412140932083,
"mean_length": 14771.51,
"survival_pct": 0.36928775,
"max_steps": 40000,
"loss": 1.7514079809188843,
"sps": 662.6907944527344
},
{
"update": 1435,
"global_step": 5877760,
"num_episodes": 591,
"mean_reward": 1116.412140932083,
"mean_length": 14771.51,
"survival_pct": 0.36928775,
"max_steps": 40000,
"loss": 0.37396010756492615,
"sps": 903.1148649948647
},
{
"update": 1440,
"global_step": 5898240,
"num_episodes": 591,
"mean_reward": 1116.412140932083,
"mean_length": 14771.51,
"survival_pct": 0.36928775,
"max_steps": 40000,
"loss": 0.057983383536338806,
"sps": 850.523983894577
},
{
"update": 1445,
"global_step": 5918720,
"num_episodes": 593,
"mean_reward": 1102.7952147102355,
"mean_length": 14889.53,
"survival_pct": 0.37223825,
"max_steps": 40000,
"loss": 83.34320831298828,
"sps": 526.5779746081431
},
{
"update": 1450,
"global_step": 5939200,
"num_episodes": 595,
"mean_reward": 1018.396473865509,
"mean_length": 14585.34,
"survival_pct": 0.3646335,
"max_steps": 40000,
"loss": 6.169382572174072,
"sps": 451.6525927001659
},
{
"update": 1455,
"global_step": 5959680,
"num_episodes": 596,
"mean_reward": 1018.2012248802185,
"mean_length": 14603.1,
"survival_pct": 0.3650775,
"max_steps": 40000,
"loss": 1.4308723211288452,
"sps": 619.623956950883
},
{
"update": 1460,
"global_step": 5980160,
"num_episodes": 596,
"mean_reward": 1018.2012248802185,
"mean_length": 14603.1,
"survival_pct": 0.3650775,
"max_steps": 40000,
"loss": 0.18915529549121857,
"sps": 800.6110593469816
},
{
"update": 1465,
"global_step": 6000640,
"num_episodes": 597,
"mean_reward": 1022.199059085846,
"mean_length": 14999.43,
"survival_pct": 0.37498575,
"max_steps": 40000,
"loss": -0.0486217737197876,
"sps": 813.4318788464009
},
{
"update": 1470,
"global_step": 6021120,
"num_episodes": 597,
"mean_reward": 1022.199059085846,
"mean_length": 14999.43,
"survival_pct": 0.37498575,
"max_steps": 40000,
"loss": 0.03587697446346283,
"sps": 825.3421945718204
},
{
"update": 1475,
"global_step": 6041600,
"num_episodes": 598,
"mean_reward": 1041.5796708869934,
"mean_length": 15399.05,
"survival_pct": 0.38497624999999996,
"max_steps": 40000,
"loss": 0.06574638187885284,
"sps": 875.8211361821493
},
{
"update": 1480,
"global_step": 6062080,
"num_episodes": 598,
"mean_reward": 1041.5796708869934,
"mean_length": 15399.05,
"survival_pct": 0.38497624999999996,
"max_steps": 40000,
"loss": -0.11098746955394745,
"sps": 970.9604971491241
},
{
"update": 1485,
"global_step": 6082560,
"num_episodes": 598,
"mean_reward": 1041.5796708869934,
"mean_length": 15399.05,
"survival_pct": 0.38497624999999996,
"max_steps": 40000,
"loss": -0.16564123332500458,
"sps": 941.8806003191344
},
{
"update": 1490,
"global_step": 6103040,
"num_episodes": 601,
"mean_reward": 1025.6624535942078,
"mean_length": 15891.22,
"survival_pct": 0.3972805,
"max_steps": 40000,
"loss": 257.9145202636719,
"sps": 562.7793013045049
},
{
"update": 1495,
"global_step": 6123520,
"num_episodes": 602,
"mean_reward": 1025.6871674919128,
"mean_length": 15904.06,
"survival_pct": 0.3976015,
"max_steps": 40000,
"loss": 8.345800399780273,
"sps": 713.7930964796354
},
{
"update": 1500,
"global_step": 6144000,
"num_episodes": 604,
"mean_reward": 1014.5235131645203,
"mean_length": 16004.56,
"survival_pct": 0.40011399999999997,
"max_steps": 40000,
"loss": 6.72609806060791,
"sps": 535.8752056296396
},
{
"update": 1505,
"global_step": 6164480,
"num_episodes": 606,
"mean_reward": 1021.3157058906555,
"mean_length": 16107.38,
"survival_pct": 0.4026845,
"max_steps": 40000,
"loss": 10.722652435302734,
"sps": 630.4906569968188
},
{
"update": 1510,
"global_step": 6184960,
"num_episodes": 607,
"mean_reward": 1004.4655008125305,
"mean_length": 16207.38,
"survival_pct": 0.4051845,
"max_steps": 40000,
"loss": 2.4828217029571533,
"sps": 838.6584245149437
},
{
"update": 1515,
"global_step": 6205440,
"num_episodes": 607,
"mean_reward": 1004.4655008125305,
"mean_length": 16207.38,
"survival_pct": 0.4051845,
"max_steps": 40000,
"loss": 1.270835280418396,
"sps": 822.396289748599
},
{
"update": 1520,
"global_step": 6225920,
"num_episodes": 607,
"mean_reward": 1004.4655008125305,
"mean_length": 16207.38,
"survival_pct": 0.4051845,
"max_steps": 40000,
"loss": 1.3082380294799805,
"sps": 817.9383133650281
},
{
"update": 1525,
"global_step": 6246400,
"num_episodes": 607,
"mean_reward": 1004.4655008125305,
"mean_length": 16207.38,
"survival_pct": 0.3241476,
"max_steps": 50000,
"loss": 0.6200645565986633,
"sps": 810.1343000695222
},
{
"update": 1530,
"global_step": 6266880,
"num_episodes": 607,
"mean_reward": 1004.4655008125305,
"mean_length": 16207.38,
"survival_pct": 0.3241476,
"max_steps": 50000,
"loss": 0.4365927278995514,
"sps": 815.0941587526099
},
{
"update": 1535,
"global_step": 6287360,
"num_episodes": 607,
"mean_reward": 1004.4655008125305,
"mean_length": 16207.38,
"survival_pct": 0.3241476,
"max_steps": 50000,
"loss": 0.3482319116592407,
"sps": 778.7197208191486
},
{
"update": 1540,
"global_step": 6307840,
"num_episodes": 607,
"mean_reward": 1004.4655008125305,
"mean_length": 16207.38,
"survival_pct": 0.3241476,
"max_steps": 50000,
"loss": 0.30282458662986755,
"sps": 856.1091418900583
},
{
"update": 1545,
"global_step": 6328320,
"num_episodes": 607,
"mean_reward": 1004.4655008125305,
"mean_length": 16207.38,
"survival_pct": 0.3241476,
"max_steps": 50000,
"loss": 0.3217318058013916,
"sps": 780.544661851666
},
{
"update": 1550,
"global_step": 6348800,
"num_episodes": 607,
"mean_reward": 1004.4655008125305,
"mean_length": 16207.38,
"survival_pct": 0.3241476,
"max_steps": 50000,
"loss": 0.012079894542694092,
"sps": 780.2026645895013
},
{
"update": 1555,
"global_step": 6369280,
"num_episodes": 609,
"mean_reward": 1009.2189987373353,
"mean_length": 16649.64,
"survival_pct": 0.3329928,
"max_steps": 50000,
"loss": 0.06771233677864075,
"sps": 827.805833049566
},
{
"update": 1560,
"global_step": 6389760,
"num_episodes": 609,
"mean_reward": 1009.2189987373353,
"mean_length": 16649.64,
"survival_pct": 0.3329928,
"max_steps": 50000,
"loss": 0.09607579559087753,
"sps": 794.12397331076
},
{
"update": 1565,
"global_step": 6410240,
"num_episodes": 612,
"mean_reward": 972.2015235805511,
"mean_length": 16863.26,
"survival_pct": 0.3372652,
"max_steps": 50000,
"loss": 31.345748901367188,
"sps": 575.1634041964817
},
{
"update": 1570,
"global_step": 6430720,
"num_episodes": 613,
"mean_reward": 977.1921964931488,
"mean_length": 17359.13,
"survival_pct": 0.3471826,
"max_steps": 50000,
"loss": 62.981990814208984,
"sps": 467.3951752643231
},
{
"update": 1575,
"global_step": 6451200,
"num_episodes": 613,
"mean_reward": 977.1921964931488,
"mean_length": 17359.13,
"survival_pct": 0.3471826,
"max_steps": 50000,
"loss": 79.36112976074219,
"sps": 427.6025784582037
},
{
"update": 1580,
"global_step": 6471680,
"num_episodes": 613,
"mean_reward": 977.1921964931488,
"mean_length": 17359.13,
"survival_pct": 0.3471826,
"max_steps": 50000,
"loss": 0.3020017147064209,
"sps": 667.9687302715897
},
{
"update": 1585,
"global_step": 6492160,
"num_episodes": 614,
"mean_reward": 1108.4623094081878,
"mean_length": 17559.13,
"survival_pct": 0.3511826,
"max_steps": 50000,
"loss": 30.998226165771484,
"sps": 597.6820954824248
},
{
"update": 1590,
"global_step": 6512640,
"num_episodes": 614,
"mean_reward": 1108.4623094081878,
"mean_length": 17559.13,
"survival_pct": 0.3511826,
"max_steps": 50000,
"loss": 0.0028700977563858032,
"sps": 696.7986031474608
},
{
"update": 1595,
"global_step": 6533120,
"num_episodes": 615,
"mean_reward": 1119.9435447597505,
"mean_length": 17759.13,
"survival_pct": 0.3551826,
"max_steps": 50000,
"loss": 7.728819847106934,
"sps": 618.6918228629329
},
{
"update": 1600,
"global_step": 6553600,
"num_episodes": 617,
"mean_reward": 1121.774534635544,
"mean_length": 17943.08,
"survival_pct": 0.35886160000000006,
"max_steps": 50000,
"loss": 1.3815250396728516,
"sps": 548.2307499735696
},
{
"update": 1605,
"global_step": 6574080,
"num_episodes": 620,
"mean_reward": 1160.0132136058808,
"mean_length": 18332.17,
"survival_pct": 0.36664339999999995,
"max_steps": 50000,
"loss": 53.58154296875,
"sps": 218.35284346944567
},
{
"update": 1610,
"global_step": 6594560,
"num_episodes": 620,
"mean_reward": 1160.0132136058808,
"mean_length": 18332.17,
"survival_pct": 0.36664339999999995,
"max_steps": 50000,
"loss": 198.85508728027344,
"sps": 414.0528781364895
},
{
"update": 1615,
"global_step": 6615040,
"num_episodes": 620,
"mean_reward": 1160.0132136058808,
"mean_length": 18332.17,
"survival_pct": 0.36664339999999995,
"max_steps": 50000,
"loss": 0.6458793878555298,
"sps": 468.4840673845562
},
{
"update": 1620,
"global_step": 6635520,
"num_episodes": 620,
"mean_reward": 1160.0132136058808,
"mean_length": 18332.17,
"survival_pct": 0.36664339999999995,
"max_steps": 50000,
"loss": 4.100710868835449,
"sps": 453.53569760987244
},
{
"update": 1625,
"global_step": 6656000,
"num_episodes": 620,
"mean_reward": 1160.0132136058808,
"mean_length": 18332.17,
"survival_pct": 0.36664339999999995,
"max_steps": 50000,
"loss": 8.949458122253418,
"sps": 445.9539348920468
},
{
"update": 1630,
"global_step": 6676480,
"num_episodes": 620,
"mean_reward": 1160.0132136058808,
"mean_length": 18332.17,
"survival_pct": 0.36664339999999995,
"max_steps": 50000,
"loss": 12.222755432128906,
"sps": 454.9897483249271
},
{
"update": 1635,
"global_step": 6696960,
"num_episodes": 622,
"mean_reward": 1161.4788415908813,
"mean_length": 18470.78,
"survival_pct": 0.36941559999999996,
"max_steps": 50000,
"loss": 0.15580856800079346,
"sps": 433.21338637539577
},
{
"update": 1640,
"global_step": 6717440,
"num_episodes": 622,
"mean_reward": 1161.4788415908813,
"mean_length": 18470.78,
"survival_pct": 0.36941559999999996,
"max_steps": 50000,
"loss": -0.03489271551370621,
"sps": 424.3238296174212
},
{
"update": 1645,
"global_step": 6737920,
"num_episodes": 622,
"mean_reward": 1161.4788415908813,
"mean_length": 18470.78,
"survival_pct": 0.36941559999999996,
"max_steps": 50000,
"loss": -0.022046178579330444,
"sps": 411.0249793122232
},
{
"update": 1650,
"global_step": 6758400,
"num_episodes": 623,
"mean_reward": 1166.8129835891723,
"mean_length": 18963.37,
"survival_pct": 0.3792674,
"max_steps": 50000,
"loss": 147.39236450195312,
"sps": 388.6099849730296
},
{
"update": 1655,
"global_step": 6778880,
"num_episodes": 623,
"mean_reward": 1166.8129835891723,
"mean_length": 18963.37,
"survival_pct": 0.3792674,
"max_steps": 50000,
"loss": 0.3184221386909485,
"sps": 559.058329817769
},
{
"update": 1660,
"global_step": 6799360,
"num_episodes": 623,
"mean_reward": 1166.8129835891723,
"mean_length": 18963.37,
"survival_pct": 0.3792674,
"max_steps": 50000,
"loss": 0.13589444756507874,
"sps": 524.1350206501007
},
{
"update": 1665,
"global_step": 6819840,
"num_episodes": 624,
"mean_reward": 1224.4580519485473,
"mean_length": 19163.37,
"survival_pct": 0.3832674,
"max_steps": 50000,
"loss": 0.28983187675476074,
"sps": 520.1970206412298
},
{
"update": 1670,
"global_step": 6840320,
"num_episodes": 625,
"mean_reward": 1325.5696249008179,
"mean_length": 19621.96,
"survival_pct": 0.3924392,
"max_steps": 50000,
"loss": -0.04390272498130798,
"sps": 600.9722666327863
},
{
"update": 1675,
"global_step": 6860800,
"num_episodes": 625,
"mean_reward": 1325.5696249008179,
"mean_length": 19621.96,
"survival_pct": 0.3924392,
"max_steps": 50000,
"loss": 0.029237419366836548,
"sps": 619.2145461020093
},
{
"update": 1680,
"global_step": 6881280,
"num_episodes": 625,
"mean_reward": 1325.5696249008179,
"mean_length": 19621.96,
"survival_pct": 0.3924392,
"max_steps": 50000,
"loss": 0.28174102306365967,
"sps": 619.1091990093024
},
{
"update": 1685,
"global_step": 6901760,
"num_episodes": 626,
"mean_reward": 1263.1981332015991,
"mean_length": 19821.96,
"survival_pct": 0.3964392,
"max_steps": 50000,
"loss": 0.4367483854293823,
"sps": 626.916632808775
},
{
"update": 1690,
"global_step": 6922240,
"num_episodes": 626,
"mean_reward": 1263.1981332015991,
"mean_length": 19821.96,
"survival_pct": 0.3964392,
"max_steps": 50000,
"loss": 0.1575974076986313,
"sps": 618.4480121190815
},
{
"update": 1695,
"global_step": 6942720,
"num_episodes": 627,
"mean_reward": 1294.6462133026123,
"mean_length": 20320.93,
"survival_pct": 0.4064186,
"max_steps": 50000,
"loss": 9.566240310668945,
"sps": 406.6387424437816
},
{
"update": 1700,
"global_step": 6963200,
"num_episodes": 628,
"mean_reward": 1280.1067363739014,
"mean_length": 20520.93,
"survival_pct": 0.4104186,
"max_steps": 50000,
"loss": 0.709872841835022,
"sps": 442.501654087899
},
{
"update": 1705,
"global_step": 6983680,
"num_episodes": 631,
"mean_reward": 1386.179150118828,
"mean_length": 20721.63,
"survival_pct": 0.41443260000000004,
"max_steps": 50000,
"loss": 19.365644454956055,
"sps": 477.75886736803943
},
{
"update": 1710,
"global_step": 7004160,
"num_episodes": 631,
"mean_reward": 1386.179150118828,
"mean_length": 20721.63,
"survival_pct": 0.41443260000000004,
"max_steps": 50000,
"loss": 108.79446411132812,
"sps": 612.149128502162
},
{
"update": 1715,
"global_step": 7024640,
"num_episodes": 631,
"mean_reward": 1386.179150118828,
"mean_length": 20721.63,
"survival_pct": 0.41443260000000004,
"max_steps": 50000,
"loss": 0.14560824632644653,
"sps": 695.5005180060909
},
{
"update": 1720,
"global_step": 7045120,
"num_episodes": 631,
"mean_reward": 1386.179150118828,
"mean_length": 20721.63,
"survival_pct": 0.41443260000000004,
"max_steps": 50000,
"loss": -0.01733715832233429,
"sps": 691.5803550233451
},
{
"update": 1725,
"global_step": 7065600,
"num_episodes": 631,
"mean_reward": 1386.179150118828,
"mean_length": 20721.63,
"survival_pct": 0.41443260000000004,
"max_steps": 50000,
"loss": 0.15833212435245514,
"sps": 667.4815281045276
},
{
"update": 1730,
"global_step": 7086080,
"num_episodes": 633,
"mean_reward": 1468.0781693506242,
"mean_length": 20922.8,
"survival_pct": 0.418456,
"max_steps": 50000,
"loss": 11.883382797241211,
"sps": 581.8314657800905
},
{
"update": 1735,
"global_step": 7106560,
"num_episodes": 633,
"mean_reward": 1468.0781693506242,
"mean_length": 20922.8,
"survival_pct": 0.418456,
"max_steps": 50000,
"loss": 1.1643352508544922,
"sps": 681.4017891653039
},
{
"update": 1740,
"global_step": 7127040,
"num_episodes": 633,
"mean_reward": 1468.0781693506242,
"mean_length": 20922.8,
"survival_pct": 0.418456,
"max_steps": 50000,
"loss": -0.016278870403766632,
"sps": 777.0781892710015
},
{
"update": 1745,
"global_step": 7147520,
"num_episodes": 633,
"mean_reward": 1468.0781693506242,
"mean_length": 20922.8,
"survival_pct": 0.418456,
"max_steps": 50000,
"loss": -0.061865031719207764,
"sps": 774.9591726476483
},
{
"update": 1750,
"global_step": 7168000,
"num_episodes": 638,
"mean_reward": 1506.0245201086998,
"mean_length": 20610.96,
"survival_pct": 0.4122192,
"max_steps": 50000,
"loss": 2.459982395172119,
"sps": 545.323321230957
},
{
"update": 1755,
"global_step": 7188480,
"num_episodes": 638,
"mean_reward": 1506.0245201086998,
"mean_length": 20610.96,
"survival_pct": 0.4122192,
"max_steps": 50000,
"loss": 0.11832943558692932,
"sps": 817.3067294637037
},
{
"update": 1760,
"global_step": 7208960,
"num_episodes": 640,
"mean_reward": 1516.9915149474143,
"mean_length": 21111.92,
"survival_pct": 0.42223839999999996,
"max_steps": 50000,
"loss": 17.96449851989746,
"sps": 621.1327524456565
},
{
"update": 1765,
"global_step": 7229440,
"num_episodes": 642,
"mean_reward": 1570.5918082213402,
"mean_length": 21605.81,
"survival_pct": 0.4321162,
"max_steps": 50000,
"loss": 886.5294189453125,
"sps": 239.42082440770906
},
{
"update": 1770,
"global_step": 7249920,
"num_episodes": 647,
"mean_reward": 1521.5539734148979,
"mean_length": 20475.3,
"survival_pct": 0.409506,
"max_steps": 50000,
"loss": 179.85931396484375,
"sps": 298.4259022731802
},
{
"update": 1775,
"global_step": 7270400,
"num_episodes": 647,
"mean_reward": 1521.5539734148979,
"mean_length": 20475.3,
"survival_pct": 0.409506,
"max_steps": 50000,
"loss": 966.6091918945312,
"sps": 537.2474721109993
},
{
"update": 1780,
"global_step": 7290880,
"num_episodes": 648,
"mean_reward": 1564.6685786104201,
"mean_length": 20973.13,
"survival_pct": 0.4194626,
"max_steps": 50000,
"loss": 62.704280853271484,
"sps": 509.433704200731
},
{
"update": 1785,
"global_step": 7311360,
"num_episodes": 648,
"mean_reward": 1564.6685786104201,
"mean_length": 20973.13,
"survival_pct": 0.4194626,
"max_steps": 50000,
"loss": -0.05052866041660309,
"sps": 655.9797605411271
},
{
"update": 1790,
"global_step": 7331840,
"num_episodes": 649,
"mean_reward": 1654.446047320366,
"mean_length": 21472.42,
"survival_pct": 0.42944839999999995,
"max_steps": 50000,
"loss": 107.669677734375,
"sps": 447.78139010542594
},
{
"update": 1795,
"global_step": 7352320,
"num_episodes": 650,
"mean_reward": 1661.2120521116258,
"mean_length": 21959.9,
"survival_pct": 0.43919800000000003,
"max_steps": 50000,
"loss": 116.32649993896484,
"sps": 581.3285585073751
},
{
"update": 1800,
"global_step": 7372800,
"num_episodes": 650,
"mean_reward": 1661.2120521116258,
"mean_length": 21959.9,
"survival_pct": 0.43919800000000003,
"max_steps": 50000,
"loss": -0.1900792270898819,
"sps": 738.826346232058
},
{
"update": 1805,
"global_step": 7393280,
"num_episodes": 650,
"mean_reward": 1661.2120521116258,
"mean_length": 21959.9,
"survival_pct": 0.36599833333333337,
"max_steps": 60000,
"loss": -0.12798017263412476,
"sps": 735.1636673358219
},
{
"update": 1810,
"global_step": 7413760,
"num_episodes": 650,
"mean_reward": 1661.2120521116258,
"mean_length": 21959.9,
"survival_pct": 0.36599833333333337,
"max_steps": 60000,
"loss": -0.17347615957260132,
"sps": 710.1339399692507
},
{
"update": 1815,
"global_step": 7434240,
"num_episodes": 650,
"mean_reward": 1661.2120521116258,
"mean_length": 21959.9,
"survival_pct": 0.36599833333333337,
"max_steps": 60000,
"loss": -0.04106990993022919,
"sps": 703.2321739929172
},
{
"update": 1820,
"global_step": 7454720,
"num_episodes": 651,
"mean_reward": 1667.2202544736863,
"mean_length": 22557.46,
"survival_pct": 0.37595766666666663,
"max_steps": 60000,
"loss": 1.203838586807251,
"sps": 608.5787796882346
},
{
"update": 1825,
"global_step": 7475200,
"num_episodes": 651,
"mean_reward": 1667.2202544736863,
"mean_length": 22557.46,
"survival_pct": 0.37595766666666663,
"max_steps": 60000,
"loss": -0.09379199892282486,
"sps": 656.6381880745341
},
{
"update": 1830,
"global_step": 7495680,
"num_episodes": 651,
"mean_reward": 1667.2202544736863,
"mean_length": 22557.46,
"survival_pct": 0.37595766666666663,
"max_steps": 60000,
"loss": 0.08595463633537292,
"sps": 696.5027182462075
},
{
"update": 1835,
"global_step": 7516160,
"num_episodes": 651,
"mean_reward": 1667.2202544736863,
"mean_length": 22557.46,
"survival_pct": 0.37595766666666663,
"max_steps": 60000,
"loss": -0.18902313709259033,
"sps": 686.4702117808695
},
{
"update": 1840,
"global_step": 7536640,
"num_episodes": 651,
"mean_reward": 1667.2202544736863,
"mean_length": 22557.46,
"survival_pct": 0.37595766666666663,
"max_steps": 60000,
"loss": -0.18000459671020508,
"sps": 663.4220467173932
},
{
"update": 1845,
"global_step": 7557120,
"num_episodes": 651,
"mean_reward": 1667.2202544736863,
"mean_length": 22557.46,
"survival_pct": 0.37595766666666663,
"max_steps": 60000,
"loss": -0.09385547041893005,
"sps": 687.3361936552064
},
{
"update": 1850,
"global_step": 7577600,
"num_episodes": 655,
"mean_reward": 1615.1273900747299,
"mean_length": 22353.29,
"survival_pct": 0.37255483333333334,
"max_steps": 60000,
"loss": 0.5962892174720764,
"sps": 577.3992316901965
},
{
"update": 1855,
"global_step": 7598080,
"num_episodes": 655,
"mean_reward": 1615.1273900747299,
"mean_length": 22353.29,
"survival_pct": 0.37255483333333334,
"max_steps": 60000,
"loss": 30.23639488220215,
"sps": 604.6227237743161
},
{
"update": 1860,
"global_step": 7618560,
"num_episodes": 655,
"mean_reward": 1615.1273900747299,
"mean_length": 22353.29,
"survival_pct": 0.37255483333333334,
"max_steps": 60000,
"loss": -0.19098417460918427,
"sps": 683.5282670532649
},
{
"update": 1865,
"global_step": 7639040,
"num_episodes": 655,
"mean_reward": 1615.1273900747299,
"mean_length": 22353.29,
"survival_pct": 0.37255483333333334,
"max_steps": 60000,
"loss": -0.1761397421360016,
"sps": 638.4061361181431
},
{
"update": 1870,
"global_step": 7659520,
"num_episodes": 656,
"mean_reward": 1621.1403862142563,
"mean_length": 22950.87,
"survival_pct": 0.3825145,
"max_steps": 60000,
"loss": 1.2790898084640503,
"sps": 692.9472830895132
},
{
"update": 1875,
"global_step": 7680000,
"num_episodes": 656,
"mean_reward": 1621.1403862142563,
"mean_length": 22950.87,
"survival_pct": 0.3825145,
"max_steps": 60000,
"loss": 0.5525964498519897,
"sps": 708.1301830175981
},
{
"update": 1880,
"global_step": 7700480,
"num_episodes": 656,
"mean_reward": 1621.1403862142563,
"mean_length": 22950.87,
"survival_pct": 0.3825145,
"max_steps": 60000,
"loss": 0.05596184730529785,
"sps": 688.5831713801172
},
{
"update": 1885,
"global_step": 7720960,
"num_episodes": 656,
"mean_reward": 1621.1403862142563,
"mean_length": 22950.87,
"survival_pct": 0.3825145,
"max_steps": 60000,
"loss": -0.039150021970272064,
"sps": 707.8312184620268
},
{
"update": 1890,
"global_step": 7741440,
"num_episodes": 658,
"mean_reward": 1769.352957472801,
"mean_length": 23749.89,
"survival_pct": 0.3958315,
"max_steps": 60000,
"loss": 1.1895029544830322,
"sps": 799.8935632475255
},
{
"update": 1895,
"global_step": 7761920,
"num_episodes": 658,
"mean_reward": 1769.352957472801,
"mean_length": 23749.89,
"survival_pct": 0.3958315,
"max_steps": 60000,
"loss": 1.1538077592849731,
"sps": 762.3956777604958
},
{
"update": 1900,
"global_step": 7782400,
"num_episodes": 659,
"mean_reward": 1825.4085091924667,
"mean_length": 24347.31,
"survival_pct": 0.4057885,
"max_steps": 60000,
"loss": 0.6419044137001038,
"sps": 703.4263875262799
},
{
"update": 1905,
"global_step": 7802880,
"num_episodes": 659,
"mean_reward": 1825.4085091924667,
"mean_length": 24347.31,
"survival_pct": 0.4057885,
"max_steps": 60000,
"loss": 0.15757112205028534,
"sps": 674.3401183927315
},
{
"update": 1910,
"global_step": 7823360,
"num_episodes": 660,
"mean_reward": 1898.589024977684,
"mean_length": 24923.49,
"survival_pct": 0.4153915,
"max_steps": 60000,
"loss": -0.06820769608020782,
"sps": 831.2280725759563
},
{
"update": 1915,
"global_step": 7843840,
"num_episodes": 664,
"mean_reward": 1901.6304970765113,
"mean_length": 24331.52,
"survival_pct": 0.40552533333333335,
"max_steps": 60000,
"loss": -0.050920240581035614,
"sps": 814.7046480629325
},
{
"update": 1920,
"global_step": 7864320,
"num_episodes": 664,
"mean_reward": 1901.6304970765113,
"mean_length": 24331.52,
"survival_pct": 0.40552533333333335,
"max_steps": 60000,
"loss": -0.14338093996047974,
"sps": 799.537306564484
},
{
"update": 1925,
"global_step": 7884800,
"num_episodes": 664,
"mean_reward": 1901.6304970765113,
"mean_length": 24331.52,
"survival_pct": 0.40552533333333335,
"max_steps": 60000,
"loss": -0.11479577422142029,
"sps": 803.4181345293225
},
{
"update": 1930,
"global_step": 7905280,
"num_episodes": 664,
"mean_reward": 1901.6304970765113,
"mean_length": 24331.52,
"survival_pct": 0.40552533333333335,
"max_steps": 60000,
"loss": -0.061470456421375275,
"sps": 778.5318076168905
},
{
"update": 1935,
"global_step": 7925760,
"num_episodes": 664,
"mean_reward": 1901.6304970765113,
"mean_length": 24331.52,
"survival_pct": 0.40552533333333335,
"max_steps": 60000,
"loss": -0.17063158750534058,
"sps": 797.1980171427649
},
{
"update": 1940,
"global_step": 7946240,
"num_episodes": 665,
"mean_reward": 1851.0390957093239,
"mean_length": 24531.52,
"survival_pct": 0.40885866666666665,
"max_steps": 60000,
"loss": 0.14837267994880676,
"sps": 741.7946196855224
},
{
"update": 1945,
"global_step": 7966720,
"num_episodes": 665,
"mean_reward": 1851.0390957093239,
"mean_length": 24531.52,
"survival_pct": 0.40885866666666665,
"max_steps": 60000,
"loss": 0.07289181649684906,
"sps": 799.7640530972891
},
{
"update": 1950,
"global_step": 7987200,
"num_episodes": 665,
"mean_reward": 1851.0390957093239,
"mean_length": 24531.52,
"survival_pct": 0.40885866666666665,
"max_steps": 60000,
"loss": -0.03141336888074875,
"sps": 777.4373642953062
},
{
"update": 1955,
"global_step": 8007680,
"num_episodes": 665,
"mean_reward": 1851.0390957093239,
"mean_length": 24531.52,
"survival_pct": 0.40885866666666665,
"max_steps": 60000,
"loss": -0.1878751814365387,
"sps": 787.4362527685952
},
{
"update": 1960,
"global_step": 8028160,
"num_episodes": 665,
"mean_reward": 1851.0390957093239,
"mean_length": 24531.52,
"survival_pct": 0.40885866666666665,
"max_steps": 60000,
"loss": 0.2337094247341156,
"sps": 836.4509391010391
},
{
"update": 1965,
"global_step": 8048640,
"num_episodes": 665,
"mean_reward": 1851.0390957093239,
"mean_length": 24531.52,
"survival_pct": 0.40885866666666665,
"max_steps": 60000,
"loss": -0.1974252164363861,
"sps": 825.3140040567519
},
{
"update": 1970,
"global_step": 8069120,
"num_episodes": 666,
"mean_reward": 1869.434642584324,
"mean_length": 25128.01,
"survival_pct": 0.41880016666666664,
"max_steps": 60000,
"loss": -0.07606863975524902,
"sps": 811.4969102185196
},
{
"update": 1975,
"global_step": 8089600,
"num_episodes": 666,
"mean_reward": 1869.434642584324,
"mean_length": 25128.01,
"survival_pct": 0.41880016666666664,
"max_steps": 60000,
"loss": -0.14107482135295868,
"sps": 865.2539173939119
},
{
"update": 1980,
"global_step": 8110080,
"num_episodes": 666,
"mean_reward": 1869.434642584324,
"mean_length": 25128.01,
"survival_pct": 0.41880016666666664,
"max_steps": 60000,
"loss": 0.04902653396129608,
"sps": 835.4859345260967
},
{
"update": 1985,
"global_step": 8130560,
"num_episodes": 667,
"mean_reward": 1875.4432358384133,
"mean_length": 25725.3,
"survival_pct": 0.428755,
"max_steps": 60000,
"loss": 235.1592254638672,
"sps": 462.53669720868015
},
{
"update": 1990,
"global_step": 8151040,
"num_episodes": 667,
"mean_reward": 1875.4432358384133,
"mean_length": 25725.3,
"survival_pct": 0.428755,
"max_steps": 60000,
"loss": 4.359109401702881,
"sps": 570.3135079716526
},
{
"update": 1995,
"global_step": 8171520,
"num_episodes": 667,
"mean_reward": 1875.4432358384133,
"mean_length": 25725.3,
"survival_pct": 0.428755,
"max_steps": 60000,
"loss": 0.6510128378868103,
"sps": 774.3925100979434
},
{
"update": 2000,
"global_step": 8192000,
"num_episodes": 667,
"mean_reward": 1875.4432358384133,
"mean_length": 25725.3,
"survival_pct": 0.428755,
"max_steps": 60000,
"loss": 0.2337443083524704,
"sps": 781.7803733771449
},
{
"update": 2005,
"global_step": 8212480,
"num_episodes": 667,
"mean_reward": 1875.4432358384133,
"mean_length": 25725.3,
"survival_pct": 0.32156625,
"max_steps": 80000,
"loss": 0.05558648705482483,
"sps": 760.4769751448292
},
{
"update": 2010,
"global_step": 8232960,
"num_episodes": 667,
"mean_reward": 1875.4432358384133,
"mean_length": 25725.3,
"survival_pct": 0.32156625,
"max_steps": 80000,
"loss": -0.010526187717914581,
"sps": 782.7780404574484
},
{
"update": 2015,
"global_step": 8253440,
"num_episodes": 667,
"mean_reward": 1875.4432358384133,
"mean_length": 25725.3,
"survival_pct": 0.32156625,
"max_steps": 80000,
"loss": -0.0630020946264267,
"sps": 770.7413751342134
},
{
"update": 2020,
"global_step": 8273920,
"num_episodes": 667,
"mean_reward": 1875.4432358384133,
"mean_length": 25725.3,
"survival_pct": 0.32156625,
"max_steps": 80000,
"loss": 0.22121278941631317,
"sps": 791.5506888179068
},
{
"update": 2025,
"global_step": 8294400,
"num_episodes": 667,
"mean_reward": 1875.4432358384133,
"mean_length": 25725.3,
"survival_pct": 0.32156625,
"max_steps": 80000,
"loss": -0.09089811146259308,
"sps": 778.5213294943511
},
{
"update": 2030,
"global_step": 8314880,
"num_episodes": 667,
"mean_reward": 1875.4432358384133,
"mean_length": 25725.3,
"survival_pct": 0.32156625,
"max_steps": 80000,
"loss": -0.16075977683067322,
"sps": 755.6295349772931
},
{
"update": 2035,
"global_step": 8335360,
"num_episodes": 667,
"mean_reward": 1875.4432358384133,
"mean_length": 25725.3,
"survival_pct": 0.32156625,
"max_steps": 80000,
"loss": -0.158203586935997,
"sps": 754.4182628574841
},
{
"update": 2040,
"global_step": 8355840,
"num_episodes": 667,
"mean_reward": 1875.4432358384133,
"mean_length": 25725.3,
"survival_pct": 0.32156625,
"max_steps": 80000,
"loss": -0.21690981090068817,
"sps": 717.6735596436803
},
{
"update": 2045,
"global_step": 8376320,
"num_episodes": 670,
"mean_reward": 1922.8468935227395,
"mean_length": 26134.49,
"survival_pct": 0.326681125,
"max_steps": 80000,
"loss": 156.1393585205078,
"sps": 323.2563608041114
},
{
"update": 2050,
"global_step": 8396800,
"num_episodes": 670,
"mean_reward": 1922.8468935227395,
"mean_length": 26134.49,
"survival_pct": 0.326681125,
"max_steps": 80000,
"loss": 0.16352523863315582,
"sps": 454.8234843620012
},
{
"update": 2055,
"global_step": 8417280,
"num_episodes": 671,
"mean_reward": 1930.1808717942238,
"mean_length": 26915.84,
"survival_pct": 0.336448,
"max_steps": 80000,
"loss": -0.08801546692848206,
"sps": 801.0593266816616
},
{
"update": 2060,
"global_step": 8437760,
"num_episodes": 671,
"mean_reward": 1930.1808717942238,
"mean_length": 26915.84,
"survival_pct": 0.336448,
"max_steps": 80000,
"loss": -0.13592661917209625,
"sps": 779.2904032377721
},
{
"update": 2065,
"global_step": 8458240,
"num_episodes": 672,
"mean_reward": 1975.5371123337745,
"mean_length": 27315.84,
"survival_pct": 0.34144800000000003,
"max_steps": 80000,
"loss": -0.17474070191383362,
"sps": 545.5720531350578
},
{
"update": 2070,
"global_step": 8478720,
"num_episodes": 673,
"mean_reward": 2013.7483372473716,
"mean_length": 28111.01,
"survival_pct": 0.351387625,
"max_steps": 80000,
"loss": 12.530708312988281,
"sps": 499.4637464037537
},
{
"update": 2075,
"global_step": 8499200,
"num_episodes": 673,
"mean_reward": 2013.7483372473716,
"mean_length": 28111.01,
"survival_pct": 0.351387625,
"max_steps": 80000,
"loss": 0.8680031299591064,
"sps": 530.5638999597164
},
{
"update": 2080,
"global_step": 8519680,
"num_episodes": 673,
"mean_reward": 2013.7483372473716,
"mean_length": 28111.01,
"survival_pct": 0.351387625,
"max_steps": 80000,
"loss": 19.798368453979492,
"sps": 656.9070684694709
},
{
"update": 2085,
"global_step": 8540160,
"num_episodes": 673,
"mean_reward": 2013.7483372473716,
"mean_length": 28111.01,
"survival_pct": 0.351387625,
"max_steps": 80000,
"loss": -0.19701889157295227,
"sps": 698.9096675564882
},
{
"update": 2090,
"global_step": 8560640,
"num_episodes": 673,
"mean_reward": 2013.7483372473716,
"mean_length": 28111.01,
"survival_pct": 0.351387625,
"max_steps": 80000,
"loss": -0.0173691064119339,
"sps": 728.1424091253976
},
{
"update": 2095,
"global_step": 8581120,
"num_episodes": 674,
"mean_reward": 1971.775068204403,
"mean_length": 28511.01,
"survival_pct": 0.356387625,
"max_steps": 80000,
"loss": 2452.706787109375,
"sps": 356.8059399544614
},
{
"update": 2100,
"global_step": 8601600,
"num_episodes": 674,
"mean_reward": 1971.775068204403,
"mean_length": 28511.01,
"survival_pct": 0.356387625,
"max_steps": 80000,
"loss": 14.414767265319824,
"sps": 585.3881767861222
},
{
"update": 2105,
"global_step": 8622080,
"num_episodes": 674,
"mean_reward": 1971.775068204403,
"mean_length": 28511.01,
"survival_pct": 0.356387625,
"max_steps": 80000,
"loss": 0.022661790251731873,
"sps": 678.3736040372619
},
{
"update": 2110,
"global_step": 8642560,
"num_episodes": 674,
"mean_reward": 1971.775068204403,
"mean_length": 28511.01,
"survival_pct": 0.356387625,
"max_steps": 80000,
"loss": -0.20638275146484375,
"sps": 665.6932482303778
},
{
"update": 2115,
"global_step": 8663040,
"num_episodes": 674,
"mean_reward": 1971.775068204403,
"mean_length": 28511.01,
"survival_pct": 0.356387625,
"max_steps": 80000,
"loss": -0.1961488574743271,
"sps": 661.1262168442004
},
{
"update": 2120,
"global_step": 8683520,
"num_episodes": 674,
"mean_reward": 1971.775068204403,
"mean_length": 28511.01,
"survival_pct": 0.356387625,
"max_steps": 80000,
"loss": -0.16852501034736633,
"sps": 666.07765454515
},
{
"update": 2125,
"global_step": 8704000,
"num_episodes": 675,
"mean_reward": 1988.447893064022,
"mean_length": 29306.48,
"survival_pct": 0.366331,
"max_steps": 80000,
"loss": 0.14007116854190826,
"sps": 633.2168784322698
},
{
"update": 2130,
"global_step": 8724480,
"num_episodes": 675,
"mean_reward": 1988.447893064022,
"mean_length": 29306.48,
"survival_pct": 0.366331,
"max_steps": 80000,
"loss": 0.08940272033214569,
"sps": 621.5424559601437
},
{
"update": 2135,
"global_step": 8744960,
"num_episodes": 675,
"mean_reward": 1988.447893064022,
"mean_length": 29306.48,
"survival_pct": 0.366331,
"max_steps": 80000,
"loss": -0.16373726725578308,
"sps": 689.1369758818673
},
{
"update": 2140,
"global_step": 8765440,
"num_episodes": 678,
"mean_reward": 2002.1588407492638,
"mean_length": 29307.47,
"survival_pct": 0.366343375,
"max_steps": 80000,
"loss": 9.728021621704102,
"sps": 466.25151106127714
},
{
"update": 2145,
"global_step": 8785920,
"num_episodes": 681,
"mean_reward": 1996.5775413012504,
"mean_length": 28885.87,
"survival_pct": 0.361073375,
"max_steps": 80000,
"loss": 0.4610579311847687,
"sps": 551.8281626137556
},
{
"update": 2150,
"global_step": 8806400,
"num_episodes": 681,
"mean_reward": 1996.5775413012504,
"mean_length": 28885.87,
"survival_pct": 0.361073375,
"max_steps": 80000,
"loss": 0.20792128145694733,
"sps": 708.9972554003779
},
{
"update": 2155,
"global_step": 8826880,
"num_episodes": 681,
"mean_reward": 1996.5775413012504,
"mean_length": 28885.87,
"survival_pct": 0.361073375,
"max_steps": 80000,
"loss": -0.21552087366580963,
"sps": 688.7493020847775
},
{
"update": 2160,
"global_step": 8847360,
"num_episodes": 681,
"mean_reward": 1996.5775413012504,
"mean_length": 28885.87,
"survival_pct": 0.361073375,
"max_steps": 80000,
"loss": -0.16776643693447113,
"sps": 693.0826430290605
},
{
"update": 2165,
"global_step": 8867840,
"num_episodes": 681,
"mean_reward": 1996.5775413012504,
"mean_length": 28885.87,
"survival_pct": 0.361073375,
"max_steps": 80000,
"loss": 0.11363191902637482,
"sps": 709.5181085639529
},
{
"update": 2170,
"global_step": 8888320,
"num_episodes": 681,
"mean_reward": 1996.5775413012504,
"mean_length": 28885.87,
"survival_pct": 0.361073375,
"max_steps": 80000,
"loss": 0.0609743595123291,
"sps": 712.5366166562799
},
{
"update": 2175,
"global_step": 8908800,
"num_episodes": 681,
"mean_reward": 1996.5775413012504,
"mean_length": 28885.87,
"survival_pct": 0.361073375,
"max_steps": 80000,
"loss": 0.7529717087745667,
"sps": 721.4643529853796
},
{
"update": 2180,
"global_step": 8929280,
"num_episodes": 681,
"mean_reward": 1996.5775413012504,
"mean_length": 28885.87,
"survival_pct": 0.361073375,
"max_steps": 80000,
"loss": 0.46089768409729004,
"sps": 729.282506794838
},
{
"update": 2185,
"global_step": 8949760,
"num_episodes": 681,
"mean_reward": 1996.5775413012504,
"mean_length": 28885.87,
"survival_pct": 0.361073375,
"max_steps": 80000,
"loss": -0.04369640350341797,
"sps": 731.0711424448074
},
{
"update": 2190,
"global_step": 8970240,
"num_episodes": 681,
"mean_reward": 1996.5775413012504,
"mean_length": 28885.87,
"survival_pct": 0.361073375,
"max_steps": 80000,
"loss": -0.2550358772277832,
"sps": 741.8354272639722
},
{
"update": 2195,
"global_step": 8990720,
"num_episodes": 681,
"mean_reward": 1996.5775413012504,
"mean_length": 28885.87,
"survival_pct": 0.361073375,
"max_steps": 80000,
"loss": -0.1644577533006668,
"sps": 712.1224425521518
},
{
"update": 2200,
"global_step": 9011200,
"num_episodes": 682,
"mean_reward": 2004.891281106472,
"mean_length": 29684.96,
"survival_pct": 0.371062,
"max_steps": 80000,
"loss": 19.648794174194336,
"sps": 465.00511290277484
},
{
"update": 2205,
"global_step": 9031680,
"num_episodes": 686,
"mean_reward": 2088.85676854372,
"mean_length": 30082.53,
"survival_pct": 0.376031625,
"max_steps": 80000,
"loss": 164.72410583496094,
"sps": 526.5227813400744
},
{
"update": 2210,
"global_step": 9052160,
"num_episodes": 687,
"mean_reward": 2035.9988553357125,
"mean_length": 30482.53,
"survival_pct": 0.381031625,
"max_steps": 80000,
"loss": 0.7927183508872986,
"sps": 546.9580142534905
},
{
"update": 2215,
"global_step": 9072640,
"num_episodes": 687,
"mean_reward": 2035.9988553357125,
"mean_length": 30482.53,
"survival_pct": 0.381031625,
"max_steps": 80000,
"loss": 0.38622233271598816,
"sps": 654.3004159066745
},
{
"update": 2220,
"global_step": 9093120,
"num_episodes": 688,
"mean_reward": 2065.251064198017,
"mean_length": 30882.53,
"survival_pct": 0.386031625,
"max_steps": 80000,
"loss": 0.734276533126831,
"sps": 253.13496237874534
},
{
"update": 2225,
"global_step": 9113600,
"num_episodes": 689,
"mean_reward": 2089.462522289753,
"mean_length": 31681.58,
"survival_pct": 0.39601975,
"max_steps": 80000,
"loss": 5.1840643882751465,
"sps": 407.23576672305194
},
{
"update": 2230,
"global_step": 9134080,
"num_episodes": 689,
"mean_reward": 2089.462522289753,
"mean_length": 31681.58,
"survival_pct": 0.39601975,
"max_steps": 80000,
"loss": 3.3036017417907715,
"sps": 334.98140076678567
},
{
"update": 2235,
"global_step": 9154560,
"num_episodes": 689,
"mean_reward": 2089.462522289753,
"mean_length": 31681.58,
"survival_pct": 0.39601975,
"max_steps": 80000,
"loss": 12.958847045898438,
"sps": 413.60512125042015
},
{
"update": 2240,
"global_step": 9175040,
"num_episodes": 689,
"mean_reward": 2089.462522289753,
"mean_length": 31681.58,
"survival_pct": 0.39601975,
"max_steps": 80000,
"loss": 11.410442352294922,
"sps": 429.40010384163304
},
{
"update": 2245,
"global_step": 9195520,
"num_episodes": 689,
"mean_reward": 2089.462522289753,
"mean_length": 31681.58,
"survival_pct": 0.39601975,
"max_steps": 80000,
"loss": 0.4679904580116272,
"sps": 442.22049673117283
},
{
"update": 2250,
"global_step": 9216000,
"num_episodes": 690,
"mean_reward": 2216.5914781308174,
"mean_length": 32423.39,
"survival_pct": 0.405292375,
"max_steps": 80000,
"loss": 13.81257152557373,
"sps": 503.0567425680048
},
{
"update": 2255,
"global_step": 9236480,
"num_episodes": 690,
"mean_reward": 2216.5914781308174,
"mean_length": 32423.39,
"survival_pct": 0.405292375,
"max_steps": 80000,
"loss": 0.347695916891098,
"sps": 582.6999735714041
},
{
"update": 2260,
"global_step": 9256960,
"num_episodes": 690,
"mean_reward": 2216.5914781308174,
"mean_length": 32423.39,
"survival_pct": 0.405292375,
"max_steps": 80000,
"loss": 0.276647686958313,
"sps": 530.4694388918329
},
{
"update": 2265,
"global_step": 9277440,
"num_episodes": 690,
"mean_reward": 2216.5914781308174,
"mean_length": 32423.39,
"survival_pct": 0.405292375,
"max_steps": 80000,
"loss": 0.12728533148765564,
"sps": 524.460969045791
},
{
"update": 2270,
"global_step": 9297920,
"num_episodes": 690,
"mean_reward": 2216.5914781308174,
"mean_length": 32423.39,
"survival_pct": 0.405292375,
"max_steps": 80000,
"loss": 0.27348071336746216,
"sps": 540.9874205296653
},
{
"update": 2275,
"global_step": 9318400,
"num_episodes": 690,
"mean_reward": 2216.5914781308174,
"mean_length": 32423.39,
"survival_pct": 0.405292375,
"max_steps": 80000,
"loss": 0.07365398108959198,
"sps": 560.7709890786243
},
{
"update": 2280,
"global_step": 9338880,
"num_episodes": 692,
"mean_reward": 2220.5268848729133,
"mean_length": 32823.83,
"survival_pct": 0.41029787500000003,
"max_steps": 80000,
"loss": 0.7585805654525757,
"sps": 369.35670437401643
},
{
"update": 2285,
"global_step": 9359360,
"num_episodes": 692,
"mean_reward": 2220.5268848729133,
"mean_length": 32823.83,
"survival_pct": 0.41029787500000003,
"max_steps": 80000,
"loss": 84.29670715332031,
"sps": 372.3955428292709
},
{
"update": 2290,
"global_step": 9379840,
"num_episodes": 692,
"mean_reward": 2220.5268848729133,
"mean_length": 32823.83,
"survival_pct": 0.41029787500000003,
"max_steps": 80000,
"loss": 0.22566762566566467,
"sps": 391.1882509653406
},
{
"update": 2295,
"global_step": 9400320,
"num_episodes": 692,
"mean_reward": 2220.5268848729133,
"mean_length": 32823.83,
"survival_pct": 0.41029787500000003,
"max_steps": 80000,
"loss": 0.11116530001163483,
"sps": 404.8368327596648
},
{
"update": 2300,
"global_step": 9420800,
"num_episodes": 693,
"mean_reward": 2245.6658289647103,
"mean_length": 33605.52,
"survival_pct": 0.42006899999999997,
"max_steps": 80000,
"loss": 14.594441413879395,
"sps": 319.4636076671403
},
{
"update": 2305,
"global_step": 9441280,
"num_episodes": 693,
"mean_reward": 2245.6658289647103,
"mean_length": 33605.52,
"survival_pct": 0.42006899999999997,
"max_steps": 80000,
"loss": 4.601461410522461,
"sps": 358.98227257652104
},
{
"update": 2310,
"global_step": 9461760,
"num_episodes": 693,
"mean_reward": 2245.6658289647103,
"mean_length": 33605.52,
"survival_pct": 0.42006899999999997,
"max_steps": 80000,
"loss": -0.08196337521076202,
"sps": 397.59174172984365
},
{
"update": 2315,
"global_step": 9482240,
"num_episodes": 693,
"mean_reward": 2245.6658289647103,
"mean_length": 33605.52,
"survival_pct": 0.42006899999999997,
"max_steps": 80000,
"loss": -0.038676969707012177,
"sps": 390.256940583996
},
{
"update": 2320,
"global_step": 9502720,
"num_episodes": 693,
"mean_reward": 2245.6658289647103,
"mean_length": 33605.52,
"survival_pct": 0.42006899999999997,
"max_steps": 80000,
"loss": -0.11568231880664825,
"sps": 386.22805447511666
},
{
"update": 2325,
"global_step": 9523200,
"num_episodes": 693,
"mean_reward": 2245.6658289647103,
"mean_length": 33605.52,
"survival_pct": 0.42006899999999997,
"max_steps": 80000,
"loss": -0.09200012683868408,
"sps": 393.62586076527043
},
{
"update": 2330,
"global_step": 9543680,
"num_episodes": 693,
"mean_reward": 2245.6658289647103,
"mean_length": 33605.52,
"survival_pct": 0.42006899999999997,
"max_steps": 80000,
"loss": -0.14359304308891296,
"sps": 390.66784252973497
},
{
"update": 2335,
"global_step": 9564160,
"num_episodes": 693,
"mean_reward": 2245.6658289647103,
"mean_length": 33605.52,
"survival_pct": 0.42006899999999997,
"max_steps": 80000,
"loss": 0.18879307806491852,
"sps": 395.8483711314571
},
{
"update": 2340,
"global_step": 9584640,
"num_episodes": 693,
"mean_reward": 2245.6658289647103,
"mean_length": 33605.52,
"survival_pct": 0.42006899999999997,
"max_steps": 80000,
"loss": -0.1260424256324768,
"sps": 392.3815903936216
},
{
"update": 2345,
"global_step": 9605120,
"num_episodes": 693,
"mean_reward": 2245.6658289647103,
"mean_length": 33605.52,
"survival_pct": 0.42006899999999997,
"max_steps": 80000,
"loss": -0.09693819284439087,
"sps": 394.7211181932461
},
{
"update": 2350,
"global_step": 9625600,
"num_episodes": 693,
"mean_reward": 2245.6658289647103,
"mean_length": 33605.52,
"survival_pct": 0.42006899999999997,
"max_steps": 80000,
"loss": -0.11026182770729065,
"sps": 396.72162235901294
},
{
"update": 2355,
"global_step": 9646080,
"num_episodes": 693,
"mean_reward": 2245.6658289647103,
"mean_length": 33605.52,
"survival_pct": 0.42006899999999997,
"max_steps": 80000,
"loss": -0.1978517770767212,
"sps": 389.2953386649375
},
{
"update": 2360,
"global_step": 9666560,
"num_episodes": 697,
"mean_reward": 2322.5986268925667,
"mean_length": 34769.99,
"survival_pct": 0.43462487499999997,
"max_steps": 80000,
"loss": 683.0784912109375,
"sps": 168.6861821242579
},
{
"update": 2365,
"global_step": 9687040,
"num_episodes": 698,
"mean_reward": 2311.5085823369027,
"mean_length": 35169.99,
"survival_pct": 0.43962487499999997,
"max_steps": 80000,
"loss": 3170.56884765625,
"sps": 235.4733420148456
},
{
"update": 2370,
"global_step": 9707520,
"num_episodes": 699,
"mean_reward": 2308.040635712147,
"mean_length": 34804.58,
"survival_pct": 0.43505725,
"max_steps": 80000,
"loss": 1.0950156450271606,
"sps": 311.37399850720584
},
{
"update": 2375,
"global_step": 9728000,
"num_episodes": 700,
"mean_reward": 2335.846759979725,
"mean_length": 35204.58,
"survival_pct": 0.44005725,
"max_steps": 80000,
"loss": 1.3819221258163452,
"sps": 322.7681723058591
},
{
"update": 2380,
"global_step": 9748480,
"num_episodes": 700,
"mean_reward": 2335.846759979725,
"mean_length": 35204.58,
"survival_pct": 0.44005725,
"max_steps": 80000,
"loss": 0.11061151325702667,
"sps": 371.7560051115758
},
{
"update": 2385,
"global_step": 9768960,
"num_episodes": 701,
"mean_reward": 2415.715403892994,
"mean_length": 36004.33,
"survival_pct": 0.450054125,
"max_steps": 80000,
"loss": 0.04716165363788605,
"sps": 404.5228919670066
},
{
"update": 2390,
"global_step": 9789440,
"num_episodes": 701,
"mean_reward": 2415.715403892994,
"mean_length": 36004.33,
"survival_pct": 0.450054125,
"max_steps": 80000,
"loss": 0.673524022102356,
"sps": 438.07322870511223
},
{
"update": 2395,
"global_step": 9809920,
"num_episodes": 701,
"mean_reward": 2415.715403892994,
"mean_length": 36004.33,
"survival_pct": 0.450054125,
"max_steps": 80000,
"loss": 0.04936300218105316,
"sps": 447.99792386063456
},
{
"update": 2400,
"global_step": 9830400,
"num_episodes": 701,
"mean_reward": 2415.715403892994,
"mean_length": 36004.33,
"survival_pct": 0.450054125,
"max_steps": 80000,
"loss": -0.05190306156873703,
"sps": 451.9291955717441
},
{
"update": 2405,
"global_step": 9850880,
"num_episodes": 701,
"mean_reward": 2415.715403892994,
"mean_length": 36004.33,
"survival_pct": 0.450054125,
"max_steps": 80000,
"loss": -0.01377500593662262,
"sps": 456.3292063646881
},
{
"update": 2410,
"global_step": 9871360,
"num_episodes": 705,
"mean_reward": 2415.0658296895026,
"mean_length": 35966.34,
"survival_pct": 0.44957924999999993,
"max_steps": 80000,
"loss": 7.559074401855469,
"sps": 317.32403267612773
},
{
"update": 2415,
"global_step": 9891840,
"num_episodes": 705,
"mean_reward": 2415.0658296895026,
"mean_length": 35966.34,
"survival_pct": 0.44957924999999993,
"max_steps": 80000,
"loss": 1.5048213005065918,
"sps": 332.4634487520527
},
{
"update": 2420,
"global_step": 9912320,
"num_episodes": 705,
"mean_reward": 2415.0658296895026,
"mean_length": 35966.34,
"survival_pct": 0.44957924999999993,
"max_steps": 80000,
"loss": 0.2138240784406662,
"sps": 432.40076557163655
},
{
"update": 2425,
"global_step": 9932800,
"num_episodes": 705,
"mean_reward": 2415.0658296895026,
"mean_length": 35966.34,
"survival_pct": 0.44957924999999993,
"max_steps": 80000,
"loss": 0.21876683831214905,
"sps": 448.3816453184398
},
{
"update": 2430,
"global_step": 9953280,
"num_episodes": 705,
"mean_reward": 2415.0658296895026,
"mean_length": 35966.34,
"survival_pct": 0.44957924999999993,
"max_steps": 80000,
"loss": -0.08095875382423401,
"sps": 427.35281803394867
},
{
"update": 2435,
"global_step": 9973760,
"num_episodes": 706,
"mean_reward": 2496.027885582447,
"mean_length": 36761.21,
"survival_pct": 0.45951512499999997,
"max_steps": 80000,
"loss": 1.3263893127441406,
"sps": 489.32343069605076
},
{
"update": 2440,
"global_step": 9994240,
"num_episodes": 706,
"mean_reward": 2496.027885582447,
"mean_length": 36761.21,
"survival_pct": 0.45951512499999997,
"max_steps": 80000,
"loss": 0.8851792812347412,
"sps": 507.1439259148593
},
{
"update": 2445,
"global_step": 10014720,
"num_episodes": 706,
"mean_reward": 2496.027885582447,
"mean_length": 36761.21,
"survival_pct": 0.45951512499999997,
"max_steps": 80000,
"loss": -0.022784769535064697,
"sps": 514.2103065530886
},
{
"update": 2450,
"global_step": 10035200,
"num_episodes": 706,
"mean_reward": 2496.027885582447,
"mean_length": 36761.21,
"survival_pct": 0.45951512499999997,
"max_steps": 80000,
"loss": 0.03437136113643646,
"sps": 498.98534084342003
},
{
"update": 2455,
"global_step": 10055680,
"num_episodes": 708,
"mean_reward": 2566.8018279957773,
"mean_length": 36664.09,
"survival_pct": 0.458301125,
"max_steps": 80000,
"loss": 42.47873306274414,
"sps": 329.38816457132594
},
{
"update": 2460,
"global_step": 10076160,
"num_episodes": 708,
"mean_reward": 2566.8018279957773,
"mean_length": 36664.09,
"survival_pct": 0.458301125,
"max_steps": 80000,
"loss": -0.1173701360821724,
"sps": 513.6460372429699
},
{
"update": 2465,
"global_step": 10096640,
"num_episodes": 708,
"mean_reward": 2566.8018279957773,
"mean_length": 36664.09,
"survival_pct": 0.458301125,
"max_steps": 80000,
"loss": -0.008197635412216187,
"sps": 529.8279586174691
},
{
"update": 2470,
"global_step": 10117120,
"num_episodes": 708,
"mean_reward": 2566.8018279957773,
"mean_length": 36664.09,
"survival_pct": 0.458301125,
"max_steps": 80000,
"loss": -0.09317698329687119,
"sps": 533.5564592749838
},
{
"update": 2475,
"global_step": 10137600,
"num_episodes": 708,
"mean_reward": 2566.8018279957773,
"mean_length": 36664.09,
"survival_pct": 0.458301125,
"max_steps": 80000,
"loss": -0.21753042936325073,
"sps": 523.3857154825558
},
{
"update": 2480,
"global_step": 10158080,
"num_episodes": 708,
"mean_reward": 2566.8018279957773,
"mean_length": 36664.09,
"survival_pct": 0.458301125,
"max_steps": 80000,
"loss": -0.038853421807289124,
"sps": 532.2887541618683
},
{
"update": 2485,
"global_step": 10178560,
"num_episodes": 708,
"mean_reward": 2566.8018279957773,
"mean_length": 36664.09,
"survival_pct": 0.458301125,
"max_steps": 80000,
"loss": -0.13897421956062317,
"sps": 529.4232504246512
},
{
"update": 2490,
"global_step": 10199040,
"num_episodes": 708,
"mean_reward": 2566.8018279957773,
"mean_length": 36664.09,
"survival_pct": 0.458301125,
"max_steps": 80000,
"loss": -0.09001248329877853,
"sps": 527.0291369571413
},
{
"update": 2495,
"global_step": 10219520,
"num_episodes": 708,
"mean_reward": 2566.8018279957773,
"mean_length": 36664.09,
"survival_pct": 0.458301125,
"max_steps": 80000,
"loss": -0.07257804274559021,
"sps": 502.41772632138844
},
{
"update": 2500,
"global_step": 10240000,
"num_episodes": 708,
"mean_reward": 2566.8018279957773,
"mean_length": 36664.09,
"survival_pct": 0.458301125,
"max_steps": 80000,
"loss": -0.09625621140003204,
"sps": 503.481571804067
},
{
"update": 2505,
"global_step": 10260480,
"num_episodes": 708,
"mean_reward": 2566.8018279957773,
"mean_length": 36664.09,
"survival_pct": 0.458301125,
"max_steps": 80000,
"loss": 0.5746378898620605,
"sps": 511.2198490027179
},
{
"update": 2510,
"global_step": 10280960,
"num_episodes": 708,
"mean_reward": 2566.8018279957773,
"mean_length": 36664.09,
"survival_pct": 0.458301125,
"max_steps": 80000,
"loss": 0.2466697096824646,
"sps": 502.732132447925
},
{
"update": 2515,
"global_step": 10301440,
"num_episodes": 711,
"mean_reward": 2659.348615782261,
"mean_length": 36969.03,
"survival_pct": 0.462112875,
"max_steps": 80000,
"loss": 18.992151260375977,
"sps": 423.86791208132917
},
{
"update": 2520,
"global_step": 10321920,
"num_episodes": 711,
"mean_reward": 2659.348615782261,
"mean_length": 36969.03,
"survival_pct": 0.462112875,
"max_steps": 80000,
"loss": 1.12687087059021,
"sps": 561.3134646893078
},
{
"update": 2525,
"global_step": 10342400,
"num_episodes": 713,
"mean_reward": 2761.6309762120245,
"mean_length": 38056.0,
"survival_pct": 0.4757,
"max_steps": 80000,
"loss": 2.7139060497283936,
"sps": 570.6222365340182
},
{
"update": 2530,
"global_step": 10362880,
"num_episodes": 713,
"mean_reward": 2761.6309762120245,
"mean_length": 38056.0,
"survival_pct": 0.4757,
"max_steps": 80000,
"loss": 0.7590304613113403,
"sps": 590.034373507516
},
{
"update": 2535,
"global_step": 10383360,
"num_episodes": 720,
"mean_reward": 2516.4986815714838,
"mean_length": 36373.73,
"survival_pct": 0.454671625,
"max_steps": 80000,
"loss": 6.213076591491699,
"sps": 402.7930412963076
},
{
"update": 2540,
"global_step": 10403840,
"num_episodes": 723,
"mean_reward": 2544.001566827297,
"mean_length": 36533.04,
"survival_pct": 0.456663,
"max_steps": 80000,
"loss": 0.7334296703338623,
"sps": 460.5953936249251
},
{
"update": 2545,
"global_step": 10424320,
"num_episodes": 723,
"mean_reward": 2544.001566827297,
"mean_length": 36533.04,
"survival_pct": 0.456663,
"max_steps": 80000,
"loss": 0.2690082788467407,
"sps": 536.0100630883657
},
{
"update": 2550,
"global_step": 10444800,
"num_episodes": 723,
"mean_reward": 2544.001566827297,
"mean_length": 36533.04,
"survival_pct": 0.456663,
"max_steps": 80000,
"loss": -0.026896320283412933,
"sps": 716.9425490948719
},
{
"update": 2555,
"global_step": 10465280,
"num_episodes": 723,
"mean_reward": 2544.001566827297,
"mean_length": 36533.04,
"survival_pct": 0.456663,
"max_steps": 80000,
"loss": -0.2345649003982544,
"sps": 709.2426825661537
},
{
"update": 2560,
"global_step": 10485760,
"num_episodes": 723,
"mean_reward": 2544.001566827297,
"mean_length": 36533.04,
"survival_pct": 0.456663,
"max_steps": 80000,
"loss": -0.09797754883766174,
"sps": 722.0229903452855
},
{
"update": 2565,
"global_step": 10506240,
"num_episodes": 725,
"mean_reward": 2377.9404914259912,
"mean_length": 36339.3,
"survival_pct": 0.45424125000000004,
"max_steps": 80000,
"loss": 8.174196243286133,
"sps": 380.177542139671
},
{
"update": 2570,
"global_step": 10526720,
"num_episodes": 729,
"mean_reward": 2195.4616722035407,
"mean_length": 34356.45,
"survival_pct": 0.42945562499999995,
"max_steps": 80000,
"loss": -0.04960069805383682,
"sps": 528.86594743839
},
{
"update": 2575,
"global_step": 10547200,
"num_episodes": 729,
"mean_reward": 2195.4616722035407,
"mean_length": 34356.45,
"survival_pct": 0.42945562499999995,
"max_steps": 80000,
"loss": -0.13887368142604828,
"sps": 724.4350769243744
},
{
"update": 2580,
"global_step": 10567680,
"num_episodes": 729,
"mean_reward": 2195.4616722035407,
"mean_length": 34356.45,
"survival_pct": 0.42945562499999995,
"max_steps": 80000,
"loss": -0.08070817589759827,
"sps": 710.3014121255162
},
{
"update": 2585,
"global_step": 10588160,
"num_episodes": 729,
"mean_reward": 2195.4616722035407,
"mean_length": 34356.45,
"survival_pct": 0.42945562499999995,
"max_steps": 80000,
"loss": 0.2522279620170593,
"sps": 722.3160261788157
},
{
"update": 2590,
"global_step": 10608640,
"num_episodes": 729,
"mean_reward": 2195.4616722035407,
"mean_length": 34356.45,
"survival_pct": 0.42945562499999995,
"max_steps": 80000,
"loss": -0.10373173654079437,
"sps": 669.264371724261
},
{
"update": 2595,
"global_step": 10629120,
"num_episodes": 737,
"mean_reward": 2092.385828053951,
"mean_length": 34157.73,
"survival_pct": 0.426971625,
"max_steps": 80000,
"loss": -0.1631798893213272,
"sps": 758.9302511283099
},
{
"update": 2600,
"global_step": 10649600,
"num_episodes": 737,
"mean_reward": 2092.385828053951,
"mean_length": 34157.73,
"survival_pct": 0.426971625,
"max_steps": 80000,
"loss": -0.20927369594573975,
"sps": 740.8227163674813
},
{
"update": 2605,
"global_step": 10670080,
"num_episodes": 737,
"mean_reward": 2092.385828053951,
"mean_length": 34157.73,
"survival_pct": 0.34157730000000003,
"max_steps": 100000,
"loss": -0.19633692502975464,
"sps": 755.8358488017623
},
{
"update": 2610,
"global_step": 10690560,
"num_episodes": 737,
"mean_reward": 2092.385828053951,
"mean_length": 34157.73,
"survival_pct": 0.34157730000000003,
"max_steps": 100000,
"loss": -0.177586629986763,
"sps": 741.6510280859512
},
{
"update": 2615,
"global_step": 10711040,
"num_episodes": 737,
"mean_reward": 2092.385828053951,
"mean_length": 34157.73,
"survival_pct": 0.34157730000000003,
"max_steps": 100000,
"loss": -0.21765384078025818,
"sps": 727.5310686928377
},
{
"update": 2620,
"global_step": 10731520,
"num_episodes": 737,
"mean_reward": 2092.385828053951,
"mean_length": 34157.73,
"survival_pct": 0.34157730000000003,
"max_steps": 100000,
"loss": -0.0905933529138565,
"sps": 741.0285660060694
},
{
"update": 2625,
"global_step": 10752000,
"num_episodes": 737,
"mean_reward": 2092.385828053951,
"mean_length": 34157.73,
"survival_pct": 0.34157730000000003,
"max_steps": 100000,
"loss": -0.0920332819223404,
"sps": 730.4494102532749
},
{
"update": 2630,
"global_step": 10772480,
"num_episodes": 737,
"mean_reward": 2092.385828053951,
"mean_length": 34157.73,
"survival_pct": 0.34157730000000003,
"max_steps": 100000,
"loss": -0.16122817993164062,
"sps": 720.0710211707521
},
{
"update": 2635,
"global_step": 10792960,
"num_episodes": 737,
"mean_reward": 2092.385828053951,
"mean_length": 34157.73,
"survival_pct": 0.34157730000000003,
"max_steps": 100000,
"loss": -0.11723538488149643,
"sps": 703.7882071317721
},
{
"update": 2640,
"global_step": 10813440,
"num_episodes": 737,
"mean_reward": 2092.385828053951,
"mean_length": 34157.73,
"survival_pct": 0.34157730000000003,
"max_steps": 100000,
"loss": -0.18088921904563904,
"sps": 692.1664885002592
},
{
"update": 2645,
"global_step": 10833920,
"num_episodes": 737,
"mean_reward": 2092.385828053951,
"mean_length": 34157.73,
"survival_pct": 0.34157730000000003,
"max_steps": 100000,
"loss": -0.17876286804676056,
"sps": 684.4397604362919
},
{
"update": 2650,
"global_step": 10854400,
"num_episodes": 738,
"mean_reward": 2173.3602819800376,
"mean_length": 35156.66,
"survival_pct": 0.35156660000000006,
"max_steps": 100000,
"loss": 0.5544903874397278,
"sps": 410.99817441737457
},
{
"update": 2655,
"global_step": 10874880,
"num_episodes": 741,
"mean_reward": 2107.5635332036018,
"mean_length": 34163.47,
"survival_pct": 0.3416347,
"max_steps": 100000,
"loss": 544.5499267578125,
"sps": 460.02549342528056
},
{
"update": 2660,
"global_step": 10895360,
"num_episodes": 741,
"mean_reward": 2107.5635332036018,
"mean_length": 34163.47,
"survival_pct": 0.3416347,
"max_steps": 100000,
"loss": 0.021736368536949158,
"sps": 548.2332167398129
},
{
"update": 2665,
"global_step": 10915840,
"num_episodes": 741,
"mean_reward": 2107.5635332036018,
"mean_length": 34163.47,
"survival_pct": 0.3416347,
"max_steps": 100000,
"loss": -0.050015464425086975,
"sps": 527.6499326243032
},
{
"update": 2670,
"global_step": 10936320,
"num_episodes": 741,
"mean_reward": 2107.5635332036018,
"mean_length": 34163.47,
"survival_pct": 0.3416347,
"max_steps": 100000,
"loss": -0.1121983677148819,
"sps": 510.22633473983234
},
{
"update": 2675,
"global_step": 10956800,
"num_episodes": 741,
"mean_reward": 2107.5635332036018,
"mean_length": 34163.47,
"survival_pct": 0.3416347,
"max_steps": 100000,
"loss": 0.46661272644996643,
"sps": 565.3571380995694
},
{
"update": 2680,
"global_step": 10977280,
"num_episodes": 741,
"mean_reward": 2107.5635332036018,
"mean_length": 34163.47,
"survival_pct": 0.3416347,
"max_steps": 100000,
"loss": 0.4648454785346985,
"sps": 572.308466885207
},
{
"update": 2685,
"global_step": 10997760,
"num_episodes": 741,
"mean_reward": 2107.5635332036018,
"mean_length": 34163.47,
"survival_pct": 0.3416347,
"max_steps": 100000,
"loss": 0.2607373595237732,
"sps": 570.2676571961572
},
{
"update": 2690,
"global_step": 11018240,
"num_episodes": 741,
"mean_reward": 2107.5635332036018,
"mean_length": 34163.47,
"survival_pct": 0.3416347,
"max_steps": 100000,
"loss": 0.125162735581398,
"sps": 566.7273297363138
},
{
"update": 2695,
"global_step": 11038720,
"num_episodes": 741,
"mean_reward": 2107.5635332036018,
"mean_length": 34163.47,
"survival_pct": 0.3416347,
"max_steps": 100000,
"loss": 0.059064000844955444,
"sps": 552.6096670437072
},
{
"update": 2700,
"global_step": 11059200,
"num_episodes": 741,
"mean_reward": 2107.5635332036018,
"mean_length": 34163.47,
"survival_pct": 0.3416347,
"max_steps": 100000,
"loss": -0.03592519462108612,
"sps": 605.1661001263743
},
{
"update": 2705,
"global_step": 11079680,
"num_episodes": 741,
"mean_reward": 2107.5635332036018,
"mean_length": 34163.47,
"survival_pct": 0.3416347,
"max_steps": 100000,
"loss": -0.05099225044250488,
"sps": 607.4455279859145
},
{
"update": 2710,
"global_step": 11100160,
"num_episodes": 742,
"mean_reward": 2117.9644485020635,
"mean_length": 35162.27,
"survival_pct": 0.35162269999999995,
"max_steps": 100000,
"loss": -0.046167902648448944,
"sps": 590.2299902607874
},
{
"update": 2715,
"global_step": 11120640,
"num_episodes": 746,
"mean_reward": 2117.223214428425,
"mean_length": 35106.36,
"survival_pct": 0.35106360000000003,
"max_steps": 100000,
"loss": 11.869409561157227,
"sps": 396.9261046886479
},
{
"update": 2720,
"global_step": 11141120,
"num_episodes": 749,
"mean_reward": 2004.7705671191216,
"mean_length": 36107.98,
"survival_pct": 0.3610798,
"max_steps": 100000,
"loss": 156.38876342773438,
"sps": 334.0244695201906
},
{
"update": 2725,
"global_step": 11161600,
"num_episodes": 749,
"mean_reward": 2004.7705671191216,
"mean_length": 36107.98,
"survival_pct": 0.3610798,
"max_steps": 100000,
"loss": 2.8302462100982666,
"sps": 414.56266910509237
},
{
"update": 2730,
"global_step": 11182080,
"num_episodes": 749,
"mean_reward": 2004.7705671191216,
"mean_length": 36107.98,
"survival_pct": 0.3610798,
"max_steps": 100000,
"loss": 2.787081003189087,
"sps": 444.05090888582356
},
{
"update": 2735,
"global_step": 11202560,
"num_episodes": 757,
"mean_reward": 1866.676198823452,
"mean_length": 35228.65,
"survival_pct": 0.3522865,
"max_steps": 100000,
"loss": 20.099119186401367,
"sps": 219.7197521516745
},
{
"update": 2740,
"global_step": 11223040,
"num_episodes": 757,
"mean_reward": 1866.676198823452,
"mean_length": 35228.65,
"survival_pct": 0.3522865,
"max_steps": 100000,
"loss": 24.235774993896484,
"sps": 364.1255592485544
},
{
"update": 2745,
"global_step": 11243520,
"num_episodes": 757,
"mean_reward": 1866.676198823452,
"mean_length": 35228.65,
"survival_pct": 0.3522865,
"max_steps": 100000,
"loss": 10.114006996154785,
"sps": 485.1483243292639
},
{
"update": 2750,
"global_step": 11264000,
"num_episodes": 757,
"mean_reward": 1866.676198823452,
"mean_length": 35228.65,
"survival_pct": 0.3522865,
"max_steps": 100000,
"loss": 17.78866958618164,
"sps": 492.4805186782399
}
]