openfront-rl-agent / training_log.json
JoshuaFreeman's picture
Upload training_log.json with huggingface_hub
49a513e verified
[
{
"update": 5,
"global_step": 10240,
"num_episodes": 12,
"mean_reward": -1.8039220174153645,
"mean_length": 214.16666666666666,
"loss": 1.5202983617782593,
"sps": 1350.1393046305259
},
{
"update": 10,
"global_step": 20480,
"num_episodes": 12,
"mean_reward": -1.8039220174153645,
"mean_length": 214.16666666666666,
"loss": 0.6584598422050476,
"sps": 2047.2883333230054
},
{
"update": 15,
"global_step": 30720,
"num_episodes": 12,
"mean_reward": -1.8039220174153645,
"mean_length": 214.16666666666666,
"loss": 0.6658369898796082,
"sps": 2055.41935946998
},
{
"update": 20,
"global_step": 40960,
"num_episodes": 15,
"mean_reward": 2.1853319803873696,
"mean_length": 1510.9333333333334,
"loss": 1.3961129188537598,
"sps": 1412.8079652055148
},
{
"update": 25,
"global_step": 51200,
"num_episodes": 22,
"mean_reward": 2.625577276403254,
"mean_length": 1983.409090909091,
"loss": 1.2982873916625977,
"sps": 470.35650512927947
},
{
"update": 30,
"global_step": 61440,
"num_episodes": 22,
"mean_reward": 2.625577276403254,
"mean_length": 1983.409090909091,
"loss": 1.0302618741989136,
"sps": 2216.3140512542655
},
{
"update": 35,
"global_step": 71680,
"num_episodes": 22,
"mean_reward": 2.625577276403254,
"mean_length": 1983.409090909091,
"loss": 0.9787145853042603,
"sps": 2070.449469801819
},
{
"update": 40,
"global_step": 81920,
"num_episodes": 27,
"mean_reward": 3.300225363837348,
"mean_length": 2365.4074074074074,
"loss": 0.8183227777481079,
"sps": 755.8004689166112
},
{
"update": 45,
"global_step": 92160,
"num_episodes": 33,
"mean_reward": 3.7748724330555308,
"mean_length": 2564.090909090909,
"loss": 0.3192096948623657,
"sps": 1984.8729329667028
},
{
"update": 50,
"global_step": 102400,
"num_episodes": 33,
"mean_reward": 3.7748724330555308,
"mean_length": 2564.090909090909,
"loss": 0.8334144949913025,
"sps": 2085.15978587018
},
{
"update": 55,
"global_step": 112640,
"num_episodes": 33,
"mean_reward": 3.7748724330555308,
"mean_length": 2564.090909090909,
"loss": 0.5191329121589661,
"sps": 2075.918435842757
},
{
"update": 60,
"global_step": 122880,
"num_episodes": 35,
"mean_reward": 5.517268398829869,
"mean_length": 2989.0,
"loss": 0.6924473643302917,
"sps": 2020.4075872880348
},
{
"update": 65,
"global_step": 133120,
"num_episodes": 40,
"mean_reward": 5.594199681282044,
"mean_length": 3128.125,
"loss": 1.9699269533157349,
"sps": 429.94535608180354
},
{
"update": 70,
"global_step": 143360,
"num_episodes": 41,
"mean_reward": 5.430626520296422,
"mean_length": 3061.170731707317,
"loss": 2.169956684112549,
"sps": 2246.502749698459
},
{
"update": 75,
"global_step": 153600,
"num_episodes": 41,
"mean_reward": 5.430626520296422,
"mean_length": 3061.170731707317,
"loss": 2.5021121501922607,
"sps": 2240.800087024051
},
{
"update": 80,
"global_step": 163840,
"num_episodes": 46,
"mean_reward": 10.588201502095098,
"mean_length": 3177.4565217391305,
"loss": 2.6493680477142334,
"sps": 436.6488855176695
},
{
"update": 85,
"global_step": 174080,
"num_episodes": 52,
"mean_reward": 13.96443979556744,
"mean_length": 3211.1923076923076,
"loss": 2.5603184700012207,
"sps": 2163.643359340071
},
{
"update": 90,
"global_step": 184320,
"num_episodes": 52,
"mean_reward": 13.96443979556744,
"mean_length": 3211.1923076923076,
"loss": 2.336817502975464,
"sps": 2163.4161260603714
},
{
"update": 95,
"global_step": 194560,
"num_episodes": 52,
"mean_reward": 13.96443979556744,
"mean_length": 3211.1923076923076,
"loss": 3.234126091003418,
"sps": 2186.3643111360093
},
{
"update": 100,
"global_step": 204800,
"num_episodes": 55,
"mean_reward": 17.41634755568071,
"mean_length": 3220.3454545454547,
"loss": 5.442279815673828,
"sps": 1960.8741209669945
},
{
"update": 105,
"global_step": 215040,
"num_episodes": 62,
"mean_reward": 27.910769308767012,
"mean_length": 3345.3387096774195,
"loss": 2.81072735786438,
"sps": 1303.1946553744522
},
{
"update": 110,
"global_step": 225280,
"num_episodes": 66,
"mean_reward": 26.517480315584123,
"mean_length": 3151.8939393939395,
"loss": 0.830104649066925,
"sps": 1994.5176102850091
},
{
"update": 115,
"global_step": 235520,
"num_episodes": 66,
"mean_reward": 26.517480315584123,
"mean_length": 3151.8939393939395,
"loss": 0.8076149225234985,
"sps": 2020.1985152956534
},
{
"update": 120,
"global_step": 245760,
"num_episodes": 69,
"mean_reward": 35.54813836968464,
"mean_length": 3449.6376811594205,
"loss": 1.7053556442260742,
"sps": 1392.6768298802306
},
{
"update": 125,
"global_step": 256000,
"num_episodes": 72,
"mean_reward": 34.35707297590044,
"mean_length": 3315.0,
"loss": 0.5338165163993835,
"sps": 2218.857641762577
},
{
"update": 130,
"global_step": 266240,
"num_episodes": 73,
"mean_reward": 37.437843152921495,
"mean_length": 3406.5753424657532,
"loss": 0.31061413884162903,
"sps": 2123.373275302262
},
{
"update": 135,
"global_step": 276480,
"num_episodes": 73,
"mean_reward": 37.437843152921495,
"mean_length": 3406.5753424657532,
"loss": 0.370238333940506,
"sps": 2270.1968695887504
},
{
"update": 140,
"global_step": 286720,
"num_episodes": 76,
"mean_reward": 42.764281247791494,
"mean_length": 3535.8289473684213,
"loss": 0.4431289732456207,
"sps": 2235.4805453156696
},
{
"update": 145,
"global_step": 296960,
"num_episodes": 78,
"mean_reward": 45.29765256245931,
"mean_length": 3576.3076923076924,
"loss": 0.11675499379634857,
"sps": 2248.214328955258
},
{
"update": 150,
"global_step": 307200,
"num_episodes": 79,
"mean_reward": 48.32240367841117,
"mean_length": 3657.6202531645567,
"loss": 0.3231047987937927,
"sps": 2273.3126392381514
},
{
"update": 155,
"global_step": 317440,
"num_episodes": 79,
"mean_reward": 48.32240367841117,
"mean_length": 3657.6202531645567,
"loss": 0.24974392354488373,
"sps": 2276.2572089535233
},
{
"update": 160,
"global_step": 327680,
"num_episodes": 90,
"mean_reward": 48.88773588604397,
"mean_length": 3444.8333333333335,
"loss": 1.1440778970718384,
"sps": 469.25381312597585
},
{
"update": 165,
"global_step": 337920,
"num_episodes": 99,
"mean_reward": 50.101470494511155,
"mean_length": 3348.060606060606,
"loss": 0.3425019383430481,
"sps": 1903.5186981694392
},
{
"update": 170,
"global_step": 348160,
"num_episodes": 99,
"mean_reward": 50.101470494511155,
"mean_length": 3348.060606060606,
"loss": 0.18799301981925964,
"sps": 2130.3254740506277
},
{
"update": 175,
"global_step": 358400,
"num_episodes": 99,
"mean_reward": 50.101470494511155,
"mean_length": 3348.060606060606,
"loss": 0.11751141399145126,
"sps": 2125.2682193036194
},
{
"update": 180,
"global_step": 368640,
"num_episodes": 101,
"mean_reward": 52.442985696792604,
"mean_length": 3414.58,
"loss": 0.2516184449195862,
"sps": 1703.135913858338
},
{
"update": 185,
"global_step": 378880,
"num_episodes": 114,
"mean_reward": 61.27311011314392,
"mean_length": 3506.98,
"loss": 0.11899760365486145,
"sps": 2246.894695169401
},
{
"update": 190,
"global_step": 389120,
"num_episodes": 114,
"mean_reward": 61.27311011314392,
"mean_length": 3506.98,
"loss": 0.30889129638671875,
"sps": 2289.0664114120555
},
{
"update": 195,
"global_step": 399360,
"num_episodes": 114,
"mean_reward": 61.27311011314392,
"mean_length": 3506.98,
"loss": -0.06268183887004852,
"sps": 2277.013858345863
},
{
"update": 200,
"global_step": 409600,
"num_episodes": 114,
"mean_reward": 61.27311011314392,
"mean_length": 3506.98,
"loss": 0.5904355645179749,
"sps": 2173.4262065580283
},
{
"update": 205,
"global_step": 419840,
"num_episodes": 121,
"mean_reward": 72.71755365371705,
"mean_length": 3714.22,
"loss": 1.7067689895629883,
"sps": 308.3386383776914
},
{
"update": 210,
"global_step": 430080,
"num_episodes": 123,
"mean_reward": 72.60844767570495,
"mean_length": 3634.11,
"loss": 0.22778448462486267,
"sps": 2270.040885359684
},
{
"update": 215,
"global_step": 440320,
"num_episodes": 123,
"mean_reward": 72.60844767570495,
"mean_length": 3634.11,
"loss": 0.4201895296573639,
"sps": 2286.5998087120915
},
{
"update": 220,
"global_step": 450560,
"num_episodes": 123,
"mean_reward": 72.60844767570495,
"mean_length": 3634.11,
"loss": 0.08545871078968048,
"sps": 2301.79050438536
},
{
"update": 225,
"global_step": 460800,
"num_episodes": 129,
"mean_reward": 81.06719317436219,
"mean_length": 3731.63,
"loss": 0.4734661877155304,
"sps": 1893.4799410834491
},
{
"update": 230,
"global_step": 471040,
"num_episodes": 132,
"mean_reward": 83.9696240234375,
"mean_length": 3736.94,
"loss": 2.656114101409912,
"sps": 1747.8656326711334
},
{
"update": 235,
"global_step": 481280,
"num_episodes": 132,
"mean_reward": 83.9696240234375,
"mean_length": 3736.94,
"loss": 0.3720909357070923,
"sps": 2290.767122681247
},
{
"update": 240,
"global_step": 491520,
"num_episodes": 133,
"mean_reward": 86.32599678993225,
"mean_length": 3836.51,
"loss": 0.28272855281829834,
"sps": 1660.2388311365764
},
{
"update": 245,
"global_step": 501760,
"num_episodes": 139,
"mean_reward": 90.87246092796326,
"mean_length": 3642.75,
"loss": 0.7918240427970886,
"sps": 226.8334199523192
},
{
"update": 250,
"global_step": 512000,
"num_episodes": 144,
"mean_reward": 93.14730869293213,
"mean_length": 3661.34,
"loss": 0.5329251885414124,
"sps": 1291.7356099331766
},
{
"update": 255,
"global_step": 522240,
"num_episodes": 147,
"mean_reward": 90.92609573364258,
"mean_length": 3465.71,
"loss": 0.2208552062511444,
"sps": 2176.5233668919136
},
{
"update": 260,
"global_step": 532480,
"num_episodes": 148,
"mean_reward": 94.03146264076233,
"mean_length": 3565.28,
"loss": 0.3058362305164337,
"sps": 1287.827730215577
},
{
"update": 265,
"global_step": 542720,
"num_episodes": 153,
"mean_reward": 93.66601790428162,
"mean_length": 3469.44,
"loss": 0.2177593857049942,
"sps": 2185.2068742705324
},
{
"update": 270,
"global_step": 552960,
"num_episodes": 154,
"mean_reward": 96.52238609313964,
"mean_length": 3569.01,
"loss": 0.1883123517036438,
"sps": 2191.858927925312
},
{
"update": 275,
"global_step": 563200,
"num_episodes": 155,
"mean_reward": 99.62512998580932,
"mean_length": 3668.07,
"loss": 0.27221783995628357,
"sps": 2123.45043596574
},
{
"update": 280,
"global_step": 573440,
"num_episodes": 155,
"mean_reward": 99.62512998580932,
"mean_length": 3668.07,
"loss": 0.18106494843959808,
"sps": 2141.5921280557186
},
{
"update": 285,
"global_step": 583680,
"num_episodes": 162,
"mean_reward": 100.8679942893982,
"mean_length": 3670.27,
"loss": 0.5695434212684631,
"sps": 500.88139423117855
},
{
"update": 290,
"global_step": 593920,
"num_episodes": 162,
"mean_reward": 100.8679942893982,
"mean_length": 3670.27,
"loss": 0.07588323950767517,
"sps": 2141.039120248053
},
{
"update": 295,
"global_step": 604160,
"num_episodes": 164,
"mean_reward": 103.97303117752075,
"mean_length": 3770.47,
"loss": 0.3367934823036194,
"sps": 2232.451302693334
},
{
"update": 300,
"global_step": 614400,
"num_episodes": 164,
"mean_reward": 103.97303117752075,
"mean_length": 3770.47,
"loss": 0.23896968364715576,
"sps": 2206.894347275839
},
{
"update": 305,
"global_step": 624640,
"num_episodes": 168,
"mean_reward": 108.57997800827026,
"mean_length": 3866.64,
"loss": 0.8634886145591736,
"sps": 2068.617103764695
},
{
"update": 310,
"global_step": 634880,
"num_episodes": 168,
"mean_reward": 108.57997800827026,
"mean_length": 3866.64,
"loss": 0.05964889004826546,
"sps": 2227.05463999608
},
{
"update": 315,
"global_step": 645120,
"num_episodes": 170,
"mean_reward": 108.82997495651244,
"mean_length": 3866.64,
"loss": 0.3584131598472595,
"sps": 2093.8527505731936
},
{
"update": 320,
"global_step": 655360,
"num_episodes": 170,
"mean_reward": 108.82997495651244,
"mean_length": 3866.64,
"loss": 0.11968313157558441,
"sps": 2108.9095828206277
},
{
"update": 325,
"global_step": 665600,
"num_episodes": 174,
"mean_reward": 112.43493017196656,
"mean_length": 3960.91,
"loss": 1.6604670286178589,
"sps": 2079.794147719775
},
{
"update": 330,
"global_step": 675840,
"num_episodes": 175,
"mean_reward": 115.29129805564881,
"mean_length": 4060.48,
"loss": 0.19095765054225922,
"sps": 2102.571665929904
},
{
"update": 335,
"global_step": 686080,
"num_episodes": 175,
"mean_reward": 115.29129805564881,
"mean_length": 4060.48,
"loss": 0.14229056239128113,
"sps": 2107.3760666985922
},
{
"update": 340,
"global_step": 696320,
"num_episodes": 175,
"mean_reward": 115.29129805564881,
"mean_length": 4060.48,
"loss": 0.36735689640045166,
"sps": 2157.1693383290976
},
{
"update": 345,
"global_step": 706560,
"num_episodes": 189,
"mean_reward": 109.84256043434144,
"mean_length": 3863.03,
"loss": 0.7830713987350464,
"sps": 381.54779046834835
},
{
"update": 350,
"global_step": 716800,
"num_episodes": 193,
"mean_reward": 109.65507596015931,
"mean_length": 3901.01,
"loss": 9.749893188476562,
"sps": 258.15120412826286
},
{
"update": 355,
"global_step": 727040,
"num_episodes": 197,
"mean_reward": 109.78353757858277,
"mean_length": 3921.28,
"loss": 0.27189120650291443,
"sps": 2234.7686845905673
},
{
"update": 360,
"global_step": 737280,
"num_episodes": 198,
"mean_reward": 110.03232297897338,
"mean_length": 3921.28,
"loss": 0.27451807260513306,
"sps": 2208.748818348954
},
{
"update": 365,
"global_step": 747520,
"num_episodes": 198,
"mean_reward": 110.03232297897338,
"mean_length": 3921.28,
"loss": 0.3887019753456116,
"sps": 2179.671400488869
},
{
"update": 370,
"global_step": 757760,
"num_episodes": 200,
"mean_reward": 110.02969996452332,
"mean_length": 3920.77,
"loss": 1.3407719135284424,
"sps": 190.58035959703773
},
{
"update": 375,
"global_step": 768000,
"num_episodes": 205,
"mean_reward": 113.28294358253478,
"mean_length": 4042.39,
"loss": 2.234469175338745,
"sps": 165.74069059867267
},
{
"update": 380,
"global_step": 778240,
"num_episodes": 209,
"mean_reward": 112.94422784805298,
"mean_length": 4057.27,
"loss": 0.2318531721830368,
"sps": 2224.56366677181
},
{
"update": 385,
"global_step": 788480,
"num_episodes": 209,
"mean_reward": 112.94422784805298,
"mean_length": 4057.27,
"loss": 0.2268732637166977,
"sps": 2253.631967291392
},
{
"update": 390,
"global_step": 798720,
"num_episodes": 210,
"mean_reward": 115.53032508850097,
"mean_length": 4153.44,
"loss": 0.06943273544311523,
"sps": 2241.199985702131
},
{
"update": 395,
"global_step": 808960,
"num_episodes": 211,
"mean_reward": 118.63075592041015,
"mean_length": 4252.74,
"loss": 0.21790897846221924,
"sps": 2233.8736866140016
},
{
"update": 400,
"global_step": 819200,
"num_episodes": 218,
"mean_reward": 112.67819341659546,
"mean_length": 4053.49,
"loss": 0.28217822313308716,
"sps": 2245.8419819824057
},
{
"update": 405,
"global_step": 829440,
"num_episodes": 218,
"mean_reward": 112.67819341659546,
"mean_length": 4053.49,
"loss": 0.07389844954013824,
"sps": 2093.830293640606
},
{
"update": 410,
"global_step": 839680,
"num_episodes": 219,
"mean_reward": 112.67819341659546,
"mean_length": 4053.49,
"loss": 0.19773584604263306,
"sps": 2089.0640172885346
},
{
"update": 415,
"global_step": 849920,
"num_episodes": 221,
"mean_reward": 115.57382575035095,
"mean_length": 4142.16,
"loss": 0.2218392789363861,
"sps": 2037.8682435790647
},
{
"update": 420,
"global_step": 860160,
"num_episodes": 225,
"mean_reward": 118.26686740875245,
"mean_length": 4220.72,
"loss": 0.16683566570281982,
"sps": 2062.011715868465
},
{
"update": 425,
"global_step": 870400,
"num_episodes": 225,
"mean_reward": 118.26686740875245,
"mean_length": 4220.72,
"loss": 0.3665807843208313,
"sps": 2085.879289076417
},
{
"update": 430,
"global_step": 880640,
"num_episodes": 227,
"mean_reward": 115.4131219291687,
"mean_length": 4123.01,
"loss": 0.2211420238018036,
"sps": 1319.1510855667186
},
{
"update": 435,
"global_step": 890880,
"num_episodes": 228,
"mean_reward": 118.51452213287354,
"mean_length": 4220.72,
"loss": 0.01409757137298584,
"sps": 2249.26043334859
},
{
"update": 440,
"global_step": 901120,
"num_episodes": 232,
"mean_reward": 121.46894996643067,
"mean_length": 4313.96,
"loss": 0.34094715118408203,
"sps": 2235.574796161653
},
{
"update": 445,
"global_step": 911360,
"num_episodes": 232,
"mean_reward": 121.46894996643067,
"mean_length": 4313.96,
"loss": 0.03650672733783722,
"sps": 2254.642878384582
},
{
"update": 450,
"global_step": 921600,
"num_episodes": 233,
"mean_reward": 122.21637367248535,
"mean_length": 4313.96,
"loss": 0.4388778805732727,
"sps": 2230.3176453482333
},
{
"update": 455,
"global_step": 931840,
"num_episodes": 235,
"mean_reward": 122.47082150459289,
"mean_length": 4315.26,
"loss": 0.04285623878240585,
"sps": 2245.5355177671836
},
{
"update": 460,
"global_step": 942080,
"num_episodes": 241,
"mean_reward": 125.87507015228272,
"mean_length": 4387.15,
"loss": 0.18071885406970978,
"sps": 2186.650382997658
},
{
"update": 465,
"global_step": 952320,
"num_episodes": 241,
"mean_reward": 125.87507015228272,
"mean_length": 4387.15,
"loss": 0.15905889868736267,
"sps": 2246.1825959008293
},
{
"update": 470,
"global_step": 962560,
"num_episodes": 243,
"mean_reward": 125.64015349388123,
"mean_length": 4385.41,
"loss": 0.2828221917152405,
"sps": 2237.777989892148
},
{
"update": 475,
"global_step": 972800,
"num_episodes": 244,
"mean_reward": 128.7428973865509,
"mean_length": 4484.47,
"loss": -0.001385476440191269,
"sps": 2224.056236169126
},
{
"update": 480,
"global_step": 983040,
"num_episodes": 247,
"mean_reward": 134.21521110534667,
"mean_length": 4676.7,
"loss": 0.6856433153152466,
"sps": 166.9800451494931
},
{
"update": 485,
"global_step": 993280,
"num_episodes": 248,
"mean_reward": 131.0992687225342,
"mean_length": 4598.8,
"loss": 0.22580231726169586,
"sps": 2205.4840180784313
},
{
"update": 490,
"global_step": 1003520,
"num_episodes": 253,
"mean_reward": 130.85596141815185,
"mean_length": 4590.29,
"loss": 15.813247680664062,
"sps": 1554.4720078900461
},
{
"update": 495,
"global_step": 1013760,
"num_episodes": 262,
"mean_reward": 118.73081938743591,
"mean_length": 4225.92,
"loss": 3.043635368347168,
"sps": 364.1160359745992
},
{
"update": 500,
"global_step": 1024000,
"num_episodes": 266,
"mean_reward": 112.27171317100525,
"mean_length": 4026.32,
"loss": 1.5756291151046753,
"sps": 2216.9506893762605
},
{
"update": 505,
"global_step": 1034240,
"num_episodes": 271,
"mean_reward": 106.31812086105347,
"mean_length": 3831.41,
"loss": 0.8373554348945618,
"sps": 2230.0889295621437
},
{
"update": 510,
"global_step": 1044480,
"num_episodes": 271,
"mean_reward": 106.31812086105347,
"mean_length": 3831.41,
"loss": 0.1685715615749359,
"sps": 2250.612331563048
},
{
"update": 515,
"global_step": 1054720,
"num_episodes": 279,
"mean_reward": 94.40078766822815,
"mean_length": 3437.38,
"loss": 0.3164912760257721,
"sps": 755.1427540116495
},
{
"update": 520,
"global_step": 1064960,
"num_episodes": 284,
"mean_reward": 97.14825866699219,
"mean_length": 3536.28,
"loss": 2.4807233810424805,
"sps": 2229.202303336277
},
{
"update": 525,
"global_step": 1075200,
"num_episodes": 288,
"mean_reward": 100.25763621330262,
"mean_length": 3636.97,
"loss": 7.4268412590026855,
"sps": 2240.4780501902205
},
{
"update": 530,
"global_step": 1085440,
"num_episodes": 288,
"mean_reward": 100.25763621330262,
"mean_length": 3636.97,
"loss": 13.631644248962402,
"sps": 2167.9135230629645
},
{
"update": 535,
"global_step": 1095680,
"num_episodes": 289,
"mean_reward": 103.11298480987548,
"mean_length": 3736.54,
"loss": 16.333158493041992,
"sps": 2159.176669252336
},
{
"update": 540,
"global_step": 1105920,
"num_episodes": 292,
"mean_reward": 103.19977847099304,
"mean_length": 3834.19,
"loss": 1.755424976348877,
"sps": 2140.8384853415
},
{
"update": 545,
"global_step": 1116160,
"num_episodes": 293,
"mean_reward": 106.40395077705384,
"mean_length": 3909.42,
"loss": 2.900822639465332,
"sps": 175.96426214417178
},
{
"update": 550,
"global_step": 1126400,
"num_episodes": 296,
"mean_reward": 106.08349679946899,
"mean_length": 3927.6,
"loss": 3.329728126525879,
"sps": 162.40274463106428
},
{
"update": 555,
"global_step": 1136640,
"num_episodes": 300,
"mean_reward": 100.18753468513489,
"mean_length": 3833.08,
"loss": 0.3283099830150604,
"sps": 2190.1276229463892
},
{
"update": 560,
"global_step": 1146880,
"num_episodes": 307,
"mean_reward": 100.06094589233399,
"mean_length": 3798.23,
"loss": 3.218208074569702,
"sps": 196.6633145580635
},
{
"update": 565,
"global_step": 1157120,
"num_episodes": 308,
"mean_reward": 99.96278611183166,
"mean_length": 3815.32,
"loss": 0.05826599895954132,
"sps": 2198.7480074814316
},
{
"update": 570,
"global_step": 1167360,
"num_episodes": 308,
"mean_reward": 99.96278611183166,
"mean_length": 3815.32,
"loss": 0.19391702115535736,
"sps": 2247.8319226488716
},
{
"update": 575,
"global_step": 1177600,
"num_episodes": 311,
"mean_reward": 97.06219799995422,
"mean_length": 3719.15,
"loss": 0.5620024800300598,
"sps": 2199.015937226961
},
{
"update": 580,
"global_step": 1187840,
"num_episodes": 316,
"mean_reward": 96.72605070114136,
"mean_length": 3721.99,
"loss": 9.720523834228516,
"sps": 1486.7675112343536
},
{
"update": 585,
"global_step": 1198080,
"num_episodes": 318,
"mean_reward": 96.66609871864318,
"mean_length": 3723.94,
"loss": 0.6666443943977356,
"sps": 2234.009052616905
},
{
"update": 590,
"global_step": 1208320,
"num_episodes": 318,
"mean_reward": 96.66609871864318,
"mean_length": 3723.94,
"loss": 0.2884657084941864,
"sps": 2239.9481475435614
},
{
"update": 595,
"global_step": 1218560,
"num_episodes": 320,
"mean_reward": 96.41710000038147,
"mean_length": 3723.94,
"loss": 0.8150730133056641,
"sps": 2242.7699730761115
},
{
"update": 600,
"global_step": 1228800,
"num_episodes": 322,
"mean_reward": 101.80320601463318,
"mean_length": 3725.16,
"loss": 0.8642079830169678,
"sps": 285.7807108395187
},
{
"update": 605,
"global_step": 1239040,
"num_episodes": 326,
"mean_reward": 98.85420690536499,
"mean_length": 3650.4,
"loss": 0.6339423060417175,
"sps": 1850.348648405877
},
{
"update": 610,
"global_step": 1249280,
"num_episodes": 326,
"mean_reward": 98.85420690536499,
"mean_length": 3650.4,
"loss": 0.5616691708564758,
"sps": 2222.6578253029397
},
{
"update": 615,
"global_step": 1259520,
"num_episodes": 331,
"mean_reward": 90.45196411609649,
"mean_length": 3548.02,
"loss": 0.2677087187767029,
"sps": 2224.9883354292588
},
{
"update": 620,
"global_step": 1269760,
"num_episodes": 331,
"mean_reward": 90.45196411609649,
"mean_length": 3548.02,
"loss": 0.05597818270325661,
"sps": 2232.8842122446686
},
{
"update": 625,
"global_step": 1280000,
"num_episodes": 335,
"mean_reward": 89.9470157957077,
"mean_length": 3546.72,
"loss": 0.4655948281288147,
"sps": 314.0543075325823
},
{
"update": 630,
"global_step": 1290240,
"num_episodes": 337,
"mean_reward": 83.67983005046844,
"mean_length": 3359.15,
"loss": 0.0969170331954956,
"sps": 2181.9774383339013
},
{
"update": 635,
"global_step": 1300480,
"num_episodes": 342,
"mean_reward": 86.38446100711822,
"mean_length": 3465.84,
"loss": 0.7292745113372803,
"sps": 2124.75987235513
},
{
"update": 640,
"global_step": 1310720,
"num_episodes": 342,
"mean_reward": 86.38446100711822,
"mean_length": 3465.84,
"loss": 0.04831065982580185,
"sps": 2168.1603087847257
},
{
"update": 645,
"global_step": 1320960,
"num_episodes": 343,
"mean_reward": 89.47943027019501,
"mean_length": 3565.41,
"loss": 0.1487177461385727,
"sps": 2107.2008167857116
},
{
"update": 650,
"global_step": 1331200,
"num_episodes": 344,
"mean_reward": 89.2304312467575,
"mean_length": 3565.41,
"loss": 0.12002412974834442,
"sps": 2127.214484781757
},
{
"update": 655,
"global_step": 1341440,
"num_episodes": 347,
"mean_reward": 88.98148257732392,
"mean_length": 3565.41,
"loss": 0.1547270268201828,
"sps": 2165.6753206938283
},
{
"update": 660,
"global_step": 1351680,
"num_episodes": 347,
"mean_reward": 88.98148257732392,
"mean_length": 3565.41,
"loss": 0.1951214224100113,
"sps": 2229.5540914965095
},
{
"update": 665,
"global_step": 1361920,
"num_episodes": 350,
"mean_reward": 88.69014154911041,
"mean_length": 3547.65,
"loss": 0.02001141756772995,
"sps": 2220.3189922208117
},
{
"update": 670,
"global_step": 1372160,
"num_episodes": 353,
"mean_reward": 91.77625680446624,
"mean_length": 3645.53,
"loss": 0.1334764063358307,
"sps": 2217.9524326914734
},
{
"update": 675,
"global_step": 1382400,
"num_episodes": 356,
"mean_reward": 94.39934126377106,
"mean_length": 3746.85,
"loss": 1.491716742515564,
"sps": 115.85030478370321
},
{
"update": 680,
"global_step": 1392640,
"num_episodes": 362,
"mean_reward": 94.36987939357758,
"mean_length": 3759.39,
"loss": 0.08460421860218048,
"sps": 466.5453273147261
},
{
"update": 685,
"global_step": 1402880,
"num_episodes": 370,
"mean_reward": 91.42453453540801,
"mean_length": 3664.28,
"loss": 0.06896203756332397,
"sps": 2211.12877876806
},
{
"update": 690,
"global_step": 1413120,
"num_episodes": 371,
"mean_reward": 94.27827970981598,
"mean_length": 3763.34,
"loss": 0.08465250581502914,
"sps": 2224.3649291369943
},
{
"update": 695,
"global_step": 1423360,
"num_episodes": 372,
"mean_reward": 94.03031706333161,
"mean_length": 3763.34,
"loss": 0.12509916722774506,
"sps": 2233.8556777814188
},
{
"update": 700,
"global_step": 1433600,
"num_episodes": 372,
"mean_reward": 94.03031706333161,
"mean_length": 3763.34,
"loss": 0.05390219762921333,
"sps": 2225.4858130028433
},
{
"update": 705,
"global_step": 1443840,
"num_episodes": 378,
"mean_reward": 100.22330937862397,
"mean_length": 3957.32,
"loss": 1.2421371936798096,
"sps": 176.73331940579132
},
{
"update": 710,
"global_step": 1454080,
"num_episodes": 380,
"mean_reward": 100.36302034854889,
"mean_length": 3980.35,
"loss": 0.06020417809486389,
"sps": 2151.8459777584108
},
{
"update": 715,
"global_step": 1464320,
"num_episodes": 383,
"mean_reward": 103.226567196846,
"mean_length": 4079.78,
"loss": 0.03555985540151596,
"sps": 779.0322633363131
},
{
"update": 720,
"global_step": 1474560,
"num_episodes": 385,
"mean_reward": 100.37291463375091,
"mean_length": 3983.61,
"loss": 0.42947918176651,
"sps": 1976.0163143357508
},
{
"update": 725,
"global_step": 1484800,
"num_episodes": 387,
"mean_reward": 102.8877759027481,
"mean_length": 4082.64,
"loss": 0.3529297709465027,
"sps": 2149.9991470019922
},
{
"update": 730,
"global_step": 1495040,
"num_episodes": 393,
"mean_reward": 96.80677964687348,
"mean_length": 3770.28,
"loss": 0.5400058627128601,
"sps": 182.2636316464271
},
{
"update": 735,
"global_step": 1505280,
"num_episodes": 397,
"mean_reward": 99.80784844875336,
"mean_length": 3841.74,
"loss": 0.07788175344467163,
"sps": 1880.7298583574118
},
{
"update": 740,
"global_step": 1515520,
"num_episodes": 400,
"mean_reward": 102.611374335289,
"mean_length": 3837.4,
"loss": 0.12514562904834747,
"sps": 2199.6995663818816
},
{
"update": 745,
"global_step": 1525760,
"num_episodes": 402,
"mean_reward": 102.8225530385971,
"mean_length": 3839.8,
"loss": 0.1331540048122406,
"sps": 505.88516130939627
},
{
"update": 750,
"global_step": 1536000,
"num_episodes": 402,
"mean_reward": 102.8225530385971,
"mean_length": 3839.8,
"loss": -0.008329648524522781,
"sps": 2268.146789029585
}
]