[ { "update": 5, "global_step": 10240, "num_episodes": 12, "mean_reward": -1.8039220174153645, "mean_length": 214.16666666666666, "loss": 1.5202983617782593, "sps": 1350.1393046305259 }, { "update": 10, "global_step": 20480, "num_episodes": 12, "mean_reward": -1.8039220174153645, "mean_length": 214.16666666666666, "loss": 0.6584598422050476, "sps": 2047.2883333230054 }, { "update": 15, "global_step": 30720, "num_episodes": 12, "mean_reward": -1.8039220174153645, "mean_length": 214.16666666666666, "loss": 0.6658369898796082, "sps": 2055.41935946998 }, { "update": 20, "global_step": 40960, "num_episodes": 15, "mean_reward": 2.1853319803873696, "mean_length": 1510.9333333333334, "loss": 1.3961129188537598, "sps": 1412.8079652055148 }, { "update": 25, "global_step": 51200, "num_episodes": 22, "mean_reward": 2.625577276403254, "mean_length": 1983.409090909091, "loss": 1.2982873916625977, "sps": 470.35650512927947 }, { "update": 30, "global_step": 61440, "num_episodes": 22, "mean_reward": 2.625577276403254, "mean_length": 1983.409090909091, "loss": 1.0302618741989136, "sps": 2216.3140512542655 }, { "update": 35, "global_step": 71680, "num_episodes": 22, "mean_reward": 2.625577276403254, "mean_length": 1983.409090909091, "loss": 0.9787145853042603, "sps": 2070.449469801819 }, { "update": 40, "global_step": 81920, "num_episodes": 27, "mean_reward": 3.300225363837348, "mean_length": 2365.4074074074074, "loss": 0.8183227777481079, "sps": 755.8004689166112 }, { "update": 45, "global_step": 92160, "num_episodes": 33, "mean_reward": 3.7748724330555308, "mean_length": 2564.090909090909, "loss": 0.3192096948623657, "sps": 1984.8729329667028 }, { "update": 50, "global_step": 102400, "num_episodes": 33, "mean_reward": 3.7748724330555308, "mean_length": 2564.090909090909, "loss": 0.8334144949913025, "sps": 2085.15978587018 }, { "update": 55, "global_step": 112640, "num_episodes": 33, "mean_reward": 3.7748724330555308, "mean_length": 2564.090909090909, "loss": 0.5191329121589661, "sps": 2075.918435842757 }, { "update": 60, "global_step": 122880, "num_episodes": 35, "mean_reward": 5.517268398829869, "mean_length": 2989.0, "loss": 0.6924473643302917, "sps": 2020.4075872880348 }, { "update": 65, "global_step": 133120, "num_episodes": 40, "mean_reward": 5.594199681282044, "mean_length": 3128.125, "loss": 1.9699269533157349, "sps": 429.94535608180354 }, { "update": 70, "global_step": 143360, "num_episodes": 41, "mean_reward": 5.430626520296422, "mean_length": 3061.170731707317, "loss": 2.169956684112549, "sps": 2246.502749698459 }, { "update": 75, "global_step": 153600, "num_episodes": 41, "mean_reward": 5.430626520296422, "mean_length": 3061.170731707317, "loss": 2.5021121501922607, "sps": 2240.800087024051 }, { "update": 80, "global_step": 163840, "num_episodes": 46, "mean_reward": 10.588201502095098, "mean_length": 3177.4565217391305, "loss": 2.6493680477142334, "sps": 436.6488855176695 }, { "update": 85, "global_step": 174080, "num_episodes": 52, "mean_reward": 13.96443979556744, "mean_length": 3211.1923076923076, "loss": 2.5603184700012207, "sps": 2163.643359340071 }, { "update": 90, "global_step": 184320, "num_episodes": 52, "mean_reward": 13.96443979556744, "mean_length": 3211.1923076923076, "loss": 2.336817502975464, "sps": 2163.4161260603714 }, { "update": 95, "global_step": 194560, "num_episodes": 52, "mean_reward": 13.96443979556744, "mean_length": 3211.1923076923076, "loss": 3.234126091003418, "sps": 2186.3643111360093 }, { "update": 100, "global_step": 204800, "num_episodes": 55, "mean_reward": 17.41634755568071, "mean_length": 3220.3454545454547, "loss": 5.442279815673828, "sps": 1960.8741209669945 }, { "update": 105, "global_step": 215040, "num_episodes": 62, "mean_reward": 27.910769308767012, "mean_length": 3345.3387096774195, "loss": 2.81072735786438, "sps": 1303.1946553744522 }, { "update": 110, "global_step": 225280, "num_episodes": 66, "mean_reward": 26.517480315584123, "mean_length": 3151.8939393939395, "loss": 0.830104649066925, "sps": 1994.5176102850091 }, { "update": 115, "global_step": 235520, "num_episodes": 66, "mean_reward": 26.517480315584123, "mean_length": 3151.8939393939395, "loss": 0.8076149225234985, "sps": 2020.1985152956534 }, { "update": 120, "global_step": 245760, "num_episodes": 69, "mean_reward": 35.54813836968464, "mean_length": 3449.6376811594205, "loss": 1.7053556442260742, "sps": 1392.6768298802306 }, { "update": 125, "global_step": 256000, "num_episodes": 72, "mean_reward": 34.35707297590044, "mean_length": 3315.0, "loss": 0.5338165163993835, "sps": 2218.857641762577 }, { "update": 130, "global_step": 266240, "num_episodes": 73, "mean_reward": 37.437843152921495, "mean_length": 3406.5753424657532, "loss": 0.31061413884162903, "sps": 2123.373275302262 }, { "update": 135, "global_step": 276480, "num_episodes": 73, "mean_reward": 37.437843152921495, "mean_length": 3406.5753424657532, "loss": 0.370238333940506, "sps": 2270.1968695887504 }, { "update": 140, "global_step": 286720, "num_episodes": 76, "mean_reward": 42.764281247791494, "mean_length": 3535.8289473684213, "loss": 0.4431289732456207, "sps": 2235.4805453156696 }, { "update": 145, "global_step": 296960, "num_episodes": 78, "mean_reward": 45.29765256245931, "mean_length": 3576.3076923076924, "loss": 0.11675499379634857, "sps": 2248.214328955258 }, { "update": 150, "global_step": 307200, "num_episodes": 79, "mean_reward": 48.32240367841117, "mean_length": 3657.6202531645567, "loss": 0.3231047987937927, "sps": 2273.3126392381514 }, { "update": 155, "global_step": 317440, "num_episodes": 79, "mean_reward": 48.32240367841117, "mean_length": 3657.6202531645567, "loss": 0.24974392354488373, "sps": 2276.2572089535233 }, { "update": 160, "global_step": 327680, "num_episodes": 90, "mean_reward": 48.88773588604397, "mean_length": 3444.8333333333335, "loss": 1.1440778970718384, "sps": 469.25381312597585 }, { "update": 165, "global_step": 337920, "num_episodes": 99, "mean_reward": 50.101470494511155, "mean_length": 3348.060606060606, "loss": 0.3425019383430481, "sps": 1903.5186981694392 }, { "update": 170, "global_step": 348160, "num_episodes": 99, "mean_reward": 50.101470494511155, "mean_length": 3348.060606060606, "loss": 0.18799301981925964, "sps": 2130.3254740506277 }, { "update": 175, "global_step": 358400, "num_episodes": 99, "mean_reward": 50.101470494511155, "mean_length": 3348.060606060606, "loss": 0.11751141399145126, "sps": 2125.2682193036194 }, { "update": 180, "global_step": 368640, "num_episodes": 101, "mean_reward": 52.442985696792604, "mean_length": 3414.58, "loss": 0.2516184449195862, "sps": 1703.135913858338 }, { "update": 185, "global_step": 378880, "num_episodes": 114, "mean_reward": 61.27311011314392, "mean_length": 3506.98, "loss": 0.11899760365486145, "sps": 2246.894695169401 }, { "update": 190, "global_step": 389120, "num_episodes": 114, "mean_reward": 61.27311011314392, "mean_length": 3506.98, "loss": 0.30889129638671875, "sps": 2289.0664114120555 }, { "update": 195, "global_step": 399360, "num_episodes": 114, "mean_reward": 61.27311011314392, "mean_length": 3506.98, "loss": -0.06268183887004852, "sps": 2277.013858345863 }, { "update": 200, "global_step": 409600, "num_episodes": 114, "mean_reward": 61.27311011314392, "mean_length": 3506.98, "loss": 0.5904355645179749, "sps": 2173.4262065580283 }, { "update": 205, "global_step": 419840, "num_episodes": 121, "mean_reward": 72.71755365371705, "mean_length": 3714.22, "loss": 1.7067689895629883, "sps": 308.3386383776914 }, { "update": 210, "global_step": 430080, "num_episodes": 123, "mean_reward": 72.60844767570495, "mean_length": 3634.11, "loss": 0.22778448462486267, "sps": 2270.040885359684 }, { "update": 215, "global_step": 440320, "num_episodes": 123, "mean_reward": 72.60844767570495, "mean_length": 3634.11, "loss": 0.4201895296573639, "sps": 2286.5998087120915 }, { "update": 220, "global_step": 450560, "num_episodes": 123, "mean_reward": 72.60844767570495, "mean_length": 3634.11, "loss": 0.08545871078968048, "sps": 2301.79050438536 }, { "update": 225, "global_step": 460800, "num_episodes": 129, "mean_reward": 81.06719317436219, "mean_length": 3731.63, "loss": 0.4734661877155304, "sps": 1893.4799410834491 }, { "update": 230, "global_step": 471040, "num_episodes": 132, "mean_reward": 83.9696240234375, "mean_length": 3736.94, "loss": 2.656114101409912, "sps": 1747.8656326711334 }, { "update": 235, "global_step": 481280, "num_episodes": 132, "mean_reward": 83.9696240234375, "mean_length": 3736.94, "loss": 0.3720909357070923, "sps": 2290.767122681247 }, { "update": 240, "global_step": 491520, "num_episodes": 133, "mean_reward": 86.32599678993225, "mean_length": 3836.51, "loss": 0.28272855281829834, "sps": 1660.2388311365764 }, { "update": 245, "global_step": 501760, "num_episodes": 139, "mean_reward": 90.87246092796326, "mean_length": 3642.75, "loss": 0.7918240427970886, "sps": 226.8334199523192 }, { "update": 250, "global_step": 512000, "num_episodes": 144, "mean_reward": 93.14730869293213, "mean_length": 3661.34, "loss": 0.5329251885414124, "sps": 1291.7356099331766 }, { "update": 255, "global_step": 522240, "num_episodes": 147, "mean_reward": 90.92609573364258, "mean_length": 3465.71, "loss": 0.2208552062511444, "sps": 2176.5233668919136 }, { "update": 260, "global_step": 532480, "num_episodes": 148, "mean_reward": 94.03146264076233, "mean_length": 3565.28, "loss": 0.3058362305164337, "sps": 1287.827730215577 }, { "update": 265, "global_step": 542720, "num_episodes": 153, "mean_reward": 93.66601790428162, "mean_length": 3469.44, "loss": 0.2177593857049942, "sps": 2185.2068742705324 }, { "update": 270, "global_step": 552960, "num_episodes": 154, "mean_reward": 96.52238609313964, "mean_length": 3569.01, "loss": 0.1883123517036438, "sps": 2191.858927925312 }, { "update": 275, "global_step": 563200, "num_episodes": 155, "mean_reward": 99.62512998580932, "mean_length": 3668.07, "loss": 0.27221783995628357, "sps": 2123.45043596574 }, { "update": 280, "global_step": 573440, "num_episodes": 155, "mean_reward": 99.62512998580932, "mean_length": 3668.07, "loss": 0.18106494843959808, "sps": 2141.5921280557186 }, { "update": 285, "global_step": 583680, "num_episodes": 162, "mean_reward": 100.8679942893982, "mean_length": 3670.27, "loss": 0.5695434212684631, "sps": 500.88139423117855 }, { "update": 290, "global_step": 593920, "num_episodes": 162, "mean_reward": 100.8679942893982, "mean_length": 3670.27, "loss": 0.07588323950767517, "sps": 2141.039120248053 }, { "update": 295, "global_step": 604160, "num_episodes": 164, "mean_reward": 103.97303117752075, "mean_length": 3770.47, "loss": 0.3367934823036194, "sps": 2232.451302693334 }, { "update": 300, "global_step": 614400, "num_episodes": 164, "mean_reward": 103.97303117752075, "mean_length": 3770.47, "loss": 0.23896968364715576, "sps": 2206.894347275839 }, { "update": 305, "global_step": 624640, "num_episodes": 168, "mean_reward": 108.57997800827026, "mean_length": 3866.64, "loss": 0.8634886145591736, "sps": 2068.617103764695 }, { "update": 310, "global_step": 634880, "num_episodes": 168, "mean_reward": 108.57997800827026, "mean_length": 3866.64, "loss": 0.05964889004826546, "sps": 2227.05463999608 }, { "update": 315, "global_step": 645120, "num_episodes": 170, "mean_reward": 108.82997495651244, "mean_length": 3866.64, "loss": 0.3584131598472595, "sps": 2093.8527505731936 }, { "update": 320, "global_step": 655360, "num_episodes": 170, "mean_reward": 108.82997495651244, "mean_length": 3866.64, "loss": 0.11968313157558441, "sps": 2108.9095828206277 }, { "update": 325, "global_step": 665600, "num_episodes": 174, "mean_reward": 112.43493017196656, "mean_length": 3960.91, "loss": 1.6604670286178589, "sps": 2079.794147719775 }, { "update": 330, "global_step": 675840, "num_episodes": 175, "mean_reward": 115.29129805564881, "mean_length": 4060.48, "loss": 0.19095765054225922, "sps": 2102.571665929904 }, { "update": 335, "global_step": 686080, "num_episodes": 175, "mean_reward": 115.29129805564881, "mean_length": 4060.48, "loss": 0.14229056239128113, "sps": 2107.3760666985922 }, { "update": 340, "global_step": 696320, "num_episodes": 175, "mean_reward": 115.29129805564881, "mean_length": 4060.48, "loss": 0.36735689640045166, "sps": 2157.1693383290976 }, { "update": 345, "global_step": 706560, "num_episodes": 189, "mean_reward": 109.84256043434144, "mean_length": 3863.03, "loss": 0.7830713987350464, "sps": 381.54779046834835 }, { "update": 350, "global_step": 716800, "num_episodes": 193, "mean_reward": 109.65507596015931, "mean_length": 3901.01, "loss": 9.749893188476562, "sps": 258.15120412826286 }, { "update": 355, "global_step": 727040, "num_episodes": 197, "mean_reward": 109.78353757858277, "mean_length": 3921.28, "loss": 0.27189120650291443, "sps": 2234.7686845905673 }, { "update": 360, "global_step": 737280, "num_episodes": 198, "mean_reward": 110.03232297897338, "mean_length": 3921.28, "loss": 0.27451807260513306, "sps": 2208.748818348954 }, { "update": 365, "global_step": 747520, "num_episodes": 198, "mean_reward": 110.03232297897338, "mean_length": 3921.28, "loss": 0.3887019753456116, "sps": 2179.671400488869 }, { "update": 370, "global_step": 757760, "num_episodes": 200, "mean_reward": 110.02969996452332, "mean_length": 3920.77, "loss": 1.3407719135284424, "sps": 190.58035959703773 }, { "update": 375, "global_step": 768000, "num_episodes": 205, "mean_reward": 113.28294358253478, "mean_length": 4042.39, "loss": 2.234469175338745, "sps": 165.74069059867267 }, { "update": 380, "global_step": 778240, "num_episodes": 209, "mean_reward": 112.94422784805298, "mean_length": 4057.27, "loss": 0.2318531721830368, "sps": 2224.56366677181 }, { "update": 385, "global_step": 788480, "num_episodes": 209, "mean_reward": 112.94422784805298, "mean_length": 4057.27, "loss": 0.2268732637166977, "sps": 2253.631967291392 }, { "update": 390, "global_step": 798720, "num_episodes": 210, "mean_reward": 115.53032508850097, "mean_length": 4153.44, "loss": 0.06943273544311523, "sps": 2241.199985702131 }, { "update": 395, "global_step": 808960, "num_episodes": 211, "mean_reward": 118.63075592041015, "mean_length": 4252.74, "loss": 0.21790897846221924, "sps": 2233.8736866140016 }, { "update": 400, "global_step": 819200, "num_episodes": 218, "mean_reward": 112.67819341659546, "mean_length": 4053.49, "loss": 0.28217822313308716, "sps": 2245.8419819824057 }, { "update": 405, "global_step": 829440, "num_episodes": 218, "mean_reward": 112.67819341659546, "mean_length": 4053.49, "loss": 0.07389844954013824, "sps": 2093.830293640606 }, { "update": 410, "global_step": 839680, "num_episodes": 219, "mean_reward": 112.67819341659546, "mean_length": 4053.49, "loss": 0.19773584604263306, "sps": 2089.0640172885346 }, { "update": 415, "global_step": 849920, "num_episodes": 221, "mean_reward": 115.57382575035095, "mean_length": 4142.16, "loss": 0.2218392789363861, "sps": 2037.8682435790647 }, { "update": 420, "global_step": 860160, "num_episodes": 225, "mean_reward": 118.26686740875245, "mean_length": 4220.72, "loss": 0.16683566570281982, "sps": 2062.011715868465 }, { "update": 425, "global_step": 870400, "num_episodes": 225, "mean_reward": 118.26686740875245, "mean_length": 4220.72, "loss": 0.3665807843208313, "sps": 2085.879289076417 }, { "update": 430, "global_step": 880640, "num_episodes": 227, "mean_reward": 115.4131219291687, "mean_length": 4123.01, "loss": 0.2211420238018036, "sps": 1319.1510855667186 }, { "update": 435, "global_step": 890880, "num_episodes": 228, "mean_reward": 118.51452213287354, "mean_length": 4220.72, "loss": 0.01409757137298584, "sps": 2249.26043334859 }, { "update": 440, "global_step": 901120, "num_episodes": 232, "mean_reward": 121.46894996643067, "mean_length": 4313.96, "loss": 0.34094715118408203, "sps": 2235.574796161653 }, { "update": 445, "global_step": 911360, "num_episodes": 232, "mean_reward": 121.46894996643067, "mean_length": 4313.96, "loss": 0.03650672733783722, "sps": 2254.642878384582 }, { "update": 450, "global_step": 921600, "num_episodes": 233, "mean_reward": 122.21637367248535, "mean_length": 4313.96, "loss": 0.4388778805732727, "sps": 2230.3176453482333 }, { "update": 455, "global_step": 931840, "num_episodes": 235, "mean_reward": 122.47082150459289, "mean_length": 4315.26, "loss": 0.04285623878240585, "sps": 2245.5355177671836 }, { "update": 460, "global_step": 942080, "num_episodes": 241, "mean_reward": 125.87507015228272, "mean_length": 4387.15, "loss": 0.18071885406970978, "sps": 2186.650382997658 }, { "update": 465, "global_step": 952320, "num_episodes": 241, "mean_reward": 125.87507015228272, "mean_length": 4387.15, "loss": 0.15905889868736267, "sps": 2246.1825959008293 }, { "update": 470, "global_step": 962560, "num_episodes": 243, "mean_reward": 125.64015349388123, "mean_length": 4385.41, "loss": 0.2828221917152405, "sps": 2237.777989892148 }, { "update": 475, "global_step": 972800, "num_episodes": 244, "mean_reward": 128.7428973865509, "mean_length": 4484.47, "loss": -0.001385476440191269, "sps": 2224.056236169126 }, { "update": 480, "global_step": 983040, "num_episodes": 247, "mean_reward": 134.21521110534667, "mean_length": 4676.7, "loss": 0.6856433153152466, "sps": 166.9800451494931 }, { "update": 485, "global_step": 993280, "num_episodes": 248, "mean_reward": 131.0992687225342, "mean_length": 4598.8, "loss": 0.22580231726169586, "sps": 2205.4840180784313 }, { "update": 490, "global_step": 1003520, "num_episodes": 253, "mean_reward": 130.85596141815185, "mean_length": 4590.29, "loss": 15.813247680664062, "sps": 1554.4720078900461 }, { "update": 495, "global_step": 1013760, "num_episodes": 262, "mean_reward": 118.73081938743591, "mean_length": 4225.92, "loss": 3.043635368347168, "sps": 364.1160359745992 }, { "update": 500, "global_step": 1024000, "num_episodes": 266, "mean_reward": 112.27171317100525, "mean_length": 4026.32, "loss": 1.5756291151046753, "sps": 2216.9506893762605 }, { "update": 505, "global_step": 1034240, "num_episodes": 271, "mean_reward": 106.31812086105347, "mean_length": 3831.41, "loss": 0.8373554348945618, "sps": 2230.0889295621437 }, { "update": 510, "global_step": 1044480, "num_episodes": 271, "mean_reward": 106.31812086105347, "mean_length": 3831.41, "loss": 0.1685715615749359, "sps": 2250.612331563048 }, { "update": 515, "global_step": 1054720, "num_episodes": 279, "mean_reward": 94.40078766822815, "mean_length": 3437.38, "loss": 0.3164912760257721, "sps": 755.1427540116495 }, { "update": 520, "global_step": 1064960, "num_episodes": 284, "mean_reward": 97.14825866699219, "mean_length": 3536.28, "loss": 2.4807233810424805, "sps": 2229.202303336277 }, { "update": 525, "global_step": 1075200, "num_episodes": 288, "mean_reward": 100.25763621330262, "mean_length": 3636.97, "loss": 7.4268412590026855, "sps": 2240.4780501902205 }, { "update": 530, "global_step": 1085440, "num_episodes": 288, "mean_reward": 100.25763621330262, "mean_length": 3636.97, "loss": 13.631644248962402, "sps": 2167.9135230629645 }, { "update": 535, "global_step": 1095680, "num_episodes": 289, "mean_reward": 103.11298480987548, "mean_length": 3736.54, "loss": 16.333158493041992, "sps": 2159.176669252336 }, { "update": 540, "global_step": 1105920, "num_episodes": 292, "mean_reward": 103.19977847099304, "mean_length": 3834.19, "loss": 1.755424976348877, "sps": 2140.8384853415 }, { "update": 545, "global_step": 1116160, "num_episodes": 293, "mean_reward": 106.40395077705384, "mean_length": 3909.42, "loss": 2.900822639465332, "sps": 175.96426214417178 }, { "update": 550, "global_step": 1126400, "num_episodes": 296, "mean_reward": 106.08349679946899, "mean_length": 3927.6, "loss": 3.329728126525879, "sps": 162.40274463106428 }, { "update": 555, "global_step": 1136640, "num_episodes": 300, "mean_reward": 100.18753468513489, "mean_length": 3833.08, "loss": 0.3283099830150604, "sps": 2190.1276229463892 }, { "update": 560, "global_step": 1146880, "num_episodes": 307, "mean_reward": 100.06094589233399, "mean_length": 3798.23, "loss": 3.218208074569702, "sps": 196.6633145580635 }, { "update": 565, "global_step": 1157120, "num_episodes": 308, "mean_reward": 99.96278611183166, "mean_length": 3815.32, "loss": 0.05826599895954132, "sps": 2198.7480074814316 }, { "update": 570, "global_step": 1167360, "num_episodes": 308, "mean_reward": 99.96278611183166, "mean_length": 3815.32, "loss": 0.19391702115535736, "sps": 2247.8319226488716 }, { "update": 575, "global_step": 1177600, "num_episodes": 311, "mean_reward": 97.06219799995422, "mean_length": 3719.15, "loss": 0.5620024800300598, "sps": 2199.015937226961 }, { "update": 580, "global_step": 1187840, "num_episodes": 316, "mean_reward": 96.72605070114136, "mean_length": 3721.99, "loss": 9.720523834228516, "sps": 1486.7675112343536 }, { "update": 585, "global_step": 1198080, "num_episodes": 318, "mean_reward": 96.66609871864318, "mean_length": 3723.94, "loss": 0.6666443943977356, "sps": 2234.009052616905 }, { "update": 590, "global_step": 1208320, "num_episodes": 318, "mean_reward": 96.66609871864318, "mean_length": 3723.94, "loss": 0.2884657084941864, "sps": 2239.9481475435614 }, { "update": 595, "global_step": 1218560, "num_episodes": 320, "mean_reward": 96.41710000038147, "mean_length": 3723.94, "loss": 0.8150730133056641, "sps": 2242.7699730761115 }, { "update": 600, "global_step": 1228800, "num_episodes": 322, "mean_reward": 101.80320601463318, "mean_length": 3725.16, "loss": 0.8642079830169678, "sps": 285.7807108395187 }, { "update": 605, "global_step": 1239040, "num_episodes": 326, "mean_reward": 98.85420690536499, "mean_length": 3650.4, "loss": 0.6339423060417175, "sps": 1850.348648405877 }, { "update": 610, "global_step": 1249280, "num_episodes": 326, "mean_reward": 98.85420690536499, "mean_length": 3650.4, "loss": 0.5616691708564758, "sps": 2222.6578253029397 }, { "update": 615, "global_step": 1259520, "num_episodes": 331, "mean_reward": 90.45196411609649, "mean_length": 3548.02, "loss": 0.2677087187767029, "sps": 2224.9883354292588 }, { "update": 620, "global_step": 1269760, "num_episodes": 331, "mean_reward": 90.45196411609649, "mean_length": 3548.02, "loss": 0.05597818270325661, "sps": 2232.8842122446686 }, { "update": 625, "global_step": 1280000, "num_episodes": 335, "mean_reward": 89.9470157957077, "mean_length": 3546.72, "loss": 0.4655948281288147, "sps": 314.0543075325823 }, { "update": 630, "global_step": 1290240, "num_episodes": 337, "mean_reward": 83.67983005046844, "mean_length": 3359.15, "loss": 0.0969170331954956, "sps": 2181.9774383339013 }, { "update": 635, "global_step": 1300480, "num_episodes": 342, "mean_reward": 86.38446100711822, "mean_length": 3465.84, "loss": 0.7292745113372803, "sps": 2124.75987235513 }, { "update": 640, "global_step": 1310720, "num_episodes": 342, "mean_reward": 86.38446100711822, "mean_length": 3465.84, "loss": 0.04831065982580185, "sps": 2168.1603087847257 }, { "update": 645, "global_step": 1320960, "num_episodes": 343, "mean_reward": 89.47943027019501, "mean_length": 3565.41, "loss": 0.1487177461385727, "sps": 2107.2008167857116 }, { "update": 650, "global_step": 1331200, "num_episodes": 344, "mean_reward": 89.2304312467575, "mean_length": 3565.41, "loss": 0.12002412974834442, "sps": 2127.214484781757 }, { "update": 655, "global_step": 1341440, "num_episodes": 347, "mean_reward": 88.98148257732392, "mean_length": 3565.41, "loss": 0.1547270268201828, "sps": 2165.6753206938283 }, { "update": 660, "global_step": 1351680, "num_episodes": 347, "mean_reward": 88.98148257732392, "mean_length": 3565.41, "loss": 0.1951214224100113, "sps": 2229.5540914965095 }, { "update": 665, "global_step": 1361920, "num_episodes": 350, "mean_reward": 88.69014154911041, "mean_length": 3547.65, "loss": 0.02001141756772995, "sps": 2220.3189922208117 }, { "update": 670, "global_step": 1372160, "num_episodes": 353, "mean_reward": 91.77625680446624, "mean_length": 3645.53, "loss": 0.1334764063358307, "sps": 2217.9524326914734 }, { "update": 675, "global_step": 1382400, "num_episodes": 356, "mean_reward": 94.39934126377106, "mean_length": 3746.85, "loss": 1.491716742515564, "sps": 115.85030478370321 }, { "update": 680, "global_step": 1392640, "num_episodes": 362, "mean_reward": 94.36987939357758, "mean_length": 3759.39, "loss": 0.08460421860218048, "sps": 466.5453273147261 }, { "update": 685, "global_step": 1402880, "num_episodes": 370, "mean_reward": 91.42453453540801, "mean_length": 3664.28, "loss": 0.06896203756332397, "sps": 2211.12877876806 }, { "update": 690, "global_step": 1413120, "num_episodes": 371, "mean_reward": 94.27827970981598, "mean_length": 3763.34, "loss": 0.08465250581502914, "sps": 2224.3649291369943 }, { "update": 695, "global_step": 1423360, "num_episodes": 372, "mean_reward": 94.03031706333161, "mean_length": 3763.34, "loss": 0.12509916722774506, "sps": 2233.8556777814188 }, { "update": 700, "global_step": 1433600, "num_episodes": 372, "mean_reward": 94.03031706333161, "mean_length": 3763.34, "loss": 0.05390219762921333, "sps": 2225.4858130028433 }, { "update": 705, "global_step": 1443840, "num_episodes": 378, "mean_reward": 100.22330937862397, "mean_length": 3957.32, "loss": 1.2421371936798096, "sps": 176.73331940579132 }, { "update": 710, "global_step": 1454080, "num_episodes": 380, "mean_reward": 100.36302034854889, "mean_length": 3980.35, "loss": 0.06020417809486389, "sps": 2151.8459777584108 }, { "update": 715, "global_step": 1464320, "num_episodes": 383, "mean_reward": 103.226567196846, "mean_length": 4079.78, "loss": 0.03555985540151596, "sps": 779.0322633363131 }, { "update": 720, "global_step": 1474560, "num_episodes": 385, "mean_reward": 100.37291463375091, "mean_length": 3983.61, "loss": 0.42947918176651, "sps": 1976.0163143357508 }, { "update": 725, "global_step": 1484800, "num_episodes": 387, "mean_reward": 102.8877759027481, "mean_length": 4082.64, "loss": 0.3529297709465027, "sps": 2149.9991470019922 }, { "update": 730, "global_step": 1495040, "num_episodes": 393, "mean_reward": 96.80677964687348, "mean_length": 3770.28, "loss": 0.5400058627128601, "sps": 182.2636316464271 }, { "update": 735, "global_step": 1505280, "num_episodes": 397, "mean_reward": 99.80784844875336, "mean_length": 3841.74, "loss": 0.07788175344467163, "sps": 1880.7298583574118 }, { "update": 740, "global_step": 1515520, "num_episodes": 400, "mean_reward": 102.611374335289, "mean_length": 3837.4, "loss": 0.12514562904834747, "sps": 2199.6995663818816 }, { "update": 745, "global_step": 1525760, "num_episodes": 402, "mean_reward": 102.8225530385971, "mean_length": 3839.8, "loss": 0.1331540048122406, "sps": 505.88516130939627 }, { "update": 750, "global_step": 1536000, "num_episodes": 402, "mean_reward": 102.8225530385971, "mean_length": 3839.8, "loss": -0.008329648524522781, "sps": 2268.146789029585 } ]