| [ |
| { |
| "update": 5, |
| "global_step": 10240, |
| "num_episodes": 12, |
| "mean_reward": -1.8039220174153645, |
| "mean_length": 214.16666666666666, |
| "loss": 1.5202983617782593, |
| "sps": 1350.1393046305259 |
| }, |
| { |
| "update": 10, |
| "global_step": 20480, |
| "num_episodes": 12, |
| "mean_reward": -1.8039220174153645, |
| "mean_length": 214.16666666666666, |
| "loss": 0.6584598422050476, |
| "sps": 2047.2883333230054 |
| }, |
| { |
| "update": 15, |
| "global_step": 30720, |
| "num_episodes": 12, |
| "mean_reward": -1.8039220174153645, |
| "mean_length": 214.16666666666666, |
| "loss": 0.6658369898796082, |
| "sps": 2055.41935946998 |
| }, |
| { |
| "update": 20, |
| "global_step": 40960, |
| "num_episodes": 15, |
| "mean_reward": 2.1853319803873696, |
| "mean_length": 1510.9333333333334, |
| "loss": 1.3961129188537598, |
| "sps": 1412.8079652055148 |
| }, |
| { |
| "update": 25, |
| "global_step": 51200, |
| "num_episodes": 22, |
| "mean_reward": 2.625577276403254, |
| "mean_length": 1983.409090909091, |
| "loss": 1.2982873916625977, |
| "sps": 470.35650512927947 |
| }, |
| { |
| "update": 30, |
| "global_step": 61440, |
| "num_episodes": 22, |
| "mean_reward": 2.625577276403254, |
| "mean_length": 1983.409090909091, |
| "loss": 1.0302618741989136, |
| "sps": 2216.3140512542655 |
| }, |
| { |
| "update": 35, |
| "global_step": 71680, |
| "num_episodes": 22, |
| "mean_reward": 2.625577276403254, |
| "mean_length": 1983.409090909091, |
| "loss": 0.9787145853042603, |
| "sps": 2070.449469801819 |
| }, |
| { |
| "update": 40, |
| "global_step": 81920, |
| "num_episodes": 27, |
| "mean_reward": 3.300225363837348, |
| "mean_length": 2365.4074074074074, |
| "loss": 0.8183227777481079, |
| "sps": 755.8004689166112 |
| }, |
| { |
| "update": 45, |
| "global_step": 92160, |
| "num_episodes": 33, |
| "mean_reward": 3.7748724330555308, |
| "mean_length": 2564.090909090909, |
| "loss": 0.3192096948623657, |
| "sps": 1984.8729329667028 |
| }, |
| { |
| "update": 50, |
| "global_step": 102400, |
| "num_episodes": 33, |
| "mean_reward": 3.7748724330555308, |
| "mean_length": 2564.090909090909, |
| "loss": 0.8334144949913025, |
| "sps": 2085.15978587018 |
| }, |
| { |
| "update": 55, |
| "global_step": 112640, |
| "num_episodes": 33, |
| "mean_reward": 3.7748724330555308, |
| "mean_length": 2564.090909090909, |
| "loss": 0.5191329121589661, |
| "sps": 2075.918435842757 |
| }, |
| { |
| "update": 60, |
| "global_step": 122880, |
| "num_episodes": 35, |
| "mean_reward": 5.517268398829869, |
| "mean_length": 2989.0, |
| "loss": 0.6924473643302917, |
| "sps": 2020.4075872880348 |
| }, |
| { |
| "update": 65, |
| "global_step": 133120, |
| "num_episodes": 40, |
| "mean_reward": 5.594199681282044, |
| "mean_length": 3128.125, |
| "loss": 1.9699269533157349, |
| "sps": 429.94535608180354 |
| }, |
| { |
| "update": 70, |
| "global_step": 143360, |
| "num_episodes": 41, |
| "mean_reward": 5.430626520296422, |
| "mean_length": 3061.170731707317, |
| "loss": 2.169956684112549, |
| "sps": 2246.502749698459 |
| }, |
| { |
| "update": 75, |
| "global_step": 153600, |
| "num_episodes": 41, |
| "mean_reward": 5.430626520296422, |
| "mean_length": 3061.170731707317, |
| "loss": 2.5021121501922607, |
| "sps": 2240.800087024051 |
| }, |
| { |
| "update": 80, |
| "global_step": 163840, |
| "num_episodes": 46, |
| "mean_reward": 10.588201502095098, |
| "mean_length": 3177.4565217391305, |
| "loss": 2.6493680477142334, |
| "sps": 436.6488855176695 |
| }, |
| { |
| "update": 85, |
| "global_step": 174080, |
| "num_episodes": 52, |
| "mean_reward": 13.96443979556744, |
| "mean_length": 3211.1923076923076, |
| "loss": 2.5603184700012207, |
| "sps": 2163.643359340071 |
| }, |
| { |
| "update": 90, |
| "global_step": 184320, |
| "num_episodes": 52, |
| "mean_reward": 13.96443979556744, |
| "mean_length": 3211.1923076923076, |
| "loss": 2.336817502975464, |
| "sps": 2163.4161260603714 |
| }, |
| { |
| "update": 95, |
| "global_step": 194560, |
| "num_episodes": 52, |
| "mean_reward": 13.96443979556744, |
| "mean_length": 3211.1923076923076, |
| "loss": 3.234126091003418, |
| "sps": 2186.3643111360093 |
| }, |
| { |
| "update": 100, |
| "global_step": 204800, |
| "num_episodes": 55, |
| "mean_reward": 17.41634755568071, |
| "mean_length": 3220.3454545454547, |
| "loss": 5.442279815673828, |
| "sps": 1960.8741209669945 |
| }, |
| { |
| "update": 105, |
| "global_step": 215040, |
| "num_episodes": 62, |
| "mean_reward": 27.910769308767012, |
| "mean_length": 3345.3387096774195, |
| "loss": 2.81072735786438, |
| "sps": 1303.1946553744522 |
| }, |
| { |
| "update": 110, |
| "global_step": 225280, |
| "num_episodes": 66, |
| "mean_reward": 26.517480315584123, |
| "mean_length": 3151.8939393939395, |
| "loss": 0.830104649066925, |
| "sps": 1994.5176102850091 |
| }, |
| { |
| "update": 115, |
| "global_step": 235520, |
| "num_episodes": 66, |
| "mean_reward": 26.517480315584123, |
| "mean_length": 3151.8939393939395, |
| "loss": 0.8076149225234985, |
| "sps": 2020.1985152956534 |
| }, |
| { |
| "update": 120, |
| "global_step": 245760, |
| "num_episodes": 69, |
| "mean_reward": 35.54813836968464, |
| "mean_length": 3449.6376811594205, |
| "loss": 1.7053556442260742, |
| "sps": 1392.6768298802306 |
| }, |
| { |
| "update": 125, |
| "global_step": 256000, |
| "num_episodes": 72, |
| "mean_reward": 34.35707297590044, |
| "mean_length": 3315.0, |
| "loss": 0.5338165163993835, |
| "sps": 2218.857641762577 |
| }, |
| { |
| "update": 130, |
| "global_step": 266240, |
| "num_episodes": 73, |
| "mean_reward": 37.437843152921495, |
| "mean_length": 3406.5753424657532, |
| "loss": 0.31061413884162903, |
| "sps": 2123.373275302262 |
| }, |
| { |
| "update": 135, |
| "global_step": 276480, |
| "num_episodes": 73, |
| "mean_reward": 37.437843152921495, |
| "mean_length": 3406.5753424657532, |
| "loss": 0.370238333940506, |
| "sps": 2270.1968695887504 |
| }, |
| { |
| "update": 140, |
| "global_step": 286720, |
| "num_episodes": 76, |
| "mean_reward": 42.764281247791494, |
| "mean_length": 3535.8289473684213, |
| "loss": 0.4431289732456207, |
| "sps": 2235.4805453156696 |
| }, |
| { |
| "update": 145, |
| "global_step": 296960, |
| "num_episodes": 78, |
| "mean_reward": 45.29765256245931, |
| "mean_length": 3576.3076923076924, |
| "loss": 0.11675499379634857, |
| "sps": 2248.214328955258 |
| }, |
| { |
| "update": 150, |
| "global_step": 307200, |
| "num_episodes": 79, |
| "mean_reward": 48.32240367841117, |
| "mean_length": 3657.6202531645567, |
| "loss": 0.3231047987937927, |
| "sps": 2273.3126392381514 |
| }, |
| { |
| "update": 155, |
| "global_step": 317440, |
| "num_episodes": 79, |
| "mean_reward": 48.32240367841117, |
| "mean_length": 3657.6202531645567, |
| "loss": 0.24974392354488373, |
| "sps": 2276.2572089535233 |
| }, |
| { |
| "update": 160, |
| "global_step": 327680, |
| "num_episodes": 90, |
| "mean_reward": 48.88773588604397, |
| "mean_length": 3444.8333333333335, |
| "loss": 1.1440778970718384, |
| "sps": 469.25381312597585 |
| }, |
| { |
| "update": 165, |
| "global_step": 337920, |
| "num_episodes": 99, |
| "mean_reward": 50.101470494511155, |
| "mean_length": 3348.060606060606, |
| "loss": 0.3425019383430481, |
| "sps": 1903.5186981694392 |
| }, |
| { |
| "update": 170, |
| "global_step": 348160, |
| "num_episodes": 99, |
| "mean_reward": 50.101470494511155, |
| "mean_length": 3348.060606060606, |
| "loss": 0.18799301981925964, |
| "sps": 2130.3254740506277 |
| }, |
| { |
| "update": 175, |
| "global_step": 358400, |
| "num_episodes": 99, |
| "mean_reward": 50.101470494511155, |
| "mean_length": 3348.060606060606, |
| "loss": 0.11751141399145126, |
| "sps": 2125.2682193036194 |
| }, |
| { |
| "update": 180, |
| "global_step": 368640, |
| "num_episodes": 101, |
| "mean_reward": 52.442985696792604, |
| "mean_length": 3414.58, |
| "loss": 0.2516184449195862, |
| "sps": 1703.135913858338 |
| }, |
| { |
| "update": 185, |
| "global_step": 378880, |
| "num_episodes": 114, |
| "mean_reward": 61.27311011314392, |
| "mean_length": 3506.98, |
| "loss": 0.11899760365486145, |
| "sps": 2246.894695169401 |
| }, |
| { |
| "update": 190, |
| "global_step": 389120, |
| "num_episodes": 114, |
| "mean_reward": 61.27311011314392, |
| "mean_length": 3506.98, |
| "loss": 0.30889129638671875, |
| "sps": 2289.0664114120555 |
| }, |
| { |
| "update": 195, |
| "global_step": 399360, |
| "num_episodes": 114, |
| "mean_reward": 61.27311011314392, |
| "mean_length": 3506.98, |
| "loss": -0.06268183887004852, |
| "sps": 2277.013858345863 |
| }, |
| { |
| "update": 200, |
| "global_step": 409600, |
| "num_episodes": 114, |
| "mean_reward": 61.27311011314392, |
| "mean_length": 3506.98, |
| "loss": 0.5904355645179749, |
| "sps": 2173.4262065580283 |
| }, |
| { |
| "update": 205, |
| "global_step": 419840, |
| "num_episodes": 121, |
| "mean_reward": 72.71755365371705, |
| "mean_length": 3714.22, |
| "loss": 1.7067689895629883, |
| "sps": 308.3386383776914 |
| }, |
| { |
| "update": 210, |
| "global_step": 430080, |
| "num_episodes": 123, |
| "mean_reward": 72.60844767570495, |
| "mean_length": 3634.11, |
| "loss": 0.22778448462486267, |
| "sps": 2270.040885359684 |
| }, |
| { |
| "update": 215, |
| "global_step": 440320, |
| "num_episodes": 123, |
| "mean_reward": 72.60844767570495, |
| "mean_length": 3634.11, |
| "loss": 0.4201895296573639, |
| "sps": 2286.5998087120915 |
| }, |
| { |
| "update": 220, |
| "global_step": 450560, |
| "num_episodes": 123, |
| "mean_reward": 72.60844767570495, |
| "mean_length": 3634.11, |
| "loss": 0.08545871078968048, |
| "sps": 2301.79050438536 |
| }, |
| { |
| "update": 225, |
| "global_step": 460800, |
| "num_episodes": 129, |
| "mean_reward": 81.06719317436219, |
| "mean_length": 3731.63, |
| "loss": 0.4734661877155304, |
| "sps": 1893.4799410834491 |
| }, |
| { |
| "update": 230, |
| "global_step": 471040, |
| "num_episodes": 132, |
| "mean_reward": 83.9696240234375, |
| "mean_length": 3736.94, |
| "loss": 2.656114101409912, |
| "sps": 1747.8656326711334 |
| }, |
| { |
| "update": 235, |
| "global_step": 481280, |
| "num_episodes": 132, |
| "mean_reward": 83.9696240234375, |
| "mean_length": 3736.94, |
| "loss": 0.3720909357070923, |
| "sps": 2290.767122681247 |
| }, |
| { |
| "update": 240, |
| "global_step": 491520, |
| "num_episodes": 133, |
| "mean_reward": 86.32599678993225, |
| "mean_length": 3836.51, |
| "loss": 0.28272855281829834, |
| "sps": 1660.2388311365764 |
| }, |
| { |
| "update": 245, |
| "global_step": 501760, |
| "num_episodes": 139, |
| "mean_reward": 90.87246092796326, |
| "mean_length": 3642.75, |
| "loss": 0.7918240427970886, |
| "sps": 226.8334199523192 |
| }, |
| { |
| "update": 250, |
| "global_step": 512000, |
| "num_episodes": 144, |
| "mean_reward": 93.14730869293213, |
| "mean_length": 3661.34, |
| "loss": 0.5329251885414124, |
| "sps": 1291.7356099331766 |
| }, |
| { |
| "update": 255, |
| "global_step": 522240, |
| "num_episodes": 147, |
| "mean_reward": 90.92609573364258, |
| "mean_length": 3465.71, |
| "loss": 0.2208552062511444, |
| "sps": 2176.5233668919136 |
| }, |
| { |
| "update": 260, |
| "global_step": 532480, |
| "num_episodes": 148, |
| "mean_reward": 94.03146264076233, |
| "mean_length": 3565.28, |
| "loss": 0.3058362305164337, |
| "sps": 1287.827730215577 |
| }, |
| { |
| "update": 265, |
| "global_step": 542720, |
| "num_episodes": 153, |
| "mean_reward": 93.66601790428162, |
| "mean_length": 3469.44, |
| "loss": 0.2177593857049942, |
| "sps": 2185.2068742705324 |
| }, |
| { |
| "update": 270, |
| "global_step": 552960, |
| "num_episodes": 154, |
| "mean_reward": 96.52238609313964, |
| "mean_length": 3569.01, |
| "loss": 0.1883123517036438, |
| "sps": 2191.858927925312 |
| }, |
| { |
| "update": 275, |
| "global_step": 563200, |
| "num_episodes": 155, |
| "mean_reward": 99.62512998580932, |
| "mean_length": 3668.07, |
| "loss": 0.27221783995628357, |
| "sps": 2123.45043596574 |
| }, |
| { |
| "update": 280, |
| "global_step": 573440, |
| "num_episodes": 155, |
| "mean_reward": 99.62512998580932, |
| "mean_length": 3668.07, |
| "loss": 0.18106494843959808, |
| "sps": 2141.5921280557186 |
| }, |
| { |
| "update": 285, |
| "global_step": 583680, |
| "num_episodes": 162, |
| "mean_reward": 100.8679942893982, |
| "mean_length": 3670.27, |
| "loss": 0.5695434212684631, |
| "sps": 500.88139423117855 |
| }, |
| { |
| "update": 290, |
| "global_step": 593920, |
| "num_episodes": 162, |
| "mean_reward": 100.8679942893982, |
| "mean_length": 3670.27, |
| "loss": 0.07588323950767517, |
| "sps": 2141.039120248053 |
| }, |
| { |
| "update": 295, |
| "global_step": 604160, |
| "num_episodes": 164, |
| "mean_reward": 103.97303117752075, |
| "mean_length": 3770.47, |
| "loss": 0.3367934823036194, |
| "sps": 2232.451302693334 |
| }, |
| { |
| "update": 300, |
| "global_step": 614400, |
| "num_episodes": 164, |
| "mean_reward": 103.97303117752075, |
| "mean_length": 3770.47, |
| "loss": 0.23896968364715576, |
| "sps": 2206.894347275839 |
| }, |
| { |
| "update": 305, |
| "global_step": 624640, |
| "num_episodes": 168, |
| "mean_reward": 108.57997800827026, |
| "mean_length": 3866.64, |
| "loss": 0.8634886145591736, |
| "sps": 2068.617103764695 |
| }, |
| { |
| "update": 310, |
| "global_step": 634880, |
| "num_episodes": 168, |
| "mean_reward": 108.57997800827026, |
| "mean_length": 3866.64, |
| "loss": 0.05964889004826546, |
| "sps": 2227.05463999608 |
| }, |
| { |
| "update": 315, |
| "global_step": 645120, |
| "num_episodes": 170, |
| "mean_reward": 108.82997495651244, |
| "mean_length": 3866.64, |
| "loss": 0.3584131598472595, |
| "sps": 2093.8527505731936 |
| }, |
| { |
| "update": 320, |
| "global_step": 655360, |
| "num_episodes": 170, |
| "mean_reward": 108.82997495651244, |
| "mean_length": 3866.64, |
| "loss": 0.11968313157558441, |
| "sps": 2108.9095828206277 |
| }, |
| { |
| "update": 325, |
| "global_step": 665600, |
| "num_episodes": 174, |
| "mean_reward": 112.43493017196656, |
| "mean_length": 3960.91, |
| "loss": 1.6604670286178589, |
| "sps": 2079.794147719775 |
| }, |
| { |
| "update": 330, |
| "global_step": 675840, |
| "num_episodes": 175, |
| "mean_reward": 115.29129805564881, |
| "mean_length": 4060.48, |
| "loss": 0.19095765054225922, |
| "sps": 2102.571665929904 |
| }, |
| { |
| "update": 335, |
| "global_step": 686080, |
| "num_episodes": 175, |
| "mean_reward": 115.29129805564881, |
| "mean_length": 4060.48, |
| "loss": 0.14229056239128113, |
| "sps": 2107.3760666985922 |
| }, |
| { |
| "update": 340, |
| "global_step": 696320, |
| "num_episodes": 175, |
| "mean_reward": 115.29129805564881, |
| "mean_length": 4060.48, |
| "loss": 0.36735689640045166, |
| "sps": 2157.1693383290976 |
| }, |
| { |
| "update": 345, |
| "global_step": 706560, |
| "num_episodes": 189, |
| "mean_reward": 109.84256043434144, |
| "mean_length": 3863.03, |
| "loss": 0.7830713987350464, |
| "sps": 381.54779046834835 |
| }, |
| { |
| "update": 350, |
| "global_step": 716800, |
| "num_episodes": 193, |
| "mean_reward": 109.65507596015931, |
| "mean_length": 3901.01, |
| "loss": 9.749893188476562, |
| "sps": 258.15120412826286 |
| }, |
| { |
| "update": 355, |
| "global_step": 727040, |
| "num_episodes": 197, |
| "mean_reward": 109.78353757858277, |
| "mean_length": 3921.28, |
| "loss": 0.27189120650291443, |
| "sps": 2234.7686845905673 |
| }, |
| { |
| "update": 360, |
| "global_step": 737280, |
| "num_episodes": 198, |
| "mean_reward": 110.03232297897338, |
| "mean_length": 3921.28, |
| "loss": 0.27451807260513306, |
| "sps": 2208.748818348954 |
| }, |
| { |
| "update": 365, |
| "global_step": 747520, |
| "num_episodes": 198, |
| "mean_reward": 110.03232297897338, |
| "mean_length": 3921.28, |
| "loss": 0.3887019753456116, |
| "sps": 2179.671400488869 |
| }, |
| { |
| "update": 370, |
| "global_step": 757760, |
| "num_episodes": 200, |
| "mean_reward": 110.02969996452332, |
| "mean_length": 3920.77, |
| "loss": 1.3407719135284424, |
| "sps": 190.58035959703773 |
| }, |
| { |
| "update": 375, |
| "global_step": 768000, |
| "num_episodes": 205, |
| "mean_reward": 113.28294358253478, |
| "mean_length": 4042.39, |
| "loss": 2.234469175338745, |
| "sps": 165.74069059867267 |
| }, |
| { |
| "update": 380, |
| "global_step": 778240, |
| "num_episodes": 209, |
| "mean_reward": 112.94422784805298, |
| "mean_length": 4057.27, |
| "loss": 0.2318531721830368, |
| "sps": 2224.56366677181 |
| }, |
| { |
| "update": 385, |
| "global_step": 788480, |
| "num_episodes": 209, |
| "mean_reward": 112.94422784805298, |
| "mean_length": 4057.27, |
| "loss": 0.2268732637166977, |
| "sps": 2253.631967291392 |
| }, |
| { |
| "update": 390, |
| "global_step": 798720, |
| "num_episodes": 210, |
| "mean_reward": 115.53032508850097, |
| "mean_length": 4153.44, |
| "loss": 0.06943273544311523, |
| "sps": 2241.199985702131 |
| }, |
| { |
| "update": 395, |
| "global_step": 808960, |
| "num_episodes": 211, |
| "mean_reward": 118.63075592041015, |
| "mean_length": 4252.74, |
| "loss": 0.21790897846221924, |
| "sps": 2233.8736866140016 |
| }, |
| { |
| "update": 400, |
| "global_step": 819200, |
| "num_episodes": 218, |
| "mean_reward": 112.67819341659546, |
| "mean_length": 4053.49, |
| "loss": 0.28217822313308716, |
| "sps": 2245.8419819824057 |
| }, |
| { |
| "update": 405, |
| "global_step": 829440, |
| "num_episodes": 218, |
| "mean_reward": 112.67819341659546, |
| "mean_length": 4053.49, |
| "loss": 0.07389844954013824, |
| "sps": 2093.830293640606 |
| }, |
| { |
| "update": 410, |
| "global_step": 839680, |
| "num_episodes": 219, |
| "mean_reward": 112.67819341659546, |
| "mean_length": 4053.49, |
| "loss": 0.19773584604263306, |
| "sps": 2089.0640172885346 |
| }, |
| { |
| "update": 415, |
| "global_step": 849920, |
| "num_episodes": 221, |
| "mean_reward": 115.57382575035095, |
| "mean_length": 4142.16, |
| "loss": 0.2218392789363861, |
| "sps": 2037.8682435790647 |
| }, |
| { |
| "update": 420, |
| "global_step": 860160, |
| "num_episodes": 225, |
| "mean_reward": 118.26686740875245, |
| "mean_length": 4220.72, |
| "loss": 0.16683566570281982, |
| "sps": 2062.011715868465 |
| }, |
| { |
| "update": 425, |
| "global_step": 870400, |
| "num_episodes": 225, |
| "mean_reward": 118.26686740875245, |
| "mean_length": 4220.72, |
| "loss": 0.3665807843208313, |
| "sps": 2085.879289076417 |
| }, |
| { |
| "update": 430, |
| "global_step": 880640, |
| "num_episodes": 227, |
| "mean_reward": 115.4131219291687, |
| "mean_length": 4123.01, |
| "loss": 0.2211420238018036, |
| "sps": 1319.1510855667186 |
| }, |
| { |
| "update": 435, |
| "global_step": 890880, |
| "num_episodes": 228, |
| "mean_reward": 118.51452213287354, |
| "mean_length": 4220.72, |
| "loss": 0.01409757137298584, |
| "sps": 2249.26043334859 |
| }, |
| { |
| "update": 440, |
| "global_step": 901120, |
| "num_episodes": 232, |
| "mean_reward": 121.46894996643067, |
| "mean_length": 4313.96, |
| "loss": 0.34094715118408203, |
| "sps": 2235.574796161653 |
| }, |
| { |
| "update": 445, |
| "global_step": 911360, |
| "num_episodes": 232, |
| "mean_reward": 121.46894996643067, |
| "mean_length": 4313.96, |
| "loss": 0.03650672733783722, |
| "sps": 2254.642878384582 |
| }, |
| { |
| "update": 450, |
| "global_step": 921600, |
| "num_episodes": 233, |
| "mean_reward": 122.21637367248535, |
| "mean_length": 4313.96, |
| "loss": 0.4388778805732727, |
| "sps": 2230.3176453482333 |
| }, |
| { |
| "update": 455, |
| "global_step": 931840, |
| "num_episodes": 235, |
| "mean_reward": 122.47082150459289, |
| "mean_length": 4315.26, |
| "loss": 0.04285623878240585, |
| "sps": 2245.5355177671836 |
| }, |
| { |
| "update": 460, |
| "global_step": 942080, |
| "num_episodes": 241, |
| "mean_reward": 125.87507015228272, |
| "mean_length": 4387.15, |
| "loss": 0.18071885406970978, |
| "sps": 2186.650382997658 |
| }, |
| { |
| "update": 465, |
| "global_step": 952320, |
| "num_episodes": 241, |
| "mean_reward": 125.87507015228272, |
| "mean_length": 4387.15, |
| "loss": 0.15905889868736267, |
| "sps": 2246.1825959008293 |
| }, |
| { |
| "update": 470, |
| "global_step": 962560, |
| "num_episodes": 243, |
| "mean_reward": 125.64015349388123, |
| "mean_length": 4385.41, |
| "loss": 0.2828221917152405, |
| "sps": 2237.777989892148 |
| }, |
| { |
| "update": 475, |
| "global_step": 972800, |
| "num_episodes": 244, |
| "mean_reward": 128.7428973865509, |
| "mean_length": 4484.47, |
| "loss": -0.001385476440191269, |
| "sps": 2224.056236169126 |
| }, |
| { |
| "update": 480, |
| "global_step": 983040, |
| "num_episodes": 247, |
| "mean_reward": 134.21521110534667, |
| "mean_length": 4676.7, |
| "loss": 0.6856433153152466, |
| "sps": 166.9800451494931 |
| }, |
| { |
| "update": 485, |
| "global_step": 993280, |
| "num_episodes": 248, |
| "mean_reward": 131.0992687225342, |
| "mean_length": 4598.8, |
| "loss": 0.22580231726169586, |
| "sps": 2205.4840180784313 |
| }, |
| { |
| "update": 490, |
| "global_step": 1003520, |
| "num_episodes": 253, |
| "mean_reward": 130.85596141815185, |
| "mean_length": 4590.29, |
| "loss": 15.813247680664062, |
| "sps": 1554.4720078900461 |
| }, |
| { |
| "update": 495, |
| "global_step": 1013760, |
| "num_episodes": 262, |
| "mean_reward": 118.73081938743591, |
| "mean_length": 4225.92, |
| "loss": 3.043635368347168, |
| "sps": 364.1160359745992 |
| }, |
| { |
| "update": 500, |
| "global_step": 1024000, |
| "num_episodes": 266, |
| "mean_reward": 112.27171317100525, |
| "mean_length": 4026.32, |
| "loss": 1.5756291151046753, |
| "sps": 2216.9506893762605 |
| }, |
| { |
| "update": 505, |
| "global_step": 1034240, |
| "num_episodes": 271, |
| "mean_reward": 106.31812086105347, |
| "mean_length": 3831.41, |
| "loss": 0.8373554348945618, |
| "sps": 2230.0889295621437 |
| }, |
| { |
| "update": 510, |
| "global_step": 1044480, |
| "num_episodes": 271, |
| "mean_reward": 106.31812086105347, |
| "mean_length": 3831.41, |
| "loss": 0.1685715615749359, |
| "sps": 2250.612331563048 |
| }, |
| { |
| "update": 515, |
| "global_step": 1054720, |
| "num_episodes": 279, |
| "mean_reward": 94.40078766822815, |
| "mean_length": 3437.38, |
| "loss": 0.3164912760257721, |
| "sps": 755.1427540116495 |
| }, |
| { |
| "update": 520, |
| "global_step": 1064960, |
| "num_episodes": 284, |
| "mean_reward": 97.14825866699219, |
| "mean_length": 3536.28, |
| "loss": 2.4807233810424805, |
| "sps": 2229.202303336277 |
| }, |
| { |
| "update": 525, |
| "global_step": 1075200, |
| "num_episodes": 288, |
| "mean_reward": 100.25763621330262, |
| "mean_length": 3636.97, |
| "loss": 7.4268412590026855, |
| "sps": 2240.4780501902205 |
| }, |
| { |
| "update": 530, |
| "global_step": 1085440, |
| "num_episodes": 288, |
| "mean_reward": 100.25763621330262, |
| "mean_length": 3636.97, |
| "loss": 13.631644248962402, |
| "sps": 2167.9135230629645 |
| }, |
| { |
| "update": 535, |
| "global_step": 1095680, |
| "num_episodes": 289, |
| "mean_reward": 103.11298480987548, |
| "mean_length": 3736.54, |
| "loss": 16.333158493041992, |
| "sps": 2159.176669252336 |
| }, |
| { |
| "update": 540, |
| "global_step": 1105920, |
| "num_episodes": 292, |
| "mean_reward": 103.19977847099304, |
| "mean_length": 3834.19, |
| "loss": 1.755424976348877, |
| "sps": 2140.8384853415 |
| }, |
| { |
| "update": 545, |
| "global_step": 1116160, |
| "num_episodes": 293, |
| "mean_reward": 106.40395077705384, |
| "mean_length": 3909.42, |
| "loss": 2.900822639465332, |
| "sps": 175.96426214417178 |
| }, |
| { |
| "update": 550, |
| "global_step": 1126400, |
| "num_episodes": 296, |
| "mean_reward": 106.08349679946899, |
| "mean_length": 3927.6, |
| "loss": 3.329728126525879, |
| "sps": 162.40274463106428 |
| }, |
| { |
| "update": 555, |
| "global_step": 1136640, |
| "num_episodes": 300, |
| "mean_reward": 100.18753468513489, |
| "mean_length": 3833.08, |
| "loss": 0.3283099830150604, |
| "sps": 2190.1276229463892 |
| }, |
| { |
| "update": 560, |
| "global_step": 1146880, |
| "num_episodes": 307, |
| "mean_reward": 100.06094589233399, |
| "mean_length": 3798.23, |
| "loss": 3.218208074569702, |
| "sps": 196.6633145580635 |
| }, |
| { |
| "update": 565, |
| "global_step": 1157120, |
| "num_episodes": 308, |
| "mean_reward": 99.96278611183166, |
| "mean_length": 3815.32, |
| "loss": 0.05826599895954132, |
| "sps": 2198.7480074814316 |
| }, |
| { |
| "update": 570, |
| "global_step": 1167360, |
| "num_episodes": 308, |
| "mean_reward": 99.96278611183166, |
| "mean_length": 3815.32, |
| "loss": 0.19391702115535736, |
| "sps": 2247.8319226488716 |
| }, |
| { |
| "update": 575, |
| "global_step": 1177600, |
| "num_episodes": 311, |
| "mean_reward": 97.06219799995422, |
| "mean_length": 3719.15, |
| "loss": 0.5620024800300598, |
| "sps": 2199.015937226961 |
| }, |
| { |
| "update": 580, |
| "global_step": 1187840, |
| "num_episodes": 316, |
| "mean_reward": 96.72605070114136, |
| "mean_length": 3721.99, |
| "loss": 9.720523834228516, |
| "sps": 1486.7675112343536 |
| }, |
| { |
| "update": 585, |
| "global_step": 1198080, |
| "num_episodes": 318, |
| "mean_reward": 96.66609871864318, |
| "mean_length": 3723.94, |
| "loss": 0.6666443943977356, |
| "sps": 2234.009052616905 |
| }, |
| { |
| "update": 590, |
| "global_step": 1208320, |
| "num_episodes": 318, |
| "mean_reward": 96.66609871864318, |
| "mean_length": 3723.94, |
| "loss": 0.2884657084941864, |
| "sps": 2239.9481475435614 |
| }, |
| { |
| "update": 595, |
| "global_step": 1218560, |
| "num_episodes": 320, |
| "mean_reward": 96.41710000038147, |
| "mean_length": 3723.94, |
| "loss": 0.8150730133056641, |
| "sps": 2242.7699730761115 |
| }, |
| { |
| "update": 600, |
| "global_step": 1228800, |
| "num_episodes": 322, |
| "mean_reward": 101.80320601463318, |
| "mean_length": 3725.16, |
| "loss": 0.8642079830169678, |
| "sps": 285.7807108395187 |
| }, |
| { |
| "update": 605, |
| "global_step": 1239040, |
| "num_episodes": 326, |
| "mean_reward": 98.85420690536499, |
| "mean_length": 3650.4, |
| "loss": 0.6339423060417175, |
| "sps": 1850.348648405877 |
| }, |
| { |
| "update": 610, |
| "global_step": 1249280, |
| "num_episodes": 326, |
| "mean_reward": 98.85420690536499, |
| "mean_length": 3650.4, |
| "loss": 0.5616691708564758, |
| "sps": 2222.6578253029397 |
| }, |
| { |
| "update": 615, |
| "global_step": 1259520, |
| "num_episodes": 331, |
| "mean_reward": 90.45196411609649, |
| "mean_length": 3548.02, |
| "loss": 0.2677087187767029, |
| "sps": 2224.9883354292588 |
| }, |
| { |
| "update": 620, |
| "global_step": 1269760, |
| "num_episodes": 331, |
| "mean_reward": 90.45196411609649, |
| "mean_length": 3548.02, |
| "loss": 0.05597818270325661, |
| "sps": 2232.8842122446686 |
| }, |
| { |
| "update": 625, |
| "global_step": 1280000, |
| "num_episodes": 335, |
| "mean_reward": 89.9470157957077, |
| "mean_length": 3546.72, |
| "loss": 0.4655948281288147, |
| "sps": 314.0543075325823 |
| }, |
| { |
| "update": 630, |
| "global_step": 1290240, |
| "num_episodes": 337, |
| "mean_reward": 83.67983005046844, |
| "mean_length": 3359.15, |
| "loss": 0.0969170331954956, |
| "sps": 2181.9774383339013 |
| }, |
| { |
| "update": 635, |
| "global_step": 1300480, |
| "num_episodes": 342, |
| "mean_reward": 86.38446100711822, |
| "mean_length": 3465.84, |
| "loss": 0.7292745113372803, |
| "sps": 2124.75987235513 |
| }, |
| { |
| "update": 640, |
| "global_step": 1310720, |
| "num_episodes": 342, |
| "mean_reward": 86.38446100711822, |
| "mean_length": 3465.84, |
| "loss": 0.04831065982580185, |
| "sps": 2168.1603087847257 |
| }, |
| { |
| "update": 645, |
| "global_step": 1320960, |
| "num_episodes": 343, |
| "mean_reward": 89.47943027019501, |
| "mean_length": 3565.41, |
| "loss": 0.1487177461385727, |
| "sps": 2107.2008167857116 |
| }, |
| { |
| "update": 650, |
| "global_step": 1331200, |
| "num_episodes": 344, |
| "mean_reward": 89.2304312467575, |
| "mean_length": 3565.41, |
| "loss": 0.12002412974834442, |
| "sps": 2127.214484781757 |
| }, |
| { |
| "update": 655, |
| "global_step": 1341440, |
| "num_episodes": 347, |
| "mean_reward": 88.98148257732392, |
| "mean_length": 3565.41, |
| "loss": 0.1547270268201828, |
| "sps": 2165.6753206938283 |
| }, |
| { |
| "update": 660, |
| "global_step": 1351680, |
| "num_episodes": 347, |
| "mean_reward": 88.98148257732392, |
| "mean_length": 3565.41, |
| "loss": 0.1951214224100113, |
| "sps": 2229.5540914965095 |
| }, |
| { |
| "update": 665, |
| "global_step": 1361920, |
| "num_episodes": 350, |
| "mean_reward": 88.69014154911041, |
| "mean_length": 3547.65, |
| "loss": 0.02001141756772995, |
| "sps": 2220.3189922208117 |
| }, |
| { |
| "update": 670, |
| "global_step": 1372160, |
| "num_episodes": 353, |
| "mean_reward": 91.77625680446624, |
| "mean_length": 3645.53, |
| "loss": 0.1334764063358307, |
| "sps": 2217.9524326914734 |
| }, |
| { |
| "update": 675, |
| "global_step": 1382400, |
| "num_episodes": 356, |
| "mean_reward": 94.39934126377106, |
| "mean_length": 3746.85, |
| "loss": 1.491716742515564, |
| "sps": 115.85030478370321 |
| }, |
| { |
| "update": 680, |
| "global_step": 1392640, |
| "num_episodes": 362, |
| "mean_reward": 94.36987939357758, |
| "mean_length": 3759.39, |
| "loss": 0.08460421860218048, |
| "sps": 466.5453273147261 |
| }, |
| { |
| "update": 685, |
| "global_step": 1402880, |
| "num_episodes": 370, |
| "mean_reward": 91.42453453540801, |
| "mean_length": 3664.28, |
| "loss": 0.06896203756332397, |
| "sps": 2211.12877876806 |
| }, |
| { |
| "update": 690, |
| "global_step": 1413120, |
| "num_episodes": 371, |
| "mean_reward": 94.27827970981598, |
| "mean_length": 3763.34, |
| "loss": 0.08465250581502914, |
| "sps": 2224.3649291369943 |
| }, |
| { |
| "update": 695, |
| "global_step": 1423360, |
| "num_episodes": 372, |
| "mean_reward": 94.03031706333161, |
| "mean_length": 3763.34, |
| "loss": 0.12509916722774506, |
| "sps": 2233.8556777814188 |
| }, |
| { |
| "update": 700, |
| "global_step": 1433600, |
| "num_episodes": 372, |
| "mean_reward": 94.03031706333161, |
| "mean_length": 3763.34, |
| "loss": 0.05390219762921333, |
| "sps": 2225.4858130028433 |
| }, |
| { |
| "update": 705, |
| "global_step": 1443840, |
| "num_episodes": 378, |
| "mean_reward": 100.22330937862397, |
| "mean_length": 3957.32, |
| "loss": 1.2421371936798096, |
| "sps": 176.73331940579132 |
| }, |
| { |
| "update": 710, |
| "global_step": 1454080, |
| "num_episodes": 380, |
| "mean_reward": 100.36302034854889, |
| "mean_length": 3980.35, |
| "loss": 0.06020417809486389, |
| "sps": 2151.8459777584108 |
| }, |
| { |
| "update": 715, |
| "global_step": 1464320, |
| "num_episodes": 383, |
| "mean_reward": 103.226567196846, |
| "mean_length": 4079.78, |
| "loss": 0.03555985540151596, |
| "sps": 779.0322633363131 |
| }, |
| { |
| "update": 720, |
| "global_step": 1474560, |
| "num_episodes": 385, |
| "mean_reward": 100.37291463375091, |
| "mean_length": 3983.61, |
| "loss": 0.42947918176651, |
| "sps": 1976.0163143357508 |
| }, |
| { |
| "update": 725, |
| "global_step": 1484800, |
| "num_episodes": 387, |
| "mean_reward": 102.8877759027481, |
| "mean_length": 4082.64, |
| "loss": 0.3529297709465027, |
| "sps": 2149.9991470019922 |
| }, |
| { |
| "update": 730, |
| "global_step": 1495040, |
| "num_episodes": 393, |
| "mean_reward": 96.80677964687348, |
| "mean_length": 3770.28, |
| "loss": 0.5400058627128601, |
| "sps": 182.2636316464271 |
| }, |
| { |
| "update": 735, |
| "global_step": 1505280, |
| "num_episodes": 397, |
| "mean_reward": 99.80784844875336, |
| "mean_length": 3841.74, |
| "loss": 0.07788175344467163, |
| "sps": 1880.7298583574118 |
| }, |
| { |
| "update": 740, |
| "global_step": 1515520, |
| "num_episodes": 400, |
| "mean_reward": 102.611374335289, |
| "mean_length": 3837.4, |
| "loss": 0.12514562904834747, |
| "sps": 2199.6995663818816 |
| }, |
| { |
| "update": 745, |
| "global_step": 1525760, |
| "num_episodes": 402, |
| "mean_reward": 102.8225530385971, |
| "mean_length": 3839.8, |
| "loss": 0.1331540048122406, |
| "sps": 505.88516130939627 |
| }, |
| { |
| "update": 750, |
| "global_step": 1536000, |
| "num_episodes": 402, |
| "mean_reward": 102.8225530385971, |
| "mean_length": 3839.8, |
| "loss": -0.008329648524522781, |
| "sps": 2268.146789029585 |
| } |
| ] |