Mirror from sadra-barikbin/ppo-LunarLander-v2-unit8

962b47f verified 6 months ago

19.3 kB

	{
	"name": "root",
	"gauges": {
	"Pyramids.Policy.Entropy.mean": {
	"value": 0.20098835229873657,
	"min": 0.1905842274427414,
	"max": 0.3273382782936096,
	"count": 41
	},
	"Pyramids.Policy.Entropy.sum": {
	"value": 6016.787109375,
	"min": 4658.0146484375,
	"max": 9935.37109375,
	"count": 41
	},
	"Pyramids.Environment.EpisodeLength.mean": {
	"value": 242.45,
	"min": 212.58778625954199,
	"max": 347.2087912087912,
	"count": 41
	},
	"Pyramids.Environment.EpisodeLength.sum": {
	"value": 29094.0,
	"min": 9657.0,
	"max": 32101.0,
	"count": 41
	},
	"Pyramids.Step.mean": {
	"value": 2999998.0,
	"min": 1799931.0,
	"max": 2999998.0,
	"count": 41
	},
	"Pyramids.Step.sum": {
	"value": 2999998.0,
	"min": 1799931.0,
	"max": 2999998.0,
	"count": 41
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.mean": {
	"value": 0.7147245407104492,
	"min": 0.5930624604225159,
	"max": 0.8102691173553467,
	"count": 41
	},
	"Pyramids.Policy.ExtrinsicValueEstimate.sum": {
	"value": 207.98483276367188,
	"min": 74.12632751464844,
	"max": 240.64993286132812,
	"count": 41
	},
	"Pyramids.Policy.RndValueEstimate.mean": {
	"value": 0.014350265264511108,
	"min": -0.008211393840610981,
	"max": 0.029343511909246445,
	"count": 41
	},
	"Pyramids.Policy.RndValueEstimate.sum": {
	"value": 4.17592716217041,
	"min": -2.3977270126342773,
	"max": 8.421587944030762,
	"count": 41
	},
	"Pyramids.Environment.CumulativeReward.mean": {
	"value": 1.7242083196838698,
	"min": 1.4989076720161751,
	"max": 1.7874121977627733,
	"count": 41
	},
	"Pyramids.Environment.CumulativeReward.sum": {
	"value": 206.90499836206436,
	"min": 62.34299957752228,
	"max": 234.1509979069233,
	"count": 41
	},
	"Pyramids.Policy.ExtrinsicReward.mean": {
	"value": 1.7242083196838698,
	"min": 1.4989076720161751,
	"max": 1.7874121977627733,
	"count": 41
	},
	"Pyramids.Policy.ExtrinsicReward.sum": {
	"value": 206.90499836206436,
	"min": 62.34299957752228,
	"max": 234.1509979069233,
	"count": 41
	},
	"Pyramids.Policy.RndReward.mean": {
	"value": 0.020705985120730474,
	"min": 0.018862549592183194,
	"max": 0.031432837420119666,
	"count": 41
	},
	"Pyramids.Policy.RndReward.sum": {
	"value": 2.484718214487657,
	"min": 0.7802353762090206,
	"max": 3.111850904591847,
	"count": 41
	},
	"Pyramids.Losses.PolicyLoss.mean": {
	"value": 0.0707786365126681,
	"min": 0.06456055554112286,
	"max": 0.07226113428580848,
	"count": 41
	},
	"Pyramids.Losses.PolicyLoss.sum": {
	"value": 0.9909009111773533,
	"min": 0.4180760901654139,
	"max": 1.0358603657017846,
	"count": 41
	},
	"Pyramids.Losses.ValueLoss.mean": {
	"value": 0.01765750899184717,
	"min": 0.011768245224500007,
	"max": 0.01780292463183327,
	"count": 41
	},
	"Pyramids.Losses.ValueLoss.sum": {
	"value": 0.24720512588586036,
	"min": 0.08460816983521606,
	"max": 0.25633822100159404,
	"count": 41
	},
	"Pyramids.Policy.LearningRate.mean": {
	"value": 1.549992340511902e-06,
	"min": 1.549992340511902e-06,
	"max": 0.00012063059312315555,
	"count": 41
	},
	"Pyramids.Policy.LearningRate.sum": {
	"value": 2.1699892767166628e-05,
	"min": 2.1699892767166628e-05,
	"max": 0.0016875695374771332,
	"count": 41
	},
	"Pyramids.Policy.Epsilon.mean": {
	"value": 0.10051663095238096,
	"min": 0.10051663095238096,
	"max": 0.14021017777777778,
	"count": 41
	},
	"Pyramids.Policy.Epsilon.sum": {
	"value": 1.4072328333333335,
	"min": 0.8412610666666667,
	"max": 2.062522866666667,
	"count": 41
	},
	"Pyramids.Policy.Beta.mean": {
	"value": 6.161143214285705e-05,
	"min": 6.161143214285705e-05,
	"max": 0.004026996759999999,
	"count": 41
	},
	"Pyramids.Policy.Beta.sum": {
	"value": 0.0008625600499999988,
	"min": 0.0008625600499999988,
	"max": 0.05634603438000001,
	"count": 41
	},
	"Pyramids.Losses.RNDLoss.mean": {
	"value": 0.008187584578990936,
	"min": 0.007832600735127926,
	"max": 0.009348301216959953,
	"count": 41
	},
	"Pyramids.Losses.RNDLoss.sum": {
	"value": 0.11462618410587311,
	"min": 0.046995606273412704,
	"max": 0.13564544916152954,
	"count": 41
	},
	"Pyramids.IsTraining.mean": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 41
	},
	"Pyramids.IsTraining.sum": {
	"value": 1.0,
	"min": 1.0,
	"max": 1.0,
	"count": 41
	}
	},
	"metadata": {
	"timer_format_version": "0.1.0",
	"start_time_seconds": "1656126923",
	"python_version": "3.8.13 (default, Mar 28 2022, 11:38:47) \n[GCC 7.5.0]",
	"command_line_arguments": "/home/fork/anaconda3/bin/mlagents-learn config/ppo/PyramidsRND.yaml --env=trained-envs-executables/Linux/Pyramids/Pyramids --run-id=First Training --no-graphics --resume --torch-device cuda:0",
	"mlagents_version": "0.29.0.dev0",
	"mlagents_envs_version": "0.29.0.dev0",
	"communication_protocol_version": "1.5.0",
	"pytorch_version": "1.8.1+cu102",
	"numpy_version": "1.23.0",
	"end_time_seconds": "1656128907"
	},
	"total": 1983.833760952999,
	"count": 1,
	"self": 0.3226226089991542,
	"children": {
	"run_training.setup": {
	"total": 0.01962070199988375,
	"count": 1,
	"self": 0.01962070199988375
	},
	"TrainerController.start_learning": {
	"total": 1983.491517642,
	"count": 1,
	"self": 1.7160227289095928,
	"children": {
	"TrainerController._reset_env": {
	"total": 2.8484658879988274,
	"count": 1,
	"self": 2.8484658879988274
	},
	"TrainerController.advance": {
	"total": 1978.8429443240893,
	"count": 79452,
	"self": 1.793405745362179,
	"children": {
	"env_step": {
	"total": 1253.4297236697494,
	"count": 79452,
	"self": 1128.186176941099,
	"children": {
	"SubprocessEnvManager._take_step": {
	"total": 124.21656780874218,
	"count": 79452,
	"self": 4.788859733611389,
	"children": {
	"TorchPolicy.evaluate": {
	"total": 119.4277080751308,
	"count": 75910,
	"self": 44.28934661207131,
	"children": {
	"TorchPolicy.sample_actions": {
	"total": 75.13836146305948,
	"count": 75910,
	"self": 75.13836146305948
	}
	}
	}
	}
	},
	"workers": {
	"total": 1.026978919908288,
	"count": 79452,
	"self": 0.0,
	"children": {
	"worker_root": {
	"total": 1980.6225938820353,
	"count": 79452,
	"is_parallel": true,
	"self": 965.2931823529743,
	"children": {
	"run_training.setup": {
	"total": 0.0,
	"count": 0,
	"is_parallel": true,
	"self": 0.0,
	"children": {
	"steps_from_proto": {
	"total": 0.001265589000468026,
	"count": 1,
	"is_parallel": true,
	"self": 0.00041130900353891775,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0008542799969291082,
	"count": 8,
	"is_parallel": true,
	"self": 0.0008542799969291082
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 0.031843670998568996,
	"count": 1,
	"is_parallel": true,
	"self": 0.0002871819979191059,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 0.00023742400117043871,
	"count": 1,
	"is_parallel": true,
	"self": 0.00023742400117043871
	},
	"communicator.exchange": {
	"total": 0.03044660399973509,
	"count": 1,
	"is_parallel": true,
	"self": 0.03044660399973509
	},
	"steps_from_proto": {
	"total": 0.0008724609997443622,
	"count": 1,
	"is_parallel": true,
	"self": 0.0002752759992290521,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 0.0005971850005153101,
	"count": 8,
	"is_parallel": true,
	"self": 0.0005971850005153101
	}
	}
	}
	}
	}
	}
	},
	"UnityEnvironment.step": {
	"total": 1015.329411529061,
	"count": 79451,
	"is_parallel": true,
	"self": 24.373401482240297,
	"children": {
	"UnityEnvironment._generate_step_input": {
	"total": 19.148204889042972,
	"count": 79451,
	"is_parallel": true,
	"self": 19.148204889042972
	},
	"communicator.exchange": {
	"total": 893.1059470398413,
	"count": 79451,
	"is_parallel": true,
	"self": 893.1059470398413
	},
	"steps_from_proto": {
	"total": 78.70185811793635,
	"count": 79451,
	"is_parallel": true,
	"self": 19.52036945575128,
	"children": {
	"_process_rank_one_or_two_observation": {
	"total": 59.181488662185075,
	"count": 635608,
	"is_parallel": true,
	"self": 59.181488662185075
	}
	}
	}
	}
	}
	}
	}
	}
	}
	}
	},
	"trainer_advance": {
	"total": 723.6198149089778,
	"count": 79452,
	"self": 3.109344066671838,
	"children": {
	"process_trajectory": {
	"total": 161.9423343703038,
	"count": 79452,
	"self": 161.65118073230406,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.2911536379997415,
	"count": 3,
	"self": 0.2911536379997415
	}
	}
	},
	"_update_policy": {
	"total": 558.5681364720022,
	"count": 574,
	"self": 177.202636010099,
	"children": {
	"TorchPPOOptimizer.update": {
	"total": 381.36550046190314,
	"count": 27627,
	"self": 381.36550046190314
	}
	}
	}
	}
	}
	}
	},
	"trainer_threads": {
	"total": 8.040005923248827e-07,
	"count": 1,
	"self": 8.040005923248827e-07
	},
	"TrainerController._save_models": {
	"total": 0.08408389700161933,
	"count": 1,
	"self": 0.0014261760024965042,
	"children": {
	"RLTrainer._checkpoint": {
	"total": 0.08265772099912283,
	"count": 1,
	"self": 0.08265772099912283
	}
	}
	}
	}
	}
	}
	}