{"episode": 0, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -7.166879999999997, "length": 14, "mean_step_reward": -0.5119199999999998} {"episode": 1, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -7.8019199999999955, "length": 14, "mean_step_reward": -0.5572799999999997} {"episode": 2, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -8.641919999999999, "length": 14, "mean_step_reward": -0.6172799999999999} {"episode": 3, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -7.801919999999996, "length": 14, "mean_step_reward": -0.5572799999999998} {"episode": 4, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -8.735999999999997, "length": 14, "mean_step_reward": -0.6239999999999998} {"episode": 5, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.961919999999997, "length": 14, "mean_step_reward": -0.49727999999999983} {"episode": 6, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -8.137919999999998, "length": 14, "mean_step_reward": -0.5812799999999998} {"episode": 7, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -7.428959999999997, "length": 14, "mean_step_reward": -0.5306399999999998} {"episode": 8, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.830879999999997, "length": 14, "mean_step_reward": -0.4879199999999998} {"episode": 9, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -7.334879999999997, "length": 14, "mean_step_reward": -0.5239199999999998} {"episode": 10, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.588959999999998, "length": 14, "mean_step_reward": -0.4706399999999999} {"episode": 11, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.625919999999997, "length": 14, "mean_step_reward": -0.4732799999999998} {"episode": 12, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.793919999999997, "length": 14, "mean_step_reward": -0.4852799999999998} {"episode": 13, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -7.166879999999997, "length": 14, "mean_step_reward": -0.5119199999999998} {"episode": 14, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.793919999999998, "length": 14, "mean_step_reward": -0.4852799999999999} {"episode": 15, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.625919999999997, "length": 14, "mean_step_reward": -0.4732799999999998} {"episode": 16, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -7.187039999999998, "length": 14, "mean_step_reward": -0.5133599999999998} {"episode": 17, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.719999999999998, "length": 14, "mean_step_reward": -0.47999999999999987} {"episode": 18, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.719999999999998, "length": 14, "mean_step_reward": -0.47999999999999987} {"episode": 19, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.8678399999999975, "length": 14, "mean_step_reward": -0.49055999999999983} {"episode": 20, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.8510399999999985, "length": 14, "mean_step_reward": -0.4893599999999999} {"episode": 21, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.719999999999998, "length": 14, "mean_step_reward": -0.47999999999999987} {"episode": 22, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -7.092959999999998, "length": 14, "mean_step_reward": -0.5066399999999999} {"episode": 23, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.887999999999998, "length": 14, "mean_step_reward": -0.4919999999999999} {"episode": 24, "scenario": "phase2_core.json", "backend": "local", "agent": "reinforce", "return": -6.662879999999997, "length": 14, "mean_step_reward": -0.4759199999999998}