{ "task_id": "easy_screening", "n_episodes": 30, "hyperparameters": { "lr": 0.0003, "gamma": 0.99, "entropy_coeff": 0.02, "batch_size": 5, "hidden_dim": 128, "state_dim": 16, "action_dim": 166 }, "episode_rewards": [ 0.47, 1.1073118279569893, 1.1486231884057971, 1.1336231884057972, 0.405, 0.395, 1.1296806853582555, 0.38, 0.7283823529411766, 0.0, 0.39, -0.095, 1.137962962962963, 1.1951785714285714, 0.9053636363636364, -0.01754088050314473, -0.04, -0.06, 0.0, 0.22666666666666668, 0.435, 0.45, 0.45, 0.37666666666666665, 0.435, 0.455, 0.5412162162162163, 0.33899999999999997, 0.3416666666666666, 0.42 ], "episode_grader_scores": [ 0.5, 0.8306451612903226, 0.8369565217391305, 0.8369565217391305, 0.5, 0.5, 0.9688473520249221, 0.5, 0.7058823529411765, 0.0, 0.5, 0.0, 0.837962962962963, 0.9776785714285714, 0.8863636363636364, 0.053459119496855306, 0.0, 0.0, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5495495495495496, 0.5, 0.27499999999999997, 0.5 ], "episode_steps": [ 5, 4, 3, 4, 6, 6, 5, 7, 4, 1, 7, 8, 4, 3, 10, 6, 3, 7, 1, 4, 4, 3, 3, 5, 4, 6, 4, 7, 2, 5 ], "policy_losses": [ 0.28967130184173584, 0.05730011314153671, -0.06924888491630554, -0.28697478771209717, -0.1783256083726883, -0.12063005566596985 ], "value_losses": [ 0.39626142382621765, 0.24146510660648346, 0.29013994336128235, 0.06388193368911743, 0.02375689707696438, 0.02241377718746662 ], "best_avg_grader_score": 0.6179287909734683, "total_training_time_s": 0.13345718383789062, "eval_stochastic_avg_reward": 0.5792066304974347, "eval_stochastic_avg_grader_score": 0.5784816304974348, "eval_stochastic_avg_steps": 5.6, "eval_stochastic_rewards": [ 1.0402956989247312, 0.485, -0.11333333333333336, 0.455, 0.36250000000000004, 1.0112089552238808, 0.41, 0.21499999999999997, 1.1173118279569894, 0.883621495327103, 0.45, 1.1073118279569893, 0.8680882352941177, 0.405, 0.675031128404669, 0.45, -0.11, 0.415, 0.32, 1.1370967741935485 ], "eval_stochastic_grader_scores": [ 0.8494623655913979, 0.5, 0.0, 0.5, 0.5, 0.9207089552238806, 0.5, 0.5, 0.8306451612903226, 0.8411214953271029, 0.5, 0.8306451612903226, 0.803921568627451, 0.5, 0.6060311284046691, 0.5, 0.0, 0.5, 0.5, 0.8870967741935485 ], "eval_greedy_avg_reward": 0.3627500000000001, "eval_greedy_avg_grader_score": 0.425, "eval_greedy_avg_steps": 6.45, "eval_greedy_rewards": [ 0.44, 0.455, -0.08, 0.455, -0.06, 0.39, 0.455, 0.455, 0.455, -0.06, 0.42, 0.455, 0.41000000000000003, 0.455, 0.44, 0.39, 0.41000000000000003, 0.49, 0.44, 0.44 ], "eval_greedy_grader_scores": [ 0.5, 0.5, 0.0, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.0, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5 ] }