polypharmacy-env / training_metrics.json
TheJackBright's picture
Version 3
f0ef01d
{
"task_id": "easy_screening",
"n_episodes": 30,
"hyperparameters": {
"lr": 0.0003,
"gamma": 0.99,
"entropy_coeff": 0.02,
"batch_size": 5,
"hidden_dim": 128,
"state_dim": 16,
"action_dim": 166
},
"episode_rewards": [
0.47,
1.1073118279569893,
1.1486231884057971,
1.1336231884057972,
0.405,
0.395,
1.1296806853582555,
0.38,
0.7283823529411766,
0.0,
0.39,
-0.095,
1.137962962962963,
1.1951785714285714,
0.9053636363636364,
-0.01754088050314473,
-0.04,
-0.06,
0.0,
0.22666666666666668,
0.435,
0.45,
0.45,
0.37666666666666665,
0.435,
0.455,
0.5412162162162163,
0.33899999999999997,
0.3416666666666666,
0.42
],
"episode_grader_scores": [
0.5,
0.8306451612903226,
0.8369565217391305,
0.8369565217391305,
0.5,
0.5,
0.9688473520249221,
0.5,
0.7058823529411765,
0.0,
0.5,
0.0,
0.837962962962963,
0.9776785714285714,
0.8863636363636364,
0.053459119496855306,
0.0,
0.0,
0.0,
0.5,
0.5,
0.5,
0.5,
0.5,
0.5,
0.5,
0.5495495495495496,
0.5,
0.27499999999999997,
0.5
],
"episode_steps": [
5,
4,
3,
4,
6,
6,
5,
7,
4,
1,
7,
8,
4,
3,
10,
6,
3,
7,
1,
4,
4,
3,
3,
5,
4,
6,
4,
7,
2,
5
],
"policy_losses": [
0.28967130184173584,
0.05730011314153671,
-0.06924888491630554,
-0.28697478771209717,
-0.1783256083726883,
-0.12063005566596985
],
"value_losses": [
0.39626142382621765,
0.24146510660648346,
0.29013994336128235,
0.06388193368911743,
0.02375689707696438,
0.02241377718746662
],
"best_avg_grader_score": 0.6179287909734683,
"total_training_time_s": 0.13345718383789062,
"eval_stochastic_avg_reward": 0.5792066304974347,
"eval_stochastic_avg_grader_score": 0.5784816304974348,
"eval_stochastic_avg_steps": 5.6,
"eval_stochastic_rewards": [
1.0402956989247312,
0.485,
-0.11333333333333336,
0.455,
0.36250000000000004,
1.0112089552238808,
0.41,
0.21499999999999997,
1.1173118279569894,
0.883621495327103,
0.45,
1.1073118279569893,
0.8680882352941177,
0.405,
0.675031128404669,
0.45,
-0.11,
0.415,
0.32,
1.1370967741935485
],
"eval_stochastic_grader_scores": [
0.8494623655913979,
0.5,
0.0,
0.5,
0.5,
0.9207089552238806,
0.5,
0.5,
0.8306451612903226,
0.8411214953271029,
0.5,
0.8306451612903226,
0.803921568627451,
0.5,
0.6060311284046691,
0.5,
0.0,
0.5,
0.5,
0.8870967741935485
],
"eval_greedy_avg_reward": 0.3627500000000001,
"eval_greedy_avg_grader_score": 0.425,
"eval_greedy_avg_steps": 6.45,
"eval_greedy_rewards": [
0.44,
0.455,
-0.08,
0.455,
-0.06,
0.39,
0.455,
0.455,
0.455,
-0.06,
0.42,
0.455,
0.41000000000000003,
0.455,
0.44,
0.39,
0.41000000000000003,
0.49,
0.44,
0.44
],
"eval_greedy_grader_scores": [
0.5,
0.5,
0.0,
0.5,
0.0,
0.5,
0.5,
0.5,
0.5,
0.0,
0.5,
0.5,
0.5,
0.5,
0.5,
0.5,
0.5,
0.5,
0.5,
0.5
]
}