phase,round,global_step,use_hint,avg_episode_reward,max_episode_reward,min_episode_reward,avg_grader,max_grader,n_training_samples,train_loss phase1_timing,1,1,True,5.127,5.315,4.96,0.9498,1.0,81,2.833 phase1_timing,2,2,False,3.04,3.303,2.6,0.259,0.3614,96,3.1413 phase1_timing,3,3,False,2.867,3.016,2.555,0.2083,0.3042,102,3.1255 phase2_content,1,4,True,3.538,3.837,3.338,0.8697,1.0,77,2.8381 phase2_content,2,5,False,2.15,2.807,1.587,0.3763,0.5979,90,2.9281 phase2_content,3,6,False,1.924,2.609,1.375,0.2855,0.5027,76,2.9184