| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.15903307888040713, |
| "eval_steps": 0, |
| "global_step": 500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "combined_loss": 0.7037124633789062, |
| "completion_length": 425.0, |
| "epoch": 0.0003180661577608143, |
| "grad_norm": 2.1160361766815186, |
| "kl": 0.0, |
| "learning_rate": 0.0, |
| "loss": 0.7037, |
| "num_samples": 1.0, |
| "reward": 3.90625, |
| "reward_std": 1.062600016593933, |
| "rewards/gpt4o_holistic_reward": 3.90625, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.3457083702087402, |
| "speech_entropy": 2.5810890197753906, |
| "speech_kl": 0.0, |
| "step": 1, |
| "text_entropy": 0.44255462288856506, |
| "text_kl": 0.0, |
| "total_entropy": 1.9978519678115845 |
| }, |
| { |
| "combined_loss": 0.7883188724517822, |
| "completion_length": 347.125, |
| "epoch": 0.0006361323155216285, |
| "grad_norm": 2.1822354793548584, |
| "kl": 0.0, |
| "learning_rate": 2.3137821315975918e-07, |
| "loss": 0.7883, |
| "num_samples": 1.0, |
| "reward": 4.875, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.6277294158935547, |
| "speech_entropy": 2.6779050827026367, |
| "speech_kl": 0.0, |
| "step": 2, |
| "text_entropy": 0.528403639793396, |
| "text_kl": 0.0, |
| "total_entropy": 2.251002311706543 |
| }, |
| { |
| "combined_loss": 0.7728084921836853, |
| "completion_length": 490.375, |
| "epoch": 0.0009541984732824427, |
| "grad_norm": 2.1721348762512207, |
| "kl": 0.0, |
| "learning_rate": 3.6672579134208467e-07, |
| "loss": 0.7728, |
| "num_samples": 1.0, |
| "reward": 2.9375, |
| "reward_std": 1.3848260641098022, |
| "rewards/gpt4o_holistic_reward": 2.9375, |
| "rl_loss": -1.862645149230957e-09, |
| "sft_loss": 2.576028347015381, |
| "speech_entropy": 2.6699180603027344, |
| "speech_kl": 0.0, |
| "step": 3, |
| "text_entropy": 0.675686240196228, |
| "text_kl": 0.0, |
| "total_entropy": 2.2666218280792236 |
| }, |
| { |
| "combined_loss": 0.7510870695114136, |
| "completion_length": 396.3125, |
| "epoch": 0.001272264631043257, |
| "grad_norm": 3.0144259929656982, |
| "kl": 0.0, |
| "learning_rate": 4.6275642631951835e-07, |
| "loss": 0.7511, |
| "num_samples": 1.0, |
| "reward": 4.5625, |
| "reward_std": 0.6637751460075378, |
| "rewards/gpt4o_holistic_reward": 4.5625, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.5036234855651855, |
| "speech_entropy": 2.7026796340942383, |
| "speech_kl": 0.0, |
| "step": 4, |
| "text_entropy": 0.6944292783737183, |
| "text_kl": 0.0, |
| "total_entropy": 2.308650493621826 |
| }, |
| { |
| "combined_loss": 0.7991127967834473, |
| "completion_length": 467.625, |
| "epoch": 0.0015903307888040711, |
| "grad_norm": 5.25661039352417, |
| "kl": 0.0, |
| "learning_rate": 5.372435736804816e-07, |
| "loss": 0.7991, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.329224169254303, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.6637091636657715, |
| "speech_entropy": 2.6493892669677734, |
| "speech_kl": 0.0, |
| "step": 5, |
| "text_entropy": 0.8403033018112183, |
| "text_kl": 0.0, |
| "total_entropy": 2.2811856269836426 |
| }, |
| { |
| "combined_loss": 0.7556890249252319, |
| "completion_length": 261.8125, |
| "epoch": 0.0019083969465648854, |
| "grad_norm": 2.2916808128356934, |
| "kl": 0.0, |
| "learning_rate": 5.981040045018438e-07, |
| "loss": 0.7557, |
| "num_samples": 1.0, |
| "reward": 2.625, |
| "reward_std": 0.7500999569892883, |
| "rewards/gpt4o_holistic_reward": 2.625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.51896333694458, |
| "speech_entropy": 2.7437381744384766, |
| "speech_kl": 0.0, |
| "step": 6, |
| "text_entropy": 0.7866218686103821, |
| "text_kl": 0.0, |
| "total_entropy": 2.3357348442077637 |
| }, |
| { |
| "combined_loss": 0.7672804594039917, |
| "completion_length": 284.625, |
| "epoch": 0.0022264631043256997, |
| "grad_norm": 8.151514053344727, |
| "kl": 0.0, |
| "learning_rate": 6.495607655709434e-07, |
| "loss": 0.7673, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.36094391345977783, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": 0.0, |
| "sft_loss": 2.5576014518737793, |
| "speech_entropy": 1.9880790710449219, |
| "speech_kl": 0.0, |
| "step": 7, |
| "text_entropy": 0.5499787926673889, |
| "text_kl": 0.0, |
| "total_entropy": 1.618296504020691 |
| }, |
| { |
| "combined_loss": 0.7481317520141602, |
| "completion_length": 526.5625, |
| "epoch": 0.002544529262086514, |
| "grad_norm": 2.451380491256714, |
| "kl": 0.0, |
| "learning_rate": 6.941346394792774e-07, |
| "loss": 0.7481, |
| "num_samples": 1.0, |
| "reward": 3.125, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 3.125, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.493772506713867, |
| "speech_entropy": 2.589114189147949, |
| "speech_kl": 0.0, |
| "step": 8, |
| "text_entropy": 0.6615394353866577, |
| "text_kl": 0.0, |
| "total_entropy": 2.1917660236358643 |
| }, |
| { |
| "combined_loss": 0.7818739414215088, |
| "completion_length": 261.625, |
| "epoch": 0.0028625954198473282, |
| "grad_norm": 2.807657480239868, |
| "kl": 0.0, |
| "learning_rate": 7.334515826841693e-07, |
| "loss": 0.7819, |
| "num_samples": 1.0, |
| "reward": 2.9375, |
| "reward_std": 0.46360161900520325, |
| "rewards/gpt4o_holistic_reward": 2.9375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.6062464714050293, |
| "speech_entropy": 2.7798025608062744, |
| "speech_kl": 0.0, |
| "step": 9, |
| "text_entropy": 0.7685192823410034, |
| "text_kl": 0.0, |
| "total_entropy": 2.320415735244751 |
| }, |
| { |
| "combined_loss": 0.74410080909729, |
| "completion_length": 355.375, |
| "epoch": 0.0031806615776081423, |
| "grad_norm": 2.3492045402526855, |
| "kl": 0.0, |
| "learning_rate": 7.686217868402409e-07, |
| "loss": 0.7441, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 0.6921550035476685, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.4803357124328613, |
| "speech_entropy": 2.6616315841674805, |
| "speech_kl": 0.0, |
| "step": 10, |
| "text_entropy": 0.7898290753364563, |
| "text_kl": 0.0, |
| "total_entropy": 2.2620432376861572 |
| }, |
| { |
| "combined_loss": 0.8018359541893005, |
| "completion_length": 417.3125, |
| "epoch": 0.003498727735368957, |
| "grad_norm": 2.31730580329895, |
| "kl": 0.0, |
| "learning_rate": 8.004371064686714e-07, |
| "loss": 0.8018, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 0.8644567728042603, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.6727864742279053, |
| "speech_entropy": 2.7092902660369873, |
| "speech_kl": 0.0, |
| "step": 11, |
| "text_entropy": 0.7865443825721741, |
| "text_kl": 0.0, |
| "total_entropy": 2.3307557106018066 |
| }, |
| { |
| "combined_loss": 0.7841310501098633, |
| "completion_length": 271.0, |
| "epoch": 0.003816793893129771, |
| "grad_norm": 4.778654098510742, |
| "kl": 0.0, |
| "learning_rate": 8.29482217661603e-07, |
| "loss": 0.7841, |
| "num_samples": 1.0, |
| "reward": 2.65625, |
| "reward_std": 1.0673450231552124, |
| "rewards/gpt4o_holistic_reward": 2.65625, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.613770008087158, |
| "speech_entropy": 2.814521312713623, |
| "speech_kl": 0.0, |
| "step": 12, |
| "text_entropy": 1.110842227935791, |
| "text_kl": 0.0, |
| "total_entropy": 2.2540793418884277 |
| }, |
| { |
| "combined_loss": 0.788222074508667, |
| "completion_length": 318.9375, |
| "epoch": 0.004134860050890585, |
| "grad_norm": 2.6159579753875732, |
| "kl": 0.0, |
| "learning_rate": 8.562011298888888e-07, |
| "loss": 0.7882, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 0.8315354585647583, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.627406597137451, |
| "speech_entropy": 2.816967725753784, |
| "speech_kl": 0.0, |
| "step": 13, |
| "text_entropy": 0.8139775395393372, |
| "text_kl": 0.0, |
| "total_entropy": 2.4237911701202393 |
| }, |
| { |
| "combined_loss": 0.7619365453720093, |
| "completion_length": 598.8125, |
| "epoch": 0.004452926208651399, |
| "grad_norm": 1.9425196647644043, |
| "kl": 0.0, |
| "learning_rate": 8.809389787307026e-07, |
| "loss": 0.7619, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 1.2196787595748901, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 2.60770320892334e-08, |
| "sft_loss": 2.539788246154785, |
| "speech_entropy": 2.607710838317871, |
| "speech_kl": 0.0, |
| "step": 14, |
| "text_entropy": 0.6120049357414246, |
| "text_kl": 0.0, |
| "total_entropy": 2.184032440185547 |
| }, |
| { |
| "combined_loss": 0.8240371942520142, |
| "completion_length": 376.6875, |
| "epoch": 0.004770992366412214, |
| "grad_norm": 2.834174871444702, |
| "kl": 0.0, |
| "learning_rate": 9.039693650225662e-07, |
| "loss": 0.824, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 1.0792241096496582, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.7467904090881348, |
| "speech_entropy": 2.7376527786254883, |
| "speech_kl": 0.0, |
| "step": 15, |
| "text_entropy": 1.3147271871566772, |
| "text_kl": 0.0, |
| "total_entropy": 2.462172031402588 |
| }, |
| { |
| "combined_loss": 0.7633702754974365, |
| "completion_length": 241.8125, |
| "epoch": 0.005089058524173028, |
| "grad_norm": 3.2188761234283447, |
| "kl": 0.0, |
| "learning_rate": 9.255128526390367e-07, |
| "loss": 0.7634, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.9856985807418823, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 0.0, |
| "sft_loss": 2.544567584991455, |
| "speech_entropy": 2.6439318656921387, |
| "speech_kl": 0.0, |
| "step": 16, |
| "text_entropy": 0.9647745490074158, |
| "text_kl": 0.0, |
| "total_entropy": 2.306708812713623 |
| }, |
| { |
| "combined_loss": 0.8270055651664734, |
| "completion_length": 534.5, |
| "epoch": 0.005407124681933842, |
| "grad_norm": 2.851463556289673, |
| "kl": 0.0, |
| "learning_rate": 9.45749848565416e-07, |
| "loss": 0.827, |
| "num_samples": 1.0, |
| "reward": 2.75, |
| "reward_std": 1.0000998973846436, |
| "rewards/gpt4o_holistic_reward": 2.75, |
| "rl_loss": 0.0, |
| "sft_loss": 2.756685256958008, |
| "speech_entropy": 2.7267394065856934, |
| "speech_kl": 0.0, |
| "step": 17, |
| "text_entropy": 0.9119875431060791, |
| "text_kl": 0.0, |
| "total_entropy": 2.3539938926696777 |
| }, |
| { |
| "combined_loss": 0.8502093553543091, |
| "completion_length": 354.75, |
| "epoch": 0.0057251908396946565, |
| "grad_norm": 2.3651652336120605, |
| "kl": 0.0, |
| "learning_rate": 9.648297958439284e-07, |
| "loss": 0.8502, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 0.3944375813007355, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.834031105041504, |
| "speech_entropy": 2.6965622901916504, |
| "speech_kl": 0.0, |
| "step": 18, |
| "text_entropy": 1.0353615283966064, |
| "text_kl": 0.0, |
| "total_entropy": 2.379913806915283 |
| }, |
| { |
| "combined_loss": 0.8145760297775269, |
| "completion_length": 331.375, |
| "epoch": 0.006043256997455471, |
| "grad_norm": 2.548919916152954, |
| "kl": 0.0, |
| "learning_rate": 9.828778776927557e-07, |
| "loss": 0.8146, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 1.6985008716583252, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 2.2351741790771484e-08, |
| "sft_loss": 2.7152533531188965, |
| "speech_entropy": 2.7572903633117676, |
| "speech_kl": 0.0, |
| "step": 19, |
| "text_entropy": 0.8444140553474426, |
| "text_kl": 0.0, |
| "total_entropy": 2.3438541889190674 |
| }, |
| { |
| "combined_loss": 0.7225195169448853, |
| "completion_length": 397.3125, |
| "epoch": 0.006361323155216285, |
| "grad_norm": 2.0722339153289795, |
| "kl": 0.0, |
| "learning_rate": 1e-06, |
| "loss": 0.7225, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 0.8872368931770325, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.408398151397705, |
| "speech_entropy": 2.698948383331299, |
| "speech_kl": 0.0, |
| "step": 20, |
| "text_entropy": 0.8427600860595703, |
| "text_kl": 0.0, |
| "total_entropy": 2.3258702754974365 |
| }, |
| { |
| "combined_loss": 0.7821958661079407, |
| "completion_length": 306.125, |
| "epoch": 0.006679389312977099, |
| "grad_norm": 2.6868255138397217, |
| "kl": 0.0, |
| "learning_rate": 9.99999433562768e-07, |
| "loss": 0.7822, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 1.103813648223877, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 1.862645149230957e-09, |
| "sft_loss": 2.6073193550109863, |
| "speech_entropy": 2.7519540786743164, |
| "speech_kl": 0.0, |
| "step": 21, |
| "text_entropy": 0.9722496271133423, |
| "text_kl": 0.0, |
| "total_entropy": 2.407655715942383 |
| }, |
| { |
| "combined_loss": 0.6645753979682922, |
| "completion_length": 460.1875, |
| "epoch": 0.006997455470737914, |
| "grad_norm": 2.0238919258117676, |
| "kl": 0.0, |
| "learning_rate": 9.99997734252498e-07, |
| "loss": 0.6646, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 1.3854628801345825, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.2152514457702637, |
| "speech_entropy": 2.5998382568359375, |
| "speech_kl": 0.0, |
| "step": 22, |
| "text_entropy": 0.31880295276641846, |
| "text_kl": 0.0, |
| "total_entropy": 2.1282958984375 |
| }, |
| { |
| "combined_loss": 0.7440503239631653, |
| "completion_length": 156.5, |
| "epoch": 0.007315521628498728, |
| "grad_norm": 2.6382734775543213, |
| "kl": 0.0, |
| "learning_rate": 9.999949020734677e-07, |
| "loss": 0.7441, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 0.9331126809120178, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.480167865753174, |
| "speech_entropy": 2.679046630859375, |
| "speech_kl": 0.0, |
| "step": 23, |
| "text_entropy": 0.8172353506088257, |
| "text_kl": 0.0, |
| "total_entropy": 2.290531873703003 |
| }, |
| { |
| "combined_loss": 0.8006659746170044, |
| "completion_length": 273.0, |
| "epoch": 0.007633587786259542, |
| "grad_norm": 2.4343767166137695, |
| "kl": 0.0, |
| "learning_rate": 9.999909370328077e-07, |
| "loss": 0.8007, |
| "num_samples": 1.0, |
| "reward": 3.1875, |
| "reward_std": 1.058112621307373, |
| "rewards/gpt4o_holistic_reward": 3.1875, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.668886423110962, |
| "speech_entropy": 2.74008846282959, |
| "speech_kl": 0.0, |
| "step": 24, |
| "text_entropy": 0.805307149887085, |
| "text_kl": 0.0, |
| "total_entropy": 2.3586955070495605 |
| }, |
| { |
| "combined_loss": 0.74156254529953, |
| "completion_length": 318.4375, |
| "epoch": 0.007951653944020356, |
| "grad_norm": 2.2340893745422363, |
| "kl": 0.0, |
| "learning_rate": 9.999858391404998e-07, |
| "loss": 0.7416, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 0.9063550233840942, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": -2.2351741790771484e-08, |
| "sft_loss": 2.4718751907348633, |
| "speech_entropy": 2.68562388420105, |
| "speech_kl": 0.0, |
| "step": 25, |
| "text_entropy": 0.8011962175369263, |
| "text_kl": 0.0, |
| "total_entropy": 2.301424503326416 |
| }, |
| { |
| "combined_loss": 0.7775212526321411, |
| "completion_length": 471.375, |
| "epoch": 0.00826972010178117, |
| "grad_norm": 1.9409180879592896, |
| "kl": 0.0, |
| "learning_rate": 9.999796084093777e-07, |
| "loss": 0.7775, |
| "num_samples": 1.0, |
| "reward": 3.90625, |
| "reward_std": 0.4376000165939331, |
| "rewards/gpt4o_holistic_reward": 3.90625, |
| "rl_loss": -2.2351741790771484e-08, |
| "sft_loss": 2.5917372703552246, |
| "speech_entropy": 2.6521334648132324, |
| "speech_kl": 0.0, |
| "step": 26, |
| "text_entropy": 0.755419909954071, |
| "text_kl": 0.0, |
| "total_entropy": 2.224979877471924 |
| }, |
| { |
| "combined_loss": 0.8118987083435059, |
| "completion_length": 381.25, |
| "epoch": 0.008587786259541985, |
| "grad_norm": 2.630601644515991, |
| "kl": 0.0, |
| "learning_rate": 9.999722448551275e-07, |
| "loss": 0.8119, |
| "num_samples": 1.0, |
| "reward": 3.03125, |
| "reward_std": 1.176088809967041, |
| "rewards/gpt4o_holistic_reward": 3.03125, |
| "rl_loss": 0.0, |
| "sft_loss": 2.706328868865967, |
| "speech_entropy": 2.7182955741882324, |
| "speech_kl": 0.0, |
| "step": 27, |
| "text_entropy": 0.9594783186912537, |
| "text_kl": 0.0, |
| "total_entropy": 2.36698055267334 |
| }, |
| { |
| "combined_loss": 0.7367856502532959, |
| "completion_length": 396.8125, |
| "epoch": 0.008905852417302799, |
| "grad_norm": 2.1226205825805664, |
| "kl": 0.0, |
| "learning_rate": 9.999637484962867e-07, |
| "loss": 0.7368, |
| "num_samples": 1.0, |
| "reward": 3.1875, |
| "reward_std": 0.6250999569892883, |
| "rewards/gpt4o_holistic_reward": 3.1875, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.4559521675109863, |
| "speech_entropy": 2.6961069107055664, |
| "speech_kl": 0.0, |
| "step": 28, |
| "text_entropy": 0.5669960975646973, |
| "text_kl": 0.0, |
| "total_entropy": 2.260632038116455 |
| }, |
| { |
| "combined_loss": 0.7663246393203735, |
| "completion_length": 175.125, |
| "epoch": 0.009223918575063612, |
| "grad_norm": 1.594689965248108, |
| "kl": 0.0, |
| "learning_rate": 9.99954119354245e-07, |
| "loss": 0.7663, |
| "num_samples": 1.0, |
| "reward": 4.5, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 4.5, |
| "rl_loss": 0.0, |
| "sft_loss": 2.554415225982666, |
| "speech_entropy": 2.819967746734619, |
| "speech_kl": 0.0, |
| "step": 29, |
| "text_entropy": 0.7421623468399048, |
| "text_kl": 0.0, |
| "total_entropy": 2.4235970973968506 |
| }, |
| { |
| "combined_loss": 0.746996283531189, |
| "completion_length": 368.8125, |
| "epoch": 0.009541984732824428, |
| "grad_norm": 3.2165513038635254, |
| "kl": 0.0, |
| "learning_rate": 9.999433574532437e-07, |
| "loss": 0.747, |
| "num_samples": 1.0, |
| "reward": 3.21875, |
| "reward_std": 1.240410566329956, |
| "rewards/gpt4o_holistic_reward": 3.21875, |
| "rl_loss": -2.60770320892334e-08, |
| "sft_loss": 2.48998761177063, |
| "speech_entropy": 2.657895088195801, |
| "speech_kl": 0.0, |
| "step": 30, |
| "text_entropy": 0.9665651321411133, |
| "text_kl": 0.0, |
| "total_entropy": 2.3054580688476562 |
| }, |
| { |
| "combined_loss": 0.7402773499488831, |
| "completion_length": 272.4375, |
| "epoch": 0.009860050890585241, |
| "grad_norm": 1.8161159753799438, |
| "kl": 0.0, |
| "learning_rate": 9.99931462820376e-07, |
| "loss": 0.7403, |
| "num_samples": 1.0, |
| "reward": 4.875, |
| "reward_std": 0.14443756639957428, |
| "rewards/gpt4o_holistic_reward": 4.875, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.4675910472869873, |
| "speech_entropy": 2.647010326385498, |
| "speech_kl": 0.0, |
| "step": 31, |
| "text_entropy": 1.1171433925628662, |
| "text_kl": 0.0, |
| "total_entropy": 2.3485312461853027 |
| }, |
| { |
| "combined_loss": 0.7246508598327637, |
| "completion_length": 248.5, |
| "epoch": 0.010178117048346057, |
| "grad_norm": 2.0797693729400635, |
| "kl": 0.0, |
| "learning_rate": 9.999184354855866e-07, |
| "loss": 0.7247, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 0.6144567728042603, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.4155025482177734, |
| "speech_entropy": 2.6706910133361816, |
| "speech_kl": 0.0, |
| "step": 32, |
| "text_entropy": 0.736768901348114, |
| "text_kl": 0.0, |
| "total_entropy": 2.322854995727539 |
| }, |
| { |
| "combined_loss": 0.6848204731941223, |
| "completion_length": 422.125, |
| "epoch": 0.01049618320610687, |
| "grad_norm": 2.8221709728240967, |
| "kl": 0.0, |
| "learning_rate": 9.999042754816715e-07, |
| "loss": 0.6848, |
| "num_samples": 1.0, |
| "reward": 2.625, |
| "reward_std": 1.172311544418335, |
| "rewards/gpt4o_holistic_reward": 2.625, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.2827348709106445, |
| "speech_entropy": 2.7249035835266113, |
| "speech_kl": 0.0, |
| "step": 33, |
| "text_entropy": 1.2600326538085938, |
| "text_kl": 0.0, |
| "total_entropy": 2.3216300010681152 |
| }, |
| { |
| "combined_loss": 0.8137314319610596, |
| "completion_length": 450.6875, |
| "epoch": 0.010814249363867684, |
| "grad_norm": 1.7230591773986816, |
| "kl": 0.0, |
| "learning_rate": 9.99888982844279e-07, |
| "loss": 0.8137, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.42705631256103516, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.7124381065368652, |
| "speech_entropy": 2.5579869747161865, |
| "speech_kl": 0.0, |
| "step": 34, |
| "text_entropy": 1.105285406112671, |
| "text_kl": 0.0, |
| "total_entropy": 2.2585811614990234 |
| }, |
| { |
| "combined_loss": 0.6867671012878418, |
| "completion_length": 296.0625, |
| "epoch": 0.0111323155216285, |
| "grad_norm": 2.008439064025879, |
| "kl": 0.0, |
| "learning_rate": 9.99872557611908e-07, |
| "loss": 0.6868, |
| "num_samples": 1.0, |
| "reward": 2.125, |
| "reward_std": 0.8536534309387207, |
| "rewards/gpt4o_holistic_reward": 2.125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.2892236709594727, |
| "speech_entropy": 2.624180316925049, |
| "speech_kl": 0.0, |
| "step": 35, |
| "text_entropy": 0.7666749358177185, |
| "text_kl": 0.0, |
| "total_entropy": 2.235846996307373 |
| }, |
| { |
| "combined_loss": 0.7378132343292236, |
| "completion_length": 518.0625, |
| "epoch": 0.011450381679389313, |
| "grad_norm": 1.8618037700653076, |
| "kl": 0.0, |
| "learning_rate": 9.99854999825909e-07, |
| "loss": 0.7378, |
| "num_samples": 1.0, |
| "reward": 2.75, |
| "reward_std": 0.8944376111030579, |
| "rewards/gpt4o_holistic_reward": 2.75, |
| "rl_loss": -1.30385160446167e-08, |
| "sft_loss": 2.4593772888183594, |
| "speech_entropy": 2.565446615219116, |
| "speech_kl": 0.0, |
| "step": 36, |
| "text_entropy": 0.864229679107666, |
| "text_kl": 0.0, |
| "total_entropy": 2.1934401988983154 |
| }, |
| { |
| "combined_loss": 0.7986043691635132, |
| "completion_length": 484.375, |
| "epoch": 0.011768447837150127, |
| "grad_norm": 1.954567551612854, |
| "kl": 0.0, |
| "learning_rate": 9.998363095304839e-07, |
| "loss": 0.7986, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": -9.313225746154785e-09, |
| "sft_loss": 2.6620142459869385, |
| "speech_entropy": 2.6134657859802246, |
| "speech_kl": 0.0, |
| "step": 37, |
| "text_entropy": 0.908026933670044, |
| "text_kl": 0.0, |
| "total_entropy": 2.264120578765869 |
| }, |
| { |
| "combined_loss": 0.7768511176109314, |
| "completion_length": 327.375, |
| "epoch": 0.012086513994910942, |
| "grad_norm": 2.6139323711395264, |
| "kl": 0.0, |
| "learning_rate": 9.99816486772685e-07, |
| "loss": 0.7769, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 0.9717878103256226, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.589503765106201, |
| "speech_entropy": 2.7159786224365234, |
| "speech_kl": 0.0, |
| "step": 38, |
| "text_entropy": 1.0709924697875977, |
| "text_kl": 0.0, |
| "total_entropy": 2.3924551010131836 |
| }, |
| { |
| "combined_loss": 0.690489649772644, |
| "completion_length": 294.0, |
| "epoch": 0.012404580152671756, |
| "grad_norm": 2.4900975227355957, |
| "kl": 0.0, |
| "learning_rate": 9.997955316024167e-07, |
| "loss": 0.6905, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.5774502754211426, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.3016321659088135, |
| "speech_entropy": 2.509531021118164, |
| "speech_kl": 0.0, |
| "step": 39, |
| "text_entropy": 0.8610185980796814, |
| "text_kl": 0.0, |
| "total_entropy": 2.159456729888916 |
| }, |
| { |
| "combined_loss": 0.8176020383834839, |
| "completion_length": 299.9375, |
| "epoch": 0.01272264631043257, |
| "grad_norm": 2.29056453704834, |
| "kl": 0.0, |
| "learning_rate": 9.997734440724333e-07, |
| "loss": 0.8176, |
| "num_samples": 1.0, |
| "reward": 2.6875, |
| "reward_std": 0.41377514600753784, |
| "rewards/gpt4o_holistic_reward": 2.6875, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.7253401279449463, |
| "speech_entropy": 2.6276955604553223, |
| "speech_kl": 0.0, |
| "step": 40, |
| "text_entropy": 0.7860556840896606, |
| "text_kl": 0.0, |
| "total_entropy": 2.2730958461761475 |
| }, |
| { |
| "combined_loss": 0.6987892985343933, |
| "completion_length": 346.4375, |
| "epoch": 0.013040712468193385, |
| "grad_norm": 1.9701205492019653, |
| "kl": 0.0, |
| "learning_rate": 9.9975022423834e-07, |
| "loss": 0.6988, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 0.36445680260658264, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 0.0, |
| "sft_loss": 2.3292975425720215, |
| "speech_entropy": 2.602095365524292, |
| "speech_kl": 0.0, |
| "step": 41, |
| "text_entropy": 0.8417441844940186, |
| "text_kl": 0.0, |
| "total_entropy": 2.2476534843444824 |
| }, |
| { |
| "combined_loss": 0.7409491539001465, |
| "completion_length": 319.1875, |
| "epoch": 0.013358778625954198, |
| "grad_norm": 2.334261894226074, |
| "kl": 0.0, |
| "learning_rate": 9.997258721585931e-07, |
| "loss": 0.7409, |
| "num_samples": 1.0, |
| "reward": 2.9375, |
| "reward_std": 0.8751000165939331, |
| "rewards/gpt4o_holistic_reward": 2.9375, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.469830274581909, |
| "speech_entropy": 2.437565326690674, |
| "speech_kl": 0.0, |
| "step": 42, |
| "text_entropy": 0.7768386602401733, |
| "text_kl": 0.0, |
| "total_entropy": 2.0847883224487305 |
| }, |
| { |
| "combined_loss": 0.6795743107795715, |
| "completion_length": 245.375, |
| "epoch": 0.013676844783715014, |
| "grad_norm": 2.3061537742614746, |
| "kl": 0.0, |
| "learning_rate": 9.997003878944985e-07, |
| "loss": 0.6796, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.2652478218078613, |
| "speech_entropy": 2.602904796600342, |
| "speech_kl": 0.0, |
| "step": 43, |
| "text_entropy": 0.6788486242294312, |
| "text_kl": 0.0, |
| "total_entropy": 2.2193617820739746 |
| }, |
| { |
| "combined_loss": 0.7629357576370239, |
| "completion_length": 426.9375, |
| "epoch": 0.013994910941475827, |
| "grad_norm": 1.8447140455245972, |
| "kl": 0.0, |
| "learning_rate": 9.996737715102132e-07, |
| "loss": 0.7629, |
| "num_samples": 1.0, |
| "reward": 3.0625, |
| "reward_std": 0.6978486180305481, |
| "rewards/gpt4o_holistic_reward": 3.0625, |
| "rl_loss": 1.6763806343078613e-08, |
| "sft_loss": 2.543118953704834, |
| "speech_entropy": 2.4126617908477783, |
| "speech_kl": 0.0, |
| "step": 44, |
| "text_entropy": 0.933107852935791, |
| "text_kl": 0.0, |
| "total_entropy": 2.1085753440856934 |
| }, |
| { |
| "combined_loss": 0.7338756322860718, |
| "completion_length": 539.75, |
| "epoch": 0.01431297709923664, |
| "grad_norm": 6.8181939125061035, |
| "kl": 0.0, |
| "learning_rate": 9.996460230727435e-07, |
| "loss": 0.7339, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 0.5728486180305481, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": 0.0, |
| "sft_loss": 2.44625186920166, |
| "speech_entropy": 2.5158934593200684, |
| "speech_kl": 0.0, |
| "step": 45, |
| "text_entropy": 0.8566389083862305, |
| "text_kl": 0.0, |
| "total_entropy": 2.162264585494995 |
| }, |
| { |
| "combined_loss": 0.8030707240104675, |
| "completion_length": 346.375, |
| "epoch": 0.014631043256997456, |
| "grad_norm": 2.483579397201538, |
| "kl": 0.0, |
| "learning_rate": 9.996171426519463e-07, |
| "loss": 0.8031, |
| "num_samples": 1.0, |
| "reward": 2.9375, |
| "reward_std": 0.7394567728042603, |
| "rewards/gpt4o_holistic_reward": 2.9375, |
| "rl_loss": -2.60770320892334e-08, |
| "sft_loss": 2.6769022941589355, |
| "speech_entropy": 2.4880712032318115, |
| "speech_kl": 0.0, |
| "step": 46, |
| "text_entropy": 0.7922484874725342, |
| "text_kl": 0.0, |
| "total_entropy": 2.1411781311035156 |
| }, |
| { |
| "combined_loss": 0.721707820892334, |
| "completion_length": 403.5625, |
| "epoch": 0.01494910941475827, |
| "grad_norm": 1.8520557880401611, |
| "kl": 0.0, |
| "learning_rate": 9.995871303205279e-07, |
| "loss": 0.7217, |
| "num_samples": 1.0, |
| "reward": 4.5625, |
| "reward_std": 0.8751000165939331, |
| "rewards/gpt4o_holistic_reward": 4.5625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.4056925773620605, |
| "speech_entropy": 2.421140193939209, |
| "speech_kl": 0.0, |
| "step": 47, |
| "text_entropy": 0.7047562003135681, |
| "text_kl": 0.0, |
| "total_entropy": 2.0640478134155273 |
| }, |
| { |
| "combined_loss": 0.7390530109405518, |
| "completion_length": 239.25, |
| "epoch": 0.015267175572519083, |
| "grad_norm": 2.3096864223480225, |
| "kl": 0.0, |
| "learning_rate": 9.995559861540447e-07, |
| "loss": 0.7391, |
| "num_samples": 1.0, |
| "reward": 2.5625, |
| "reward_std": 0.5194376111030579, |
| "rewards/gpt4o_holistic_reward": 2.5625, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.463510036468506, |
| "speech_entropy": 2.620394229888916, |
| "speech_kl": 0.0, |
| "step": 48, |
| "text_entropy": 1.4838829040527344, |
| "text_kl": 0.0, |
| "total_entropy": 2.386155605316162 |
| }, |
| { |
| "combined_loss": 0.8287366628646851, |
| "completion_length": 313.125, |
| "epoch": 0.015585241730279899, |
| "grad_norm": 2.5821146965026855, |
| "kl": 0.0, |
| "learning_rate": 9.995237102309018e-07, |
| "loss": 0.8287, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 1.2807698249816895, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.762455463409424, |
| "speech_entropy": 2.669032096862793, |
| "speech_kl": 0.0, |
| "step": 49, |
| "text_entropy": 1.0619488954544067, |
| "text_kl": 0.0, |
| "total_entropy": 2.372556447982788 |
| }, |
| { |
| "combined_loss": 0.6661741733551025, |
| "completion_length": 309.875, |
| "epoch": 0.015903307888040712, |
| "grad_norm": 1.8256869316101074, |
| "kl": 0.0, |
| "learning_rate": 9.994903026323536e-07, |
| "loss": 0.6662, |
| "num_samples": 1.0, |
| "reward": 4.125, |
| "reward_std": 0.14443756639957428, |
| "rewards/gpt4o_holistic_reward": 4.125, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.220580577850342, |
| "speech_entropy": 2.4839134216308594, |
| "speech_kl": 0.0, |
| "step": 50, |
| "text_entropy": 0.8060861825942993, |
| "text_kl": 0.0, |
| "total_entropy": 2.165754556655884 |
| }, |
| { |
| "combined_loss": 0.7401760220527649, |
| "completion_length": 260.75, |
| "epoch": 0.016221374045801526, |
| "grad_norm": 2.784619092941284, |
| "kl": 0.0, |
| "learning_rate": 9.994557634425038e-07, |
| "loss": 0.7402, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 1.1831127405166626, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": 2.2351741790771484e-08, |
| "sft_loss": 2.4672532081604004, |
| "speech_entropy": 1.8801207542419434, |
| "speech_kl": 0.0, |
| "step": 51, |
| "text_entropy": 1.1698064804077148, |
| "text_kl": 0.0, |
| "total_entropy": 1.9170701503753662 |
| }, |
| { |
| "combined_loss": 0.7516125440597534, |
| "completion_length": 388.875, |
| "epoch": 0.01653944020356234, |
| "grad_norm": 1.9916623830795288, |
| "kl": 0.0, |
| "learning_rate": 9.994200927483053e-07, |
| "loss": 0.7516, |
| "num_samples": 1.0, |
| "reward": 2.75, |
| "reward_std": 0.9002986550331116, |
| "rewards/gpt4o_holistic_reward": 2.75, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.5053749084472656, |
| "speech_entropy": 2.393984794616699, |
| "speech_kl": 0.0, |
| "step": 52, |
| "text_entropy": 1.078744649887085, |
| "text_kl": 0.0, |
| "total_entropy": 2.1226654052734375 |
| }, |
| { |
| "combined_loss": 0.6983101963996887, |
| "completion_length": 421.9375, |
| "epoch": 0.016857506361323157, |
| "grad_norm": 2.4226467609405518, |
| "kl": 0.0, |
| "learning_rate": 9.993832906395582e-07, |
| "loss": 0.6983, |
| "num_samples": 1.0, |
| "reward": 3.125, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 3.125, |
| "rl_loss": 0.0, |
| "sft_loss": 2.327700614929199, |
| "speech_entropy": 2.536123752593994, |
| "speech_kl": 0.0, |
| "step": 53, |
| "text_entropy": 1.049363136291504, |
| "text_kl": 0.0, |
| "total_entropy": 2.2463369369506836 |
| }, |
| { |
| "combined_loss": 0.738640308380127, |
| "completion_length": 277.5625, |
| "epoch": 0.01717557251908397, |
| "grad_norm": 2.4906787872314453, |
| "kl": 0.0, |
| "learning_rate": 9.993453572089124e-07, |
| "loss": 0.7386, |
| "num_samples": 1.0, |
| "reward": 4.125, |
| "reward_std": 0.7501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.125, |
| "rl_loss": 2.9802322387695312e-08, |
| "sft_loss": 2.46213436126709, |
| "speech_entropy": 2.3886947631835938, |
| "speech_kl": 0.0, |
| "step": 54, |
| "text_entropy": 1.1047334671020508, |
| "text_kl": 0.0, |
| "total_entropy": 2.162165641784668 |
| }, |
| { |
| "combined_loss": 0.6425143480300903, |
| "completion_length": 306.8125, |
| "epoch": 0.017493638676844784, |
| "grad_norm": 1.8969634771347046, |
| "kl": 0.0, |
| "learning_rate": 9.99306292551865e-07, |
| "loss": 0.6425, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 0.14443756639957428, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.141714572906494, |
| "speech_entropy": 2.4277968406677246, |
| "speech_kl": 0.0, |
| "step": 55, |
| "text_entropy": 0.7971184253692627, |
| "text_kl": 0.0, |
| "total_entropy": 2.088435173034668 |
| }, |
| { |
| "combined_loss": 0.5967005491256714, |
| "completion_length": 362.5, |
| "epoch": 0.017811704834605598, |
| "grad_norm": 1.6407321691513062, |
| "kl": 0.0, |
| "learning_rate": 9.99266096766761e-07, |
| "loss": 0.5967, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 0.7501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 1.989001750946045, |
| "speech_entropy": 2.3562631607055664, |
| "speech_kl": 0.0, |
| "step": 56, |
| "text_entropy": 0.650113582611084, |
| "text_kl": 0.0, |
| "total_entropy": 1.9950945377349854 |
| }, |
| { |
| "combined_loss": 0.6596853137016296, |
| "completion_length": 356.0, |
| "epoch": 0.01812977099236641, |
| "grad_norm": 3.1980700492858887, |
| "kl": 0.0, |
| "learning_rate": 9.992247699547936e-07, |
| "loss": 0.6597, |
| "num_samples": 1.0, |
| "reward": 3.0625, |
| "reward_std": 1.536826252937317, |
| "rewards/gpt4o_holistic_reward": 3.0625, |
| "rl_loss": -2.2351741790771484e-08, |
| "sft_loss": 2.198951244354248, |
| "speech_entropy": 2.4273312091827393, |
| "speech_kl": 0.0, |
| "step": 57, |
| "text_entropy": 0.5911531448364258, |
| "text_kl": 0.0, |
| "total_entropy": 2.0504350662231445 |
| }, |
| { |
| "combined_loss": 0.7111167907714844, |
| "completion_length": 328.8125, |
| "epoch": 0.018447837150127225, |
| "grad_norm": 2.5813424587249756, |
| "kl": 0.0, |
| "learning_rate": 9.99182312220003e-07, |
| "loss": 0.7111, |
| "num_samples": 1.0, |
| "reward": 2.5625, |
| "reward_std": 1.2798004150390625, |
| "rewards/gpt4o_holistic_reward": 2.5625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.370388984680176, |
| "speech_entropy": 2.387758731842041, |
| "speech_kl": 0.0, |
| "step": 58, |
| "text_entropy": 1.290541410446167, |
| "text_kl": 0.0, |
| "total_entropy": 2.1883113384246826 |
| }, |
| { |
| "combined_loss": 0.7116686105728149, |
| "completion_length": 483.125, |
| "epoch": 0.018765903307888042, |
| "grad_norm": 1.8333094120025635, |
| "kl": 0.0, |
| "learning_rate": 9.991387236692764e-07, |
| "loss": 0.7117, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.6038135886192322, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.3722286224365234, |
| "speech_entropy": 2.387399196624756, |
| "speech_kl": 0.0, |
| "step": 59, |
| "text_entropy": 0.8359103798866272, |
| "text_kl": 0.0, |
| "total_entropy": 2.049403429031372 |
| }, |
| { |
| "combined_loss": 0.7444514036178589, |
| "completion_length": 254.6875, |
| "epoch": 0.019083969465648856, |
| "grad_norm": 2.073017120361328, |
| "kl": 0.0, |
| "learning_rate": 9.990940044123479e-07, |
| "loss": 0.7445, |
| "num_samples": 1.0, |
| "reward": 5.0, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 5.0, |
| "rl_loss": 0.0, |
| "sft_loss": 2.4815046787261963, |
| "speech_entropy": 2.459671974182129, |
| "speech_kl": 0.0, |
| "step": 60, |
| "text_entropy": 0.8760254383087158, |
| "text_kl": 0.0, |
| "total_entropy": 2.171876907348633 |
| }, |
| { |
| "combined_loss": 0.6521174907684326, |
| "completion_length": 459.6875, |
| "epoch": 0.01940203562340967, |
| "grad_norm": 2.9567947387695312, |
| "kl": 0.0, |
| "learning_rate": 9.990481545617983e-07, |
| "loss": 0.6521, |
| "num_samples": 1.0, |
| "reward": 2.90625, |
| "reward_std": 0.6609638333320618, |
| "rewards/gpt4o_holistic_reward": 2.90625, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.173725128173828, |
| "speech_entropy": 1.9960131645202637, |
| "speech_kl": 0.0, |
| "step": 61, |
| "text_entropy": 0.9018564820289612, |
| "text_kl": 0.0, |
| "total_entropy": 1.8020646572113037 |
| }, |
| { |
| "combined_loss": 0.7371933460235596, |
| "completion_length": 338.1875, |
| "epoch": 0.019720101781170483, |
| "grad_norm": 1.8295475244522095, |
| "kl": 0.0, |
| "learning_rate": 9.990011742330542e-07, |
| "loss": 0.7372, |
| "num_samples": 1.0, |
| "reward": 4.4375, |
| "reward_std": 0.8081126809120178, |
| "rewards/gpt4o_holistic_reward": 4.4375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.4573111534118652, |
| "speech_entropy": 2.409938097000122, |
| "speech_kl": 0.0, |
| "step": 62, |
| "text_entropy": 1.2029674053192139, |
| "text_kl": 0.0, |
| "total_entropy": 2.186131238937378 |
| }, |
| { |
| "combined_loss": 0.748369574546814, |
| "completion_length": 248.625, |
| "epoch": 0.020038167938931296, |
| "grad_norm": 2.8785228729248047, |
| "kl": 0.0, |
| "learning_rate": 9.98953063544389e-07, |
| "loss": 0.7484, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 1.7174440622329712, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.49456524848938, |
| "speech_entropy": 2.4662866592407227, |
| "speech_kl": 0.0, |
| "step": 63, |
| "text_entropy": 1.4685275554656982, |
| "text_kl": 0.0, |
| "total_entropy": 2.278367280960083 |
| }, |
| { |
| "combined_loss": 0.668720006942749, |
| "completion_length": 369.25, |
| "epoch": 0.020356234096692113, |
| "grad_norm": 2.220876693725586, |
| "kl": 0.0, |
| "learning_rate": 9.989038226169207e-07, |
| "loss": 0.6687, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 1.019437551498413, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": 2.2351741790771484e-08, |
| "sft_loss": 2.2290663719177246, |
| "speech_entropy": 2.3302509784698486, |
| "speech_kl": 0.0, |
| "step": 64, |
| "text_entropy": 0.9201998710632324, |
| "text_kl": 0.0, |
| "total_entropy": 2.049210786819458 |
| }, |
| { |
| "combined_loss": 0.7535024881362915, |
| "completion_length": 459.625, |
| "epoch": 0.020674300254452927, |
| "grad_norm": 3.4402458667755127, |
| "kl": 0.0, |
| "learning_rate": 9.98853451574614e-07, |
| "loss": 0.7535, |
| "num_samples": 1.0, |
| "reward": 2.625, |
| "reward_std": 1.161826252937317, |
| "rewards/gpt4o_holistic_reward": 2.625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.5116748809814453, |
| "speech_entropy": 2.1198697090148926, |
| "speech_kl": 0.0, |
| "step": 65, |
| "text_entropy": 0.7827379703521729, |
| "text_kl": 0.0, |
| "total_entropy": 1.855495572090149 |
| }, |
| { |
| "combined_loss": 0.6956632733345032, |
| "completion_length": 267.0, |
| "epoch": 0.02099236641221374, |
| "grad_norm": 2.199934244155884, |
| "kl": 0.0, |
| "learning_rate": 9.988019505442775e-07, |
| "loss": 0.6957, |
| "num_samples": 1.0, |
| "reward": 4.5625, |
| "reward_std": 0.42705631256103516, |
| "rewards/gpt4o_holistic_reward": 4.5625, |
| "rl_loss": -9.313225746154785e-09, |
| "sft_loss": 2.3188774585723877, |
| "speech_entropy": 2.392827033996582, |
| "speech_kl": 0.0, |
| "step": 66, |
| "text_entropy": 1.4710502624511719, |
| "text_kl": 0.0, |
| "total_entropy": 2.220284938812256 |
| }, |
| { |
| "combined_loss": 0.6765873432159424, |
| "completion_length": 389.0, |
| "epoch": 0.021310432569974554, |
| "grad_norm": 2.0207324028015137, |
| "kl": 0.0, |
| "learning_rate": 9.987493196555649e-07, |
| "loss": 0.6766, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 0.9565354585647583, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.255290985107422, |
| "speech_entropy": 2.4497034549713135, |
| "speech_kl": 0.0, |
| "step": 67, |
| "text_entropy": 0.9174681901931763, |
| "text_kl": 0.0, |
| "total_entropy": 2.17930006980896 |
| }, |
| { |
| "combined_loss": 0.7614186406135559, |
| "completion_length": 355.125, |
| "epoch": 0.021628498727735368, |
| "grad_norm": 2.724764585494995, |
| "kl": 0.0, |
| "learning_rate": 9.986955590409747e-07, |
| "loss": 0.7614, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 0.9788135290145874, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.53806209564209, |
| "speech_entropy": 2.3254313468933105, |
| "speech_kl": 0.0, |
| "step": 68, |
| "text_entropy": 0.8083021640777588, |
| "text_kl": 0.0, |
| "total_entropy": 2.063969612121582 |
| }, |
| { |
| "combined_loss": 0.6472816467285156, |
| "completion_length": 407.5, |
| "epoch": 0.02194656488549618, |
| "grad_norm": 1.8889881372451782, |
| "kl": 0.0, |
| "learning_rate": 9.986406688358491e-07, |
| "loss": 0.6473, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 1.2387304306030273, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.1576054096221924, |
| "speech_entropy": 2.329939842224121, |
| "speech_kl": 0.0, |
| "step": 69, |
| "text_entropy": 0.53284752368927, |
| "text_kl": 0.0, |
| "total_entropy": 1.9823434352874756 |
| }, |
| { |
| "combined_loss": 0.8229079842567444, |
| "completion_length": 370.9375, |
| "epoch": 0.022264631043257, |
| "grad_norm": 2.0763661861419678, |
| "kl": 0.0, |
| "learning_rate": 9.98584649178374e-07, |
| "loss": 0.8229, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.8538135886192322, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.7430264949798584, |
| "speech_entropy": 2.232337474822998, |
| "speech_kl": 0.0, |
| "step": 70, |
| "text_entropy": 1.1738722324371338, |
| "text_kl": 0.0, |
| "total_entropy": 2.027456283569336 |
| }, |
| { |
| "combined_loss": 0.7078136205673218, |
| "completion_length": 402.1875, |
| "epoch": 0.022582697201017812, |
| "grad_norm": 2.2039167881011963, |
| "kl": 0.0, |
| "learning_rate": 9.985275002095789e-07, |
| "loss": 0.7078, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 1.183112621307373, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 0.0, |
| "sft_loss": 2.3593788146972656, |
| "speech_entropy": 2.290937900543213, |
| "speech_kl": 0.0, |
| "step": 71, |
| "text_entropy": 1.0952609777450562, |
| "text_kl": 0.0, |
| "total_entropy": 2.054002285003662 |
| }, |
| { |
| "combined_loss": 0.7774462699890137, |
| "completion_length": 370.0, |
| "epoch": 0.022900763358778626, |
| "grad_norm": 2.0490500926971436, |
| "kl": 0.0, |
| "learning_rate": 9.984692220733363e-07, |
| "loss": 0.7774, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 0.7126991748809814, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.591487407684326, |
| "speech_entropy": 2.2809252738952637, |
| "speech_kl": 0.0, |
| "step": 72, |
| "text_entropy": 1.4932012557983398, |
| "text_kl": 0.0, |
| "total_entropy": 2.1317849159240723 |
| }, |
| { |
| "combined_loss": 0.7310476899147034, |
| "completion_length": 416.125, |
| "epoch": 0.02321882951653944, |
| "grad_norm": 1.7452352046966553, |
| "kl": 0.0, |
| "learning_rate": 9.984098149163612e-07, |
| "loss": 0.731, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 0.5520563125610352, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.4368255138397217, |
| "speech_entropy": 2.2939229011535645, |
| "speech_kl": 0.0, |
| "step": 73, |
| "text_entropy": 1.1391348838806152, |
| "text_kl": 0.0, |
| "total_entropy": 2.0588903427124023 |
| }, |
| { |
| "combined_loss": 0.6863812208175659, |
| "completion_length": 426.4375, |
| "epoch": 0.023536895674300253, |
| "grad_norm": 1.8024567365646362, |
| "kl": 0.0, |
| "learning_rate": 9.98349278888211e-07, |
| "loss": 0.6864, |
| "num_samples": 1.0, |
| "reward": 2.5, |
| "reward_std": 0.14443756639957428, |
| "rewards/gpt4o_holistic_reward": 2.5, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2879374027252197, |
| "speech_entropy": 1.6559635400772095, |
| "speech_kl": 0.0, |
| "step": 74, |
| "text_entropy": 0.796265184879303, |
| "text_kl": 0.0, |
| "total_entropy": 1.6575778722763062 |
| }, |
| { |
| "combined_loss": 0.6243323683738708, |
| "completion_length": 371.375, |
| "epoch": 0.02385496183206107, |
| "grad_norm": 1.5667005777359009, |
| "kl": 0.0, |
| "learning_rate": 9.982876141412855e-07, |
| "loss": 0.6243, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 0.3944375813007355, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.0811080932617188, |
| "speech_entropy": 2.248847723007202, |
| "speech_kl": 0.0, |
| "step": 75, |
| "text_entropy": 0.7592964172363281, |
| "text_kl": 0.0, |
| "total_entropy": 1.9378535747528076 |
| }, |
| { |
| "combined_loss": 0.745708703994751, |
| "completion_length": 340.0, |
| "epoch": 0.024173027989821884, |
| "grad_norm": 2.7676212787628174, |
| "kl": 0.0, |
| "learning_rate": 9.982248208308253e-07, |
| "loss": 0.7457, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 1.1250998973846436, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": 1.30385160446167e-08, |
| "sft_loss": 2.4856958389282227, |
| "speech_entropy": 2.3308167457580566, |
| "speech_kl": 0.0, |
| "step": 76, |
| "text_entropy": 1.140113353729248, |
| "text_kl": 0.0, |
| "total_entropy": 2.1104745864868164 |
| }, |
| { |
| "combined_loss": 0.6474106907844543, |
| "completion_length": 482.3125, |
| "epoch": 0.024491094147582698, |
| "grad_norm": 2.045401096343994, |
| "kl": 0.0, |
| "learning_rate": 9.981608991149123e-07, |
| "loss": 0.6474, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 1.375100016593933, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.1580355167388916, |
| "speech_entropy": 2.319225311279297, |
| "speech_kl": 0.0, |
| "step": 77, |
| "text_entropy": 0.8749170899391174, |
| "text_kl": 0.0, |
| "total_entropy": 2.047513723373413 |
| }, |
| { |
| "combined_loss": 0.7499513626098633, |
| "completion_length": 317.375, |
| "epoch": 0.02480916030534351, |
| "grad_norm": 2.194958209991455, |
| "kl": 0.0, |
| "learning_rate": 9.980958491544697e-07, |
| "loss": 0.75, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.6444375514984131, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": -5.587935447692871e-09, |
| "sft_loss": 2.499837875366211, |
| "speech_entropy": 2.3051891326904297, |
| "speech_kl": 0.0, |
| "step": 78, |
| "text_entropy": 1.2871158123016357, |
| "text_kl": 0.0, |
| "total_entropy": 2.1196823120117188 |
| }, |
| { |
| "combined_loss": 0.686776876449585, |
| "completion_length": 423.0, |
| "epoch": 0.025127226463104325, |
| "grad_norm": 2.2229573726654053, |
| "kl": 0.0, |
| "learning_rate": 9.980296711132606e-07, |
| "loss": 0.6868, |
| "num_samples": 1.0, |
| "reward": 3.1875, |
| "reward_std": 1.2525264024734497, |
| "rewards/gpt4o_holistic_reward": 3.1875, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.2892560958862305, |
| "speech_entropy": 2.3141417503356934, |
| "speech_kl": 0.0, |
| "step": 79, |
| "text_entropy": 1.1506476402282715, |
| "text_kl": 0.0, |
| "total_entropy": 2.09428071975708 |
| }, |
| { |
| "combined_loss": 0.7100934386253357, |
| "completion_length": 345.375, |
| "epoch": 0.02544529262086514, |
| "grad_norm": 2.2556166648864746, |
| "kl": 0.0, |
| "learning_rate": 9.97962365157888e-07, |
| "loss": 0.7101, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 1.7286533117294312, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": 1.862645149230957e-08, |
| "sft_loss": 2.366978168487549, |
| "speech_entropy": 2.3218116760253906, |
| "speech_kl": 0.0, |
| "step": 80, |
| "text_entropy": 1.308366060256958, |
| "text_kl": 0.0, |
| "total_entropy": 2.1330385208129883 |
| }, |
| { |
| "combined_loss": 0.7132716774940491, |
| "completion_length": 592.8125, |
| "epoch": 0.025763358778625955, |
| "grad_norm": 2.3514413833618164, |
| "kl": 0.0, |
| "learning_rate": 9.97893931457795e-07, |
| "loss": 0.7133, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 1.0983424186706543, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.3775720596313477, |
| "speech_entropy": 2.166839838027954, |
| "speech_kl": 0.0, |
| "step": 81, |
| "text_entropy": 0.6873091459274292, |
| "text_kl": 0.0, |
| "total_entropy": 1.8318581581115723 |
| }, |
| { |
| "combined_loss": 0.7247699499130249, |
| "completion_length": 368.875, |
| "epoch": 0.02608142493638677, |
| "grad_norm": 2.5765998363494873, |
| "kl": 0.0, |
| "learning_rate": 9.978243701852625e-07, |
| "loss": 0.7248, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 1.0000998973846436, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.4158997535705566, |
| "speech_entropy": 2.3246705532073975, |
| "speech_kl": 0.0, |
| "step": 82, |
| "text_entropy": 1.0593510866165161, |
| "text_kl": 0.0, |
| "total_entropy": 1.8370463848114014 |
| }, |
| { |
| "combined_loss": 0.7713165879249573, |
| "completion_length": 510.5, |
| "epoch": 0.026399491094147583, |
| "grad_norm": 2.231696844100952, |
| "kl": 0.0, |
| "learning_rate": 9.977536815154117e-07, |
| "loss": 0.7713, |
| "num_samples": 1.0, |
| "reward": 3.125, |
| "reward_std": 0.7180101871490479, |
| "rewards/gpt4o_holistic_reward": 3.125, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.5710554122924805, |
| "speech_entropy": 2.2227771282196045, |
| "speech_kl": 0.0, |
| "step": 83, |
| "text_entropy": 0.9712114334106445, |
| "text_kl": 0.0, |
| "total_entropy": 1.9926376342773438 |
| }, |
| { |
| "combined_loss": 0.66883784532547, |
| "completion_length": 462.8125, |
| "epoch": 0.026717557251908396, |
| "grad_norm": 2.2363080978393555, |
| "kl": 0.0, |
| "learning_rate": 9.97681865626201e-07, |
| "loss": 0.6688, |
| "num_samples": 1.0, |
| "reward": 2.625, |
| "reward_std": 1.017488718032837, |
| "rewards/gpt4o_holistic_reward": 2.625, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.229459524154663, |
| "speech_entropy": 2.178924083709717, |
| "speech_kl": 0.0, |
| "step": 84, |
| "text_entropy": 0.9698714017868042, |
| "text_kl": 0.0, |
| "total_entropy": 1.9314519166946411 |
| }, |
| { |
| "combined_loss": 0.7741247415542603, |
| "completion_length": 383.75, |
| "epoch": 0.02703562340966921, |
| "grad_norm": 2.050072193145752, |
| "kl": 0.0, |
| "learning_rate": 9.97608922698427e-07, |
| "loss": 0.7741, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 1.1404881477355957, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 1.862645149230957e-08, |
| "sft_loss": 2.580415725708008, |
| "speech_entropy": 2.2788493633270264, |
| "speech_kl": 0.0, |
| "step": 85, |
| "text_entropy": 0.9160740971565247, |
| "text_kl": 0.0, |
| "total_entropy": 2.0253312587738037 |
| }, |
| { |
| "combined_loss": 0.6860091090202332, |
| "completion_length": 373.125, |
| "epoch": 0.027353689567430027, |
| "grad_norm": 3.053793430328369, |
| "kl": 0.0, |
| "learning_rate": 9.975348529157229e-07, |
| "loss": 0.686, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 0.8944376111030579, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.2866969108581543, |
| "speech_entropy": 2.2216386795043945, |
| "speech_kl": 0.0, |
| "step": 86, |
| "text_entropy": 0.9611391425132751, |
| "text_kl": 0.0, |
| "total_entropy": 1.9948720932006836 |
| }, |
| { |
| "combined_loss": 0.7096176147460938, |
| "completion_length": 320.625, |
| "epoch": 0.02767175572519084, |
| "grad_norm": 2.05188250541687, |
| "kl": 0.0, |
| "learning_rate": 9.974596564645598e-07, |
| "loss": 0.7096, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.5520563125610352, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.365391969680786, |
| "speech_entropy": 2.3104031085968018, |
| "speech_kl": 0.0, |
| "step": 87, |
| "text_entropy": 1.122492790222168, |
| "text_kl": 0.0, |
| "total_entropy": 2.0940003395080566 |
| }, |
| { |
| "combined_loss": 0.6567751169204712, |
| "completion_length": 530.125, |
| "epoch": 0.027989821882951654, |
| "grad_norm": 1.9356623888015747, |
| "kl": 0.0, |
| "learning_rate": 9.973833335342446e-07, |
| "loss": 0.6568, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.807937741279602, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.1892502307891846, |
| "speech_entropy": 2.227321147918701, |
| "speech_kl": 0.0, |
| "step": 88, |
| "text_entropy": 1.0134230852127075, |
| "text_kl": 0.0, |
| "total_entropy": 1.9499976634979248 |
| }, |
| { |
| "combined_loss": 0.7411503791809082, |
| "completion_length": 485.9375, |
| "epoch": 0.028307888040712468, |
| "grad_norm": 1.8700544834136963, |
| "kl": 0.0, |
| "learning_rate": 9.9730588431692e-07, |
| "loss": 0.7412, |
| "num_samples": 1.0, |
| "reward": 3.1875, |
| "reward_std": 0.6637751460075378, |
| "rewards/gpt4o_holistic_reward": 3.1875, |
| "rl_loss": 5.587935447692871e-09, |
| "sft_loss": 2.47050142288208, |
| "speech_entropy": 2.357027530670166, |
| "speech_kl": 0.0, |
| "step": 89, |
| "text_entropy": 0.865372896194458, |
| "text_kl": 0.0, |
| "total_entropy": 2.083202600479126 |
| }, |
| { |
| "combined_loss": 0.6864016056060791, |
| "completion_length": 444.875, |
| "epoch": 0.02862595419847328, |
| "grad_norm": 1.8632216453552246, |
| "kl": 0.0, |
| "learning_rate": 9.972273090075645e-07, |
| "loss": 0.6864, |
| "num_samples": 1.0, |
| "reward": 2.90625, |
| "reward_std": 0.7911534309387207, |
| "rewards/gpt4o_holistic_reward": 2.90625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.2880053520202637, |
| "speech_entropy": 2.133165121078491, |
| "speech_kl": 0.0, |
| "step": 90, |
| "text_entropy": 0.9260656833648682, |
| "text_kl": 0.0, |
| "total_entropy": 1.80254328250885 |
| }, |
| { |
| "combined_loss": 0.6964578628540039, |
| "completion_length": 433.9375, |
| "epoch": 0.028944020356234095, |
| "grad_norm": 1.954607367515564, |
| "kl": 0.0, |
| "learning_rate": 9.97147607803991e-07, |
| "loss": 0.6965, |
| "num_samples": 1.0, |
| "reward": 2.875, |
| "reward_std": 0.6983708143234253, |
| "rewards/gpt4o_holistic_reward": 2.875, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.321526050567627, |
| "speech_entropy": 2.222456455230713, |
| "speech_kl": 0.0, |
| "step": 91, |
| "text_entropy": 1.1704072952270508, |
| "text_kl": 0.0, |
| "total_entropy": 1.9853535890579224 |
| }, |
| { |
| "combined_loss": 0.7411965131759644, |
| "completion_length": 500.125, |
| "epoch": 0.029262086513994912, |
| "grad_norm": 2.046849250793457, |
| "kl": 0.0, |
| "learning_rate": 9.970667809068474e-07, |
| "loss": 0.7412, |
| "num_samples": 1.0, |
| "reward": 2.6875, |
| "reward_std": 0.8678992986679077, |
| "rewards/gpt4o_holistic_reward": 2.6875, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.4706549644470215, |
| "speech_entropy": 2.2015461921691895, |
| "speech_kl": 0.0, |
| "step": 92, |
| "text_entropy": 1.1552786827087402, |
| "text_kl": 0.0, |
| "total_entropy": 2.0065386295318604 |
| }, |
| { |
| "combined_loss": 0.7156788110733032, |
| "completion_length": 403.125, |
| "epoch": 0.029580152671755726, |
| "grad_norm": 2.1912262439727783, |
| "kl": 0.0, |
| "learning_rate": 9.969848285196157e-07, |
| "loss": 0.7157, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 1.4470233917236328, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.3855957984924316, |
| "speech_entropy": 2.2147631645202637, |
| "speech_kl": 0.0, |
| "step": 93, |
| "text_entropy": 1.0130202770233154, |
| "text_kl": 0.0, |
| "total_entropy": 1.9480912685394287 |
| }, |
| { |
| "combined_loss": 0.660508394241333, |
| "completion_length": 303.5, |
| "epoch": 0.02989821882951654, |
| "grad_norm": 2.6118781566619873, |
| "kl": 0.0, |
| "learning_rate": 9.969017508486105e-07, |
| "loss": 0.6605, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 1.2233422994613647, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2016944885253906, |
| "speech_entropy": 2.45943284034729, |
| "speech_kl": 0.0, |
| "step": 94, |
| "text_entropy": 1.1381962299346924, |
| "text_kl": 0.0, |
| "total_entropy": 2.197706460952759 |
| }, |
| { |
| "combined_loss": 0.7968100309371948, |
| "completion_length": 326.6875, |
| "epoch": 0.030216284987277353, |
| "grad_norm": 2.501716375350952, |
| "kl": 0.0, |
| "learning_rate": 9.968175481029798e-07, |
| "loss": 0.7968, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.7887751460075378, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.656033515930176, |
| "speech_entropy": 2.389923334121704, |
| "speech_kl": 0.0, |
| "step": 95, |
| "text_entropy": 1.3742587566375732, |
| "text_kl": 0.0, |
| "total_entropy": 2.177299976348877 |
| }, |
| { |
| "combined_loss": 0.7844936847686768, |
| "completion_length": 325.375, |
| "epoch": 0.030534351145038167, |
| "grad_norm": 2.390887498855591, |
| "kl": 0.0, |
| "learning_rate": 9.967322204947038e-07, |
| "loss": 0.7845, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 1.3041632175445557, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.6149790287017822, |
| "speech_entropy": 2.296114206314087, |
| "speech_kl": 0.0, |
| "step": 96, |
| "text_entropy": 1.206712245941162, |
| "text_kl": 0.0, |
| "total_entropy": 2.0752415657043457 |
| }, |
| { |
| "combined_loss": 0.7896230220794678, |
| "completion_length": 317.3125, |
| "epoch": 0.030852417302798984, |
| "grad_norm": 2.363767147064209, |
| "kl": 0.0, |
| "learning_rate": 9.96645768238595e-07, |
| "loss": 0.7896, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 1.0173285007476807, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": 2.2351741790771484e-08, |
| "sft_loss": 2.6320767402648926, |
| "speech_entropy": 2.3654651641845703, |
| "speech_kl": 0.0, |
| "step": 97, |
| "text_entropy": 1.4019675254821777, |
| "text_kl": 0.0, |
| "total_entropy": 2.174586296081543 |
| }, |
| { |
| "combined_loss": 0.6473546028137207, |
| "completion_length": 494.8125, |
| "epoch": 0.031170483460559797, |
| "grad_norm": 1.823893427848816, |
| "kl": 0.0, |
| "learning_rate": 9.965581915522964e-07, |
| "loss": 0.6474, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.9981511235237122, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.157848358154297, |
| "speech_entropy": 2.229156494140625, |
| "speech_kl": 0.0, |
| "step": 98, |
| "text_entropy": 1.1541342735290527, |
| "text_kl": 0.0, |
| "total_entropy": 2.026672840118408 |
| }, |
| { |
| "combined_loss": 0.7282466292381287, |
| "completion_length": 373.5625, |
| "epoch": 0.03148854961832061, |
| "grad_norm": 2.1396214962005615, |
| "kl": 0.0, |
| "learning_rate": 9.964694906562826e-07, |
| "loss": 0.7282, |
| "num_samples": 1.0, |
| "reward": 3.0625, |
| "reward_std": 0.5281319618225098, |
| "rewards/gpt4o_holistic_reward": 3.0625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.4274885654449463, |
| "speech_entropy": 2.2445669174194336, |
| "speech_kl": 0.0, |
| "step": 99, |
| "text_entropy": 1.1222586631774902, |
| "text_kl": 0.0, |
| "total_entropy": 2.014863967895508 |
| }, |
| { |
| "combined_loss": 0.7340562343597412, |
| "completion_length": 360.875, |
| "epoch": 0.031806615776081425, |
| "grad_norm": 2.1381564140319824, |
| "kl": 0.0, |
| "learning_rate": 9.96379665773858e-07, |
| "loss": 0.7341, |
| "num_samples": 1.0, |
| "reward": 4.0625, |
| "reward_std": 0.7394567728042603, |
| "rewards/gpt4o_holistic_reward": 4.0625, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.4468541145324707, |
| "speech_entropy": 2.292088270187378, |
| "speech_kl": 0.0, |
| "step": 100, |
| "text_entropy": 1.465188980102539, |
| "text_kl": 0.0, |
| "total_entropy": 2.1374363899230957 |
| }, |
| { |
| "combined_loss": 0.7976064682006836, |
| "completion_length": 296.25, |
| "epoch": 0.03212468193384224, |
| "grad_norm": 1.9820961952209473, |
| "kl": 0.0, |
| "learning_rate": 9.962887171311562e-07, |
| "loss": 0.7976, |
| "num_samples": 1.0, |
| "reward": 4.6875, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 4.6875, |
| "rl_loss": 1.862645149230957e-09, |
| "sft_loss": 2.6586880683898926, |
| "speech_entropy": 2.284330368041992, |
| "speech_kl": 0.0, |
| "step": 101, |
| "text_entropy": 1.2345999479293823, |
| "text_kl": 0.0, |
| "total_entropy": 2.0803914070129395 |
| }, |
| { |
| "combined_loss": 0.6758935451507568, |
| "completion_length": 429.5625, |
| "epoch": 0.03244274809160305, |
| "grad_norm": 2.111237049102783, |
| "kl": 0.0, |
| "learning_rate": 9.961966449571407e-07, |
| "loss": 0.6759, |
| "num_samples": 1.0, |
| "reward": 2.875, |
| "reward_std": 1.5787245035171509, |
| "rewards/gpt4o_holistic_reward": 2.875, |
| "rl_loss": -1.862645149230957e-08, |
| "sft_loss": 2.2529783248901367, |
| "speech_entropy": 2.263260841369629, |
| "speech_kl": 0.0, |
| "step": 102, |
| "text_entropy": 0.8939234614372253, |
| "text_kl": 0.0, |
| "total_entropy": 1.9995882511138916 |
| }, |
| { |
| "combined_loss": 0.7361711263656616, |
| "completion_length": 395.9375, |
| "epoch": 0.03276081424936387, |
| "grad_norm": 1.850319743156433, |
| "kl": 0.0, |
| "learning_rate": 9.961034494836029e-07, |
| "loss": 0.7362, |
| "num_samples": 1.0, |
| "reward": 4.5, |
| "reward_std": 0.614456832408905, |
| "rewards/gpt4o_holistic_reward": 4.5, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.4539036750793457, |
| "speech_entropy": 2.2334747314453125, |
| "speech_kl": 0.0, |
| "step": 103, |
| "text_entropy": 1.248520851135254, |
| "text_kl": 0.0, |
| "total_entropy": 2.0419020652770996 |
| }, |
| { |
| "combined_loss": 0.6475083827972412, |
| "completion_length": 475.625, |
| "epoch": 0.03307888040712468, |
| "grad_norm": 1.9739583730697632, |
| "kl": 0.0, |
| "learning_rate": 9.960091309451625e-07, |
| "loss": 0.6475, |
| "num_samples": 1.0, |
| "reward": 2.375, |
| "reward_std": 1.1752138137817383, |
| "rewards/gpt4o_holistic_reward": 2.375, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.1583614349365234, |
| "speech_entropy": 2.26711368560791, |
| "speech_kl": 0.0, |
| "step": 104, |
| "text_entropy": 0.8396031856536865, |
| "text_kl": 0.0, |
| "total_entropy": 1.9932773113250732 |
| }, |
| { |
| "combined_loss": 0.7815308570861816, |
| "completion_length": 394.0625, |
| "epoch": 0.033396946564885496, |
| "grad_norm": 2.9025793075561523, |
| "kl": 0.0, |
| "learning_rate": 9.95913689579266e-07, |
| "loss": 0.7815, |
| "num_samples": 1.0, |
| "reward": 4.5, |
| "reward_std": 0.704224169254303, |
| "rewards/gpt4o_holistic_reward": 4.5, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.6051025390625, |
| "speech_entropy": 2.256810188293457, |
| "speech_kl": 0.0, |
| "step": 105, |
| "text_entropy": 1.4908084869384766, |
| "text_kl": 0.0, |
| "total_entropy": 2.115090847015381 |
| }, |
| { |
| "combined_loss": 0.7154955863952637, |
| "completion_length": 475.25, |
| "epoch": 0.03371501272264631, |
| "grad_norm": 1.8907688856124878, |
| "kl": 0.0, |
| "learning_rate": 9.958171256261873e-07, |
| "loss": 0.7155, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.7587944269180298, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": 5.587935447692871e-09, |
| "sft_loss": 2.3849852085113525, |
| "speech_entropy": 2.25828218460083, |
| "speech_kl": 0.0, |
| "step": 106, |
| "text_entropy": 1.0575703382492065, |
| "text_kl": 0.0, |
| "total_entropy": 2.039116144180298 |
| }, |
| { |
| "combined_loss": 0.6845788955688477, |
| "completion_length": 512.5625, |
| "epoch": 0.034033078880407124, |
| "grad_norm": 1.646696925163269, |
| "kl": 0.0, |
| "learning_rate": 9.957194393290259e-07, |
| "loss": 0.6846, |
| "num_samples": 1.0, |
| "reward": 3.25, |
| "reward_std": 0.28877514600753784, |
| "rewards/gpt4o_holistic_reward": 3.25, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2819294929504395, |
| "speech_entropy": 2.166106700897217, |
| "speech_kl": 0.0, |
| "step": 107, |
| "text_entropy": 0.6757108569145203, |
| "text_kl": 0.0, |
| "total_entropy": 1.8433654308319092 |
| }, |
| { |
| "combined_loss": 0.7150195837020874, |
| "completion_length": 491.6875, |
| "epoch": 0.03435114503816794, |
| "grad_norm": 1.833224892616272, |
| "kl": 0.0, |
| "learning_rate": 9.956206309337066e-07, |
| "loss": 0.715, |
| "num_samples": 1.0, |
| "reward": 3.0625, |
| "reward_std": 0.5581126809120178, |
| "rewards/gpt4o_holistic_reward": 3.0625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.3833985328674316, |
| "speech_entropy": 2.1860873699188232, |
| "speech_kl": 0.0, |
| "step": 108, |
| "text_entropy": 1.2409619092941284, |
| "text_kl": 0.0, |
| "total_entropy": 2.0030808448791504 |
| }, |
| { |
| "combined_loss": 0.6441072821617126, |
| "completion_length": 293.3125, |
| "epoch": 0.03466921119592875, |
| "grad_norm": 2.175922393798828, |
| "kl": 0.0, |
| "learning_rate": 9.9552070068898e-07, |
| "loss": 0.6441, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 0.8750999569892883, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.147024154663086, |
| "speech_entropy": 2.2451610565185547, |
| "speech_kl": 0.0, |
| "step": 109, |
| "text_entropy": 1.1438672542572021, |
| "text_kl": 0.0, |
| "total_entropy": 2.044260025024414 |
| }, |
| { |
| "combined_loss": 0.6975011825561523, |
| "completion_length": 398.8125, |
| "epoch": 0.03498727735368957, |
| "grad_norm": 1.7699748277664185, |
| "kl": 0.0, |
| "learning_rate": 9.954196488464196e-07, |
| "loss": 0.6975, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.2694375813007355, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.3250038623809814, |
| "speech_entropy": 2.3067374229431152, |
| "speech_kl": 0.0, |
| "step": 110, |
| "text_entropy": 1.097214937210083, |
| "text_kl": 0.0, |
| "total_entropy": 2.070528268814087 |
| }, |
| { |
| "combined_loss": 0.6993280649185181, |
| "completion_length": 462.0625, |
| "epoch": 0.035305343511450385, |
| "grad_norm": 1.6435576677322388, |
| "kl": 0.0, |
| "learning_rate": 9.953174756604242e-07, |
| "loss": 0.6993, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 0.8483423590660095, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.3310935497283936, |
| "speech_entropy": 2.2712697982788086, |
| "speech_kl": 0.0, |
| "step": 111, |
| "text_entropy": 0.8282650113105774, |
| "text_kl": 0.0, |
| "total_entropy": 1.9767301082611084 |
| }, |
| { |
| "combined_loss": 0.6761323809623718, |
| "completion_length": 294.25, |
| "epoch": 0.035623409669211195, |
| "grad_norm": 2.934861183166504, |
| "kl": 0.0, |
| "learning_rate": 9.95214181388214e-07, |
| "loss": 0.6761, |
| "num_samples": 1.0, |
| "reward": 2.9375, |
| "reward_std": 1.14496648311615, |
| "rewards/gpt4o_holistic_reward": 2.9375, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.253774404525757, |
| "speech_entropy": 2.4185285568237305, |
| "speech_kl": 0.0, |
| "step": 112, |
| "text_entropy": 1.202892541885376, |
| "text_kl": 0.0, |
| "total_entropy": 2.171070098876953 |
| }, |
| { |
| "combined_loss": 0.7770819067955017, |
| "completion_length": 376.625, |
| "epoch": 0.03594147582697201, |
| "grad_norm": 73.62300872802734, |
| "kl": 0.0, |
| "learning_rate": 9.951097662898325e-07, |
| "loss": 0.7771, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 0.8750999569892883, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.590272903442383, |
| "speech_entropy": 2.2988834381103516, |
| "speech_kl": 0.0, |
| "step": 113, |
| "text_entropy": 1.125475287437439, |
| "text_kl": 0.0, |
| "total_entropy": 2.0705349445343018 |
| }, |
| { |
| "combined_loss": 0.6613748073577881, |
| "completion_length": 509.4375, |
| "epoch": 0.03625954198473282, |
| "grad_norm": 2.1758439540863037, |
| "kl": 0.0, |
| "learning_rate": 9.950042306281445e-07, |
| "loss": 0.6614, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 1.4686723947525024, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.204582691192627, |
| "speech_entropy": 2.27337908744812, |
| "speech_kl": 0.0, |
| "step": 114, |
| "text_entropy": 0.9519417881965637, |
| "text_kl": 0.0, |
| "total_entropy": 2.023104667663574 |
| }, |
| { |
| "combined_loss": 0.6832470893859863, |
| "completion_length": 496.5625, |
| "epoch": 0.03657760814249364, |
| "grad_norm": 9.72231674194336, |
| "kl": 0.0, |
| "learning_rate": 9.94897574668836e-07, |
| "loss": 0.6832, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.6250999569892883, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.2774901390075684, |
| "speech_entropy": 2.29954195022583, |
| "speech_kl": 0.0, |
| "step": 115, |
| "text_entropy": 1.2875080108642578, |
| "text_kl": 0.0, |
| "total_entropy": 2.0839648246765137 |
| }, |
| { |
| "combined_loss": 0.6530779600143433, |
| "completion_length": 418.875, |
| "epoch": 0.03689567430025445, |
| "grad_norm": 2.1920166015625, |
| "kl": 0.0, |
| "learning_rate": 9.94789798680413e-07, |
| "loss": 0.6531, |
| "num_samples": 1.0, |
| "reward": 4.4375, |
| "reward_std": 0.23945678770542145, |
| "rewards/gpt4o_holistic_reward": 4.4375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.176926612854004, |
| "speech_entropy": 2.3347601890563965, |
| "speech_kl": 0.0, |
| "step": 116, |
| "text_entropy": 1.2426201105117798, |
| "text_kl": 0.0, |
| "total_entropy": 2.1373891830444336 |
| }, |
| { |
| "combined_loss": 0.6800841689109802, |
| "completion_length": 381.5625, |
| "epoch": 0.03721374045801527, |
| "grad_norm": 2.2477047443389893, |
| "kl": 0.0, |
| "learning_rate": 9.94680902934202e-07, |
| "loss": 0.6801, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 0.9940415620803833, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.2669472694396973, |
| "speech_entropy": 2.3144116401672363, |
| "speech_kl": 0.0, |
| "step": 117, |
| "text_entropy": 1.3952128887176514, |
| "text_kl": 0.0, |
| "total_entropy": 2.1378135681152344 |
| }, |
| { |
| "combined_loss": 0.7205807566642761, |
| "completion_length": 266.75, |
| "epoch": 0.037531806615776084, |
| "grad_norm": 2.3268606662750244, |
| "kl": 0.0, |
| "learning_rate": 9.94570887704347e-07, |
| "loss": 0.7206, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 1.2288135290145874, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.4019358158111572, |
| "speech_entropy": 2.3998208045959473, |
| "speech_kl": 0.0, |
| "step": 118, |
| "text_entropy": 1.6290910243988037, |
| "text_kl": 0.0, |
| "total_entropy": 2.2556941509246826 |
| }, |
| { |
| "combined_loss": 0.7098033428192139, |
| "completion_length": 314.0, |
| "epoch": 0.037849872773536894, |
| "grad_norm": 2.0043420791625977, |
| "kl": 0.0, |
| "learning_rate": 9.944597532678119e-07, |
| "loss": 0.7098, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 0.6444375514984131, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": -1.862645149230957e-08, |
| "sft_loss": 2.366011142730713, |
| "speech_entropy": 2.3302297592163086, |
| "speech_kl": 0.0, |
| "step": 119, |
| "text_entropy": 1.2116031646728516, |
| "text_kl": 0.0, |
| "total_entropy": 2.1241815090179443 |
| }, |
| { |
| "combined_loss": 0.7127311825752258, |
| "completion_length": 565.0625, |
| "epoch": 0.03816793893129771, |
| "grad_norm": 1.6876592636108398, |
| "kl": 0.0, |
| "learning_rate": 9.943474999043775e-07, |
| "loss": 0.7127, |
| "num_samples": 1.0, |
| "reward": 2.6875, |
| "reward_std": 0.7235617637634277, |
| "rewards/gpt4o_holistic_reward": 2.6875, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.3757705688476562, |
| "speech_entropy": 2.1477317810058594, |
| "speech_kl": 0.0, |
| "step": 120, |
| "text_entropy": 0.8258525133132935, |
| "text_kl": 0.0, |
| "total_entropy": 1.8741722106933594 |
| }, |
| { |
| "combined_loss": 0.6518399715423584, |
| "completion_length": 460.875, |
| "epoch": 0.03848600508905852, |
| "grad_norm": 1.9149906635284424, |
| "kl": 0.0, |
| "learning_rate": 9.94234127896641e-07, |
| "loss": 0.6518, |
| "num_samples": 1.0, |
| "reward": 2.5, |
| "reward_std": 1.2335585355758667, |
| "rewards/gpt4o_holistic_reward": 2.5, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.172799825668335, |
| "speech_entropy": 2.247347593307495, |
| "speech_kl": 0.0, |
| "step": 121, |
| "text_entropy": 0.7082687020301819, |
| "text_kl": 0.0, |
| "total_entropy": 1.955895185470581 |
| }, |
| { |
| "combined_loss": 0.6932583451271057, |
| "completion_length": 415.9375, |
| "epoch": 0.03880407124681934, |
| "grad_norm": 1.803132176399231, |
| "kl": 0.0, |
| "learning_rate": 9.94119637530017e-07, |
| "loss": 0.6933, |
| "num_samples": 1.0, |
| "reward": 2.5625, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 2.5625, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.310861110687256, |
| "speech_entropy": 2.2421672344207764, |
| "speech_kl": 0.0, |
| "step": 122, |
| "text_entropy": 1.031731367111206, |
| "text_kl": 0.0, |
| "total_entropy": 2.0151727199554443 |
| }, |
| { |
| "combined_loss": 0.6039379239082336, |
| "completion_length": 606.0625, |
| "epoch": 0.039122137404580155, |
| "grad_norm": 1.545535683631897, |
| "kl": 0.0, |
| "learning_rate": 9.940040290927343e-07, |
| "loss": 0.6039, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 1.2654881477355957, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.0131263732910156, |
| "speech_entropy": 2.1528759002685547, |
| "speech_kl": 0.0, |
| "step": 123, |
| "text_entropy": 0.7468642592430115, |
| "text_kl": 0.0, |
| "total_entropy": 1.8691860437393188 |
| }, |
| { |
| "combined_loss": 0.637839674949646, |
| "completion_length": 377.375, |
| "epoch": 0.039440203562340966, |
| "grad_norm": 1.8493309020996094, |
| "kl": 0.0, |
| "learning_rate": 9.938873028758374e-07, |
| "loss": 0.6378, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 1.0646765232086182, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 1.862645149230957e-08, |
| "sft_loss": 2.126132011413574, |
| "speech_entropy": 2.3589277267456055, |
| "speech_kl": 0.0, |
| "step": 124, |
| "text_entropy": 1.0232412815093994, |
| "text_kl": 0.0, |
| "total_entropy": 2.1352920532226562 |
| }, |
| { |
| "combined_loss": 0.6103811860084534, |
| "completion_length": 330.75, |
| "epoch": 0.03975826972010178, |
| "grad_norm": 1.9315379858016968, |
| "kl": 0.0, |
| "learning_rate": 9.93769459173184e-07, |
| "loss": 0.6104, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.8751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 2.60770320892334e-08, |
| "sft_loss": 2.0346038341522217, |
| "speech_entropy": 2.236910343170166, |
| "speech_kl": 0.0, |
| "step": 125, |
| "text_entropy": 0.7762662172317505, |
| "text_kl": 0.0, |
| "total_entropy": 1.9460570812225342 |
| }, |
| { |
| "combined_loss": 0.6840636730194092, |
| "completion_length": 393.25, |
| "epoch": 0.04007633587786259, |
| "grad_norm": 2.2317352294921875, |
| "kl": 0.0, |
| "learning_rate": 9.936504982814457e-07, |
| "loss": 0.6841, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.579224169254303, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.28021240234375, |
| "speech_entropy": 2.2817130088806152, |
| "speech_kl": 0.0, |
| "step": 126, |
| "text_entropy": 0.9976484775543213, |
| "text_kl": 0.0, |
| "total_entropy": 2.0223848819732666 |
| }, |
| { |
| "combined_loss": 0.6637799739837646, |
| "completion_length": 422.9375, |
| "epoch": 0.04039440203562341, |
| "grad_norm": 1.987329125404358, |
| "kl": 0.0, |
| "learning_rate": 9.935304205001066e-07, |
| "loss": 0.6638, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 0.9001989364624023, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.212599754333496, |
| "speech_entropy": 2.2056527137756348, |
| "speech_kl": 0.0, |
| "step": 127, |
| "text_entropy": 0.9696847200393677, |
| "text_kl": 0.0, |
| "total_entropy": 1.965734839439392 |
| }, |
| { |
| "combined_loss": 0.7613767981529236, |
| "completion_length": 356.6875, |
| "epoch": 0.04071246819338423, |
| "grad_norm": 2.8389971256256104, |
| "kl": 0.0, |
| "learning_rate": 9.934092261314617e-07, |
| "loss": 0.7614, |
| "num_samples": 1.0, |
| "reward": 2.375, |
| "reward_std": 1.0087943077087402, |
| "rewards/gpt4o_holistic_reward": 2.375, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.5379223823547363, |
| "speech_entropy": 2.238985538482666, |
| "speech_kl": 0.0, |
| "step": 128, |
| "text_entropy": 0.8710644841194153, |
| "text_kl": 0.0, |
| "total_entropy": 1.9456055164337158 |
| }, |
| { |
| "combined_loss": 0.7112630009651184, |
| "completion_length": 368.5, |
| "epoch": 0.04103053435114504, |
| "grad_norm": 1.772113561630249, |
| "kl": 0.0, |
| "learning_rate": 9.932869154806185e-07, |
| "loss": 0.7113, |
| "num_samples": 1.0, |
| "reward": 4.75, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 4.75, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.3708765506744385, |
| "speech_entropy": 2.2802562713623047, |
| "speech_kl": 0.0, |
| "step": 129, |
| "text_entropy": 1.0544356107711792, |
| "text_kl": 0.0, |
| "total_entropy": 2.0886013507843018 |
| }, |
| { |
| "combined_loss": 0.6580800414085388, |
| "completion_length": 434.4375, |
| "epoch": 0.041348600508905854, |
| "grad_norm": 1.6276386976242065, |
| "kl": 0.0, |
| "learning_rate": 9.931634888554935e-07, |
| "loss": 0.6581, |
| "num_samples": 1.0, |
| "reward": 4.75, |
| "reward_std": 0.28877514600753784, |
| "rewards/gpt4o_holistic_reward": 4.75, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.1935999393463135, |
| "speech_entropy": 2.217723846435547, |
| "speech_kl": 0.0, |
| "step": 130, |
| "text_entropy": 0.9710134267807007, |
| "text_kl": 0.0, |
| "total_entropy": 1.9742225408554077 |
| }, |
| { |
| "combined_loss": 0.7769891023635864, |
| "completion_length": 299.3125, |
| "epoch": 0.041666666666666664, |
| "grad_norm": 2.6481971740722656, |
| "kl": 0.0, |
| "learning_rate": 9.930389465668132e-07, |
| "loss": 0.777, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 1.250100016593933, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.589963436126709, |
| "speech_entropy": 2.583065986633301, |
| "speech_kl": 0.0, |
| "step": 131, |
| "text_entropy": 1.1438889503479004, |
| "text_kl": 0.0, |
| "total_entropy": 2.3853845596313477 |
| }, |
| { |
| "combined_loss": 0.6638728380203247, |
| "completion_length": 330.6875, |
| "epoch": 0.04198473282442748, |
| "grad_norm": 1.9016973972320557, |
| "kl": 0.0, |
| "learning_rate": 9.929132889281126e-07, |
| "loss": 0.6639, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.212909460067749, |
| "speech_entropy": 2.2528860569000244, |
| "speech_kl": 0.0, |
| "step": 132, |
| "text_entropy": 1.0528066158294678, |
| "text_kl": 0.0, |
| "total_entropy": 2.020822525024414 |
| }, |
| { |
| "combined_loss": 0.7390019297599792, |
| "completion_length": 362.9375, |
| "epoch": 0.0423027989821883, |
| "grad_norm": 1.9329123497009277, |
| "kl": 0.0, |
| "learning_rate": 9.927865162557345e-07, |
| "loss": 0.739, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.4633398056030273, |
| "speech_entropy": 2.1905789375305176, |
| "speech_kl": 0.0, |
| "step": 133, |
| "text_entropy": 1.1139158010482788, |
| "text_kl": 0.0, |
| "total_entropy": 1.9759125709533691 |
| }, |
| { |
| "combined_loss": 0.8078758716583252, |
| "completion_length": 340.375, |
| "epoch": 0.04262086513994911, |
| "grad_norm": 2.5768580436706543, |
| "kl": 0.0, |
| "learning_rate": 9.926586288688295e-07, |
| "loss": 0.8079, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 1.0792241096496582, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.6929194927215576, |
| "speech_entropy": 2.2723331451416016, |
| "speech_kl": 0.0, |
| "step": 134, |
| "text_entropy": 1.9749469757080078, |
| "text_kl": 0.0, |
| "total_entropy": 2.219290256500244 |
| }, |
| { |
| "combined_loss": 0.6676887273788452, |
| "completion_length": 364.0625, |
| "epoch": 0.042938931297709926, |
| "grad_norm": 1.688926339149475, |
| "kl": 0.0, |
| "learning_rate": 9.925296270893531e-07, |
| "loss": 0.6677, |
| "num_samples": 1.0, |
| "reward": 4.4375, |
| "reward_std": 0.8538135886192322, |
| "rewards/gpt4o_holistic_reward": 4.4375, |
| "rl_loss": 2.2351741790771484e-08, |
| "sft_loss": 2.2256290912628174, |
| "speech_entropy": 2.1805365085601807, |
| "speech_kl": 0.0, |
| "step": 135, |
| "text_entropy": 0.9880690574645996, |
| "text_kl": 0.0, |
| "total_entropy": 1.9571375846862793 |
| }, |
| { |
| "combined_loss": 0.7317559123039246, |
| "completion_length": 442.375, |
| "epoch": 0.043256997455470736, |
| "grad_norm": 2.035095691680908, |
| "kl": 0.0, |
| "learning_rate": 9.923995112420679e-07, |
| "loss": 0.7318, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.4391863346099854, |
| "speech_entropy": 2.255812644958496, |
| "speech_kl": 0.0, |
| "step": 136, |
| "text_entropy": 1.4666073322296143, |
| "text_kl": 0.0, |
| "total_entropy": 2.107949733734131 |
| }, |
| { |
| "combined_loss": 0.624023973941803, |
| "completion_length": 389.125, |
| "epoch": 0.04357506361323155, |
| "grad_norm": 2.0001988410949707, |
| "kl": 0.0, |
| "learning_rate": 9.922682816545399e-07, |
| "loss": 0.624, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 0.9396764636039734, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.0800797939300537, |
| "speech_entropy": 2.1931605339050293, |
| "speech_kl": 0.0, |
| "step": 137, |
| "text_entropy": 1.0324208736419678, |
| "text_kl": 0.0, |
| "total_entropy": 2.005070924758911 |
| }, |
| { |
| "combined_loss": 0.6598743796348572, |
| "completion_length": 486.0625, |
| "epoch": 0.04389312977099236, |
| "grad_norm": 2.193516731262207, |
| "kl": 0.0, |
| "learning_rate": 9.9213593865714e-07, |
| "loss": 0.6599, |
| "num_samples": 1.0, |
| "reward": 4.125, |
| "reward_std": 1.183112621307373, |
| "rewards/gpt4o_holistic_reward": 4.125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.1995811462402344, |
| "speech_entropy": 2.289968252182007, |
| "speech_kl": 0.0, |
| "step": 138, |
| "text_entropy": 1.0836520195007324, |
| "text_kl": 0.0, |
| "total_entropy": 2.0700204372406006 |
| }, |
| { |
| "combined_loss": 0.6657881736755371, |
| "completion_length": 326.8125, |
| "epoch": 0.04421119592875318, |
| "grad_norm": 1.7214406728744507, |
| "kl": 0.0, |
| "learning_rate": 9.920024825830406e-07, |
| "loss": 0.6658, |
| "num_samples": 1.0, |
| "reward": 4.5625, |
| "reward_std": 0.3146764636039734, |
| "rewards/gpt4o_holistic_reward": 4.5625, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.2192935943603516, |
| "speech_entropy": 2.253314971923828, |
| "speech_kl": 0.0, |
| "step": 139, |
| "text_entropy": 1.1099263429641724, |
| "text_kl": 0.0, |
| "total_entropy": 2.0267200469970703 |
| }, |
| { |
| "combined_loss": 0.6005537509918213, |
| "completion_length": 402.875, |
| "epoch": 0.044529262086514, |
| "grad_norm": 2.0783207416534424, |
| "kl": 0.0, |
| "learning_rate": 9.91867913768218e-07, |
| "loss": 0.6006, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 1.0774502754211426, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.0018458366394043, |
| "speech_entropy": 2.27315354347229, |
| "speech_kl": 0.0, |
| "step": 140, |
| "text_entropy": 1.074316143989563, |
| "text_kl": 0.0, |
| "total_entropy": 2.044334888458252 |
| }, |
| { |
| "combined_loss": 0.6734490394592285, |
| "completion_length": 382.6875, |
| "epoch": 0.04484732824427481, |
| "grad_norm": 1.4923752546310425, |
| "kl": 0.0, |
| "learning_rate": 9.917322325514487e-07, |
| "loss": 0.6734, |
| "num_samples": 1.0, |
| "reward": 4.5, |
| "reward_std": 0.4565354883670807, |
| "rewards/gpt4o_holistic_reward": 4.5, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2448298931121826, |
| "speech_entropy": 2.2860617637634277, |
| "speech_kl": 0.0, |
| "step": 141, |
| "text_entropy": 0.9306349754333496, |
| "text_kl": 0.0, |
| "total_entropy": 2.0320706367492676 |
| }, |
| { |
| "combined_loss": 0.6445150375366211, |
| "completion_length": 401.875, |
| "epoch": 0.045165394402035625, |
| "grad_norm": 2.9365267753601074, |
| "kl": 0.0, |
| "learning_rate": 9.915954392743102e-07, |
| "loss": 0.6445, |
| "num_samples": 1.0, |
| "reward": 2.25, |
| "reward_std": 0.9002986550331116, |
| "rewards/gpt4o_holistic_reward": 2.25, |
| "rl_loss": -1.862645149230957e-08, |
| "sft_loss": 2.148383617401123, |
| "speech_entropy": 2.395698070526123, |
| "speech_kl": 0.0, |
| "step": 142, |
| "text_entropy": 1.1935876607894897, |
| "text_kl": 0.0, |
| "total_entropy": 2.16239070892334 |
| }, |
| { |
| "combined_loss": 0.6403375864028931, |
| "completion_length": 475.5, |
| "epoch": 0.045483460559796435, |
| "grad_norm": 1.8146846294403076, |
| "kl": 0.0, |
| "learning_rate": 9.914575342811792e-07, |
| "loss": 0.6403, |
| "num_samples": 1.0, |
| "reward": 2.6875, |
| "reward_std": 1.096787929534912, |
| "rewards/gpt4o_holistic_reward": 2.6875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.134458541870117, |
| "speech_entropy": 2.208829641342163, |
| "speech_kl": 0.0, |
| "step": 143, |
| "text_entropy": 0.6652034521102905, |
| "text_kl": 0.0, |
| "total_entropy": 1.8854793310165405 |
| }, |
| { |
| "combined_loss": 0.6461049914360046, |
| "completion_length": 507.3125, |
| "epoch": 0.04580152671755725, |
| "grad_norm": 1.7192074060440063, |
| "kl": 0.0, |
| "learning_rate": 9.913185179192316e-07, |
| "loss": 0.6461, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 0.8536533713340759, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.1536831855773926, |
| "speech_entropy": 2.2175731658935547, |
| "speech_kl": 0.0, |
| "step": 144, |
| "text_entropy": 0.8226357698440552, |
| "text_kl": 0.0, |
| "total_entropy": 1.9434648752212524 |
| }, |
| { |
| "combined_loss": 0.6483294367790222, |
| "completion_length": 542.5, |
| "epoch": 0.04611959287531807, |
| "grad_norm": 1.73550546169281, |
| "kl": 0.0, |
| "learning_rate": 9.911783905384405e-07, |
| "loss": 0.6483, |
| "num_samples": 1.0, |
| "reward": 4.4375, |
| "reward_std": 0.9137751460075378, |
| "rewards/gpt4o_holistic_reward": 4.4375, |
| "rl_loss": -1.862645149230957e-08, |
| "sft_loss": 2.161098003387451, |
| "speech_entropy": 2.1762547492980957, |
| "speech_kl": 0.0, |
| "step": 145, |
| "text_entropy": 1.2476625442504883, |
| "text_kl": 0.0, |
| "total_entropy": 2.0006935596466064 |
| }, |
| { |
| "combined_loss": 0.6429945230484009, |
| "completion_length": 354.8125, |
| "epoch": 0.04643765903307888, |
| "grad_norm": 1.9572664499282837, |
| "kl": 0.0, |
| "learning_rate": 9.910371524915768e-07, |
| "loss": 0.643, |
| "num_samples": 1.0, |
| "reward": 2.6875, |
| "reward_std": 0.6229909658432007, |
| "rewards/gpt4o_holistic_reward": 2.6875, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.143314838409424, |
| "speech_entropy": 2.268470287322998, |
| "speech_kl": 0.0, |
| "step": 146, |
| "text_entropy": 0.9877128601074219, |
| "text_kl": 0.0, |
| "total_entropy": 2.013819694519043 |
| }, |
| { |
| "combined_loss": 0.725261926651001, |
| "completion_length": 361.375, |
| "epoch": 0.046755725190839696, |
| "grad_norm": 2.1626226902008057, |
| "kl": 0.0, |
| "learning_rate": 9.908948041342072e-07, |
| "loss": 0.7253, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 1.1963939666748047, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.417539596557617, |
| "speech_entropy": 2.394669532775879, |
| "speech_kl": 0.0, |
| "step": 147, |
| "text_entropy": 1.3553166389465332, |
| "text_kl": 0.0, |
| "total_entropy": 2.1874074935913086 |
| }, |
| { |
| "combined_loss": 0.7557258009910583, |
| "completion_length": 520.1875, |
| "epoch": 0.047073791348600506, |
| "grad_norm": 1.967831015586853, |
| "kl": 0.0, |
| "learning_rate": 9.907513458246934e-07, |
| "loss": 0.7557, |
| "num_samples": 1.0, |
| "reward": 4.4375, |
| "reward_std": 0.9733423590660095, |
| "rewards/gpt4o_holistic_reward": 4.4375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.5190858840942383, |
| "speech_entropy": 2.2636446952819824, |
| "speech_kl": 0.0, |
| "step": 148, |
| "text_entropy": 1.526688814163208, |
| "text_kl": 0.0, |
| "total_entropy": 2.1319243907928467 |
| }, |
| { |
| "combined_loss": 0.6749532222747803, |
| "completion_length": 338.5, |
| "epoch": 0.04739185750636132, |
| "grad_norm": 1.4147047996520996, |
| "kl": 0.0, |
| "learning_rate": 9.90606777924191e-07, |
| "loss": 0.675, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 0.23945678770542145, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.2498438358306885, |
| "speech_entropy": 2.2806315422058105, |
| "speech_kl": 0.0, |
| "step": 149, |
| "text_entropy": 0.9889430999755859, |
| "text_kl": 0.0, |
| "total_entropy": 2.014362096786499 |
| }, |
| { |
| "combined_loss": 0.7438491582870483, |
| "completion_length": 432.8125, |
| "epoch": 0.04770992366412214, |
| "grad_norm": 1.7533581256866455, |
| "kl": 0.0, |
| "learning_rate": 9.904611007966504e-07, |
| "loss": 0.7438, |
| "num_samples": 1.0, |
| "reward": 4.6875, |
| "reward_std": 0.6251000165939331, |
| "rewards/gpt4o_holistic_reward": 4.6875, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.479497194290161, |
| "speech_entropy": 2.2352869510650635, |
| "speech_kl": 0.0, |
| "step": 150, |
| "text_entropy": 1.373764991760254, |
| "text_kl": 0.0, |
| "total_entropy": 2.0701396465301514 |
| }, |
| { |
| "combined_loss": 0.6471817493438721, |
| "completion_length": 439.8125, |
| "epoch": 0.04802798982188295, |
| "grad_norm": 1.6037753820419312, |
| "kl": 0.0, |
| "learning_rate": 9.90314314808813e-07, |
| "loss": 0.6472, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.1572723388671875, |
| "speech_entropy": 2.206624746322632, |
| "speech_kl": 0.0, |
| "step": 151, |
| "text_entropy": 1.0710368156433105, |
| "text_kl": 0.0, |
| "total_entropy": 1.9989676475524902 |
| }, |
| { |
| "combined_loss": 0.713241696357727, |
| "completion_length": 396.6875, |
| "epoch": 0.04834605597964377, |
| "grad_norm": 1.923511266708374, |
| "kl": 0.0, |
| "learning_rate": 9.901664203302124e-07, |
| "loss": 0.7132, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 0.8536533713340759, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.377472162246704, |
| "speech_entropy": 2.1870193481445312, |
| "speech_kl": 0.0, |
| "step": 152, |
| "text_entropy": 1.3757061958312988, |
| "text_kl": 0.0, |
| "total_entropy": 2.0416908264160156 |
| }, |
| { |
| "combined_loss": 0.671237587928772, |
| "completion_length": 504.0, |
| "epoch": 0.04866412213740458, |
| "grad_norm": 2.226810932159424, |
| "kl": 0.0, |
| "learning_rate": 9.90017417733173e-07, |
| "loss": 0.6712, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 1.125100016593933, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.2374584674835205, |
| "speech_entropy": 2.2338881492614746, |
| "speech_kl": 0.0, |
| "step": 153, |
| "text_entropy": 1.1020745038986206, |
| "text_kl": 0.0, |
| "total_entropy": 2.044203758239746 |
| }, |
| { |
| "combined_loss": 0.6561381816864014, |
| "completion_length": 230.1875, |
| "epoch": 0.048982188295165395, |
| "grad_norm": 2.5940654277801514, |
| "kl": 0.0, |
| "learning_rate": 9.898673073928087e-07, |
| "loss": 0.6561, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.9524502158164978, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": 0.0, |
| "sft_loss": 2.187127113342285, |
| "speech_entropy": 2.3985471725463867, |
| "speech_kl": 0.0, |
| "step": 154, |
| "text_entropy": 1.0082178115844727, |
| "text_kl": 0.0, |
| "total_entropy": 2.182006359100342 |
| }, |
| { |
| "combined_loss": 0.73064124584198, |
| "completion_length": 516.375, |
| "epoch": 0.04930025445292621, |
| "grad_norm": 1.78936767578125, |
| "kl": 0.0, |
| "learning_rate": 9.897160896870217e-07, |
| "loss": 0.7306, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.6637751460075378, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": 9.313225746154785e-09, |
| "sft_loss": 2.4354705810546875, |
| "speech_entropy": 2.2467494010925293, |
| "speech_kl": 0.0, |
| "step": 155, |
| "text_entropy": 1.0586869716644287, |
| "text_kl": 0.0, |
| "total_entropy": 2.0154693126678467 |
| }, |
| { |
| "combined_loss": 0.721272349357605, |
| "completion_length": 523.8125, |
| "epoch": 0.04961832061068702, |
| "grad_norm": 2.0883450508117676, |
| "kl": 0.0, |
| "learning_rate": 9.895637649965028e-07, |
| "loss": 0.7213, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 1.1250998973846436, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.4042410850524902, |
| "speech_entropy": 2.18511700630188, |
| "speech_kl": 0.0, |
| "step": 156, |
| "text_entropy": 1.0734648704528809, |
| "text_kl": 0.0, |
| "total_entropy": 1.961314082145691 |
| }, |
| { |
| "combined_loss": 0.6512343883514404, |
| "completion_length": 284.4375, |
| "epoch": 0.04993638676844784, |
| "grad_norm": 2.0043251514434814, |
| "kl": 0.0, |
| "learning_rate": 9.89410333704729e-07, |
| "loss": 0.6512, |
| "num_samples": 1.0, |
| "reward": 4.8125, |
| "reward_std": 0.2694375813007355, |
| "rewards/gpt4o_holistic_reward": 4.8125, |
| "rl_loss": 0.0, |
| "sft_loss": 2.170781135559082, |
| "speech_entropy": 2.359740734100342, |
| "speech_kl": 0.0, |
| "step": 157, |
| "text_entropy": 1.0422818660736084, |
| "text_kl": 0.0, |
| "total_entropy": 2.0986175537109375 |
| }, |
| { |
| "combined_loss": 0.6856322884559631, |
| "completion_length": 369.625, |
| "epoch": 0.05025445292620865, |
| "grad_norm": 2.0799102783203125, |
| "kl": 0.0, |
| "learning_rate": 9.892557961979634e-07, |
| "loss": 0.6856, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 1.183112621307373, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2854409217834473, |
| "speech_entropy": 2.2864654064178467, |
| "speech_kl": 0.0, |
| "step": 158, |
| "text_entropy": 1.1000885963439941, |
| "text_kl": 0.0, |
| "total_entropy": 2.0594735145568848 |
| }, |
| { |
| "combined_loss": 0.6959141492843628, |
| "completion_length": 417.0625, |
| "epoch": 0.05057251908396947, |
| "grad_norm": 1.7902482748031616, |
| "kl": 0.0, |
| "learning_rate": 9.891001528652542e-07, |
| "loss": 0.6959, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.7042241096496582, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.319713592529297, |
| "speech_entropy": 2.2585508823394775, |
| "speech_kl": 0.0, |
| "step": 159, |
| "text_entropy": 1.5893566608428955, |
| "text_kl": 0.0, |
| "total_entropy": 2.1322367191314697 |
| }, |
| { |
| "combined_loss": 0.7123146057128906, |
| "completion_length": 478.875, |
| "epoch": 0.05089058524173028, |
| "grad_norm": 1.8146113157272339, |
| "kl": 0.0, |
| "learning_rate": 9.889434040984331e-07, |
| "loss": 0.7123, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 0.8020563125610352, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 1.30385160446167e-08, |
| "sft_loss": 2.3743817806243896, |
| "speech_entropy": 2.307361602783203, |
| "speech_kl": 0.0, |
| "step": 160, |
| "text_entropy": 1.4639774560928345, |
| "text_kl": 0.0, |
| "total_entropy": 2.144629955291748 |
| }, |
| { |
| "combined_loss": 0.6143687963485718, |
| "completion_length": 426.8125, |
| "epoch": 0.051208651399491094, |
| "grad_norm": 1.7007486820220947, |
| "kl": 0.0, |
| "learning_rate": 9.88785550292115e-07, |
| "loss": 0.6144, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 1.1308612823486328, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.047895908355713, |
| "speech_entropy": 2.206613540649414, |
| "speech_kl": 0.0, |
| "step": 161, |
| "text_entropy": 1.1258774995803833, |
| "text_kl": 0.0, |
| "total_entropy": 1.9874699115753174 |
| }, |
| { |
| "combined_loss": 0.7055187821388245, |
| "completion_length": 364.375, |
| "epoch": 0.05152671755725191, |
| "grad_norm": 1.8136767148971558, |
| "kl": 0.0, |
| "learning_rate": 9.886265918436966e-07, |
| "loss": 0.7055, |
| "num_samples": 1.0, |
| "reward": 3.0625, |
| "reward_std": 0.8920267820358276, |
| "rewards/gpt4o_holistic_reward": 3.0625, |
| "rl_loss": 1.862645149230957e-08, |
| "sft_loss": 2.351729154586792, |
| "speech_entropy": 2.2987916469573975, |
| "speech_kl": 0.0, |
| "step": 162, |
| "text_entropy": 1.06236732006073, |
| "text_kl": 0.0, |
| "total_entropy": 2.0641303062438965 |
| }, |
| { |
| "combined_loss": 0.61933434009552, |
| "completion_length": 491.375, |
| "epoch": 0.05184478371501272, |
| "grad_norm": 1.4536187648773193, |
| "kl": 0.0, |
| "learning_rate": 9.88466529153356e-07, |
| "loss": 0.6193, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.0644476413726807, |
| "speech_entropy": 2.1868205070495605, |
| "speech_kl": 0.0, |
| "step": 163, |
| "text_entropy": 0.7075154781341553, |
| "text_kl": 0.0, |
| "total_entropy": 1.885782241821289 |
| }, |
| { |
| "combined_loss": 0.6253555417060852, |
| "completion_length": 537.5625, |
| "epoch": 0.05216284987277354, |
| "grad_norm": 1.6687992811203003, |
| "kl": 0.0, |
| "learning_rate": 9.883053626240501e-07, |
| "loss": 0.6254, |
| "num_samples": 1.0, |
| "reward": 2.5625, |
| "reward_std": 0.829224169254303, |
| "rewards/gpt4o_holistic_reward": 2.5625, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.0845184326171875, |
| "speech_entropy": 2.169955015182495, |
| "speech_kl": 0.0, |
| "step": 164, |
| "text_entropy": 0.7777245044708252, |
| "text_kl": 0.0, |
| "total_entropy": 1.884574055671692 |
| }, |
| { |
| "combined_loss": 0.6416522264480591, |
| "completion_length": 443.125, |
| "epoch": 0.05248091603053435, |
| "grad_norm": 2.1316330432891846, |
| "kl": 0.0, |
| "learning_rate": 9.88143092661516e-07, |
| "loss": 0.6417, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.8750999569892883, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.138840675354004, |
| "speech_entropy": 2.191802501678467, |
| "speech_kl": 0.0, |
| "step": 165, |
| "text_entropy": 0.8017352819442749, |
| "text_kl": 0.0, |
| "total_entropy": 1.9171819686889648 |
| }, |
| { |
| "combined_loss": 0.6333335638046265, |
| "completion_length": 309.625, |
| "epoch": 0.052798982188295165, |
| "grad_norm": 1.602042317390442, |
| "kl": 0.0, |
| "learning_rate": 9.87979719674268e-07, |
| "loss": 0.6333, |
| "num_samples": 1.0, |
| "reward": 4.875, |
| "reward_std": 0.14443756639957428, |
| "rewards/gpt4o_holistic_reward": 4.875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.111111640930176, |
| "speech_entropy": 2.29636549949646, |
| "speech_kl": 0.0, |
| "step": 166, |
| "text_entropy": 1.0871713161468506, |
| "text_kl": 0.0, |
| "total_entropy": 2.0691018104553223 |
| }, |
| { |
| "combined_loss": 0.753852903842926, |
| "completion_length": 504.5625, |
| "epoch": 0.05311704834605598, |
| "grad_norm": 1.7408075332641602, |
| "kl": 0.0, |
| "learning_rate": 9.878152440735971e-07, |
| "loss": 0.7539, |
| "num_samples": 1.0, |
| "reward": 4.5625, |
| "reward_std": 0.7286534309387207, |
| "rewards/gpt4o_holistic_reward": 4.5625, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.512842893600464, |
| "speech_entropy": 2.2807769775390625, |
| "speech_kl": 0.0, |
| "step": 167, |
| "text_entropy": 1.8450149297714233, |
| "text_kl": 0.0, |
| "total_entropy": 2.1998775005340576 |
| }, |
| { |
| "combined_loss": 0.7053524255752563, |
| "completion_length": 346.375, |
| "epoch": 0.05343511450381679, |
| "grad_norm": 1.8231476545333862, |
| "kl": 0.0, |
| "learning_rate": 9.876496662735711e-07, |
| "loss": 0.7054, |
| "num_samples": 1.0, |
| "reward": 4.6875, |
| "reward_std": 0.6250999569892883, |
| "rewards/gpt4o_holistic_reward": 4.6875, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.3511745929718018, |
| "speech_entropy": 2.353151559829712, |
| "speech_kl": 0.0, |
| "step": 168, |
| "text_entropy": 1.3969731330871582, |
| "text_kl": 0.0, |
| "total_entropy": 2.1840548515319824 |
| }, |
| { |
| "combined_loss": 0.6700061559677124, |
| "completion_length": 525.125, |
| "epoch": 0.05375318066157761, |
| "grad_norm": 1.6231902837753296, |
| "kl": 0.0, |
| "learning_rate": 9.874829866910313e-07, |
| "loss": 0.67, |
| "num_samples": 1.0, |
| "reward": 2.8125, |
| "reward_std": 0.6637752056121826, |
| "rewards/gpt4o_holistic_reward": 2.8125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.233353853225708, |
| "speech_entropy": 2.205387592315674, |
| "speech_kl": 0.0, |
| "step": 169, |
| "text_entropy": 0.9943252801895142, |
| "text_kl": 0.0, |
| "total_entropy": 1.9630248546600342 |
| }, |
| { |
| "combined_loss": 0.6313989162445068, |
| "completion_length": 331.25, |
| "epoch": 0.05407124681933842, |
| "grad_norm": 1.6013562679290771, |
| "kl": 0.0, |
| "learning_rate": 9.873152057455938e-07, |
| "loss": 0.6314, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.1046628952026367, |
| "speech_entropy": 2.343129873275757, |
| "speech_kl": 0.0, |
| "step": 170, |
| "text_entropy": 0.897804856300354, |
| "text_kl": 0.0, |
| "total_entropy": 2.0444600582122803 |
| }, |
| { |
| "combined_loss": 0.6534004211425781, |
| "completion_length": 617.3125, |
| "epoch": 0.05438931297709924, |
| "grad_norm": 1.932045578956604, |
| "kl": 0.0, |
| "learning_rate": 9.871463238596464e-07, |
| "loss": 0.6534, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 0.8751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.1780014038085938, |
| "speech_entropy": 2.1928927898406982, |
| "speech_kl": 0.0, |
| "step": 171, |
| "text_entropy": 0.8436852693557739, |
| "text_kl": 0.0, |
| "total_entropy": 1.9311038255691528 |
| }, |
| { |
| "combined_loss": 0.6976642608642578, |
| "completion_length": 512.0625, |
| "epoch": 0.054707379134860054, |
| "grad_norm": 1.7346135377883911, |
| "kl": 0.0, |
| "learning_rate": 9.869763414583495e-07, |
| "loss": 0.6977, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.5646764636039734, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.325547218322754, |
| "speech_entropy": 2.1841320991516113, |
| "speech_kl": 0.0, |
| "step": 172, |
| "text_entropy": 1.026000738143921, |
| "text_kl": 0.0, |
| "total_entropy": 1.9609410762786865 |
| }, |
| { |
| "combined_loss": 0.7238253355026245, |
| "completion_length": 598.0625, |
| "epoch": 0.055025445292620864, |
| "grad_norm": 1.9262490272521973, |
| "kl": 0.0, |
| "learning_rate": 9.868052589696336e-07, |
| "loss": 0.7238, |
| "num_samples": 1.0, |
| "reward": 4.5, |
| "reward_std": 0.4928992986679077, |
| "rewards/gpt4o_holistic_reward": 4.5, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.4127509593963623, |
| "speech_entropy": 2.1995720863342285, |
| "speech_kl": 0.0, |
| "step": 173, |
| "text_entropy": 1.5878872871398926, |
| "text_kl": 0.0, |
| "total_entropy": 2.0725741386413574 |
| }, |
| { |
| "combined_loss": 0.6825557947158813, |
| "completion_length": 435.9375, |
| "epoch": 0.05534351145038168, |
| "grad_norm": 1.9291430711746216, |
| "kl": 0.0, |
| "learning_rate": 9.866330768241983e-07, |
| "loss": 0.6826, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 0.7548449039459229, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.2751858234405518, |
| "speech_entropy": 2.231337070465088, |
| "speech_kl": 0.0, |
| "step": 174, |
| "text_entropy": 0.8735387325286865, |
| "text_kl": 0.0, |
| "total_entropy": 1.9939302206039429 |
| }, |
| { |
| "combined_loss": 0.7245073914527893, |
| "completion_length": 432.0, |
| "epoch": 0.05566157760814249, |
| "grad_norm": 2.2042038440704346, |
| "kl": 0.0, |
| "learning_rate": 9.864597954555122e-07, |
| "loss": 0.7245, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 1.6403796672821045, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.415024757385254, |
| "speech_entropy": 2.3350276947021484, |
| "speech_kl": 0.0, |
| "step": 175, |
| "text_entropy": 1.081442952156067, |
| "text_kl": 0.0, |
| "total_entropy": 2.1579785346984863 |
| }, |
| { |
| "combined_loss": 0.7414748668670654, |
| "completion_length": 358.9375, |
| "epoch": 0.05597964376590331, |
| "grad_norm": 1.7152729034423828, |
| "kl": 0.0, |
| "learning_rate": 9.86285415299811e-07, |
| "loss": 0.7415, |
| "num_samples": 1.0, |
| "reward": 5.0, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 5.0, |
| "rl_loss": 0.0, |
| "sft_loss": 2.4715828895568848, |
| "speech_entropy": 2.273726463317871, |
| "speech_kl": 0.0, |
| "step": 176, |
| "text_entropy": 1.5496562719345093, |
| "text_kl": 0.0, |
| "total_entropy": 2.137073040008545 |
| }, |
| { |
| "combined_loss": 0.6711795330047607, |
| "completion_length": 557.6875, |
| "epoch": 0.05629770992366412, |
| "grad_norm": 1.7282371520996094, |
| "kl": 0.0, |
| "learning_rate": 9.861099367960964e-07, |
| "loss": 0.6712, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 1.1298449039459229, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.237265110015869, |
| "speech_entropy": 2.1773886680603027, |
| "speech_kl": 0.0, |
| "step": 177, |
| "text_entropy": 0.8734384775161743, |
| "text_kl": 0.0, |
| "total_entropy": 1.9134694337844849 |
| }, |
| { |
| "combined_loss": 0.7701910138130188, |
| "completion_length": 591.25, |
| "epoch": 0.056615776081424936, |
| "grad_norm": 1.7133898735046387, |
| "kl": 0.0, |
| "learning_rate": 9.859333603861353e-07, |
| "loss": 0.7702, |
| "num_samples": 1.0, |
| "reward": 3.1875, |
| "reward_std": 0.5194375514984131, |
| "rewards/gpt4o_holistic_reward": 3.1875, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.56730318069458, |
| "speech_entropy": 2.189483165740967, |
| "speech_kl": 0.0, |
| "step": 178, |
| "text_entropy": 1.366645097732544, |
| "text_kl": 0.0, |
| "total_entropy": 2.039158582687378 |
| }, |
| { |
| "combined_loss": 0.5972847938537598, |
| "completion_length": 284.375, |
| "epoch": 0.05693384223918575, |
| "grad_norm": 2.5166637897491455, |
| "kl": 0.0, |
| "learning_rate": 9.857556865144585e-07, |
| "loss": 0.5973, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 1.7565135955810547, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 1.990949273109436, |
| "speech_entropy": 2.3300843238830566, |
| "speech_kl": 0.0, |
| "step": 179, |
| "text_entropy": 0.771490752696991, |
| "text_kl": 0.0, |
| "total_entropy": 2.0337142944335938 |
| }, |
| { |
| "combined_loss": 0.6814907193183899, |
| "completion_length": 378.6875, |
| "epoch": 0.05725190839694656, |
| "grad_norm": 1.7109546661376953, |
| "kl": 0.0, |
| "learning_rate": 9.855769156283603e-07, |
| "loss": 0.6815, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.2716357707977295, |
| "speech_entropy": 2.227330207824707, |
| "speech_kl": 0.0, |
| "step": 180, |
| "text_entropy": 1.1663092374801636, |
| "text_kl": 0.0, |
| "total_entropy": 2.016590118408203 |
| }, |
| { |
| "combined_loss": 0.5935865640640259, |
| "completion_length": 368.4375, |
| "epoch": 0.05756997455470738, |
| "grad_norm": 1.8667573928833008, |
| "kl": 0.0, |
| "learning_rate": 9.853970481778956e-07, |
| "loss": 0.5936, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 0.3944375813007355, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 1.9786219596862793, |
| "speech_entropy": 2.1767773628234863, |
| "speech_kl": 0.0, |
| "step": 181, |
| "text_entropy": 0.6917097568511963, |
| "text_kl": 0.0, |
| "total_entropy": 1.8641599416732788 |
| }, |
| { |
| "combined_loss": 0.716184675693512, |
| "completion_length": 458.625, |
| "epoch": 0.05788804071246819, |
| "grad_norm": 1.799391508102417, |
| "kl": 0.0, |
| "learning_rate": 9.852160846158806e-07, |
| "loss": 0.7162, |
| "num_samples": 1.0, |
| "reward": 3.125, |
| "reward_std": 0.454224169254303, |
| "rewards/gpt4o_holistic_reward": 3.125, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.387282133102417, |
| "speech_entropy": 2.258867025375366, |
| "speech_kl": 0.0, |
| "step": 182, |
| "text_entropy": 1.5640207529067993, |
| "text_kl": 0.0, |
| "total_entropy": 2.1286048889160156 |
| }, |
| { |
| "combined_loss": 0.7314043045043945, |
| "completion_length": 255.3125, |
| "epoch": 0.05820610687022901, |
| "grad_norm": 2.2445545196533203, |
| "kl": 0.0, |
| "learning_rate": 9.850340253978911e-07, |
| "loss": 0.7314, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 1.125100016593933, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.438014268875122, |
| "speech_entropy": 2.723639726638794, |
| "speech_kl": 0.0, |
| "step": 183, |
| "text_entropy": 0.9034126400947571, |
| "text_kl": 0.0, |
| "total_entropy": 2.436861991882324 |
| }, |
| { |
| "combined_loss": 0.6868402361869812, |
| "completion_length": 410.8125, |
| "epoch": 0.058524173027989825, |
| "grad_norm": 1.6614896059036255, |
| "kl": 0.0, |
| "learning_rate": 9.848508709822607e-07, |
| "loss": 0.6868, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 0.3944375813007355, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 1.862645149230957e-08, |
| "sft_loss": 2.2894670963287354, |
| "speech_entropy": 2.232311964035034, |
| "speech_kl": 0.0, |
| "step": 184, |
| "text_entropy": 1.3743724822998047, |
| "text_kl": 0.0, |
| "total_entropy": 2.072235584259033 |
| }, |
| { |
| "combined_loss": 0.6055930852890015, |
| "completion_length": 391.9375, |
| "epoch": 0.058842239185750635, |
| "grad_norm": 1.8519771099090576, |
| "kl": 0.0, |
| "learning_rate": 9.846666218300807e-07, |
| "loss": 0.6056, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.9712333679199219, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": 2.2351741790771484e-08, |
| "sft_loss": 2.018643379211426, |
| "speech_entropy": 2.2869997024536133, |
| "speech_kl": 0.0, |
| "step": 185, |
| "text_entropy": 0.664115846157074, |
| "text_kl": 0.0, |
| "total_entropy": 1.9729156494140625 |
| }, |
| { |
| "combined_loss": 0.6875672936439514, |
| "completion_length": 485.3125, |
| "epoch": 0.05916030534351145, |
| "grad_norm": 2.1656157970428467, |
| "kl": 0.0, |
| "learning_rate": 9.844812784051978e-07, |
| "loss": 0.6876, |
| "num_samples": 1.0, |
| "reward": 3.1875, |
| "reward_std": 0.9129188060760498, |
| "rewards/gpt4o_holistic_reward": 3.1875, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.291890859603882, |
| "speech_entropy": 2.182652473449707, |
| "speech_kl": 0.0, |
| "step": 186, |
| "text_entropy": 0.9604874849319458, |
| "text_kl": 0.0, |
| "total_entropy": 1.9440966844558716 |
| }, |
| { |
| "combined_loss": 0.6967759728431702, |
| "completion_length": 383.75, |
| "epoch": 0.05947837150127226, |
| "grad_norm": 1.8742365837097168, |
| "kl": 0.0, |
| "learning_rate": 9.84294841174214e-07, |
| "loss": 0.6968, |
| "num_samples": 1.0, |
| "reward": 4.125, |
| "reward_std": 1.0308762788772583, |
| "rewards/gpt4o_holistic_reward": 4.125, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.3225862979888916, |
| "speech_entropy": 2.2666873931884766, |
| "speech_kl": 0.0, |
| "step": 187, |
| "text_entropy": 1.5268785953521729, |
| "text_kl": 0.0, |
| "total_entropy": 2.1126818656921387 |
| }, |
| { |
| "combined_loss": 0.6987001895904541, |
| "completion_length": 427.75, |
| "epoch": 0.05979643765903308, |
| "grad_norm": 1.6652710437774658, |
| "kl": 0.0, |
| "learning_rate": 9.841073106064852e-07, |
| "loss": 0.6987, |
| "num_samples": 1.0, |
| "reward": 2.875, |
| "reward_std": 0.5387751460075378, |
| "rewards/gpt4o_holistic_reward": 2.875, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.329000473022461, |
| "speech_entropy": 2.187225103378296, |
| "speech_kl": 0.0, |
| "step": 188, |
| "text_entropy": 1.3647143840789795, |
| "text_kl": 0.0, |
| "total_entropy": 2.0151681900024414 |
| }, |
| { |
| "combined_loss": 0.6457899808883667, |
| "completion_length": 442.75, |
| "epoch": 0.060114503816793896, |
| "grad_norm": 3.32289719581604, |
| "kl": 0.0, |
| "learning_rate": 9.839186871741186e-07, |
| "loss": 0.6458, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.8536533117294312, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.1526331901550293, |
| "speech_entropy": 2.330132007598877, |
| "speech_kl": 0.0, |
| "step": 189, |
| "text_entropy": 1.0898842811584473, |
| "text_kl": 0.0, |
| "total_entropy": 2.0880885124206543 |
| }, |
| { |
| "combined_loss": 0.7040784955024719, |
| "completion_length": 552.4375, |
| "epoch": 0.060432569974554706, |
| "grad_norm": 1.7851852178573608, |
| "kl": 0.0, |
| "learning_rate": 9.83728971351974e-07, |
| "loss": 0.7041, |
| "num_samples": 1.0, |
| "reward": 2.25, |
| "reward_std": 0.6444376111030579, |
| "rewards/gpt4o_holistic_reward": 2.25, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.346928119659424, |
| "speech_entropy": 2.2672841548919678, |
| "speech_kl": 0.0, |
| "step": 190, |
| "text_entropy": 1.2936159372329712, |
| "text_kl": 0.0, |
| "total_entropy": 2.0743160247802734 |
| }, |
| { |
| "combined_loss": 0.6943396329879761, |
| "completion_length": 251.625, |
| "epoch": 0.06075063613231552, |
| "grad_norm": 1.8841829299926758, |
| "kl": 0.0, |
| "learning_rate": 9.835381636176605e-07, |
| "loss": 0.6943, |
| "num_samples": 1.0, |
| "reward": 3.25, |
| "reward_std": 1.0983422994613647, |
| "rewards/gpt4o_holistic_reward": 3.25, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.3144655227661133, |
| "speech_entropy": 2.4599857330322266, |
| "speech_kl": 0.0, |
| "step": 191, |
| "text_entropy": 1.357433557510376, |
| "text_kl": 0.0, |
| "total_entropy": 2.25075101852417 |
| }, |
| { |
| "combined_loss": 0.6926529407501221, |
| "completion_length": 340.9375, |
| "epoch": 0.061068702290076333, |
| "grad_norm": 1.9702279567718506, |
| "kl": 0.0, |
| "learning_rate": 9.833462644515366e-07, |
| "loss": 0.6927, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.6251000165939331, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.3088431358337402, |
| "speech_entropy": 2.233442783355713, |
| "speech_kl": 0.0, |
| "step": 192, |
| "text_entropy": 1.2242528200149536, |
| "text_kl": 0.0, |
| "total_entropy": 2.0358939170837402 |
| }, |
| { |
| "combined_loss": 0.6026841998100281, |
| "completion_length": 382.8125, |
| "epoch": 0.06138676844783715, |
| "grad_norm": 1.9015692472457886, |
| "kl": 0.0, |
| "learning_rate": 9.83153274336708e-07, |
| "loss": 0.6027, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 0.6444375514984131, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.0089473724365234, |
| "speech_entropy": 2.1782498359680176, |
| "speech_kl": 0.0, |
| "step": 193, |
| "text_entropy": 0.8885223269462585, |
| "text_kl": 0.0, |
| "total_entropy": 1.9228941202163696 |
| }, |
| { |
| "combined_loss": 0.8226222991943359, |
| "completion_length": 641.9375, |
| "epoch": 0.06170483460559797, |
| "grad_norm": 4.10353946685791, |
| "kl": 0.0, |
| "learning_rate": 9.829591937590273e-07, |
| "loss": 0.8226, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 1.288775086402893, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.7420742511749268, |
| "speech_entropy": 2.501152992248535, |
| "speech_kl": 0.0, |
| "step": 194, |
| "text_entropy": 1.702864646911621, |
| "text_kl": 0.0, |
| "total_entropy": 2.362520217895508 |
| }, |
| { |
| "combined_loss": 0.6262680292129517, |
| "completion_length": 441.6875, |
| "epoch": 0.06202290076335878, |
| "grad_norm": 1.9847943782806396, |
| "kl": 0.0, |
| "learning_rate": 9.82764023207092e-07, |
| "loss": 0.6263, |
| "num_samples": 1.0, |
| "reward": 3.25, |
| "reward_std": 1.1531318426132202, |
| "rewards/gpt4o_holistic_reward": 3.25, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.087559938430786, |
| "speech_entropy": 2.365201473236084, |
| "speech_kl": 0.0, |
| "step": 195, |
| "text_entropy": 1.2258142232894897, |
| "text_kl": 0.0, |
| "total_entropy": 2.138317584991455 |
| }, |
| { |
| "combined_loss": 0.7117223739624023, |
| "completion_length": 181.375, |
| "epoch": 0.062340966921119595, |
| "grad_norm": 1.908617377281189, |
| "kl": 0.0, |
| "learning_rate": 9.825677631722435e-07, |
| "loss": 0.7117, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 0.20422415435314178, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": 0.0, |
| "sft_loss": 2.372407913208008, |
| "speech_entropy": 2.5207109451293945, |
| "speech_kl": 0.0, |
| "step": 196, |
| "text_entropy": 0.8852477073669434, |
| "text_kl": 0.0, |
| "total_entropy": 2.2032618522644043 |
| }, |
| { |
| "combined_loss": 0.6184705495834351, |
| "completion_length": 488.5625, |
| "epoch": 0.0626590330788804, |
| "grad_norm": 1.9325580596923828, |
| "kl": 0.0, |
| "learning_rate": 9.823704141485666e-07, |
| "loss": 0.6185, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 0.8751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.061568260192871, |
| "speech_entropy": 2.212630033493042, |
| "speech_kl": 0.0, |
| "step": 197, |
| "text_entropy": 1.0164635181427002, |
| "text_kl": 0.0, |
| "total_entropy": 1.9818271398544312 |
| }, |
| { |
| "combined_loss": 0.6890236139297485, |
| "completion_length": 352.3125, |
| "epoch": 0.06297709923664122, |
| "grad_norm": 2.0383756160736084, |
| "kl": 0.0, |
| "learning_rate": 9.82171976632887e-07, |
| "loss": 0.689, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 0.6251000165939331, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.2967453002929688, |
| "speech_entropy": 2.279099702835083, |
| "speech_kl": 0.0, |
| "step": 198, |
| "text_entropy": 1.1877176761627197, |
| "text_kl": 0.0, |
| "total_entropy": 2.078038215637207 |
| }, |
| { |
| "combined_loss": 0.727849006652832, |
| "completion_length": 459.625, |
| "epoch": 0.06329516539440204, |
| "grad_norm": 4.476938724517822, |
| "kl": 0.0, |
| "learning_rate": 9.81972451124771e-07, |
| "loss": 0.7278, |
| "num_samples": 1.0, |
| "reward": 2.5625, |
| "reward_std": 1.036826252937317, |
| "rewards/gpt4o_holistic_reward": 2.5625, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.4261631965637207, |
| "speech_entropy": 2.485858201980591, |
| "speech_kl": 0.0, |
| "step": 199, |
| "text_entropy": 0.7858097553253174, |
| "text_kl": 0.0, |
| "total_entropy": 2.1179397106170654 |
| }, |
| { |
| "combined_loss": 0.6614863276481628, |
| "completion_length": 468.25, |
| "epoch": 0.06361323155216285, |
| "grad_norm": 1.9755204916000366, |
| "kl": 0.0, |
| "learning_rate": 9.817718381265238e-07, |
| "loss": 0.6615, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 0.6144567728042603, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.2049543857574463, |
| "speech_entropy": 2.3594043254852295, |
| "speech_kl": 0.0, |
| "step": 200, |
| "text_entropy": 1.0148909091949463, |
| "text_kl": 0.0, |
| "total_entropy": 2.100245475769043 |
| }, |
| { |
| "combined_loss": 0.6087247133255005, |
| "completion_length": 624.625, |
| "epoch": 0.06393129770992366, |
| "grad_norm": 1.7758878469467163, |
| "kl": 0.0, |
| "learning_rate": 9.815701381431885e-07, |
| "loss": 0.6087, |
| "num_samples": 1.0, |
| "reward": 2.875, |
| "reward_std": 0.68720543384552, |
| "rewards/gpt4o_holistic_reward": 2.875, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.0290822982788086, |
| "speech_entropy": 2.1871001720428467, |
| "speech_kl": 0.0, |
| "step": 201, |
| "text_entropy": 1.0028703212738037, |
| "text_kl": 0.0, |
| "total_entropy": 1.9624230861663818 |
| }, |
| { |
| "combined_loss": 0.7581361532211304, |
| "completion_length": 428.5625, |
| "epoch": 0.06424936386768448, |
| "grad_norm": 2.9266483783721924, |
| "kl": 0.0, |
| "learning_rate": 9.813673516825443e-07, |
| "loss": 0.7581, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 0.8376991748809814, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.527120351791382, |
| "speech_entropy": 2.388430595397949, |
| "speech_kl": 0.0, |
| "step": 202, |
| "text_entropy": 1.3267887830734253, |
| "text_kl": 0.0, |
| "total_entropy": 2.1770803928375244 |
| }, |
| { |
| "combined_loss": 0.679368257522583, |
| "completion_length": 652.9375, |
| "epoch": 0.0645674300254453, |
| "grad_norm": 1.8700424432754517, |
| "kl": 0.0, |
| "learning_rate": 9.81163479255106e-07, |
| "loss": 0.6794, |
| "num_samples": 1.0, |
| "reward": 2.875, |
| "reward_std": 0.7171862125396729, |
| "rewards/gpt4o_holistic_reward": 2.875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2645606994628906, |
| "speech_entropy": 2.1289772987365723, |
| "speech_kl": 0.0, |
| "step": 203, |
| "text_entropy": 1.0961881875991821, |
| "text_kl": 0.0, |
| "total_entropy": 1.903523325920105 |
| }, |
| { |
| "combined_loss": 0.7537906169891357, |
| "completion_length": 480.3125, |
| "epoch": 0.0648854961832061, |
| "grad_norm": 2.1203622817993164, |
| "kl": 0.0, |
| "learning_rate": 9.809585213741224e-07, |
| "loss": 0.7538, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 0.7288135886192322, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.5126352310180664, |
| "speech_entropy": 2.323160171508789, |
| "speech_kl": 0.0, |
| "step": 204, |
| "text_entropy": 1.5344562530517578, |
| "text_kl": 0.0, |
| "total_entropy": 2.1817140579223633 |
| }, |
| { |
| "combined_loss": 0.7854565978050232, |
| "completion_length": 404.1875, |
| "epoch": 0.06520356234096693, |
| "grad_norm": 6.252560138702393, |
| "kl": 0.0, |
| "learning_rate": 9.807524785555744e-07, |
| "loss": 0.7855, |
| "num_samples": 1.0, |
| "reward": 2.9375, |
| "reward_std": 0.23945678770542145, |
| "rewards/gpt4o_holistic_reward": 2.9375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.6181886196136475, |
| "speech_entropy": 2.286557674407959, |
| "speech_kl": 0.0, |
| "step": 205, |
| "text_entropy": 1.3187462091445923, |
| "text_kl": 0.0, |
| "total_entropy": 2.110858917236328 |
| }, |
| { |
| "combined_loss": 0.6578105092048645, |
| "completion_length": 464.8125, |
| "epoch": 0.06552162849872774, |
| "grad_norm": 1.7753384113311768, |
| "kl": 0.0, |
| "learning_rate": 9.805453513181746e-07, |
| "loss": 0.6578, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.4331127107143402, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.192701816558838, |
| "speech_entropy": 2.21907639503479, |
| "speech_kl": 0.0, |
| "step": 206, |
| "text_entropy": 1.3251252174377441, |
| "text_kl": 0.0, |
| "total_entropy": 2.0617318153381348 |
| }, |
| { |
| "combined_loss": 0.7920005321502686, |
| "completion_length": 623.25, |
| "epoch": 0.06583969465648855, |
| "grad_norm": 1.7488422393798828, |
| "kl": 0.0, |
| "learning_rate": 9.80337140183366e-07, |
| "loss": 0.792, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 0.5194375514984131, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.6400017738342285, |
| "speech_entropy": 2.1968274116516113, |
| "speech_kl": 0.0, |
| "step": 207, |
| "text_entropy": 1.44332754611969, |
| "text_kl": 0.0, |
| "total_entropy": 2.0511021614074707 |
| }, |
| { |
| "combined_loss": 0.6618804931640625, |
| "completion_length": 418.75, |
| "epoch": 0.06615776081424936, |
| "grad_norm": 1.7858362197875977, |
| "kl": 0.0, |
| "learning_rate": 9.801278456753193e-07, |
| "loss": 0.6619, |
| "num_samples": 1.0, |
| "reward": 4.8125, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 4.8125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.206268310546875, |
| "speech_entropy": 2.241903781890869, |
| "speech_kl": 0.0, |
| "step": 208, |
| "text_entropy": 0.9072421193122864, |
| "text_kl": 0.0, |
| "total_entropy": 1.9820117950439453 |
| }, |
| { |
| "combined_loss": 0.6754240989685059, |
| "completion_length": 353.1875, |
| "epoch": 0.06647582697201018, |
| "grad_norm": 2.1248185634613037, |
| "kl": 0.0, |
| "learning_rate": 9.799174683209336e-07, |
| "loss": 0.6754, |
| "num_samples": 1.0, |
| "reward": 2.9375, |
| "reward_std": 1.2371759414672852, |
| "rewards/gpt4o_holistic_reward": 2.9375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.251413583755493, |
| "speech_entropy": 2.267850875854492, |
| "speech_kl": 0.0, |
| "step": 209, |
| "text_entropy": 1.2034542560577393, |
| "text_kl": 0.0, |
| "total_entropy": 2.069960594177246 |
| }, |
| { |
| "combined_loss": 0.6498122215270996, |
| "completion_length": 532.8125, |
| "epoch": 0.06679389312977099, |
| "grad_norm": 1.8640666007995605, |
| "kl": 0.0, |
| "learning_rate": 9.797060086498332e-07, |
| "loss": 0.6498, |
| "num_samples": 1.0, |
| "reward": 2.75, |
| "reward_std": 1.0983422994613647, |
| "rewards/gpt4o_holistic_reward": 2.75, |
| "rl_loss": 0.0, |
| "sft_loss": 2.1660404205322266, |
| "speech_entropy": 2.172290563583374, |
| "speech_kl": 0.0, |
| "step": 210, |
| "text_entropy": 1.1589062213897705, |
| "text_kl": 0.0, |
| "total_entropy": 1.9814873933792114 |
| }, |
| { |
| "combined_loss": 0.6711292862892151, |
| "completion_length": 657.9375, |
| "epoch": 0.0671119592875318, |
| "grad_norm": 1.5342586040496826, |
| "kl": 0.0, |
| "learning_rate": 9.79493467194368e-07, |
| "loss": 0.6711, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 0.2694375813007355, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.2370975017547607, |
| "speech_entropy": 2.2588064670562744, |
| "speech_kl": 0.0, |
| "step": 211, |
| "text_entropy": 0.9491192102432251, |
| "text_kl": 0.0, |
| "total_entropy": 2.00757098197937 |
| }, |
| { |
| "combined_loss": 0.7253336906433105, |
| "completion_length": 462.125, |
| "epoch": 0.06743002544529263, |
| "grad_norm": 1.837734341621399, |
| "kl": 0.0, |
| "learning_rate": 9.792798444896107e-07, |
| "loss": 0.7253, |
| "num_samples": 1.0, |
| "reward": 4.5, |
| "reward_std": 0.6831126809120178, |
| "rewards/gpt4o_holistic_reward": 4.5, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.417778730392456, |
| "speech_entropy": 2.2486274242401123, |
| "speech_kl": 0.0, |
| "step": 212, |
| "text_entropy": 1.3472001552581787, |
| "text_kl": 0.0, |
| "total_entropy": 2.0750081539154053 |
| }, |
| { |
| "combined_loss": 0.7301596999168396, |
| "completion_length": 306.8125, |
| "epoch": 0.06774809160305344, |
| "grad_norm": 2.5512921810150146, |
| "kl": 0.0, |
| "learning_rate": 9.790651410733562e-07, |
| "loss": 0.7302, |
| "num_samples": 1.0, |
| "reward": 4.625, |
| "reward_std": 0.4331127107143402, |
| "rewards/gpt4o_holistic_reward": 4.625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.433865547180176, |
| "speech_entropy": 2.319303512573242, |
| "speech_kl": 0.0, |
| "step": 213, |
| "text_entropy": 0.9959409832954407, |
| "text_kl": 0.0, |
| "total_entropy": 2.0706756114959717 |
| }, |
| { |
| "combined_loss": 0.7285110950469971, |
| "completion_length": 426.6875, |
| "epoch": 0.06806615776081425, |
| "grad_norm": 2.5493366718292236, |
| "kl": 0.0, |
| "learning_rate": 9.788493574861199e-07, |
| "loss": 0.7285, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 1.0774502754211426, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.428370237350464, |
| "speech_entropy": 2.256551742553711, |
| "speech_kl": 0.0, |
| "step": 214, |
| "text_entropy": 0.9938225150108337, |
| "text_kl": 0.0, |
| "total_entropy": 2.015477180480957 |
| }, |
| { |
| "combined_loss": 0.7399945855140686, |
| "completion_length": 371.375, |
| "epoch": 0.06838422391857506, |
| "grad_norm": 1.7615910768508911, |
| "kl": 0.0, |
| "learning_rate": 9.786324942711371e-07, |
| "loss": 0.74, |
| "num_samples": 1.0, |
| "reward": 4.75, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 4.75, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.466648578643799, |
| "speech_entropy": 2.2756552696228027, |
| "speech_kl": 0.0, |
| "step": 215, |
| "text_entropy": 1.2351531982421875, |
| "text_kl": 0.0, |
| "total_entropy": 2.0899648666381836 |
| }, |
| { |
| "combined_loss": 0.6048213243484497, |
| "completion_length": 541.9375, |
| "epoch": 0.06870229007633588, |
| "grad_norm": 1.555175542831421, |
| "kl": 0.0, |
| "learning_rate": 9.784145519743606e-07, |
| "loss": 0.6048, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 0.4331127107143402, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.016071081161499, |
| "speech_entropy": 2.1028213500976562, |
| "speech_kl": 0.0, |
| "step": 216, |
| "text_entropy": 0.6853211522102356, |
| "text_kl": 0.0, |
| "total_entropy": 1.8233641386032104 |
| }, |
| { |
| "combined_loss": 0.6289892196655273, |
| "completion_length": 317.1875, |
| "epoch": 0.06902035623409669, |
| "grad_norm": 1.876192569732666, |
| "kl": 0.0, |
| "learning_rate": 9.781955311444596e-07, |
| "loss": 0.629, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 0.3944375813007355, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": 0.0, |
| "sft_loss": 2.096630573272705, |
| "speech_entropy": 2.2578630447387695, |
| "speech_kl": 0.0, |
| "step": 217, |
| "text_entropy": 1.1112196445465088, |
| "text_kl": 0.0, |
| "total_entropy": 2.032492160797119 |
| }, |
| { |
| "combined_loss": 0.6842765808105469, |
| "completion_length": 477.25, |
| "epoch": 0.0693384223918575, |
| "grad_norm": 2.3141274452209473, |
| "kl": 0.0, |
| "learning_rate": 9.779754323328192e-07, |
| "loss": 0.6843, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 1.269437551498413, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2809219360351562, |
| "speech_entropy": 2.205554485321045, |
| "speech_kl": 0.0, |
| "step": 218, |
| "text_entropy": 1.3427551984786987, |
| "text_kl": 0.0, |
| "total_entropy": 2.0352625846862793 |
| }, |
| { |
| "combined_loss": 0.6563563346862793, |
| "completion_length": 386.4375, |
| "epoch": 0.06965648854961833, |
| "grad_norm": 1.7859476804733276, |
| "kl": 0.0, |
| "learning_rate": 9.777542560935373e-07, |
| "loss": 0.6564, |
| "num_samples": 1.0, |
| "reward": 4.625, |
| "reward_std": 0.5983423590660095, |
| "rewards/gpt4o_holistic_reward": 4.625, |
| "rl_loss": 2.2351741790771484e-08, |
| "sft_loss": 2.187854290008545, |
| "speech_entropy": 2.2751975059509277, |
| "speech_kl": 0.0, |
| "step": 219, |
| "text_entropy": 0.9894624352455139, |
| "text_kl": 0.0, |
| "total_entropy": 2.025278091430664 |
| }, |
| { |
| "combined_loss": 0.6714828610420227, |
| "completion_length": 397.6875, |
| "epoch": 0.06997455470737914, |
| "grad_norm": 2.1150624752044678, |
| "kl": 0.0, |
| "learning_rate": 9.775320029834254e-07, |
| "loss": 0.6715, |
| "num_samples": 1.0, |
| "reward": 2.75, |
| "reward_std": 0.9565354585647583, |
| "rewards/gpt4o_holistic_reward": 2.75, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.238276243209839, |
| "speech_entropy": 2.2403571605682373, |
| "speech_kl": 0.0, |
| "step": 220, |
| "text_entropy": 1.4980268478393555, |
| "text_kl": 0.0, |
| "total_entropy": 2.0874156951904297 |
| }, |
| { |
| "combined_loss": 0.7247896790504456, |
| "completion_length": 497.375, |
| "epoch": 0.07029262086513995, |
| "grad_norm": 1.8192265033721924, |
| "kl": 0.0, |
| "learning_rate": 9.773086735620053e-07, |
| "loss": 0.7248, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.4435809552669525, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 0.0, |
| "sft_loss": 2.4159655570983887, |
| "speech_entropy": 2.3147919178009033, |
| "speech_kl": 0.0, |
| "step": 221, |
| "text_entropy": 1.4983744621276855, |
| "text_kl": 0.0, |
| "total_entropy": 2.1691160202026367 |
| }, |
| { |
| "combined_loss": 0.616317868232727, |
| "completion_length": 594.25, |
| "epoch": 0.07061068702290077, |
| "grad_norm": 1.6542634963989258, |
| "kl": 0.0, |
| "learning_rate": 9.770842683915082e-07, |
| "loss": 0.6163, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 0.47356173396110535, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.0543928146362305, |
| "speech_entropy": 2.261496067047119, |
| "speech_kl": 0.0, |
| "step": 222, |
| "text_entropy": 0.8812452554702759, |
| "text_kl": 0.0, |
| "total_entropy": 1.996401309967041 |
| }, |
| { |
| "combined_loss": 0.8017942905426025, |
| "completion_length": 413.75, |
| "epoch": 0.07092875318066158, |
| "grad_norm": 2.075211763381958, |
| "kl": 0.0, |
| "learning_rate": 9.768587880368742e-07, |
| "loss": 0.8018, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 0.6251000165939331, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.672647714614868, |
| "speech_entropy": 2.2792246341705322, |
| "speech_kl": 0.0, |
| "step": 223, |
| "text_entropy": 1.4464610815048218, |
| "text_kl": 0.0, |
| "total_entropy": 2.1342384815216064 |
| }, |
| { |
| "combined_loss": 0.733474612236023, |
| "completion_length": 342.875, |
| "epoch": 0.07124681933842239, |
| "grad_norm": 1.4014809131622314, |
| "kl": 0.0, |
| "learning_rate": 9.766322330657497e-07, |
| "loss": 0.7335, |
| "num_samples": 1.0, |
| "reward": 2.5, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 2.5, |
| "rl_loss": 0.0, |
| "sft_loss": 2.444915294647217, |
| "speech_entropy": 2.490835428237915, |
| "speech_kl": 0.0, |
| "step": 224, |
| "text_entropy": 1.531280755996704, |
| "text_kl": 0.0, |
| "total_entropy": 2.3087196350097656 |
| }, |
| { |
| "combined_loss": 0.7604819536209106, |
| "completion_length": 377.125, |
| "epoch": 0.0715648854961832, |
| "grad_norm": 2.309183120727539, |
| "kl": 0.0, |
| "learning_rate": 9.764046040484868e-07, |
| "loss": 0.7605, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.534939765930176, |
| "speech_entropy": 2.4606456756591797, |
| "speech_kl": 0.0, |
| "step": 225, |
| "text_entropy": 0.7890236377716064, |
| "text_kl": 0.0, |
| "total_entropy": 2.0891623497009277 |
| }, |
| { |
| "combined_loss": 0.6990571022033691, |
| "completion_length": 528.3125, |
| "epoch": 0.07188295165394402, |
| "grad_norm": 1.9987963438034058, |
| "kl": 0.0, |
| "learning_rate": 9.76175901558141e-07, |
| "loss": 0.6991, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 0.6770563125610352, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.3301901817321777, |
| "speech_entropy": 2.320341110229492, |
| "speech_kl": 0.0, |
| "step": 226, |
| "text_entropy": 1.3003857135772705, |
| "text_kl": 0.0, |
| "total_entropy": 2.1371474266052246 |
| }, |
| { |
| "combined_loss": 0.7233133316040039, |
| "completion_length": 355.9375, |
| "epoch": 0.07220101781170483, |
| "grad_norm": 1.6554374694824219, |
| "kl": 0.0, |
| "learning_rate": 9.759461261704705e-07, |
| "loss": 0.7233, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.4110443592071533, |
| "speech_entropy": 2.268995761871338, |
| "speech_kl": 0.0, |
| "step": 227, |
| "text_entropy": 1.413590431213379, |
| "text_kl": 0.0, |
| "total_entropy": 2.0987539291381836 |
| }, |
| { |
| "combined_loss": 0.7177764773368835, |
| "completion_length": 507.4375, |
| "epoch": 0.07251908396946564, |
| "grad_norm": 1.8407859802246094, |
| "kl": 0.0, |
| "learning_rate": 9.757152784639347e-07, |
| "loss": 0.7178, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.7622368335723877, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.3925881385803223, |
| "speech_entropy": 2.314845561981201, |
| "speech_kl": 0.0, |
| "step": 228, |
| "text_entropy": 1.0042717456817627, |
| "text_kl": 0.0, |
| "total_entropy": 2.067537784576416 |
| }, |
| { |
| "combined_loss": 0.7055172324180603, |
| "completion_length": 615.5625, |
| "epoch": 0.07283715012722647, |
| "grad_norm": 1.6229463815689087, |
| "kl": 0.0, |
| "learning_rate": 9.754833590196926e-07, |
| "loss": 0.7055, |
| "num_samples": 1.0, |
| "reward": 4.625, |
| "reward_std": 0.6444375514984131, |
| "rewards/gpt4o_holistic_reward": 4.625, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.3517239093780518, |
| "speech_entropy": 2.1739797592163086, |
| "speech_kl": 0.0, |
| "step": 229, |
| "text_entropy": 1.4586985111236572, |
| "text_kl": 0.0, |
| "total_entropy": 2.04360294342041 |
| }, |
| { |
| "combined_loss": 0.7200095057487488, |
| "completion_length": 461.0, |
| "epoch": 0.07315521628498728, |
| "grad_norm": 2.110440254211426, |
| "kl": 0.0, |
| "learning_rate": 9.752503684216007e-07, |
| "loss": 0.72, |
| "num_samples": 1.0, |
| "reward": 4.5625, |
| "reward_std": 0.8751000165939331, |
| "rewards/gpt4o_holistic_reward": 4.5625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.400031566619873, |
| "speech_entropy": 2.22104811668396, |
| "speech_kl": 0.0, |
| "step": 230, |
| "text_entropy": 1.356811761856079, |
| "text_kl": 0.0, |
| "total_entropy": 2.0665719509124756 |
| }, |
| { |
| "combined_loss": 0.6246628761291504, |
| "completion_length": 591.875, |
| "epoch": 0.07347328244274809, |
| "grad_norm": 2.518479585647583, |
| "kl": 0.0, |
| "learning_rate": 9.75016307256213e-07, |
| "loss": 0.6247, |
| "num_samples": 1.0, |
| "reward": 3.1875, |
| "reward_std": 0.9550646543502808, |
| "rewards/gpt4o_holistic_reward": 3.1875, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.082209587097168, |
| "speech_entropy": 2.3805904388427734, |
| "speech_kl": 0.0, |
| "step": 231, |
| "text_entropy": 0.864353597164154, |
| "text_kl": 0.0, |
| "total_entropy": 2.0375137329101562 |
| }, |
| { |
| "combined_loss": 0.6500042676925659, |
| "completion_length": 494.125, |
| "epoch": 0.0737913486005089, |
| "grad_norm": 1.7023345232009888, |
| "kl": 0.0, |
| "learning_rate": 9.74781176112778e-07, |
| "loss": 0.65, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.7674887180328369, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.1666808128356934, |
| "speech_entropy": 2.172966957092285, |
| "speech_kl": 0.0, |
| "step": 232, |
| "text_entropy": 1.2742629051208496, |
| "text_kl": 0.0, |
| "total_entropy": 2.008732318878174 |
| }, |
| { |
| "combined_loss": 0.6174441576004028, |
| "completion_length": 380.1875, |
| "epoch": 0.07410941475826972, |
| "grad_norm": 1.9199665784835815, |
| "kl": 0.0, |
| "learning_rate": 9.74544975583238e-07, |
| "loss": 0.6174, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 1.6082265377044678, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.0581471920013428, |
| "speech_entropy": 2.2164857387542725, |
| "speech_kl": 0.0, |
| "step": 233, |
| "text_entropy": 0.900662899017334, |
| "text_kl": 0.0, |
| "total_entropy": 1.9652466773986816 |
| }, |
| { |
| "combined_loss": 0.7314097285270691, |
| "completion_length": 307.0625, |
| "epoch": 0.07442748091603053, |
| "grad_norm": 2.08520245552063, |
| "kl": 0.0, |
| "learning_rate": 9.743077062622278e-07, |
| "loss": 0.7314, |
| "num_samples": 1.0, |
| "reward": 3.1875, |
| "reward_std": 0.8014019727706909, |
| "rewards/gpt4o_holistic_reward": 3.1875, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.438032388687134, |
| "speech_entropy": 2.3524227142333984, |
| "speech_kl": 0.0, |
| "step": 234, |
| "text_entropy": 0.8163133859634399, |
| "text_kl": 0.0, |
| "total_entropy": 2.0464541912078857 |
| }, |
| { |
| "combined_loss": 0.6391720175743103, |
| "completion_length": 435.5625, |
| "epoch": 0.07474554707379134, |
| "grad_norm": 1.9668989181518555, |
| "kl": 0.0, |
| "learning_rate": 9.740693687470722e-07, |
| "loss": 0.6392, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.7694376111030579, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.130573272705078, |
| "speech_entropy": 2.305225372314453, |
| "speech_kl": 0.0, |
| "step": 235, |
| "text_entropy": 0.7647875547409058, |
| "text_kl": 0.0, |
| "total_entropy": 2.0108447074890137 |
| }, |
| { |
| "combined_loss": 0.630230724811554, |
| "completion_length": 401.0625, |
| "epoch": 0.07506361323155217, |
| "grad_norm": 2.1346487998962402, |
| "kl": 0.0, |
| "learning_rate": 9.738299636377862e-07, |
| "loss": 0.6302, |
| "num_samples": 1.0, |
| "reward": 3.125, |
| "reward_std": 0.8859703540802002, |
| "rewards/gpt4o_holistic_reward": 3.125, |
| "rl_loss": 9.313225746154785e-09, |
| "sft_loss": 2.10076904296875, |
| "speech_entropy": 2.27579402923584, |
| "speech_kl": 0.0, |
| "step": 236, |
| "text_entropy": 0.8570226430892944, |
| "text_kl": 0.0, |
| "total_entropy": 2.0057263374328613 |
| }, |
| { |
| "combined_loss": 0.6507729291915894, |
| "completion_length": 424.8125, |
| "epoch": 0.07538167938931298, |
| "grad_norm": 1.7010408639907837, |
| "kl": 0.0, |
| "learning_rate": 9.735894915370712e-07, |
| "loss": 0.6508, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 0.989456832408905, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.169243097305298, |
| "speech_entropy": 2.329667329788208, |
| "speech_kl": 0.0, |
| "step": 237, |
| "text_entropy": 1.0157864093780518, |
| "text_kl": 0.0, |
| "total_entropy": 2.0907399654388428 |
| }, |
| { |
| "combined_loss": 0.6825940608978271, |
| "completion_length": 556.8125, |
| "epoch": 0.07569974554707379, |
| "grad_norm": 2.043261766433716, |
| "kl": 0.0, |
| "learning_rate": 9.73347953050316e-07, |
| "loss": 0.6826, |
| "num_samples": 1.0, |
| "reward": 3.1875, |
| "reward_std": 0.7705972194671631, |
| "rewards/gpt4o_holistic_reward": 3.1875, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.275313377380371, |
| "speech_entropy": 2.336167812347412, |
| "speech_kl": 0.0, |
| "step": 238, |
| "text_entropy": 1.4063137769699097, |
| "text_kl": 0.0, |
| "total_entropy": 2.168437957763672 |
| }, |
| { |
| "combined_loss": 0.6205179691314697, |
| "completion_length": 378.75, |
| "epoch": 0.07601781170483461, |
| "grad_norm": 2.229231595993042, |
| "kl": 0.0, |
| "learning_rate": 9.731053487855932e-07, |
| "loss": 0.6205, |
| "num_samples": 1.0, |
| "reward": 3.0625, |
| "reward_std": 0.8837943077087402, |
| "rewards/gpt4o_holistic_reward": 3.0625, |
| "rl_loss": 1.862645149230957e-08, |
| "sft_loss": 2.0683932304382324, |
| "speech_entropy": 2.4868783950805664, |
| "speech_kl": 0.0, |
| "step": 239, |
| "text_entropy": 1.320270299911499, |
| "text_kl": 0.0, |
| "total_entropy": 2.2788162231445312 |
| }, |
| { |
| "combined_loss": 0.7069031596183777, |
| "completion_length": 469.5625, |
| "epoch": 0.07633587786259542, |
| "grad_norm": 2.225677728652954, |
| "kl": 0.0, |
| "learning_rate": 9.728616793536587e-07, |
| "loss": 0.7069, |
| "num_samples": 1.0, |
| "reward": 3.1875, |
| "reward_std": 1.3122053146362305, |
| "rewards/gpt4o_holistic_reward": 3.1875, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.3563437461853027, |
| "speech_entropy": 2.316706657409668, |
| "speech_kl": 0.0, |
| "step": 240, |
| "text_entropy": 1.6138790845870972, |
| "text_kl": 0.0, |
| "total_entropy": 2.198500156402588 |
| }, |
| { |
| "combined_loss": 0.6990001797676086, |
| "completion_length": 413.25, |
| "epoch": 0.07665394402035623, |
| "grad_norm": 1.7011535167694092, |
| "kl": 0.0, |
| "learning_rate": 9.726169453679502e-07, |
| "loss": 0.699, |
| "num_samples": 1.0, |
| "reward": 4.125, |
| "reward_std": 0.6764019727706909, |
| "rewards/gpt4o_holistic_reward": 4.125, |
| "rl_loss": 2.60770320892334e-08, |
| "sft_loss": 2.330000400543213, |
| "speech_entropy": 2.288140296936035, |
| "speech_kl": 0.0, |
| "step": 241, |
| "text_entropy": 1.567537546157837, |
| "text_kl": 0.0, |
| "total_entropy": 2.162627696990967 |
| }, |
| { |
| "combined_loss": 0.7738334536552429, |
| "completion_length": 296.8125, |
| "epoch": 0.07697201017811704, |
| "grad_norm": 2.249540328979492, |
| "kl": 0.0, |
| "learning_rate": 9.72371147444585e-07, |
| "loss": 0.7738, |
| "num_samples": 1.0, |
| "reward": 3.25, |
| "reward_std": 1.8020561933517456, |
| "rewards/gpt4o_holistic_reward": 3.25, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.579444646835327, |
| "speech_entropy": 2.2558746337890625, |
| "speech_kl": 0.0, |
| "step": 242, |
| "text_entropy": 1.140458583831787, |
| "text_kl": 0.0, |
| "total_entropy": 2.0467886924743652 |
| }, |
| { |
| "combined_loss": 0.6218153834342957, |
| "completion_length": 417.1875, |
| "epoch": 0.07729007633587787, |
| "grad_norm": 1.888468861579895, |
| "kl": 0.0, |
| "learning_rate": 9.721242862023591e-07, |
| "loss": 0.6218, |
| "num_samples": 1.0, |
| "reward": 4.6875, |
| "reward_std": 0.6251000165939331, |
| "rewards/gpt4o_holistic_reward": 4.6875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.0727176666259766, |
| "speech_entropy": 2.2084412574768066, |
| "speech_kl": 0.0, |
| "step": 243, |
| "text_entropy": 0.5448473691940308, |
| "text_kl": 0.0, |
| "total_entropy": 1.8867276906967163 |
| }, |
| { |
| "combined_loss": 0.6377319097518921, |
| "completion_length": 424.375, |
| "epoch": 0.07760814249363868, |
| "grad_norm": 1.7285096645355225, |
| "kl": 0.0, |
| "learning_rate": 9.718763622627458e-07, |
| "loss": 0.6377, |
| "num_samples": 1.0, |
| "reward": 4.5625, |
| "reward_std": 0.7286534309387207, |
| "rewards/gpt4o_holistic_reward": 4.5625, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.1257729530334473, |
| "speech_entropy": 2.167391538619995, |
| "speech_kl": 0.0, |
| "step": 244, |
| "text_entropy": 1.0646146535873413, |
| "text_kl": 0.0, |
| "total_entropy": 1.956799030303955 |
| }, |
| { |
| "combined_loss": 0.6482560634613037, |
| "completion_length": 407.4375, |
| "epoch": 0.07792620865139949, |
| "grad_norm": 1.660476803779602, |
| "kl": 0.0, |
| "learning_rate": 9.716273762498929e-07, |
| "loss": 0.6483, |
| "num_samples": 1.0, |
| "reward": 4.4375, |
| "reward_std": 0.42705631256103516, |
| "rewards/gpt4o_holistic_reward": 4.4375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.160853385925293, |
| "speech_entropy": 2.198376417160034, |
| "speech_kl": 0.0, |
| "step": 245, |
| "text_entropy": 1.3145016431808472, |
| "text_kl": 0.0, |
| "total_entropy": 2.031963348388672 |
| }, |
| { |
| "combined_loss": 0.6421551704406738, |
| "completion_length": 586.875, |
| "epoch": 0.07824427480916031, |
| "grad_norm": 1.4380934238433838, |
| "kl": 0.0, |
| "learning_rate": 9.71377328790622e-07, |
| "loss": 0.6422, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 0.5774502754211426, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 0.0, |
| "sft_loss": 2.140516996383667, |
| "speech_entropy": 2.2290802001953125, |
| "speech_kl": 0.0, |
| "step": 246, |
| "text_entropy": 0.9156054258346558, |
| "text_kl": 0.0, |
| "total_entropy": 1.9700312614440918 |
| }, |
| { |
| "combined_loss": 0.7020258903503418, |
| "completion_length": 419.125, |
| "epoch": 0.07856234096692112, |
| "grad_norm": 3.0993080139160156, |
| "kl": 0.0, |
| "learning_rate": 9.711262205144285e-07, |
| "loss": 0.702, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.6637751460075378, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": -2.2351741790771484e-08, |
| "sft_loss": 2.340085983276367, |
| "speech_entropy": 2.3030998706817627, |
| "speech_kl": 0.0, |
| "step": 247, |
| "text_entropy": 1.1675899028778076, |
| "text_kl": 0.0, |
| "total_entropy": 2.082179307937622 |
| }, |
| { |
| "combined_loss": 0.686911940574646, |
| "completion_length": 387.25, |
| "epoch": 0.07888040712468193, |
| "grad_norm": 1.6759203672409058, |
| "kl": 0.0, |
| "learning_rate": 9.70874052053476e-07, |
| "loss": 0.6869, |
| "num_samples": 1.0, |
| "reward": 4.9375, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 4.9375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.2897064685821533, |
| "speech_entropy": 2.1984457969665527, |
| "speech_kl": 0.0, |
| "step": 248, |
| "text_entropy": 1.1239951848983765, |
| "text_kl": 0.0, |
| "total_entropy": 1.9890978336334229 |
| }, |
| { |
| "combined_loss": 0.6796283721923828, |
| "completion_length": 495.375, |
| "epoch": 0.07919847328244274, |
| "grad_norm": 1.5949386358261108, |
| "kl": 0.0, |
| "learning_rate": 9.706208240425988e-07, |
| "loss": 0.6796, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.265427589416504, |
| "speech_entropy": 2.263617992401123, |
| "speech_kl": 0.0, |
| "step": 249, |
| "text_entropy": 1.3017940521240234, |
| "text_kl": 0.0, |
| "total_entropy": 2.0769906044006348 |
| }, |
| { |
| "combined_loss": 0.6568690538406372, |
| "completion_length": 432.3125, |
| "epoch": 0.07951653944020357, |
| "grad_norm": 1.5095460414886475, |
| "kl": 0.0, |
| "learning_rate": 9.70366537119298e-07, |
| "loss": 0.6569, |
| "num_samples": 1.0, |
| "reward": 4.5, |
| "reward_std": 0.3536534011363983, |
| "rewards/gpt4o_holistic_reward": 4.5, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.189563274383545, |
| "speech_entropy": 2.2465767860412598, |
| "speech_kl": 0.0, |
| "step": 250, |
| "text_entropy": 0.8271604776382446, |
| "text_kl": 0.0, |
| "total_entropy": 1.9808815717697144 |
| }, |
| { |
| "combined_loss": 0.691437840461731, |
| "completion_length": 455.625, |
| "epoch": 0.07983460559796438, |
| "grad_norm": 2.107602119445801, |
| "kl": 0.0, |
| "learning_rate": 9.701111919237408e-07, |
| "loss": 0.6914, |
| "num_samples": 1.0, |
| "reward": 2.625, |
| "reward_std": 1.228813648223877, |
| "rewards/gpt4o_holistic_reward": 2.625, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.304792881011963, |
| "speech_entropy": 2.464122772216797, |
| "speech_kl": 0.0, |
| "step": 251, |
| "text_entropy": 1.3184483051300049, |
| "text_kl": 0.0, |
| "total_entropy": 2.2695252895355225 |
| }, |
| { |
| "combined_loss": 0.6778163909912109, |
| "completion_length": 400.5625, |
| "epoch": 0.08015267175572519, |
| "grad_norm": 1.9228991270065308, |
| "kl": 0.0, |
| "learning_rate": 9.698547890987584e-07, |
| "loss": 0.6778, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 0.8751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": 1.862645149230957e-09, |
| "sft_loss": 2.259387969970703, |
| "speech_entropy": 2.1710927486419678, |
| "speech_kl": 0.0, |
| "step": 252, |
| "text_entropy": 1.0632684230804443, |
| "text_kl": 0.0, |
| "total_entropy": 1.970045566558838 |
| }, |
| { |
| "combined_loss": 0.7383031845092773, |
| "completion_length": 435.9375, |
| "epoch": 0.08047073791348601, |
| "grad_norm": 1.9669862985610962, |
| "kl": 0.0, |
| "learning_rate": 9.695973292898442e-07, |
| "loss": 0.7383, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 1.0876991748809814, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.461010456085205, |
| "speech_entropy": 2.2386293411254883, |
| "speech_kl": 0.0, |
| "step": 253, |
| "text_entropy": 1.4104411602020264, |
| "text_kl": 0.0, |
| "total_entropy": 2.0837998390197754 |
| }, |
| { |
| "combined_loss": 0.6110467910766602, |
| "completion_length": 388.125, |
| "epoch": 0.08078880407124682, |
| "grad_norm": 1.7974542379379272, |
| "kl": 0.0, |
| "learning_rate": 9.693388131451536e-07, |
| "loss": 0.611, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 1.3661253452301025, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.036822557449341, |
| "speech_entropy": 2.301140785217285, |
| "speech_kl": 0.0, |
| "step": 254, |
| "text_entropy": 0.9246535897254944, |
| "text_kl": 0.0, |
| "total_entropy": 2.01608943939209 |
| }, |
| { |
| "combined_loss": 0.6638992428779602, |
| "completion_length": 567.8125, |
| "epoch": 0.08110687022900763, |
| "grad_norm": 1.5331536531448364, |
| "kl": 0.0, |
| "learning_rate": 9.690792413155002e-07, |
| "loss": 0.6639, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.5646764636039734, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2129974365234375, |
| "speech_entropy": 2.180696964263916, |
| "speech_kl": 0.0, |
| "step": 255, |
| "text_entropy": 1.1481618881225586, |
| "text_kl": 0.0, |
| "total_entropy": 1.9820756912231445 |
| }, |
| { |
| "combined_loss": 0.6851294040679932, |
| "completion_length": 547.0, |
| "epoch": 0.08142493638676845, |
| "grad_norm": 1.8251949548721313, |
| "kl": 0.0, |
| "learning_rate": 9.688186144543558e-07, |
| "loss": 0.6851, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 0.8081126809120178, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.2837648391723633, |
| "speech_entropy": 2.2099359035491943, |
| "speech_kl": 0.0, |
| "step": 256, |
| "text_entropy": 1.4614191055297852, |
| "text_kl": 0.0, |
| "total_entropy": 2.0672175884246826 |
| }, |
| { |
| "combined_loss": 0.7618996500968933, |
| "completion_length": 372.0625, |
| "epoch": 0.08174300254452926, |
| "grad_norm": 2.1775362491607666, |
| "kl": 0.0, |
| "learning_rate": 9.685569332178487e-07, |
| "loss": 0.7619, |
| "num_samples": 1.0, |
| "reward": 2.8125, |
| "reward_std": 0.5194376111030579, |
| "rewards/gpt4o_holistic_reward": 2.8125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.539665460586548, |
| "speech_entropy": 2.2694239616394043, |
| "speech_kl": 0.0, |
| "step": 257, |
| "text_entropy": 0.980126142501831, |
| "text_kl": 0.0, |
| "total_entropy": 2.0079169273376465 |
| }, |
| { |
| "combined_loss": 0.6269736289978027, |
| "completion_length": 546.5, |
| "epoch": 0.08206106870229007, |
| "grad_norm": 1.405476689338684, |
| "kl": 0.0, |
| "learning_rate": 9.682941982647605e-07, |
| "loss": 0.627, |
| "num_samples": 1.0, |
| "reward": 2.9375, |
| "reward_std": 0.6251000165939331, |
| "rewards/gpt4o_holistic_reward": 2.9375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.089912176132202, |
| "speech_entropy": 2.1415200233459473, |
| "speech_kl": 0.0, |
| "step": 258, |
| "text_entropy": 1.16708242893219, |
| "text_kl": 0.0, |
| "total_entropy": 1.9402116537094116 |
| }, |
| { |
| "combined_loss": 0.6667758226394653, |
| "completion_length": 376.0625, |
| "epoch": 0.08237913486005088, |
| "grad_norm": 1.9283385276794434, |
| "kl": 0.0, |
| "learning_rate": 9.680304102565265e-07, |
| "loss": 0.6668, |
| "num_samples": 1.0, |
| "reward": 3.0625, |
| "reward_std": 0.7394567728042603, |
| "rewards/gpt4o_holistic_reward": 3.0625, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.222586154937744, |
| "speech_entropy": 2.2938990592956543, |
| "speech_kl": 0.0, |
| "step": 259, |
| "text_entropy": 0.9376010894775391, |
| "text_kl": 0.0, |
| "total_entropy": 2.0546226501464844 |
| }, |
| { |
| "combined_loss": 0.6579493880271912, |
| "completion_length": 224.5, |
| "epoch": 0.08269720101781171, |
| "grad_norm": 2.025418519973755, |
| "kl": 0.0, |
| "learning_rate": 9.677655698572325e-07, |
| "loss": 0.6579, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 0.3228486180305481, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.193164587020874, |
| "speech_entropy": 2.3803346157073975, |
| "speech_kl": 0.0, |
| "step": 260, |
| "text_entropy": 1.0623462200164795, |
| "text_kl": 0.0, |
| "total_entropy": 2.11057186126709 |
| }, |
| { |
| "combined_loss": 0.7478048205375671, |
| "completion_length": 362.0625, |
| "epoch": 0.08301526717557252, |
| "grad_norm": 3.3174071311950684, |
| "kl": 0.0, |
| "learning_rate": 9.674996777336142e-07, |
| "loss": 0.7478, |
| "num_samples": 1.0, |
| "reward": 4.5625, |
| "reward_std": 0.3146764636039734, |
| "rewards/gpt4o_holistic_reward": 4.5625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.492682456970215, |
| "speech_entropy": 2.2766928672790527, |
| "speech_kl": 0.0, |
| "step": 261, |
| "text_entropy": 1.202368140220642, |
| "text_kl": 0.0, |
| "total_entropy": 2.069495677947998 |
| }, |
| { |
| "combined_loss": 0.6093226075172424, |
| "completion_length": 404.625, |
| "epoch": 0.08333333333333333, |
| "grad_norm": 2.024925470352173, |
| "kl": 0.0, |
| "learning_rate": 9.672327345550543e-07, |
| "loss": 0.6093, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 1.1161253452301025, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.0310750007629395, |
| "speech_entropy": 2.2833635807037354, |
| "speech_kl": 0.0, |
| "step": 262, |
| "text_entropy": 0.996048629283905, |
| "text_kl": 0.0, |
| "total_entropy": 2.037139654159546 |
| }, |
| { |
| "combined_loss": 0.6289666891098022, |
| "completion_length": 487.0625, |
| "epoch": 0.08365139949109415, |
| "grad_norm": 2.06803297996521, |
| "kl": 0.0, |
| "learning_rate": 9.669647409935822e-07, |
| "loss": 0.629, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 1.2440414428710938, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.096555709838867, |
| "speech_entropy": 2.283952236175537, |
| "speech_kl": 0.0, |
| "step": 263, |
| "text_entropy": 1.3200794458389282, |
| "text_kl": 0.0, |
| "total_entropy": 2.095597267150879 |
| }, |
| { |
| "combined_loss": 0.6302919387817383, |
| "completion_length": 503.0625, |
| "epoch": 0.08396946564885496, |
| "grad_norm": 1.8565832376480103, |
| "kl": 0.0, |
| "learning_rate": 9.666956977238711e-07, |
| "loss": 0.6303, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.6144567728042603, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 0.0, |
| "sft_loss": 2.100973129272461, |
| "speech_entropy": 2.1800198554992676, |
| "speech_kl": 0.0, |
| "step": 264, |
| "text_entropy": 0.9526975154876709, |
| "text_kl": 0.0, |
| "total_entropy": 1.956667184829712 |
| }, |
| { |
| "combined_loss": 0.7350342273712158, |
| "completion_length": 456.9375, |
| "epoch": 0.08428753180661577, |
| "grad_norm": 1.7183167934417725, |
| "kl": 0.0, |
| "learning_rate": 9.664256054232374e-07, |
| "loss": 0.735, |
| "num_samples": 1.0, |
| "reward": 4.875, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.875, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.4501140117645264, |
| "speech_entropy": 2.2488903999328613, |
| "speech_kl": 0.0, |
| "step": 265, |
| "text_entropy": 1.4711058139801025, |
| "text_kl": 0.0, |
| "total_entropy": 2.073747396469116 |
| }, |
| { |
| "combined_loss": 0.6489673256874084, |
| "completion_length": 426.8125, |
| "epoch": 0.0846055979643766, |
| "grad_norm": 1.6044869422912598, |
| "kl": 0.0, |
| "learning_rate": 9.66154464771638e-07, |
| "loss": 0.649, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": 0.0, |
| "sft_loss": 2.163224220275879, |
| "speech_entropy": 2.525622606277466, |
| "speech_kl": 0.0, |
| "step": 266, |
| "text_entropy": 1.200254201889038, |
| "text_kl": 0.0, |
| "total_entropy": 2.274986505508423 |
| }, |
| { |
| "combined_loss": 0.7551906108856201, |
| "completion_length": 445.6875, |
| "epoch": 0.08492366412213741, |
| "grad_norm": 1.658619999885559, |
| "kl": 0.0, |
| "learning_rate": 9.658822764516693e-07, |
| "loss": 0.7552, |
| "num_samples": 1.0, |
| "reward": 4.4375, |
| "reward_std": 0.8081126809120178, |
| "rewards/gpt4o_holistic_reward": 4.4375, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.5173017978668213, |
| "speech_entropy": 2.1905436515808105, |
| "speech_kl": 0.0, |
| "step": 267, |
| "text_entropy": 1.1561870574951172, |
| "text_kl": 0.0, |
| "total_entropy": 1.9924242496490479 |
| }, |
| { |
| "combined_loss": 0.6897430419921875, |
| "completion_length": 678.3125, |
| "epoch": 0.08524173027989822, |
| "grad_norm": 1.7225435972213745, |
| "kl": 0.0, |
| "learning_rate": 9.65609041148565e-07, |
| "loss": 0.6897, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 0.9435809850692749, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.2991433143615723, |
| "speech_entropy": 2.1389427185058594, |
| "speech_kl": 0.0, |
| "step": 268, |
| "text_entropy": 1.3221979141235352, |
| "text_kl": 0.0, |
| "total_entropy": 1.9796137809753418 |
| }, |
| { |
| "combined_loss": 0.5930161476135254, |
| "completion_length": 548.0, |
| "epoch": 0.08555979643765903, |
| "grad_norm": 1.4510716199874878, |
| "kl": 0.0, |
| "learning_rate": 9.653347595501946e-07, |
| "loss": 0.593, |
| "num_samples": 1.0, |
| "reward": 4.125, |
| "reward_std": 0.14443756639957428, |
| "rewards/gpt4o_holistic_reward": 4.125, |
| "rl_loss": 0.0, |
| "sft_loss": 1.9767203330993652, |
| "speech_entropy": 2.1469504833221436, |
| "speech_kl": 0.0, |
| "step": 269, |
| "text_entropy": 0.8947275876998901, |
| "text_kl": 0.0, |
| "total_entropy": 1.9000020027160645 |
| }, |
| { |
| "combined_loss": 0.652092456817627, |
| "completion_length": 484.625, |
| "epoch": 0.08587786259541985, |
| "grad_norm": 1.9512776136398315, |
| "kl": 0.0, |
| "learning_rate": 9.650594323470617e-07, |
| "loss": 0.6521, |
| "num_samples": 1.0, |
| "reward": 2.8125, |
| "reward_std": 0.8808612823486328, |
| "rewards/gpt4o_holistic_reward": 2.8125, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.1736412048339844, |
| "speech_entropy": 2.1839957237243652, |
| "speech_kl": 0.0, |
| "step": 270, |
| "text_entropy": 1.3368381261825562, |
| "text_kl": 0.0, |
| "total_entropy": 2.01955246925354 |
| }, |
| { |
| "combined_loss": 0.6888371706008911, |
| "completion_length": 471.3125, |
| "epoch": 0.08619592875318066, |
| "grad_norm": 1.8091537952423096, |
| "kl": 0.0, |
| "learning_rate": 9.64783060232302e-07, |
| "loss": 0.6888, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 1.183112621307373, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": 2.2351741790771484e-08, |
| "sft_loss": 2.296123743057251, |
| "speech_entropy": 2.1703720092773438, |
| "speech_kl": 0.0, |
| "step": 271, |
| "text_entropy": 1.033645749092102, |
| "text_kl": 0.0, |
| "total_entropy": 1.927350401878357 |
| }, |
| { |
| "combined_loss": 0.6634478569030762, |
| "completion_length": 618.1875, |
| "epoch": 0.08651399491094147, |
| "grad_norm": 1.6202704906463623, |
| "kl": 0.0, |
| "learning_rate": 9.645056439016825e-07, |
| "loss": 0.6634, |
| "num_samples": 1.0, |
| "reward": 3.125, |
| "reward_std": 1.5379188060760498, |
| "rewards/gpt4o_holistic_reward": 3.125, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.2114930152893066, |
| "speech_entropy": 2.063384532928467, |
| "speech_kl": 0.0, |
| "step": 272, |
| "text_entropy": 0.6551350951194763, |
| "text_kl": 0.0, |
| "total_entropy": 1.7709801197052002 |
| }, |
| { |
| "combined_loss": 0.6863433718681335, |
| "completion_length": 390.0625, |
| "epoch": 0.0868320610687023, |
| "grad_norm": 1.5252995491027832, |
| "kl": 0.0, |
| "learning_rate": 9.64227184053598e-07, |
| "loss": 0.6863, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 0.4788135886192322, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.287811040878296, |
| "speech_entropy": 2.296483039855957, |
| "speech_kl": 0.0, |
| "step": 273, |
| "text_entropy": 1.0703375339508057, |
| "text_kl": 0.0, |
| "total_entropy": 2.0609984397888184 |
| }, |
| { |
| "combined_loss": 0.7279566526412964, |
| "completion_length": 336.625, |
| "epoch": 0.0871501272264631, |
| "grad_norm": 2.0608808994293213, |
| "kl": 0.0, |
| "learning_rate": 9.639476813890713e-07, |
| "loss": 0.728, |
| "num_samples": 1.0, |
| "reward": 2.875, |
| "reward_std": 0.6444375514984131, |
| "rewards/gpt4o_holistic_reward": 2.875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.4265217781066895, |
| "speech_entropy": 2.2220163345336914, |
| "speech_kl": 0.0, |
| "step": 274, |
| "text_entropy": 1.446899175643921, |
| "text_kl": 0.0, |
| "total_entropy": 2.081493377685547 |
| }, |
| { |
| "combined_loss": 0.7374498248100281, |
| "completion_length": 533.1875, |
| "epoch": 0.08746819338422392, |
| "grad_norm": 1.9891971349716187, |
| "kl": 0.0, |
| "learning_rate": 9.636671366117494e-07, |
| "loss": 0.7374, |
| "num_samples": 1.0, |
| "reward": 2.5, |
| "reward_std": 0.3944375813007355, |
| "rewards/gpt4o_holistic_reward": 2.5, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.4581661224365234, |
| "speech_entropy": 2.133476495742798, |
| "speech_kl": 0.0, |
| "step": 275, |
| "text_entropy": 1.2225779294967651, |
| "text_kl": 0.0, |
| "total_entropy": 1.9578973054885864 |
| }, |
| { |
| "combined_loss": 0.6486621499061584, |
| "completion_length": 366.875, |
| "epoch": 0.08778625954198473, |
| "grad_norm": 3.3881635665893555, |
| "kl": 0.0, |
| "learning_rate": 9.63385550427904e-07, |
| "loss": 0.6487, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 0.9331126809120178, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.1622071266174316, |
| "speech_entropy": 2.344599723815918, |
| "speech_kl": 0.0, |
| "step": 276, |
| "text_entropy": 1.1587448120117188, |
| "text_kl": 0.0, |
| "total_entropy": 2.122767448425293 |
| }, |
| { |
| "combined_loss": 0.6684514284133911, |
| "completion_length": 424.9375, |
| "epoch": 0.08810432569974555, |
| "grad_norm": 1.9074612855911255, |
| "kl": 0.0, |
| "learning_rate": 9.631029235464278e-07, |
| "loss": 0.6685, |
| "num_samples": 1.0, |
| "reward": 4.5, |
| "reward_std": 0.5774502158164978, |
| "rewards/gpt4o_holistic_reward": 4.5, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.2281713485717773, |
| "speech_entropy": 2.4562625885009766, |
| "speech_kl": 0.0, |
| "step": 277, |
| "text_entropy": 1.2592930793762207, |
| "text_kl": 0.0, |
| "total_entropy": 2.2500598430633545 |
| }, |
| { |
| "combined_loss": 0.671592116355896, |
| "completion_length": 438.0, |
| "epoch": 0.08842239185750636, |
| "grad_norm": 1.861733078956604, |
| "kl": 0.0, |
| "learning_rate": 9.628192566788335e-07, |
| "loss": 0.6716, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.6404881477355957, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.238640308380127, |
| "speech_entropy": 2.177516222000122, |
| "speech_kl": 0.0, |
| "step": 278, |
| "text_entropy": 1.346944808959961, |
| "text_kl": 0.0, |
| "total_entropy": 2.020207643508911 |
| }, |
| { |
| "combined_loss": 0.638625979423523, |
| "completion_length": 528.4375, |
| "epoch": 0.08874045801526717, |
| "grad_norm": 1.7982735633850098, |
| "kl": 0.0, |
| "learning_rate": 9.625345505392522e-07, |
| "loss": 0.6386, |
| "num_samples": 1.0, |
| "reward": 2.625, |
| "reward_std": 1.0474694967269897, |
| "rewards/gpt4o_holistic_reward": 2.625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.128753185272217, |
| "speech_entropy": 2.180100679397583, |
| "speech_kl": 0.0, |
| "step": 279, |
| "text_entropy": 0.8083711862564087, |
| "text_kl": 0.0, |
| "total_entropy": 1.9105725288391113 |
| }, |
| { |
| "combined_loss": 0.7118488550186157, |
| "completion_length": 514.25, |
| "epoch": 0.089058524173028, |
| "grad_norm": 1.5744655132293701, |
| "kl": 0.0, |
| "learning_rate": 9.622488058444313e-07, |
| "loss": 0.7118, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 0.8228486180305481, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": 5.587935447692871e-09, |
| "sft_loss": 2.3728294372558594, |
| "speech_entropy": 2.1545000076293945, |
| "speech_kl": 0.0, |
| "step": 280, |
| "text_entropy": 1.114195466041565, |
| "text_kl": 0.0, |
| "total_entropy": 1.9586902856826782 |
| }, |
| { |
| "combined_loss": 0.6681440472602844, |
| "completion_length": 377.4375, |
| "epoch": 0.0893765903307888, |
| "grad_norm": 1.993377447128296, |
| "kl": 0.0, |
| "learning_rate": 9.619620233137326e-07, |
| "loss": 0.6681, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 0.6724694967269897, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.227146863937378, |
| "speech_entropy": 2.207726240158081, |
| "speech_kl": 0.0, |
| "step": 281, |
| "text_entropy": 1.0726655721664429, |
| "text_kl": 0.0, |
| "total_entropy": 1.990494728088379 |
| }, |
| { |
| "combined_loss": 0.6539067029953003, |
| "completion_length": 420.5625, |
| "epoch": 0.08969465648854962, |
| "grad_norm": 1.9349361658096313, |
| "kl": 0.0, |
| "learning_rate": 9.61674203669131e-07, |
| "loss": 0.6539, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 1.250100016593933, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.1796889305114746, |
| "speech_entropy": 2.3027138710021973, |
| "speech_kl": 0.0, |
| "step": 282, |
| "text_entropy": 1.0392296314239502, |
| "text_kl": 0.0, |
| "total_entropy": 2.0374677181243896 |
| }, |
| { |
| "combined_loss": 0.7105068564414978, |
| "completion_length": 469.1875, |
| "epoch": 0.09001272264631044, |
| "grad_norm": 1.9159810543060303, |
| "kl": 0.0, |
| "learning_rate": 9.61385347635212e-07, |
| "loss": 0.7105, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 0.7288135886192322, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.368356227874756, |
| "speech_entropy": 2.4519176483154297, |
| "speech_kl": 0.0, |
| "step": 283, |
| "text_entropy": 2.145763397216797, |
| "text_kl": 0.0, |
| "total_entropy": 2.3996076583862305 |
| }, |
| { |
| "combined_loss": 0.562969982624054, |
| "completion_length": 570.3125, |
| "epoch": 0.09033078880407125, |
| "grad_norm": 1.6282283067703247, |
| "kl": 0.0, |
| "learning_rate": 9.610954559391704e-07, |
| "loss": 0.563, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 0.9788135886192322, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 0.0, |
| "sft_loss": 1.8765664100646973, |
| "speech_entropy": 2.180119514465332, |
| "speech_kl": 0.0, |
| "step": 284, |
| "text_entropy": 0.6460127830505371, |
| "text_kl": 0.0, |
| "total_entropy": 1.8707494735717773 |
| }, |
| { |
| "combined_loss": 0.691946268081665, |
| "completion_length": 618.0625, |
| "epoch": 0.09064885496183206, |
| "grad_norm": 1.7233694791793823, |
| "kl": 0.0, |
| "learning_rate": 9.60804529310808e-07, |
| "loss": 0.6919, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 1.3536533117294312, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.306487560272217, |
| "speech_entropy": 2.1861276626586914, |
| "speech_kl": 0.0, |
| "step": 285, |
| "text_entropy": 0.9004356861114502, |
| "text_kl": 0.0, |
| "total_entropy": 1.9308792352676392 |
| }, |
| { |
| "combined_loss": 0.6357396841049194, |
| "completion_length": 506.5625, |
| "epoch": 0.09096692111959287, |
| "grad_norm": 1.5584510564804077, |
| "kl": 0.0, |
| "learning_rate": 9.605125684825322e-07, |
| "loss": 0.6357, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.1191322803497314, |
| "speech_entropy": 2.424971580505371, |
| "speech_kl": 0.0, |
| "step": 286, |
| "text_entropy": 1.4699835777282715, |
| "text_kl": 0.0, |
| "total_entropy": 2.243284225463867 |
| }, |
| { |
| "combined_loss": 0.8120319843292236, |
| "completion_length": 579.6875, |
| "epoch": 0.0912849872773537, |
| "grad_norm": 1.81868577003479, |
| "kl": 0.0, |
| "learning_rate": 9.602195741893546e-07, |
| "loss": 0.812, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 0.6115237474441528, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": -1.862645149230957e-09, |
| "sft_loss": 2.706773042678833, |
| "speech_entropy": 2.2252883911132812, |
| "speech_kl": 0.0, |
| "step": 287, |
| "text_entropy": 1.768620491027832, |
| "text_kl": 0.0, |
| "total_entropy": 2.1409504413604736 |
| }, |
| { |
| "combined_loss": 0.7210448384284973, |
| "completion_length": 399.75, |
| "epoch": 0.0916030534351145, |
| "grad_norm": 3.3182334899902344, |
| "kl": 0.0, |
| "learning_rate": 9.59925547168887e-07, |
| "loss": 0.721, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 1.0731656551361084, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.403482675552368, |
| "speech_entropy": 2.526559829711914, |
| "speech_kl": 0.0, |
| "step": 288, |
| "text_entropy": 1.5308034420013428, |
| "text_kl": 0.0, |
| "total_entropy": 2.3434336185455322 |
| }, |
| { |
| "combined_loss": 0.7331863641738892, |
| "completion_length": 562.9375, |
| "epoch": 0.09192111959287531, |
| "grad_norm": 1.7665117979049683, |
| "kl": 0.0, |
| "learning_rate": 9.596304881613432e-07, |
| "loss": 0.7332, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 1.0327467918395996, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.4439544677734375, |
| "speech_entropy": 2.499924659729004, |
| "speech_kl": 0.0, |
| "step": 289, |
| "text_entropy": 1.3738188743591309, |
| "text_kl": 0.0, |
| "total_entropy": 2.2846546173095703 |
| }, |
| { |
| "combined_loss": 0.7444977760314941, |
| "completion_length": 483.3125, |
| "epoch": 0.09223918575063614, |
| "grad_norm": 1.8501068353652954, |
| "kl": 0.0, |
| "learning_rate": 9.593343979095332e-07, |
| "loss": 0.7445, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.5000999569892883, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.481659173965454, |
| "speech_entropy": 2.394073486328125, |
| "speech_kl": 0.0, |
| "step": 290, |
| "text_entropy": 1.5317790508270264, |
| "text_kl": 0.0, |
| "total_entropy": 2.2343432903289795 |
| }, |
| { |
| "combined_loss": 0.6787593364715576, |
| "completion_length": 557.375, |
| "epoch": 0.09255725190839695, |
| "grad_norm": 2.0699431896209717, |
| "kl": 0.0, |
| "learning_rate": 9.59037277158864e-07, |
| "loss": 0.6788, |
| "num_samples": 1.0, |
| "reward": 2.6875, |
| "reward_std": 0.9478486180305481, |
| "rewards/gpt4o_holistic_reward": 2.6875, |
| "rl_loss": 1.862645149230957e-08, |
| "sft_loss": 2.26253080368042, |
| "speech_entropy": 2.463019609451294, |
| "speech_kl": 0.0, |
| "step": 291, |
| "text_entropy": 1.1903096437454224, |
| "text_kl": 0.0, |
| "total_entropy": 2.2193331718444824 |
| }, |
| { |
| "combined_loss": 0.6866003274917603, |
| "completion_length": 340.25, |
| "epoch": 0.09287531806615776, |
| "grad_norm": 1.7148224115371704, |
| "kl": 0.0, |
| "learning_rate": 9.587391266573366e-07, |
| "loss": 0.6866, |
| "num_samples": 1.0, |
| "reward": 4.875, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.875, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.288667678833008, |
| "speech_entropy": 2.2371129989624023, |
| "speech_kl": 0.0, |
| "step": 292, |
| "text_entropy": 1.372521162033081, |
| "text_kl": 0.0, |
| "total_entropy": 2.084916591644287 |
| }, |
| { |
| "combined_loss": 0.6642424464225769, |
| "completion_length": 414.375, |
| "epoch": 0.09319338422391857, |
| "grad_norm": 1.98994779586792, |
| "kl": 0.0, |
| "learning_rate": 9.584399471555449e-07, |
| "loss": 0.6642, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 1.1036534309387207, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.214141368865967, |
| "speech_entropy": 2.3391613960266113, |
| "speech_kl": 0.0, |
| "step": 293, |
| "text_entropy": 1.0432538986206055, |
| "text_kl": 0.0, |
| "total_entropy": 2.0753378868103027 |
| }, |
| { |
| "combined_loss": 0.6237722635269165, |
| "completion_length": 518.5, |
| "epoch": 0.09351145038167939, |
| "grad_norm": 1.5434069633483887, |
| "kl": 0.0, |
| "learning_rate": 9.581397394066726e-07, |
| "loss": 0.6238, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 1.862645149230957e-09, |
| "sft_loss": 2.0792407989501953, |
| "speech_entropy": 2.4281280040740967, |
| "speech_kl": 0.0, |
| "step": 294, |
| "text_entropy": 1.0986557006835938, |
| "text_kl": 0.0, |
| "total_entropy": 2.148758888244629 |
| }, |
| { |
| "combined_loss": 0.6445981860160828, |
| "completion_length": 476.0625, |
| "epoch": 0.0938295165394402, |
| "grad_norm": 1.9953515529632568, |
| "kl": 0.0, |
| "learning_rate": 9.578385041664925e-07, |
| "loss": 0.6446, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 0.5774502754211426, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.148660659790039, |
| "speech_entropy": 2.1423118114471436, |
| "speech_kl": 0.0, |
| "step": 295, |
| "text_entropy": 1.1786762475967407, |
| "text_kl": 0.0, |
| "total_entropy": 1.9594595432281494 |
| }, |
| { |
| "combined_loss": 0.698508620262146, |
| "completion_length": 513.6875, |
| "epoch": 0.09414758269720101, |
| "grad_norm": 1.7260363101959229, |
| "kl": 0.0, |
| "learning_rate": 9.575362421933638e-07, |
| "loss": 0.6985, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 0.9786533117294312, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.328361988067627, |
| "speech_entropy": 2.1992037296295166, |
| "speech_kl": 0.0, |
| "step": 296, |
| "text_entropy": 1.1559290885925293, |
| "text_kl": 0.0, |
| "total_entropy": 1.9949164390563965 |
| }, |
| { |
| "combined_loss": 0.6678205132484436, |
| "completion_length": 562.1875, |
| "epoch": 0.09446564885496184, |
| "grad_norm": 2.3985586166381836, |
| "kl": 0.0, |
| "learning_rate": 9.572329542482309e-07, |
| "loss": 0.6678, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.5622053742408752, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.2260682582855225, |
| "speech_entropy": 2.17387056350708, |
| "speech_kl": 0.0, |
| "step": 297, |
| "text_entropy": 1.249056339263916, |
| "text_kl": 0.0, |
| "total_entropy": 1.9902275800704956 |
| }, |
| { |
| "combined_loss": 0.7157855033874512, |
| "completion_length": 452.0, |
| "epoch": 0.09478371501272265, |
| "grad_norm": 1.746224045753479, |
| "kl": 0.0, |
| "learning_rate": 9.569286410946207e-07, |
| "loss": 0.7158, |
| "num_samples": 1.0, |
| "reward": 4.5, |
| "reward_std": 0.5774502158164978, |
| "rewards/gpt4o_holistic_reward": 4.5, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.385951519012451, |
| "speech_entropy": 2.403526782989502, |
| "speech_kl": 0.0, |
| "step": 298, |
| "text_entropy": 1.6002991199493408, |
| "text_kl": 0.0, |
| "total_entropy": 2.2620410919189453 |
| }, |
| { |
| "combined_loss": 0.7848547101020813, |
| "completion_length": 352.8125, |
| "epoch": 0.09510178117048346, |
| "grad_norm": 2.679422616958618, |
| "kl": 0.0, |
| "learning_rate": 9.566233034986411e-07, |
| "loss": 0.7849, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 0.7500999569892883, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.6161820888519287, |
| "speech_entropy": 2.4429244995117188, |
| "speech_kl": 0.0, |
| "step": 299, |
| "text_entropy": 1.1291344165802002, |
| "text_kl": 0.0, |
| "total_entropy": 2.160275936126709 |
| }, |
| { |
| "combined_loss": 0.7183820009231567, |
| "completion_length": 509.0, |
| "epoch": 0.09541984732824428, |
| "grad_norm": 2.244758129119873, |
| "kl": 0.0, |
| "learning_rate": 9.563169422289796e-07, |
| "loss": 0.7184, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 1.478813648223877, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 1.862645149230957e-09, |
| "sft_loss": 2.394606590270996, |
| "speech_entropy": 2.7869982719421387, |
| "speech_kl": 0.0, |
| "step": 300, |
| "text_entropy": 1.4828916788101196, |
| "text_kl": 0.0, |
| "total_entropy": 2.5221667289733887 |
| }, |
| { |
| "combined_loss": 0.6260841488838196, |
| "completion_length": 436.25, |
| "epoch": 0.09573791348600509, |
| "grad_norm": 2.182548761367798, |
| "kl": 0.0, |
| "learning_rate": 9.560095580568996e-07, |
| "loss": 0.6261, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 1.395711898803711, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.086947202682495, |
| "speech_entropy": 2.514786720275879, |
| "speech_kl": 0.0, |
| "step": 301, |
| "text_entropy": 1.6524386405944824, |
| "text_kl": 0.0, |
| "total_entropy": 2.2878293991088867 |
| }, |
| { |
| "combined_loss": 0.7308363914489746, |
| "completion_length": 391.375, |
| "epoch": 0.0960559796437659, |
| "grad_norm": 1.8132033348083496, |
| "kl": 0.0, |
| "learning_rate": 9.55701151756241e-07, |
| "loss": 0.7308, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.4361209869384766, |
| "speech_entropy": 2.2866296768188477, |
| "speech_kl": 0.0, |
| "step": 302, |
| "text_entropy": 1.3407695293426514, |
| "text_kl": 0.0, |
| "total_entropy": 2.0895495414733887 |
| }, |
| { |
| "combined_loss": 0.6096498966217041, |
| "completion_length": 303.9375, |
| "epoch": 0.09637404580152671, |
| "grad_norm": 2.869903087615967, |
| "kl": 0.0, |
| "learning_rate": 9.55391724103416e-07, |
| "loss": 0.6096, |
| "num_samples": 1.0, |
| "reward": 4.0625, |
| "reward_std": 0.7394567728042603, |
| "rewards/gpt4o_holistic_reward": 4.0625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.0321662425994873, |
| "speech_entropy": 2.8348755836486816, |
| "speech_kl": 0.0, |
| "step": 303, |
| "text_entropy": 1.151402235031128, |
| "text_kl": 0.0, |
| "total_entropy": 2.51552152633667 |
| }, |
| { |
| "combined_loss": 0.6735842227935791, |
| "completion_length": 553.4375, |
| "epoch": 0.09669211195928754, |
| "grad_norm": 1.5486979484558105, |
| "kl": 0.0, |
| "learning_rate": 9.550812758774085e-07, |
| "loss": 0.6736, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.4788135886192322, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2452807426452637, |
| "speech_entropy": 2.6690659523010254, |
| "speech_kl": 0.0, |
| "step": 304, |
| "text_entropy": 1.2094488143920898, |
| "text_kl": 0.0, |
| "total_entropy": 2.3848817348480225 |
| }, |
| { |
| "combined_loss": 0.6192010641098022, |
| "completion_length": 465.4375, |
| "epoch": 0.09701017811704835, |
| "grad_norm": 1.6173670291900635, |
| "kl": 0.0, |
| "learning_rate": 9.547698078597713e-07, |
| "loss": 0.6192, |
| "num_samples": 1.0, |
| "reward": 4.0625, |
| "reward_std": 0.6770563125610352, |
| "rewards/gpt4o_holistic_reward": 4.0625, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.0640034675598145, |
| "speech_entropy": 2.154529094696045, |
| "speech_kl": 0.0, |
| "step": 305, |
| "text_entropy": 1.051210641860962, |
| "text_kl": 0.0, |
| "total_entropy": 1.921095848083496 |
| }, |
| { |
| "combined_loss": 0.6843652725219727, |
| "completion_length": 401.125, |
| "epoch": 0.09732824427480916, |
| "grad_norm": 2.191951036453247, |
| "kl": 0.0, |
| "learning_rate": 9.54457320834625e-07, |
| "loss": 0.6844, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.5581127405166626, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.281217575073242, |
| "speech_entropy": 2.282792091369629, |
| "speech_kl": 0.0, |
| "step": 306, |
| "text_entropy": 1.1090407371520996, |
| "text_kl": 0.0, |
| "total_entropy": 2.0719239711761475 |
| }, |
| { |
| "combined_loss": 0.6455174684524536, |
| "completion_length": 423.625, |
| "epoch": 0.09764631043256998, |
| "grad_norm": 2.207777261734009, |
| "kl": 0.0, |
| "learning_rate": 9.541438155886554e-07, |
| "loss": 0.6455, |
| "num_samples": 1.0, |
| "reward": 2.625, |
| "reward_std": 0.8644567728042603, |
| "rewards/gpt4o_holistic_reward": 2.625, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.1517248153686523, |
| "speech_entropy": 2.6509978771209717, |
| "speech_kl": 0.0, |
| "step": 307, |
| "text_entropy": 1.262976884841919, |
| "text_kl": 0.0, |
| "total_entropy": 2.38793683052063 |
| }, |
| { |
| "combined_loss": 0.7142800092697144, |
| "completion_length": 549.875, |
| "epoch": 0.09796437659033079, |
| "grad_norm": 1.5794309377670288, |
| "kl": 0.0, |
| "learning_rate": 9.538292929111112e-07, |
| "loss": 0.7143, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.3809332847595215, |
| "speech_entropy": 2.319915294647217, |
| "speech_kl": 0.0, |
| "step": 308, |
| "text_entropy": 1.5507756471633911, |
| "text_kl": 0.0, |
| "total_entropy": 2.1751065254211426 |
| }, |
| { |
| "combined_loss": 0.6554994583129883, |
| "completion_length": 314.3125, |
| "epoch": 0.0982824427480916, |
| "grad_norm": 1.8395949602127075, |
| "kl": 0.0, |
| "learning_rate": 9.535137535938031e-07, |
| "loss": 0.6555, |
| "num_samples": 1.0, |
| "reward": 4.4375, |
| "reward_std": 0.5646764636039734, |
| "rewards/gpt4o_holistic_reward": 4.4375, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.1849982738494873, |
| "speech_entropy": 2.135890245437622, |
| "speech_kl": 0.0, |
| "step": 309, |
| "text_entropy": 0.8897652626037598, |
| "text_kl": 0.0, |
| "total_entropy": 1.9156931638717651 |
| }, |
| { |
| "combined_loss": 0.738502025604248, |
| "completion_length": 449.25, |
| "epoch": 0.09860050890585242, |
| "grad_norm": 1.810171127319336, |
| "kl": 0.0, |
| "learning_rate": 9.531971984311011e-07, |
| "loss": 0.7385, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 0.7654882073402405, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.4616734981536865, |
| "speech_entropy": 2.2290310859680176, |
| "speech_kl": 0.0, |
| "step": 310, |
| "text_entropy": 1.629712462425232, |
| "text_kl": 0.0, |
| "total_entropy": 2.121129035949707 |
| }, |
| { |
| "combined_loss": 0.7250782251358032, |
| "completion_length": 504.125, |
| "epoch": 0.09891857506361323, |
| "grad_norm": 1.8087352514266968, |
| "kl": 0.0, |
| "learning_rate": 9.528796282199321e-07, |
| "loss": 0.7251, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 0.8750999569892883, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.4169273376464844, |
| "speech_entropy": 2.1521968841552734, |
| "speech_kl": 0.0, |
| "step": 311, |
| "text_entropy": 1.3652535676956177, |
| "text_kl": 0.0, |
| "total_entropy": 2.007472038269043 |
| }, |
| { |
| "combined_loss": 0.6740373373031616, |
| "completion_length": 274.0625, |
| "epoch": 0.09923664122137404, |
| "grad_norm": 2.0441370010375977, |
| "kl": 0.0, |
| "learning_rate": 9.52561043759779e-07, |
| "loss": 0.674, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 1.183112621307373, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.246790885925293, |
| "speech_entropy": 2.477415084838867, |
| "speech_kl": 0.0, |
| "step": 312, |
| "text_entropy": 0.9889883399009705, |
| "text_kl": 0.0, |
| "total_entropy": 2.1920857429504395 |
| }, |
| { |
| "combined_loss": 0.64084392786026, |
| "completion_length": 396.0625, |
| "epoch": 0.09955470737913485, |
| "grad_norm": 1.5913077592849731, |
| "kl": 0.0, |
| "learning_rate": 9.522414458526778e-07, |
| "loss": 0.6408, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.1361465454101562, |
| "speech_entropy": 2.176445484161377, |
| "speech_kl": 0.0, |
| "step": 313, |
| "text_entropy": 1.2821910381317139, |
| "text_kl": 0.0, |
| "total_entropy": 2.0197858810424805 |
| }, |
| { |
| "combined_loss": 0.6571515202522278, |
| "completion_length": 449.0625, |
| "epoch": 0.09987277353689568, |
| "grad_norm": 1.8189066648483276, |
| "kl": 0.0, |
| "learning_rate": 9.519208353032158e-07, |
| "loss": 0.6572, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.4733423590660095, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.190505027770996, |
| "speech_entropy": 2.1687145233154297, |
| "speech_kl": 0.0, |
| "step": 314, |
| "text_entropy": 1.071582555770874, |
| "text_kl": 0.0, |
| "total_entropy": 1.9608268737792969 |
| }, |
| { |
| "combined_loss": 0.5773699283599854, |
| "completion_length": 375.3125, |
| "epoch": 0.10019083969465649, |
| "grad_norm": 1.6602267026901245, |
| "kl": 0.0, |
| "learning_rate": 9.515992129185294e-07, |
| "loss": 0.5774, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 0.8274502158164978, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 1.924566388130188, |
| "speech_entropy": 2.1676058769226074, |
| "speech_kl": 0.0, |
| "step": 315, |
| "text_entropy": 0.8978569507598877, |
| "text_kl": 0.0, |
| "total_entropy": 1.9022667407989502 |
| }, |
| { |
| "combined_loss": 0.591927170753479, |
| "completion_length": 374.9375, |
| "epoch": 0.1005089058524173, |
| "grad_norm": 1.8265076875686646, |
| "kl": 0.0, |
| "learning_rate": 9.512765795083029e-07, |
| "loss": 0.5919, |
| "num_samples": 1.0, |
| "reward": 4.0625, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 4.0625, |
| "rl_loss": 0.0, |
| "sft_loss": 1.973090410232544, |
| "speech_entropy": 2.2445173263549805, |
| "speech_kl": 0.0, |
| "step": 316, |
| "text_entropy": 0.9660072922706604, |
| "text_kl": 0.0, |
| "total_entropy": 1.9937914609909058 |
| }, |
| { |
| "combined_loss": 0.7159188985824585, |
| "completion_length": 387.875, |
| "epoch": 0.10082697201017812, |
| "grad_norm": 2.4045403003692627, |
| "kl": 0.0, |
| "learning_rate": 9.509529358847654e-07, |
| "loss": 0.7159, |
| "num_samples": 1.0, |
| "reward": 4.625, |
| "reward_std": 0.6444375514984131, |
| "rewards/gpt4o_holistic_reward": 4.625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.3863961696624756, |
| "speech_entropy": 2.341677665710449, |
| "speech_kl": 0.0, |
| "step": 317, |
| "text_entropy": 1.6361427307128906, |
| "text_kl": 0.0, |
| "total_entropy": 2.214456796646118 |
| }, |
| { |
| "combined_loss": 0.6382442712783813, |
| "completion_length": 391.0625, |
| "epoch": 0.10114503816793893, |
| "grad_norm": 1.9414576292037964, |
| "kl": 0.0, |
| "learning_rate": 9.506282828626894e-07, |
| "loss": 0.6382, |
| "num_samples": 1.0, |
| "reward": 2.75, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 2.75, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.1274807453155518, |
| "speech_entropy": 2.3970725536346436, |
| "speech_kl": 0.0, |
| "step": 318, |
| "text_entropy": 1.257880449295044, |
| "text_kl": 0.0, |
| "total_entropy": 2.180114269256592 |
| }, |
| { |
| "combined_loss": 0.6456592082977295, |
| "completion_length": 357.625, |
| "epoch": 0.10146310432569974, |
| "grad_norm": 1.8857872486114502, |
| "kl": 0.0, |
| "learning_rate": 9.503026212593886e-07, |
| "loss": 0.6457, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.6251000165939331, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.1521973609924316, |
| "speech_entropy": 2.1984481811523438, |
| "speech_kl": 0.0, |
| "step": 319, |
| "text_entropy": 1.3871957063674927, |
| "text_kl": 0.0, |
| "total_entropy": 2.054154634475708 |
| }, |
| { |
| "combined_loss": 0.6763216257095337, |
| "completion_length": 374.75, |
| "epoch": 0.10178117048346055, |
| "grad_norm": 1.619836688041687, |
| "kl": 0.0, |
| "learning_rate": 9.499759518947154e-07, |
| "loss": 0.6763, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2544054985046387, |
| "speech_entropy": 2.144301414489746, |
| "speech_kl": 0.0, |
| "step": 320, |
| "text_entropy": 0.933074951171875, |
| "text_kl": 0.0, |
| "total_entropy": 1.9263477325439453 |
| }, |
| { |
| "combined_loss": 0.8536103963851929, |
| "completion_length": 433.5625, |
| "epoch": 0.10209923664122138, |
| "grad_norm": 3.0396976470947266, |
| "kl": 0.0, |
| "learning_rate": 9.496482755910599e-07, |
| "loss": 0.8536, |
| "num_samples": 1.0, |
| "reward": 2.875, |
| "reward_std": 0.772705078125, |
| "rewards/gpt4o_holistic_reward": 2.875, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.8453681468963623, |
| "speech_entropy": 2.4692482948303223, |
| "speech_kl": 0.0, |
| "step": 321, |
| "text_entropy": 1.5737974643707275, |
| "text_kl": 0.0, |
| "total_entropy": 2.3104732036590576 |
| }, |
| { |
| "combined_loss": 0.6165826320648193, |
| "completion_length": 455.8125, |
| "epoch": 0.10241730279898219, |
| "grad_norm": 1.8582490682601929, |
| "kl": 0.0, |
| "learning_rate": 9.493195931733465e-07, |
| "loss": 0.6166, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.8750999569892883, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.0552754402160645, |
| "speech_entropy": 2.3463516235351562, |
| "speech_kl": 0.0, |
| "step": 322, |
| "text_entropy": 0.7413707971572876, |
| "text_kl": 0.0, |
| "total_entropy": 2.0313949584960938 |
| }, |
| { |
| "combined_loss": 0.7083429098129272, |
| "completion_length": 455.0625, |
| "epoch": 0.102735368956743, |
| "grad_norm": 1.5822250843048096, |
| "kl": 0.0, |
| "learning_rate": 9.489899054690329e-07, |
| "loss": 0.7083, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 0.3536534011363983, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 0.0, |
| "sft_loss": 2.361143112182617, |
| "speech_entropy": 2.1958041191101074, |
| "speech_kl": 0.0, |
| "step": 323, |
| "text_entropy": 1.5856623649597168, |
| "text_kl": 0.0, |
| "total_entropy": 2.0761876106262207 |
| }, |
| { |
| "combined_loss": 0.6362742185592651, |
| "completion_length": 430.3125, |
| "epoch": 0.10305343511450382, |
| "grad_norm": 2.0028886795043945, |
| "kl": 0.0, |
| "learning_rate": 9.486592133081075e-07, |
| "loss": 0.6363, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.4478486180305481, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.1209139823913574, |
| "speech_entropy": 2.1673266887664795, |
| "speech_kl": 0.0, |
| "step": 324, |
| "text_entropy": 1.05299711227417, |
| "text_kl": 0.0, |
| "total_entropy": 1.9691715240478516 |
| }, |
| { |
| "combined_loss": 0.7382842302322388, |
| "completion_length": 476.875, |
| "epoch": 0.10337150127226463, |
| "grad_norm": 2.1232645511627197, |
| "kl": 0.0, |
| "learning_rate": 9.483275175230874e-07, |
| "loss": 0.7383, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 1.0774502754211426, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.460947275161743, |
| "speech_entropy": 2.690502882003784, |
| "speech_kl": 0.0, |
| "step": 325, |
| "text_entropy": 1.2036354541778564, |
| "text_kl": 0.0, |
| "total_entropy": 2.3894941806793213 |
| }, |
| { |
| "combined_loss": 0.6234292387962341, |
| "completion_length": 601.4375, |
| "epoch": 0.10368956743002544, |
| "grad_norm": 1.537564992904663, |
| "kl": 0.0, |
| "learning_rate": 9.479948189490164e-07, |
| "loss": 0.6234, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.5581127405166626, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.078097343444824, |
| "speech_entropy": 2.1220028400421143, |
| "speech_kl": 0.0, |
| "step": 326, |
| "text_entropy": 1.139967918395996, |
| "text_kl": 0.0, |
| "total_entropy": 1.9373573064804077 |
| }, |
| { |
| "combined_loss": 0.6593961715698242, |
| "completion_length": 401.125, |
| "epoch": 0.10400763358778627, |
| "grad_norm": 2.1285951137542725, |
| "kl": 0.0, |
| "learning_rate": 9.476611184234627e-07, |
| "loss": 0.6594, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 1.019437551498413, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.1979873180389404, |
| "speech_entropy": 2.558565616607666, |
| "speech_kl": 0.0, |
| "step": 327, |
| "text_entropy": 1.2850337028503418, |
| "text_kl": 0.0, |
| "total_entropy": 2.329627513885498 |
| }, |
| { |
| "combined_loss": 0.7995979189872742, |
| "completion_length": 416.5625, |
| "epoch": 0.10432569974554708, |
| "grad_norm": 2.2032155990600586, |
| "kl": 0.0, |
| "learning_rate": 9.473264167865171e-07, |
| "loss": 0.7996, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 0.6637751460075378, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.6653263568878174, |
| "speech_entropy": 2.289992332458496, |
| "speech_kl": 0.0, |
| "step": 328, |
| "text_entropy": 1.8079229593276978, |
| "text_kl": 0.0, |
| "total_entropy": 2.2008442878723145 |
| }, |
| { |
| "combined_loss": 0.7375897169113159, |
| "completion_length": 515.0625, |
| "epoch": 0.10464376590330789, |
| "grad_norm": 1.9254885911941528, |
| "kl": 0.0, |
| "learning_rate": 9.469907148807904e-07, |
| "loss": 0.7376, |
| "num_samples": 1.0, |
| "reward": 3.125, |
| "reward_std": 0.8515443801879883, |
| "rewards/gpt4o_holistic_reward": 3.125, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.458632469177246, |
| "speech_entropy": 2.328657627105713, |
| "speech_kl": 0.0, |
| "step": 329, |
| "text_entropy": 1.6102688312530518, |
| "text_kl": 0.0, |
| "total_entropy": 2.207139492034912 |
| }, |
| { |
| "combined_loss": 0.6494673490524292, |
| "completion_length": 605.5, |
| "epoch": 0.1049618320610687, |
| "grad_norm": 1.5243898630142212, |
| "kl": 0.0, |
| "learning_rate": 9.466540135514118e-07, |
| "loss": 0.6495, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 0.5581127405166626, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.164891004562378, |
| "speech_entropy": 2.216064929962158, |
| "speech_kl": 0.0, |
| "step": 330, |
| "text_entropy": 1.050743579864502, |
| "text_kl": 0.0, |
| "total_entropy": 1.9922053813934326 |
| }, |
| { |
| "combined_loss": 0.6644630432128906, |
| "completion_length": 465.4375, |
| "epoch": 0.10527989821882952, |
| "grad_norm": 2.0127413272857666, |
| "kl": 0.0, |
| "learning_rate": 9.463163136460267e-07, |
| "loss": 0.6645, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 1.1372368335723877, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.214876651763916, |
| "speech_entropy": 2.606668472290039, |
| "speech_kl": 0.0, |
| "step": 331, |
| "text_entropy": 1.4162580966949463, |
| "text_kl": 0.0, |
| "total_entropy": 2.3680739402770996 |
| }, |
| { |
| "combined_loss": 0.6894693374633789, |
| "completion_length": 457.5, |
| "epoch": 0.10559796437659033, |
| "grad_norm": 1.5198652744293213, |
| "kl": 0.0, |
| "learning_rate": 9.45977616014794e-07, |
| "loss": 0.6895, |
| "num_samples": 1.0, |
| "reward": 4.8125, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 4.8125, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2982308864593506, |
| "speech_entropy": 2.0877485275268555, |
| "speech_kl": 0.0, |
| "step": 332, |
| "text_entropy": 1.2472989559173584, |
| "text_kl": 0.0, |
| "total_entropy": 1.9227503538131714 |
| }, |
| { |
| "combined_loss": 0.6039384603500366, |
| "completion_length": 533.625, |
| "epoch": 0.10591603053435114, |
| "grad_norm": 1.8885440826416016, |
| "kl": 0.0, |
| "learning_rate": 9.456379215103845e-07, |
| "loss": 0.6039, |
| "num_samples": 1.0, |
| "reward": 3.125, |
| "reward_std": 1.010462999343872, |
| "rewards/gpt4o_holistic_reward": 3.125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.0131282806396484, |
| "speech_entropy": 2.623091697692871, |
| "speech_kl": 0.0, |
| "step": 333, |
| "text_entropy": 0.8223081827163696, |
| "text_kl": 0.0, |
| "total_entropy": 2.1995439529418945 |
| }, |
| { |
| "combined_loss": 0.7336439490318298, |
| "completion_length": 343.3125, |
| "epoch": 0.10623409669211197, |
| "grad_norm": 1.7900915145874023, |
| "kl": 0.0, |
| "learning_rate": 9.452972309879789e-07, |
| "loss": 0.7336, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.4454798698425293, |
| "speech_entropy": 2.2133467197418213, |
| "speech_kl": 0.0, |
| "step": 334, |
| "text_entropy": 1.487365484237671, |
| "text_kl": 0.0, |
| "total_entropy": 2.088663339614868 |
| }, |
| { |
| "combined_loss": 0.6571398377418518, |
| "completion_length": 472.6875, |
| "epoch": 0.10655216284987278, |
| "grad_norm": 1.6357449293136597, |
| "kl": 0.0, |
| "learning_rate": 9.44955545305265e-07, |
| "loss": 0.6571, |
| "num_samples": 1.0, |
| "reward": 4.75, |
| "reward_std": 0.5000999569892883, |
| "rewards/gpt4o_holistic_reward": 4.75, |
| "rl_loss": 0.0, |
| "sft_loss": 2.1904659271240234, |
| "speech_entropy": 2.30568790435791, |
| "speech_kl": 0.0, |
| "step": 335, |
| "text_entropy": 0.9574769139289856, |
| "text_kl": 0.0, |
| "total_entropy": 2.014315128326416 |
| }, |
| { |
| "combined_loss": 0.663806140422821, |
| "completion_length": 316.8125, |
| "epoch": 0.10687022900763359, |
| "grad_norm": 1.8551936149597168, |
| "kl": 0.0, |
| "learning_rate": 9.446128653224363e-07, |
| "loss": 0.6638, |
| "num_samples": 1.0, |
| "reward": 2.9375, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 2.9375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.2126870155334473, |
| "speech_entropy": 2.284435749053955, |
| "speech_kl": 0.0, |
| "step": 336, |
| "text_entropy": 1.4010343551635742, |
| "text_kl": 0.0, |
| "total_entropy": 2.119978904724121 |
| }, |
| { |
| "combined_loss": 0.696797788143158, |
| "completion_length": 450.1875, |
| "epoch": 0.1071882951653944, |
| "grad_norm": 1.812302589416504, |
| "kl": 0.0, |
| "learning_rate": 9.442691919021891e-07, |
| "loss": 0.6968, |
| "num_samples": 1.0, |
| "reward": 3.25, |
| "reward_std": 0.7887751460075378, |
| "rewards/gpt4o_holistic_reward": 3.25, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.3226590156555176, |
| "speech_entropy": 2.48870587348938, |
| "speech_kl": 0.0, |
| "step": 337, |
| "text_entropy": 1.0859538316726685, |
| "text_kl": 0.0, |
| "total_entropy": 2.2247252464294434 |
| }, |
| { |
| "combined_loss": 0.6544849276542664, |
| "completion_length": 362.5, |
| "epoch": 0.10750636132315522, |
| "grad_norm": 1.7814265489578247, |
| "kl": 0.0, |
| "learning_rate": 9.43924525909721e-07, |
| "loss": 0.6545, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 0.6831126809120178, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.1816163063049316, |
| "speech_entropy": 2.2594857215881348, |
| "speech_kl": 0.0, |
| "step": 338, |
| "text_entropy": 1.0299164056777954, |
| "text_kl": 0.0, |
| "total_entropy": 2.003577470779419 |
| }, |
| { |
| "combined_loss": 0.5944312214851379, |
| "completion_length": 279.625, |
| "epoch": 0.10782442748091603, |
| "grad_norm": 1.5945285558700562, |
| "kl": 0.0, |
| "learning_rate": 9.43578868212728e-07, |
| "loss": 0.5944, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.4733423590660095, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 1.9814373254776, |
| "speech_entropy": 2.3196024894714355, |
| "speech_kl": 0.0, |
| "step": 339, |
| "text_entropy": 0.6264474391937256, |
| "text_kl": 0.0, |
| "total_entropy": 1.9987720251083374 |
| }, |
| { |
| "combined_loss": 0.7080922722816467, |
| "completion_length": 357.75, |
| "epoch": 0.10814249363867684, |
| "grad_norm": 1.8270853757858276, |
| "kl": 0.0, |
| "learning_rate": 9.432322196814032e-07, |
| "loss": 0.7081, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.360307455062866, |
| "speech_entropy": 2.377963066101074, |
| "speech_kl": 0.0, |
| "step": 340, |
| "text_entropy": 1.37347412109375, |
| "text_kl": 0.0, |
| "total_entropy": 2.1954591274261475 |
| }, |
| { |
| "combined_loss": 0.6495932340621948, |
| "completion_length": 439.4375, |
| "epoch": 0.10846055979643766, |
| "grad_norm": 1.823044776916504, |
| "kl": 0.0, |
| "learning_rate": 9.428845811884336e-07, |
| "loss": 0.6496, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.2694375813007355, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.165310859680176, |
| "speech_entropy": 2.547529697418213, |
| "speech_kl": 0.0, |
| "step": 341, |
| "text_entropy": 1.3410420417785645, |
| "text_kl": 0.0, |
| "total_entropy": 2.3126533031463623 |
| }, |
| { |
| "combined_loss": 0.744182825088501, |
| "completion_length": 700.0625, |
| "epoch": 0.10877862595419847, |
| "grad_norm": 1.4928951263427734, |
| "kl": 0.0, |
| "learning_rate": 9.42535953608999e-07, |
| "loss": 0.7442, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.36445680260658264, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.48060941696167, |
| "speech_entropy": 2.0912342071533203, |
| "speech_kl": 0.0, |
| "step": 342, |
| "text_entropy": 1.2619953155517578, |
| "text_kl": 0.0, |
| "total_entropy": 1.9290344715118408 |
| }, |
| { |
| "combined_loss": 0.6683810353279114, |
| "completion_length": 419.0, |
| "epoch": 0.10909669211195928, |
| "grad_norm": 2.006411075592041, |
| "kl": 0.0, |
| "learning_rate": 9.421863378207685e-07, |
| "loss": 0.6684, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 1.0154881477355957, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 2.2351741790771484e-08, |
| "sft_loss": 2.2279367446899414, |
| "speech_entropy": 2.1261141300201416, |
| "speech_kl": 0.0, |
| "step": 343, |
| "text_entropy": 1.138145923614502, |
| "text_kl": 0.0, |
| "total_entropy": 1.941408395767212 |
| }, |
| { |
| "combined_loss": 0.6885769367218018, |
| "completion_length": 448.625, |
| "epoch": 0.10941475826972011, |
| "grad_norm": 1.6280336380004883, |
| "kl": 0.0, |
| "learning_rate": 9.418357347038998e-07, |
| "loss": 0.6886, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.2952563762664795, |
| "speech_entropy": 2.533572196960449, |
| "speech_kl": 0.0, |
| "step": 344, |
| "text_entropy": 1.452898621559143, |
| "text_kl": 0.0, |
| "total_entropy": 2.3209335803985596 |
| }, |
| { |
| "combined_loss": 0.6933377981185913, |
| "completion_length": 469.25, |
| "epoch": 0.10973282442748092, |
| "grad_norm": 1.7831571102142334, |
| "kl": 0.0, |
| "learning_rate": 9.414841451410354e-07, |
| "loss": 0.6933, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.7217878103256226, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.3111257553100586, |
| "speech_entropy": 2.4832825660705566, |
| "speech_kl": 0.0, |
| "step": 345, |
| "text_entropy": 1.2831058502197266, |
| "text_kl": 0.0, |
| "total_entropy": 2.2077646255493164 |
| }, |
| { |
| "combined_loss": 0.6893813610076904, |
| "completion_length": 457.25, |
| "epoch": 0.11005089058524173, |
| "grad_norm": 1.5565513372421265, |
| "kl": 0.0, |
| "learning_rate": 9.411315700173023e-07, |
| "loss": 0.6894, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2979378700256348, |
| "speech_entropy": 2.1734261512756348, |
| "speech_kl": 0.0, |
| "step": 346, |
| "text_entropy": 1.4588537216186523, |
| "text_kl": 0.0, |
| "total_entropy": 2.0491230487823486 |
| }, |
| { |
| "combined_loss": 0.6448703408241272, |
| "completion_length": 340.125, |
| "epoch": 0.11036895674300254, |
| "grad_norm": 2.0143826007843018, |
| "kl": 0.0, |
| "learning_rate": 9.407780102203073e-07, |
| "loss": 0.6449, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 1.0313551425933838, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": -2.2351741790771484e-08, |
| "sft_loss": 2.1495676040649414, |
| "speech_entropy": 2.1733856201171875, |
| "speech_kl": 0.0, |
| "step": 347, |
| "text_entropy": 1.2910046577453613, |
| "text_kl": 0.0, |
| "total_entropy": 2.0091493129730225 |
| }, |
| { |
| "combined_loss": 0.7653839588165283, |
| "completion_length": 219.375, |
| "epoch": 0.11068702290076336, |
| "grad_norm": 2.6158885955810547, |
| "kl": 0.0, |
| "learning_rate": 9.40423466640137e-07, |
| "loss": 0.7654, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 0.4478486180305481, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.5512795448303223, |
| "speech_entropy": 2.3109560012817383, |
| "speech_kl": 0.0, |
| "step": 348, |
| "text_entropy": 1.525007963180542, |
| "text_kl": 0.0, |
| "total_entropy": 2.177790641784668 |
| }, |
| { |
| "combined_loss": 0.7735346555709839, |
| "completion_length": 485.0, |
| "epoch": 0.11100508905852417, |
| "grad_norm": 1.7478300333023071, |
| "kl": 0.0, |
| "learning_rate": 9.400679401693546e-07, |
| "loss": 0.7735, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.48945680260658264, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.578448534011841, |
| "speech_entropy": 2.2317914962768555, |
| "speech_kl": 0.0, |
| "step": 349, |
| "text_entropy": 1.4859943389892578, |
| "text_kl": 0.0, |
| "total_entropy": 2.102978229522705 |
| }, |
| { |
| "combined_loss": 0.7051453590393066, |
| "completion_length": 413.4375, |
| "epoch": 0.11132315521628498, |
| "grad_norm": 1.8334420919418335, |
| "kl": 0.0, |
| "learning_rate": 9.397114317029974e-07, |
| "loss": 0.7051, |
| "num_samples": 1.0, |
| "reward": 3.125, |
| "reward_std": 0.14443756639957428, |
| "rewards/gpt4o_holistic_reward": 3.125, |
| "rl_loss": 0.0, |
| "sft_loss": 2.3504843711853027, |
| "speech_entropy": 2.3960375785827637, |
| "speech_kl": 0.0, |
| "step": 350, |
| "text_entropy": 1.6391247510910034, |
| "text_kl": 0.0, |
| "total_entropy": 2.2544939517974854 |
| }, |
| { |
| "combined_loss": 0.6969923973083496, |
| "completion_length": 393.25, |
| "epoch": 0.11164122137404581, |
| "grad_norm": 1.7989410161972046, |
| "kl": 0.0, |
| "learning_rate": 9.393539421385749e-07, |
| "loss": 0.697, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 0.2694375813007355, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.323307514190674, |
| "speech_entropy": 2.3470993041992188, |
| "speech_kl": 0.0, |
| "step": 351, |
| "text_entropy": 1.3637549877166748, |
| "text_kl": 0.0, |
| "total_entropy": 2.1490964889526367 |
| }, |
| { |
| "combined_loss": 0.6479834318161011, |
| "completion_length": 286.0625, |
| "epoch": 0.11195928753180662, |
| "grad_norm": 1.3780865669250488, |
| "kl": 0.0, |
| "learning_rate": 9.38995472376067e-07, |
| "loss": 0.648, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": 0.0, |
| "sft_loss": 2.159944534301758, |
| "speech_entropy": 2.2307074069976807, |
| "speech_kl": 0.0, |
| "step": 352, |
| "text_entropy": 1.2372667789459229, |
| "text_kl": 0.0, |
| "total_entropy": 2.0377559661865234 |
| }, |
| { |
| "combined_loss": 0.9208307862281799, |
| "completion_length": 562.375, |
| "epoch": 0.11227735368956743, |
| "grad_norm": 2.392199754714966, |
| "kl": 0.0, |
| "learning_rate": 9.386360233179206e-07, |
| "loss": 0.9208, |
| "num_samples": 1.0, |
| "reward": 2.875, |
| "reward_std": 0.7065354585647583, |
| "rewards/gpt4o_holistic_reward": 2.875, |
| "rl_loss": 0.0, |
| "sft_loss": 3.0694358348846436, |
| "speech_entropy": 2.4065301418304443, |
| "speech_kl": 0.0, |
| "step": 353, |
| "text_entropy": 1.0953893661499023, |
| "text_kl": 0.0, |
| "total_entropy": 2.1367077827453613 |
| }, |
| { |
| "combined_loss": 0.6610080599784851, |
| "completion_length": 534.125, |
| "epoch": 0.11259541984732824, |
| "grad_norm": 1.6925069093704224, |
| "kl": 0.0, |
| "learning_rate": 9.382755958690485e-07, |
| "loss": 0.661, |
| "num_samples": 1.0, |
| "reward": 4.625, |
| "reward_std": 0.7500999569892883, |
| "rewards/gpt4o_holistic_reward": 4.625, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.203360080718994, |
| "speech_entropy": 2.2898178100585938, |
| "speech_kl": 0.0, |
| "step": 354, |
| "text_entropy": 1.2608642578125, |
| "text_kl": 0.0, |
| "total_entropy": 2.0855863094329834 |
| }, |
| { |
| "combined_loss": 0.6016900539398193, |
| "completion_length": 431.875, |
| "epoch": 0.11291348600508906, |
| "grad_norm": 2.1892993450164795, |
| "kl": 0.0, |
| "learning_rate": 9.379141909368262e-07, |
| "loss": 0.6017, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 1.2394567728042603, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.0056333541870117, |
| "speech_entropy": 2.3564295768737793, |
| "speech_kl": 0.0, |
| "step": 355, |
| "text_entropy": 1.2530461549758911, |
| "text_kl": 0.0, |
| "total_entropy": 2.141939640045166 |
| }, |
| { |
| "combined_loss": 0.6209220290184021, |
| "completion_length": 342.8125, |
| "epoch": 0.11323155216284987, |
| "grad_norm": 2.095818519592285, |
| "kl": 0.0, |
| "learning_rate": 9.375518094310902e-07, |
| "loss": 0.6209, |
| "num_samples": 1.0, |
| "reward": 4.5, |
| "reward_std": 0.7288135886192322, |
| "rewards/gpt4o_holistic_reward": 4.5, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.069740056991577, |
| "speech_entropy": 2.1869020462036133, |
| "speech_kl": 0.0, |
| "step": 356, |
| "text_entropy": 1.4376100301742554, |
| "text_kl": 0.0, |
| "total_entropy": 2.056838035583496 |
| }, |
| { |
| "combined_loss": 0.6652117967605591, |
| "completion_length": 436.75, |
| "epoch": 0.11354961832061068, |
| "grad_norm": 1.6426494121551514, |
| "kl": 0.0, |
| "learning_rate": 9.371884522641357e-07, |
| "loss": 0.6652, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.4733423590660095, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.217372417449951, |
| "speech_entropy": 2.4225001335144043, |
| "speech_kl": 0.0, |
| "step": 357, |
| "text_entropy": 1.0462160110473633, |
| "text_kl": 0.0, |
| "total_entropy": 2.1539487838745117 |
| }, |
| { |
| "combined_loss": 0.6718185544013977, |
| "completion_length": 358.9375, |
| "epoch": 0.1138676844783715, |
| "grad_norm": 2.442697048187256, |
| "kl": 0.0, |
| "learning_rate": 9.368241203507136e-07, |
| "loss": 0.6718, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 0.6831126809120178, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.2393951416015625, |
| "speech_entropy": 2.331137180328369, |
| "speech_kl": 0.0, |
| "step": 358, |
| "text_entropy": 1.2198386192321777, |
| "text_kl": 0.0, |
| "total_entropy": 2.1104815006256104 |
| }, |
| { |
| "combined_loss": 0.6823822259902954, |
| "completion_length": 366.125, |
| "epoch": 0.11418575063613232, |
| "grad_norm": 2.0511248111724854, |
| "kl": 0.0, |
| "learning_rate": 9.364588146080293e-07, |
| "loss": 0.6824, |
| "num_samples": 1.0, |
| "reward": 3.25, |
| "reward_std": 0.7042241096496582, |
| "rewards/gpt4o_holistic_reward": 3.25, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.2746074199676514, |
| "speech_entropy": 2.4417941570281982, |
| "speech_kl": 0.0, |
| "step": 359, |
| "text_entropy": 1.3095173835754395, |
| "text_kl": 0.0, |
| "total_entropy": 2.220010757446289 |
| }, |
| { |
| "combined_loss": 0.8011901378631592, |
| "completion_length": 471.9375, |
| "epoch": 0.11450381679389313, |
| "grad_norm": 2.027939558029175, |
| "kl": 0.0, |
| "learning_rate": 9.360925359557396e-07, |
| "loss": 0.8012, |
| "num_samples": 1.0, |
| "reward": 3.25, |
| "reward_std": 0.6444375514984131, |
| "rewards/gpt4o_holistic_reward": 3.25, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.6706337928771973, |
| "speech_entropy": 2.198263168334961, |
| "speech_kl": 0.0, |
| "step": 360, |
| "text_entropy": 1.6125402450561523, |
| "text_kl": 0.0, |
| "total_entropy": 2.0894393920898438 |
| }, |
| { |
| "combined_loss": 0.6634936332702637, |
| "completion_length": 622.0625, |
| "epoch": 0.11482188295165395, |
| "grad_norm": 1.804551124572754, |
| "kl": 0.0, |
| "learning_rate": 9.357252853159505e-07, |
| "loss": 0.6635, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 1.2180101871490479, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.2116456031799316, |
| "speech_entropy": 2.1450726985931396, |
| "speech_kl": 0.0, |
| "step": 361, |
| "text_entropy": 1.2231552600860596, |
| "text_kl": 0.0, |
| "total_entropy": 1.972318410873413 |
| }, |
| { |
| "combined_loss": 0.7286593317985535, |
| "completion_length": 344.0, |
| "epoch": 0.11513994910941476, |
| "grad_norm": 2.2605528831481934, |
| "kl": 0.0, |
| "learning_rate": 9.35357063613215e-07, |
| "loss": 0.7287, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.933112621307373, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.4288644790649414, |
| "speech_entropy": 2.370051383972168, |
| "speech_kl": 0.0, |
| "step": 362, |
| "text_entropy": 1.7082545757293701, |
| "text_kl": 0.0, |
| "total_entropy": 2.255542039871216 |
| }, |
| { |
| "combined_loss": 0.7230384349822998, |
| "completion_length": 372.1875, |
| "epoch": 0.11545801526717557, |
| "grad_norm": 1.8940509557724, |
| "kl": 0.0, |
| "learning_rate": 9.349878717745308e-07, |
| "loss": 0.723, |
| "num_samples": 1.0, |
| "reward": 4.875, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.875, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.410127878189087, |
| "speech_entropy": 2.214015245437622, |
| "speech_kl": 0.0, |
| "step": 363, |
| "text_entropy": 1.4911437034606934, |
| "text_kl": 0.0, |
| "total_entropy": 2.0867202281951904 |
| }, |
| { |
| "combined_loss": 0.7651809453964233, |
| "completion_length": 591.6875, |
| "epoch": 0.11577608142493638, |
| "grad_norm": 1.642069697380066, |
| "kl": 0.0, |
| "learning_rate": 9.34617710729338e-07, |
| "loss": 0.7652, |
| "num_samples": 1.0, |
| "reward": 2.6875, |
| "reward_std": 0.8538135886192322, |
| "rewards/gpt4o_holistic_reward": 2.6875, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.550603151321411, |
| "speech_entropy": 2.124917507171631, |
| "speech_kl": 0.0, |
| "step": 364, |
| "text_entropy": 1.3163607120513916, |
| "text_kl": 0.0, |
| "total_entropy": 1.9680266380310059 |
| }, |
| { |
| "combined_loss": 0.6506673693656921, |
| "completion_length": 625.3125, |
| "epoch": 0.1160941475826972, |
| "grad_norm": 2.7186765670776367, |
| "kl": 0.0, |
| "learning_rate": 9.342465814095166e-07, |
| "loss": 0.6507, |
| "num_samples": 1.0, |
| "reward": 2.4375, |
| "reward_std": 0.5194375514984131, |
| "rewards/gpt4o_holistic_reward": 2.4375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.168890953063965, |
| "speech_entropy": 2.8031604290008545, |
| "speech_kl": 0.0, |
| "step": 365, |
| "text_entropy": 1.175290822982788, |
| "text_kl": 0.0, |
| "total_entropy": 2.457470655441284 |
| }, |
| { |
| "combined_loss": 0.6432278156280518, |
| "completion_length": 494.9375, |
| "epoch": 0.11641221374045801, |
| "grad_norm": 1.8091281652450562, |
| "kl": 0.0, |
| "learning_rate": 9.338744847493842e-07, |
| "loss": 0.6432, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 0.4788135886192322, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.1440927982330322, |
| "speech_entropy": 2.3679351806640625, |
| "speech_kl": 0.0, |
| "step": 366, |
| "text_entropy": 1.285107135772705, |
| "text_kl": 0.0, |
| "total_entropy": 2.1672964096069336 |
| }, |
| { |
| "combined_loss": 0.6938588619232178, |
| "completion_length": 392.9375, |
| "epoch": 0.11673027989821882, |
| "grad_norm": 2.0217089653015137, |
| "kl": 0.0, |
| "learning_rate": 9.335014216856936e-07, |
| "loss": 0.6939, |
| "num_samples": 1.0, |
| "reward": 4.5625, |
| "reward_std": 0.8750999569892883, |
| "rewards/gpt4o_holistic_reward": 4.5625, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.3128628730773926, |
| "speech_entropy": 2.1415584087371826, |
| "speech_kl": 0.0, |
| "step": 367, |
| "text_entropy": 1.1758991479873657, |
| "text_kl": 0.0, |
| "total_entropy": 1.9482624530792236 |
| }, |
| { |
| "combined_loss": 0.726897120475769, |
| "completion_length": 487.875, |
| "epoch": 0.11704834605597965, |
| "grad_norm": 1.7421088218688965, |
| "kl": 0.0, |
| "learning_rate": 9.331273931576306e-07, |
| "loss": 0.7269, |
| "num_samples": 1.0, |
| "reward": 4.0625, |
| "reward_std": 0.8146764636039734, |
| "rewards/gpt4o_holistic_reward": 4.0625, |
| "rl_loss": 1.862645149230957e-08, |
| "sft_loss": 2.422990322113037, |
| "speech_entropy": 2.3109121322631836, |
| "speech_kl": 0.0, |
| "step": 368, |
| "text_entropy": 1.0041307210922241, |
| "text_kl": 0.0, |
| "total_entropy": 2.0410120487213135 |
| }, |
| { |
| "combined_loss": 0.6220736503601074, |
| "completion_length": 688.875, |
| "epoch": 0.11736641221374046, |
| "grad_norm": 1.5585706233978271, |
| "kl": 0.0, |
| "learning_rate": 9.327524001068118e-07, |
| "loss": 0.6221, |
| "num_samples": 1.0, |
| "reward": 2.875, |
| "reward_std": 0.8644567728042603, |
| "rewards/gpt4o_holistic_reward": 2.875, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.0735785961151123, |
| "speech_entropy": 2.154421806335449, |
| "speech_kl": 0.0, |
| "step": 369, |
| "text_entropy": 1.0012127161026, |
| "text_kl": 0.0, |
| "total_entropy": 1.9310146570205688 |
| }, |
| { |
| "combined_loss": 0.6993934512138367, |
| "completion_length": 423.875, |
| "epoch": 0.11768447837150127, |
| "grad_norm": 1.9345406293869019, |
| "kl": 0.0, |
| "learning_rate": 9.323764434772815e-07, |
| "loss": 0.6994, |
| "num_samples": 1.0, |
| "reward": 2.5, |
| "reward_std": 0.6231511831283569, |
| "rewards/gpt4o_holistic_reward": 2.5, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.3313114643096924, |
| "speech_entropy": 2.3823764324188232, |
| "speech_kl": 0.0, |
| "step": 370, |
| "text_entropy": 1.6932473182678223, |
| "text_kl": 0.0, |
| "total_entropy": 2.245680332183838 |
| }, |
| { |
| "combined_loss": 0.6287088394165039, |
| "completion_length": 436.6875, |
| "epoch": 0.1180025445292621, |
| "grad_norm": 2.2497189044952393, |
| "kl": 0.0, |
| "learning_rate": 9.319995242155101e-07, |
| "loss": 0.6287, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 0.8750999569892883, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.095696210861206, |
| "speech_entropy": 2.30611515045166, |
| "speech_kl": 0.0, |
| "step": 371, |
| "text_entropy": 0.9617571234703064, |
| "text_kl": 0.0, |
| "total_entropy": 2.0462255477905273 |
| }, |
| { |
| "combined_loss": 0.6533428430557251, |
| "completion_length": 536.4375, |
| "epoch": 0.1183206106870229, |
| "grad_norm": 2.011059045791626, |
| "kl": 0.0, |
| "learning_rate": 9.316216432703917e-07, |
| "loss": 0.6533, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 1.057937741279602, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.177809238433838, |
| "speech_entropy": 2.581768035888672, |
| "speech_kl": 0.0, |
| "step": 372, |
| "text_entropy": 1.0381070375442505, |
| "text_kl": 0.0, |
| "total_entropy": 2.2744696140289307 |
| }, |
| { |
| "combined_loss": 0.6885940432548523, |
| "completion_length": 485.1875, |
| "epoch": 0.11863867684478371, |
| "grad_norm": 2.2359321117401123, |
| "kl": 0.0, |
| "learning_rate": 9.312428015932407e-07, |
| "loss": 0.6886, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 1.226402759552002, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": 1.862645149230957e-09, |
| "sft_loss": 2.2953133583068848, |
| "speech_entropy": 3.0134053230285645, |
| "speech_kl": 0.0, |
| "step": 373, |
| "text_entropy": 1.2640031576156616, |
| "text_kl": 0.0, |
| "total_entropy": 2.5999834537506104 |
| }, |
| { |
| "combined_loss": 0.6422200202941895, |
| "completion_length": 451.625, |
| "epoch": 0.11895674300254452, |
| "grad_norm": 1.8345330953598022, |
| "kl": 0.0, |
| "learning_rate": 9.308630001377909e-07, |
| "loss": 0.6422, |
| "num_samples": 1.0, |
| "reward": 2.75, |
| "reward_std": 0.9565354585647583, |
| "rewards/gpt4o_holistic_reward": 2.75, |
| "rl_loss": 0.0, |
| "sft_loss": 2.140733242034912, |
| "speech_entropy": 2.1300010681152344, |
| "speech_kl": 0.0, |
| "step": 374, |
| "text_entropy": 1.349212408065796, |
| "text_kl": 0.0, |
| "total_entropy": 1.9911762475967407 |
| }, |
| { |
| "combined_loss": 0.6511521339416504, |
| "completion_length": 440.8125, |
| "epoch": 0.11927480916030535, |
| "grad_norm": 2.062229871749878, |
| "kl": 0.0, |
| "learning_rate": 9.304822398601919e-07, |
| "loss": 0.6512, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.2694375813007355, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": 0.0, |
| "sft_loss": 2.1705071926116943, |
| "speech_entropy": 2.244220495223999, |
| "speech_kl": 0.0, |
| "step": 375, |
| "text_entropy": 1.1989765167236328, |
| "text_kl": 0.0, |
| "total_entropy": 2.0404000282287598 |
| }, |
| { |
| "combined_loss": 0.7069913148880005, |
| "completion_length": 472.375, |
| "epoch": 0.11959287531806616, |
| "grad_norm": 2.3955235481262207, |
| "kl": 0.0, |
| "learning_rate": 9.301005217190072e-07, |
| "loss": 0.707, |
| "num_samples": 1.0, |
| "reward": 2.625, |
| "reward_std": 1.0000998973846436, |
| "rewards/gpt4o_holistic_reward": 2.625, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.356637477874756, |
| "speech_entropy": 3.064028263092041, |
| "speech_kl": 0.0, |
| "step": 376, |
| "text_entropy": 1.921829104423523, |
| "text_kl": 0.0, |
| "total_entropy": 2.78952693939209 |
| }, |
| { |
| "combined_loss": 0.648313045501709, |
| "completion_length": 499.5625, |
| "epoch": 0.11991094147582697, |
| "grad_norm": 2.091409206390381, |
| "kl": 0.0, |
| "learning_rate": 9.297178466752118e-07, |
| "loss": 0.6483, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 1.0774502754211426, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.161043405532837, |
| "speech_entropy": 3.0961804389953613, |
| "speech_kl": 0.0, |
| "step": 377, |
| "text_entropy": 1.5107793807983398, |
| "text_kl": 0.0, |
| "total_entropy": 2.7813608646392822 |
| }, |
| { |
| "combined_loss": 0.713459849357605, |
| "completion_length": 469.625, |
| "epoch": 0.12022900763358779, |
| "grad_norm": 2.0358638763427734, |
| "kl": 0.0, |
| "learning_rate": 9.293342156921896e-07, |
| "loss": 0.7135, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.378199338912964, |
| "speech_entropy": 2.2007017135620117, |
| "speech_kl": 0.0, |
| "step": 378, |
| "text_entropy": 1.2517070770263672, |
| "text_kl": 0.0, |
| "total_entropy": 2.017704963684082 |
| }, |
| { |
| "combined_loss": 0.6941728591918945, |
| "completion_length": 430.8125, |
| "epoch": 0.1205470737913486, |
| "grad_norm": 1.5506932735443115, |
| "kl": 0.0, |
| "learning_rate": 9.289496297357313e-07, |
| "loss": 0.6942, |
| "num_samples": 1.0, |
| "reward": 5.0, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 5.0, |
| "rl_loss": 0.0, |
| "sft_loss": 2.3139095306396484, |
| "speech_entropy": 2.1886677742004395, |
| "speech_kl": 0.0, |
| "step": 379, |
| "text_entropy": 1.5006790161132812, |
| "text_kl": 0.0, |
| "total_entropy": 2.0677237510681152 |
| }, |
| { |
| "combined_loss": 0.7921469211578369, |
| "completion_length": 444.0, |
| "epoch": 0.12086513994910941, |
| "grad_norm": 1.8955799341201782, |
| "kl": 0.0, |
| "learning_rate": 9.285640897740315e-07, |
| "loss": 0.7921, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 0.4331127107143402, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.6404895782470703, |
| "speech_entropy": 2.0556373596191406, |
| "speech_kl": 0.0, |
| "step": 380, |
| "text_entropy": 1.4717328548431396, |
| "text_kl": 0.0, |
| "total_entropy": 1.9481353759765625 |
| }, |
| { |
| "combined_loss": 0.7151652574539185, |
| "completion_length": 566.5, |
| "epoch": 0.12118320610687022, |
| "grad_norm": 1.9992356300354004, |
| "kl": 0.0, |
| "learning_rate": 9.281775967776865e-07, |
| "loss": 0.7152, |
| "num_samples": 1.0, |
| "reward": 4.5, |
| "reward_std": 0.864456832408905, |
| "rewards/gpt4o_holistic_reward": 4.5, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.3838841915130615, |
| "speech_entropy": 2.1918318271636963, |
| "speech_kl": 0.0, |
| "step": 381, |
| "text_entropy": 1.32926344871521, |
| "text_kl": 0.0, |
| "total_entropy": 2.039583683013916 |
| }, |
| { |
| "combined_loss": 0.6602721214294434, |
| "completion_length": 361.8125, |
| "epoch": 0.12150127226463105, |
| "grad_norm": 2.4221584796905518, |
| "kl": 0.0, |
| "learning_rate": 9.277901517196921e-07, |
| "loss": 0.6603, |
| "num_samples": 1.0, |
| "reward": 3.25, |
| "reward_std": 0.704224169254303, |
| "rewards/gpt4o_holistic_reward": 3.25, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.2009072303771973, |
| "speech_entropy": 3.4160265922546387, |
| "speech_kl": 0.0, |
| "step": 382, |
| "text_entropy": 1.843759536743164, |
| "text_kl": 0.0, |
| "total_entropy": 3.1454572677612305 |
| }, |
| { |
| "combined_loss": 0.6596149206161499, |
| "completion_length": 469.5625, |
| "epoch": 0.12181933842239186, |
| "grad_norm": 2.0681304931640625, |
| "kl": 0.0, |
| "learning_rate": 9.274017555754407e-07, |
| "loss": 0.6596, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.8676799535751343, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.198716402053833, |
| "speech_entropy": 2.4791064262390137, |
| "speech_kl": 0.0, |
| "step": 383, |
| "text_entropy": 1.5029938220977783, |
| "text_kl": 0.0, |
| "total_entropy": 2.3004837036132812 |
| }, |
| { |
| "combined_loss": 0.6301867961883545, |
| "completion_length": 606.3125, |
| "epoch": 0.12213740458015267, |
| "grad_norm": 2.0828793048858643, |
| "kl": 0.0, |
| "learning_rate": 9.270124093227192e-07, |
| "loss": 0.6302, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 0.8483423590660095, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.1006226539611816, |
| "speech_entropy": 2.148000717163086, |
| "speech_kl": 0.0, |
| "step": 384, |
| "text_entropy": 0.49018028378486633, |
| "text_kl": 0.0, |
| "total_entropy": 1.8085635900497437 |
| }, |
| { |
| "combined_loss": 0.589484453201294, |
| "completion_length": 417.5, |
| "epoch": 0.12245547073791349, |
| "grad_norm": 2.000455856323242, |
| "kl": 0.0, |
| "learning_rate": 9.266221139417064e-07, |
| "loss": 0.5895, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 0.3944375813007355, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 1.9649479389190674, |
| "speech_entropy": 2.3916573524475098, |
| "speech_kl": 0.0, |
| "step": 385, |
| "text_entropy": 1.274303913116455, |
| "text_kl": 0.0, |
| "total_entropy": 2.180541515350342 |
| }, |
| { |
| "combined_loss": 0.6605916023254395, |
| "completion_length": 463.625, |
| "epoch": 0.1227735368956743, |
| "grad_norm": 2.3290114402770996, |
| "kl": 0.0, |
| "learning_rate": 9.262308704149701e-07, |
| "loss": 0.6606, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 0.6176798939704895, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.201972007751465, |
| "speech_entropy": 3.0757741928100586, |
| "speech_kl": 0.0, |
| "step": 386, |
| "text_entropy": 1.367950439453125, |
| "text_kl": 0.0, |
| "total_entropy": 2.818263530731201 |
| }, |
| { |
| "combined_loss": 0.6665786504745483, |
| "completion_length": 479.8125, |
| "epoch": 0.12309160305343511, |
| "grad_norm": 3.4283993244171143, |
| "kl": 0.0, |
| "learning_rate": 9.258386797274658e-07, |
| "loss": 0.6666, |
| "num_samples": 1.0, |
| "reward": 2.8125, |
| "reward_std": 1.2235616445541382, |
| "rewards/gpt4o_holistic_reward": 2.8125, |
| "rl_loss": -5.587935447692871e-09, |
| "sft_loss": 2.221928596496582, |
| "speech_entropy": 3.2466955184936523, |
| "speech_kl": 0.0, |
| "step": 387, |
| "text_entropy": 1.366699457168579, |
| "text_kl": 0.0, |
| "total_entropy": 2.97598934173584 |
| }, |
| { |
| "combined_loss": 0.6852359771728516, |
| "completion_length": 439.5, |
| "epoch": 0.12340966921119594, |
| "grad_norm": 2.255603551864624, |
| "kl": 0.0, |
| "learning_rate": 9.254455428665329e-07, |
| "loss": 0.6852, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.8751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.2841198444366455, |
| "speech_entropy": 2.7042312622070312, |
| "speech_kl": 0.0, |
| "step": 388, |
| "text_entropy": 1.335016131401062, |
| "text_kl": 0.0, |
| "total_entropy": 2.444276809692383 |
| }, |
| { |
| "combined_loss": 0.6944676041603088, |
| "completion_length": 545.0, |
| "epoch": 0.12372773536895675, |
| "grad_norm": 3.5577309131622314, |
| "kl": 0.0, |
| "learning_rate": 9.250514608218928e-07, |
| "loss": 0.6945, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.314892053604126, |
| "speech_entropy": 2.9382314682006836, |
| "speech_kl": 0.0, |
| "step": 389, |
| "text_entropy": 1.2093441486358643, |
| "text_kl": 0.0, |
| "total_entropy": 2.6713953018188477 |
| }, |
| { |
| "combined_loss": 0.6847076416015625, |
| "completion_length": 318.875, |
| "epoch": 0.12404580152671756, |
| "grad_norm": 2.288792610168457, |
| "kl": 0.0, |
| "learning_rate": 9.24656434585647e-07, |
| "loss": 0.6847, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 0.7042241096496582, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.2823588848114014, |
| "speech_entropy": 2.4487361907958984, |
| "speech_kl": 0.0, |
| "step": 390, |
| "text_entropy": 1.1644865274429321, |
| "text_kl": 0.0, |
| "total_entropy": 2.222623348236084 |
| }, |
| { |
| "combined_loss": 0.6812296509742737, |
| "completion_length": 496.5, |
| "epoch": 0.12436386768447837, |
| "grad_norm": 2.5237197875976562, |
| "kl": 0.0, |
| "learning_rate": 9.242604651522735e-07, |
| "loss": 0.6812, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 1.2178384065628052, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.2707653045654297, |
| "speech_entropy": 3.798156261444092, |
| "speech_kl": 0.0, |
| "step": 391, |
| "text_entropy": 1.453238606452942, |
| "text_kl": 0.0, |
| "total_entropy": 3.4767727851867676 |
| }, |
| { |
| "combined_loss": 0.6517486572265625, |
| "completion_length": 350.5, |
| "epoch": 0.12468193384223919, |
| "grad_norm": 1.931217908859253, |
| "kl": 0.0, |
| "learning_rate": 9.238635535186246e-07, |
| "loss": 0.6517, |
| "num_samples": 1.0, |
| "reward": 1.875, |
| "reward_std": 0.3944375813007355, |
| "rewards/gpt4o_holistic_reward": 1.875, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.1724953651428223, |
| "speech_entropy": 2.189283847808838, |
| "speech_kl": 0.0, |
| "step": 392, |
| "text_entropy": 1.1253941059112549, |
| "text_kl": 0.0, |
| "total_entropy": 1.9858797788619995 |
| }, |
| { |
| "combined_loss": 0.693282961845398, |
| "completion_length": 286.8125, |
| "epoch": 0.125, |
| "grad_norm": 2.019578218460083, |
| "kl": 0.0, |
| "learning_rate": 9.234657006839249e-07, |
| "loss": 0.6933, |
| "num_samples": 1.0, |
| "reward": 3.25, |
| "reward_std": 0.5000999569892883, |
| "rewards/gpt4o_holistic_reward": 3.25, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.310943126678467, |
| "speech_entropy": 2.226673126220703, |
| "speech_kl": 0.0, |
| "step": 393, |
| "text_entropy": 1.3821954727172852, |
| "text_kl": 0.0, |
| "total_entropy": 2.0710020065307617 |
| }, |
| { |
| "combined_loss": 0.6477099657058716, |
| "completion_length": 431.5625, |
| "epoch": 0.1253180661577608, |
| "grad_norm": 3.454335927963257, |
| "kl": 0.0, |
| "learning_rate": 9.230669076497687e-07, |
| "loss": 0.6477, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 1.501086711883545, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.1590328216552734, |
| "speech_entropy": 5.795164108276367, |
| "speech_kl": 0.0, |
| "step": 394, |
| "text_entropy": 1.2117631435394287, |
| "text_kl": 0.0, |
| "total_entropy": 5.437085151672363 |
| }, |
| { |
| "combined_loss": 0.6723122596740723, |
| "completion_length": 557.5625, |
| "epoch": 0.12563613231552162, |
| "grad_norm": 1.878871202468872, |
| "kl": 0.0, |
| "learning_rate": 9.226671754201167e-07, |
| "loss": 0.6723, |
| "num_samples": 1.0, |
| "reward": 3.0625, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.0625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.2410407066345215, |
| "speech_entropy": 3.8088769912719727, |
| "speech_kl": 0.0, |
| "step": 395, |
| "text_entropy": 1.0774390697479248, |
| "text_kl": 0.0, |
| "total_entropy": 3.4700815677642822 |
| }, |
| { |
| "combined_loss": 0.7662006616592407, |
| "completion_length": 397.5625, |
| "epoch": 0.12595419847328243, |
| "grad_norm": 2.458489418029785, |
| "kl": 0.0, |
| "learning_rate": 9.222665050012947e-07, |
| "loss": 0.7662, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 0.7654881477355957, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.554001808166504, |
| "speech_entropy": 2.176966667175293, |
| "speech_kl": 0.0, |
| "step": 396, |
| "text_entropy": 1.7679321765899658, |
| "text_kl": 0.0, |
| "total_entropy": 2.1059505939483643 |
| }, |
| { |
| "combined_loss": 0.5795345902442932, |
| "completion_length": 308.3125, |
| "epoch": 0.12627226463104327, |
| "grad_norm": 2.792732000350952, |
| "kl": 0.0, |
| "learning_rate": 9.218648974019896e-07, |
| "loss": 0.5795, |
| "num_samples": 1.0, |
| "reward": 4.125, |
| "reward_std": 0.614456832408905, |
| "rewards/gpt4o_holistic_reward": 4.125, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 1.9317818880081177, |
| "speech_entropy": 2.6999130249023438, |
| "speech_kl": 0.0, |
| "step": 397, |
| "text_entropy": 1.0977978706359863, |
| "text_kl": 0.0, |
| "total_entropy": 2.432361602783203 |
| }, |
| { |
| "combined_loss": 0.8004469275474548, |
| "completion_length": 436.625, |
| "epoch": 0.12659033078880408, |
| "grad_norm": 4.002890586853027, |
| "kl": 0.0, |
| "learning_rate": 9.214623536332482e-07, |
| "loss": 0.8004, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 0.9788135290145874, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.668156385421753, |
| "speech_entropy": 5.438493728637695, |
| "speech_kl": 0.0, |
| "step": 398, |
| "text_entropy": 1.7521681785583496, |
| "text_kl": 0.0, |
| "total_entropy": 5.220246315002441 |
| }, |
| { |
| "combined_loss": 0.7312483787536621, |
| "completion_length": 681.8125, |
| "epoch": 0.1269083969465649, |
| "grad_norm": 2.7238285541534424, |
| "kl": 0.0, |
| "learning_rate": 9.21058874708474e-07, |
| "loss": 0.7312, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 0.9463939070701599, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.4374947547912598, |
| "speech_entropy": 3.460540294647217, |
| "speech_kl": 0.0, |
| "step": 399, |
| "text_entropy": 1.679933786392212, |
| "text_kl": 0.0, |
| "total_entropy": 3.2443912029266357 |
| }, |
| { |
| "combined_loss": 0.7706436514854431, |
| "completion_length": 449.875, |
| "epoch": 0.1272264631043257, |
| "grad_norm": 1.9544907808303833, |
| "kl": 0.0, |
| "learning_rate": 9.206544616434248e-07, |
| "loss": 0.7706, |
| "num_samples": 1.0, |
| "reward": 4.125, |
| "reward_std": 1.0308762788772583, |
| "rewards/gpt4o_holistic_reward": 4.125, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.568812131881714, |
| "speech_entropy": 2.1668813228607178, |
| "speech_kl": 0.0, |
| "step": 400, |
| "text_entropy": 1.5073027610778809, |
| "text_kl": 0.0, |
| "total_entropy": 2.0442421436309814 |
| }, |
| { |
| "combined_loss": 0.6995494365692139, |
| "completion_length": 447.75, |
| "epoch": 0.1275445292620865, |
| "grad_norm": 1.9926646947860718, |
| "kl": 0.0, |
| "learning_rate": 9.202491154562097e-07, |
| "loss": 0.6995, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.331831455230713, |
| "speech_entropy": 3.2041335105895996, |
| "speech_kl": 0.0, |
| "step": 401, |
| "text_entropy": 1.542539119720459, |
| "text_kl": 0.0, |
| "total_entropy": 3.0047965049743652 |
| }, |
| { |
| "combined_loss": 0.674731433391571, |
| "completion_length": 414.75, |
| "epoch": 0.12786259541984732, |
| "grad_norm": 1.9648370742797852, |
| "kl": 0.0, |
| "learning_rate": 9.198428371672874e-07, |
| "loss": 0.6747, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.6682298183441162, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": 1.862645149230957e-09, |
| "sft_loss": 2.2491049766540527, |
| "speech_entropy": 2.0483105182647705, |
| "speech_kl": 0.0, |
| "step": 402, |
| "text_entropy": 1.0541050434112549, |
| "text_kl": 0.0, |
| "total_entropy": 1.8693532943725586 |
| }, |
| { |
| "combined_loss": 0.7094697952270508, |
| "completion_length": 490.75, |
| "epoch": 0.12818066157760813, |
| "grad_norm": 3.759694814682007, |
| "kl": 0.0, |
| "learning_rate": 9.194356277994632e-07, |
| "loss": 0.7095, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 0.5713939070701599, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.364899158477783, |
| "speech_entropy": 2.0986576080322266, |
| "speech_kl": 0.0, |
| "step": 403, |
| "text_entropy": 1.6229678392410278, |
| "text_kl": 0.0, |
| "total_entropy": 2.018561363220215 |
| }, |
| { |
| "combined_loss": 0.6625657081604004, |
| "completion_length": 677.5, |
| "epoch": 0.12849872773536897, |
| "grad_norm": 6.927186012268066, |
| "kl": 0.0, |
| "learning_rate": 9.19027488377886e-07, |
| "loss": 0.6626, |
| "num_samples": 1.0, |
| "reward": 3.125, |
| "reward_std": 1.3692017793655396, |
| "rewards/gpt4o_holistic_reward": 3.125, |
| "rl_loss": 1.862645149230957e-08, |
| "sft_loss": 2.208552122116089, |
| "speech_entropy": 3.995192527770996, |
| "speech_kl": 0.0, |
| "step": 404, |
| "text_entropy": 1.7138077020645142, |
| "text_kl": 0.0, |
| "total_entropy": 3.6808578968048096 |
| }, |
| { |
| "combined_loss": 0.6429011225700378, |
| "completion_length": 530.25, |
| "epoch": 0.12881679389312978, |
| "grad_norm": 5.672791004180908, |
| "kl": 0.0, |
| "learning_rate": 9.186184199300463e-07, |
| "loss": 0.6429, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 1.3904881477355957, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": 0.0, |
| "sft_loss": 2.1430037021636963, |
| "speech_entropy": 4.908637046813965, |
| "speech_kl": 0.0, |
| "step": 405, |
| "text_entropy": 1.521721363067627, |
| "text_kl": 0.0, |
| "total_entropy": 4.66417932510376 |
| }, |
| { |
| "combined_loss": 0.6549615859985352, |
| "completion_length": 477.375, |
| "epoch": 0.1291348600508906, |
| "grad_norm": 2.929534912109375, |
| "kl": 0.0, |
| "learning_rate": 9.182084234857735e-07, |
| "loss": 0.655, |
| "num_samples": 1.0, |
| "reward": 2.3125, |
| "reward_std": 0.8430101871490479, |
| "rewards/gpt4o_holistic_reward": 2.3125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.1832053661346436, |
| "speech_entropy": 2.1303212642669678, |
| "speech_kl": 0.0, |
| "step": 406, |
| "text_entropy": 1.1068902015686035, |
| "text_kl": 0.0, |
| "total_entropy": 1.9402127265930176 |
| }, |
| { |
| "combined_loss": 0.6791283488273621, |
| "completion_length": 461.6875, |
| "epoch": 0.1294529262086514, |
| "grad_norm": 2.371797800064087, |
| "kl": 0.0, |
| "learning_rate": 9.17797500077233e-07, |
| "loss": 0.6791, |
| "num_samples": 1.0, |
| "reward": 4.4375, |
| "reward_std": 0.8538135886192322, |
| "rewards/gpt4o_holistic_reward": 4.4375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.263761043548584, |
| "speech_entropy": 3.0191469192504883, |
| "speech_kl": 0.0, |
| "step": 407, |
| "text_entropy": 2.0163426399230957, |
| "text_kl": 0.0, |
| "total_entropy": 2.867436408996582 |
| }, |
| { |
| "combined_loss": 0.683641791343689, |
| "completion_length": 407.125, |
| "epoch": 0.1297709923664122, |
| "grad_norm": 3.123765468597412, |
| "kl": 0.0, |
| "learning_rate": 9.173856507389244e-07, |
| "loss": 0.6836, |
| "num_samples": 1.0, |
| "reward": 3.0, |
| "reward_std": 1.3274502754211426, |
| "rewards/gpt4o_holistic_reward": 3.0, |
| "rl_loss": 0.0, |
| "sft_loss": 2.278805732727051, |
| "speech_entropy": 4.666620254516602, |
| "speech_kl": 0.0, |
| "step": 408, |
| "text_entropy": 4.294477462768555, |
| "text_kl": 0.0, |
| "total_entropy": 4.622071266174316 |
| }, |
| { |
| "combined_loss": 0.7313430309295654, |
| "completion_length": 254.5625, |
| "epoch": 0.13008905852417302, |
| "grad_norm": 2.3901264667510986, |
| "kl": 0.0, |
| "learning_rate": 9.169728765076774e-07, |
| "loss": 0.7313, |
| "num_samples": 1.0, |
| "reward": 4.0625, |
| "reward_std": 0.2694375813007355, |
| "rewards/gpt4o_holistic_reward": 4.0625, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.437809944152832, |
| "speech_entropy": 2.1548728942871094, |
| "speech_kl": 0.0, |
| "step": 409, |
| "text_entropy": 1.255692958831787, |
| "text_kl": 0.0, |
| "total_entropy": 2.0065855979919434 |
| }, |
| { |
| "combined_loss": 0.6742551326751709, |
| "completion_length": 608.0, |
| "epoch": 0.13040712468193386, |
| "grad_norm": 2.8765196800231934, |
| "kl": 0.0, |
| "learning_rate": 9.165591784226511e-07, |
| "loss": 0.6743, |
| "num_samples": 1.0, |
| "reward": 2.0625, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 2.0625, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.2475171089172363, |
| "speech_entropy": 2.9422497749328613, |
| "speech_kl": 0.0, |
| "step": 410, |
| "text_entropy": 2.1728014945983887, |
| "text_kl": 0.0, |
| "total_entropy": 2.778034210205078 |
| }, |
| { |
| "combined_loss": 0.6012893319129944, |
| "completion_length": 402.5, |
| "epoch": 0.13072519083969467, |
| "grad_norm": 4.226046085357666, |
| "kl": 0.0, |
| "learning_rate": 9.161445575253295e-07, |
| "loss": 0.6013, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 0.9331126809120178, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.0042977333068848, |
| "speech_entropy": 3.145230293273926, |
| "speech_kl": 0.0, |
| "step": 411, |
| "text_entropy": 2.9247562885284424, |
| "text_kl": 0.0, |
| "total_entropy": 3.075840473175049 |
| }, |
| { |
| "combined_loss": 0.6655627489089966, |
| "completion_length": 465.5625, |
| "epoch": 0.13104325699745548, |
| "grad_norm": 4.012775421142578, |
| "kl": 0.0, |
| "learning_rate": 9.157290148595206e-07, |
| "loss": 0.6656, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.6250999569892883, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.2185425758361816, |
| "speech_entropy": 2.861677646636963, |
| "speech_kl": 0.0, |
| "step": 412, |
| "text_entropy": 2.6879758834838867, |
| "text_kl": 0.0, |
| "total_entropy": 2.890303134918213 |
| }, |
| { |
| "combined_loss": 0.7086147665977478, |
| "completion_length": 562.5625, |
| "epoch": 0.1313613231552163, |
| "grad_norm": 4.615509510040283, |
| "kl": 0.0, |
| "learning_rate": 9.153125514713523e-07, |
| "loss": 0.7086, |
| "num_samples": 1.0, |
| "reward": 3.125, |
| "reward_std": 1.1756925582885742, |
| "rewards/gpt4o_holistic_reward": 3.125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.362049102783203, |
| "speech_entropy": 3.222433090209961, |
| "speech_kl": 0.0, |
| "step": 413, |
| "text_entropy": 2.680953025817871, |
| "text_kl": 0.0, |
| "total_entropy": 3.1390838623046875 |
| }, |
| { |
| "combined_loss": 0.7332242131233215, |
| "completion_length": 579.3125, |
| "epoch": 0.1316793893129771, |
| "grad_norm": 8.893054962158203, |
| "kl": 0.0, |
| "learning_rate": 9.148951684092709e-07, |
| "loss": 0.7332, |
| "num_samples": 1.0, |
| "reward": 3.0625, |
| "reward_std": 1.2498197555541992, |
| "rewards/gpt4o_holistic_reward": 3.0625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.4440808296203613, |
| "speech_entropy": 3.2557754516601562, |
| "speech_kl": 0.0, |
| "step": 414, |
| "text_entropy": 3.8602097034454346, |
| "text_kl": 0.0, |
| "total_entropy": 3.7247118949890137 |
| }, |
| { |
| "combined_loss": 0.6186578273773193, |
| "completion_length": 346.75, |
| "epoch": 0.1319974554707379, |
| "grad_norm": 2.8829197883605957, |
| "kl": 0.0, |
| "learning_rate": 9.144768667240375e-07, |
| "loss": 0.6187, |
| "num_samples": 1.0, |
| "reward": 4.6875, |
| "reward_std": 0.4733423590660095, |
| "rewards/gpt4o_holistic_reward": 4.6875, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.062192678451538, |
| "speech_entropy": 2.5403671264648438, |
| "speech_kl": 0.0, |
| "step": 415, |
| "text_entropy": 2.09316086769104, |
| "text_kl": 0.0, |
| "total_entropy": 2.482808828353882 |
| }, |
| { |
| "combined_loss": 0.6629255414009094, |
| "completion_length": 498.1875, |
| "epoch": 0.13231552162849872, |
| "grad_norm": 1.7470639944076538, |
| "kl": 0.0, |
| "learning_rate": 9.140576474687263e-07, |
| "loss": 0.6629, |
| "num_samples": 1.0, |
| "reward": 4.625, |
| "reward_std": 0.7501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.625, |
| "rl_loss": 1.862645149230957e-08, |
| "sft_loss": 2.209751605987549, |
| "speech_entropy": 2.040069103240967, |
| "speech_kl": 0.0, |
| "step": 416, |
| "text_entropy": 1.1820553541183472, |
| "text_kl": 0.0, |
| "total_entropy": 1.8843843936920166 |
| }, |
| { |
| "combined_loss": 0.7431026101112366, |
| "completion_length": 490.5, |
| "epoch": 0.13263358778625955, |
| "grad_norm": 1.9392704963684082, |
| "kl": 0.0, |
| "learning_rate": 9.136375116987211e-07, |
| "loss": 0.7431, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 0.7394567728042603, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.477008581161499, |
| "speech_entropy": 2.424851894378662, |
| "speech_kl": 0.0, |
| "step": 417, |
| "text_entropy": 1.5257370471954346, |
| "text_kl": 0.0, |
| "total_entropy": 2.2481653690338135 |
| }, |
| { |
| "combined_loss": 0.6801667809486389, |
| "completion_length": 366.6875, |
| "epoch": 0.13295165394402036, |
| "grad_norm": 2.0551204681396484, |
| "kl": 0.0, |
| "learning_rate": 9.132164604717135e-07, |
| "loss": 0.6802, |
| "num_samples": 1.0, |
| "reward": 4.125, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.125, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.2672226428985596, |
| "speech_entropy": 2.0900187492370605, |
| "speech_kl": 0.0, |
| "step": 418, |
| "text_entropy": 1.2720967531204224, |
| "text_kl": 0.0, |
| "total_entropy": 1.9535316228866577 |
| }, |
| { |
| "combined_loss": 0.7415467500686646, |
| "completion_length": 447.4375, |
| "epoch": 0.13326972010178118, |
| "grad_norm": 3.4998650550842285, |
| "kl": 0.0, |
| "learning_rate": 9.127944948476993e-07, |
| "loss": 0.7415, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 1.3322067260742188, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.471822500228882, |
| "speech_entropy": 2.6262001991271973, |
| "speech_kl": 0.0, |
| "step": 419, |
| "text_entropy": 2.3739140033721924, |
| "text_kl": 0.0, |
| "total_entropy": 2.5941972732543945 |
| }, |
| { |
| "combined_loss": 0.6638205051422119, |
| "completion_length": 492.5, |
| "epoch": 0.13358778625954199, |
| "grad_norm": 2.7289879322052, |
| "kl": 0.0, |
| "learning_rate": 9.123716158889764e-07, |
| "loss": 0.6638, |
| "num_samples": 1.0, |
| "reward": 4.75, |
| "reward_std": 0.5000999569892883, |
| "rewards/gpt4o_holistic_reward": 4.75, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.212735176086426, |
| "speech_entropy": 2.982739210128784, |
| "speech_kl": 0.0, |
| "step": 420, |
| "text_entropy": 2.626387119293213, |
| "text_kl": 0.0, |
| "total_entropy": 2.923607110977173 |
| }, |
| { |
| "combined_loss": 0.801853358745575, |
| "completion_length": 382.75, |
| "epoch": 0.1339058524173028, |
| "grad_norm": 5.198933124542236, |
| "kl": 0.0, |
| "learning_rate": 9.11947824660142e-07, |
| "loss": 0.8019, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 0.41377514600753784, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.672844409942627, |
| "speech_entropy": 2.06710147857666, |
| "speech_kl": 0.0, |
| "step": 421, |
| "text_entropy": 1.2483782768249512, |
| "text_kl": 0.0, |
| "total_entropy": 1.9093949794769287 |
| }, |
| { |
| "combined_loss": 0.6525322198867798, |
| "completion_length": 461.3125, |
| "epoch": 0.1342239185750636, |
| "grad_norm": 2.1027040481567383, |
| "kl": 0.0, |
| "learning_rate": 9.115231222280901e-07, |
| "loss": 0.6525, |
| "num_samples": 1.0, |
| "reward": 3.0625, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.0625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.175107479095459, |
| "speech_entropy": 2.58778715133667, |
| "speech_kl": 0.0, |
| "step": 422, |
| "text_entropy": 1.6190730333328247, |
| "text_kl": 0.0, |
| "total_entropy": 2.423537492752075 |
| }, |
| { |
| "combined_loss": 0.6848458051681519, |
| "completion_length": 399.25, |
| "epoch": 0.13454198473282442, |
| "grad_norm": 3.9089579582214355, |
| "kl": 0.0, |
| "learning_rate": 9.110975096620087e-07, |
| "loss": 0.6848, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 1.2500998973846436, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.2828192710876465, |
| "speech_entropy": 4.557295799255371, |
| "speech_kl": 0.0, |
| "step": 423, |
| "text_entropy": 4.6551923751831055, |
| "text_kl": 0.0, |
| "total_entropy": 4.5974225997924805 |
| }, |
| { |
| "combined_loss": 0.6781374216079712, |
| "completion_length": 464.25, |
| "epoch": 0.13486005089058525, |
| "grad_norm": 2.1914126873016357, |
| "kl": 0.0, |
| "learning_rate": 9.106709880333768e-07, |
| "loss": 0.6781, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 0.9031319618225098, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": -1.862645149230957e-08, |
| "sft_loss": 2.260457992553711, |
| "speech_entropy": 2.97335147857666, |
| "speech_kl": 0.0, |
| "step": 424, |
| "text_entropy": 3.013314723968506, |
| "text_kl": 0.0, |
| "total_entropy": 3.1752607822418213 |
| }, |
| { |
| "combined_loss": 0.671977698802948, |
| "completion_length": 338.625, |
| "epoch": 0.13517811704834606, |
| "grad_norm": 3.089296579360962, |
| "kl": 0.0, |
| "learning_rate": 9.102435584159621e-07, |
| "loss": 0.672, |
| "num_samples": 1.0, |
| "reward": 4.25, |
| "reward_std": 1.077450156211853, |
| "rewards/gpt4o_holistic_reward": 4.25, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2399253845214844, |
| "speech_entropy": 3.009519100189209, |
| "speech_kl": 0.0, |
| "step": 425, |
| "text_entropy": 3.133180618286133, |
| "text_kl": 0.0, |
| "total_entropy": 3.1568689346313477 |
| }, |
| { |
| "combined_loss": 0.6367964148521423, |
| "completion_length": 485.375, |
| "epoch": 0.13549618320610687, |
| "grad_norm": 2.3259880542755127, |
| "kl": 0.0, |
| "learning_rate": 9.098152218858182e-07, |
| "loss": 0.6368, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 0.36445680260658264, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.122654676437378, |
| "speech_entropy": 2.219896078109741, |
| "speech_kl": 0.0, |
| "step": 426, |
| "text_entropy": 2.419771671295166, |
| "text_kl": 0.0, |
| "total_entropy": 2.319455862045288 |
| }, |
| { |
| "combined_loss": 0.7209469079971313, |
| "completion_length": 481.4375, |
| "epoch": 0.13581424936386768, |
| "grad_norm": 3.2517311573028564, |
| "kl": 0.0, |
| "learning_rate": 9.093859795212817e-07, |
| "loss": 0.7209, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.9894567728042603, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 0.0, |
| "sft_loss": 2.403156280517578, |
| "speech_entropy": 2.589256525039673, |
| "speech_kl": 0.0, |
| "step": 427, |
| "text_entropy": 3.033482074737549, |
| "text_kl": 0.0, |
| "total_entropy": 2.8865761756896973 |
| }, |
| { |
| "combined_loss": 0.6140339970588684, |
| "completion_length": 439.125, |
| "epoch": 0.1361323155216285, |
| "grad_norm": 2.3876121044158936, |
| "kl": 0.0, |
| "learning_rate": 9.089558324029699e-07, |
| "loss": 0.614, |
| "num_samples": 1.0, |
| "reward": 2.75, |
| "reward_std": 1.2350690364837646, |
| "rewards/gpt4o_holistic_reward": 2.75, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.0467801094055176, |
| "speech_entropy": 2.7281534671783447, |
| "speech_kl": 0.0, |
| "step": 428, |
| "text_entropy": 2.7204747200012207, |
| "text_kl": 0.0, |
| "total_entropy": 2.784191846847534 |
| }, |
| { |
| "combined_loss": 0.6906248331069946, |
| "completion_length": 602.125, |
| "epoch": 0.1364503816793893, |
| "grad_norm": 1.9201291799545288, |
| "kl": 0.0, |
| "learning_rate": 9.085247816137775e-07, |
| "loss": 0.6906, |
| "num_samples": 1.0, |
| "reward": 2.6875, |
| "reward_std": 0.45901402831077576, |
| "rewards/gpt4o_holistic_reward": 2.6875, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.3020825386047363, |
| "speech_entropy": 2.1891660690307617, |
| "speech_kl": 0.0, |
| "step": 429, |
| "text_entropy": 1.7951006889343262, |
| "text_kl": 0.0, |
| "total_entropy": 2.125535488128662 |
| }, |
| { |
| "combined_loss": 0.8007920980453491, |
| "completion_length": 364.9375, |
| "epoch": 0.13676844783715011, |
| "grad_norm": 3.1121604442596436, |
| "kl": 0.0, |
| "learning_rate": 9.080928282388745e-07, |
| "loss": 0.8008, |
| "num_samples": 1.0, |
| "reward": 3.5, |
| "reward_std": 0.8024665117263794, |
| "rewards/gpt4o_holistic_reward": 3.5, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.669306755065918, |
| "speech_entropy": 3.015037775039673, |
| "speech_kl": 0.0, |
| "step": 430, |
| "text_entropy": 3.2195496559143066, |
| "text_kl": 0.0, |
| "total_entropy": 3.1852550506591797 |
| }, |
| { |
| "combined_loss": 0.627656102180481, |
| "completion_length": 145.8125, |
| "epoch": 0.13708651399491095, |
| "grad_norm": 1.940796971321106, |
| "kl": 0.0, |
| "learning_rate": 9.076599733657027e-07, |
| "loss": 0.6277, |
| "num_samples": 1.0, |
| "reward": 4.875, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.09218692779541, |
| "speech_entropy": 2.180631637573242, |
| "speech_kl": 0.0, |
| "step": 431, |
| "text_entropy": 0.7923494577407837, |
| "text_kl": 0.0, |
| "total_entropy": 1.9149651527404785 |
| }, |
| { |
| "combined_loss": 0.6692330241203308, |
| "completion_length": 400.875, |
| "epoch": 0.13740458015267176, |
| "grad_norm": 2.384009599685669, |
| "kl": 0.0, |
| "learning_rate": 9.072262180839741e-07, |
| "loss": 0.6692, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.8750999569892883, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.230776786804199, |
| "speech_entropy": 2.6541309356689453, |
| "speech_kl": 0.0, |
| "step": 432, |
| "text_entropy": 2.617983102798462, |
| "text_kl": 0.0, |
| "total_entropy": 2.7352967262268066 |
| }, |
| { |
| "combined_loss": 0.619999885559082, |
| "completion_length": 364.625, |
| "epoch": 0.13772264631043257, |
| "grad_norm": 1.7166526317596436, |
| "kl": 0.0, |
| "learning_rate": 9.06791563485667e-07, |
| "loss": 0.62, |
| "num_samples": 1.0, |
| "reward": 4.875, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.875, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.0666658878326416, |
| "speech_entropy": 2.0609240531921387, |
| "speech_kl": 0.0, |
| "step": 433, |
| "text_entropy": 1.1358726024627686, |
| "text_kl": 0.0, |
| "total_entropy": 1.8963937759399414 |
| }, |
| { |
| "combined_loss": 0.6619538068771362, |
| "completion_length": 455.8125, |
| "epoch": 0.13804071246819338, |
| "grad_norm": 2.4818007946014404, |
| "kl": 0.0, |
| "learning_rate": 9.063560106650238e-07, |
| "loss": 0.662, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 1.363730549812317, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": -1.862645149230957e-09, |
| "sft_loss": 2.206512451171875, |
| "speech_entropy": 3.38385272026062, |
| "speech_kl": 0.0, |
| "step": 434, |
| "text_entropy": 3.1482739448547363, |
| "text_kl": 0.0, |
| "total_entropy": 3.4985315799713135 |
| }, |
| { |
| "combined_loss": 0.6583099365234375, |
| "completion_length": 388.0, |
| "epoch": 0.1383587786259542, |
| "grad_norm": 1.6376653909683228, |
| "kl": 0.0, |
| "learning_rate": 9.059195607185481e-07, |
| "loss": 0.6583, |
| "num_samples": 1.0, |
| "reward": 5.0, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 5.0, |
| "rl_loss": 0.0, |
| "sft_loss": 2.194366455078125, |
| "speech_entropy": 2.094740152359009, |
| "speech_kl": 0.0, |
| "step": 435, |
| "text_entropy": 1.0836068391799927, |
| "text_kl": 0.0, |
| "total_entropy": 1.9306923151016235 |
| }, |
| { |
| "combined_loss": 0.6782636642456055, |
| "completion_length": 487.8125, |
| "epoch": 0.138676844783715, |
| "grad_norm": 2.002826452255249, |
| "kl": 0.0, |
| "learning_rate": 9.054822147450022e-07, |
| "loss": 0.6783, |
| "num_samples": 1.0, |
| "reward": 4.375, |
| "reward_std": 0.8904882073402405, |
| "rewards/gpt4o_holistic_reward": 4.375, |
| "rl_loss": 1.862645149230957e-08, |
| "sft_loss": 2.260878562927246, |
| "speech_entropy": 1.9968137741088867, |
| "speech_kl": 0.0, |
| "step": 436, |
| "text_entropy": 1.1932947635650635, |
| "text_kl": 0.0, |
| "total_entropy": 1.8503947257995605 |
| }, |
| { |
| "combined_loss": 0.7755292654037476, |
| "completion_length": 452.0625, |
| "epoch": 0.1389949109414758, |
| "grad_norm": 1.9055498838424683, |
| "kl": 0.0, |
| "learning_rate": 9.050439738454042e-07, |
| "loss": 0.7755, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 1.058112621307373, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.585097551345825, |
| "speech_entropy": 1.9971908330917358, |
| "speech_kl": 0.0, |
| "step": 437, |
| "text_entropy": 1.1110544204711914, |
| "text_kl": 0.0, |
| "total_entropy": 1.8316848278045654 |
| }, |
| { |
| "combined_loss": 0.6731459498405457, |
| "completion_length": 323.3125, |
| "epoch": 0.13931297709923665, |
| "grad_norm": 2.3526909351348877, |
| "kl": 0.0, |
| "learning_rate": 9.046048391230247e-07, |
| "loss": 0.6731, |
| "num_samples": 1.0, |
| "reward": 4.6875, |
| "reward_std": 0.6251000165939331, |
| "rewards/gpt4o_holistic_reward": 4.6875, |
| "rl_loss": 2.2351741790771484e-08, |
| "sft_loss": 2.2438197135925293, |
| "speech_entropy": 2.0683212280273438, |
| "speech_kl": 0.0, |
| "step": 438, |
| "text_entropy": 1.1782288551330566, |
| "text_kl": 0.0, |
| "total_entropy": 1.9051158428192139 |
| }, |
| { |
| "combined_loss": 0.7724200487136841, |
| "completion_length": 532.0, |
| "epoch": 0.13963104325699746, |
| "grad_norm": 1.7427912950515747, |
| "kl": 0.0, |
| "learning_rate": 9.041648116833853e-07, |
| "loss": 0.7724, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.20422415435314178, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 0.0, |
| "sft_loss": 2.5747334957122803, |
| "speech_entropy": 2.0349836349487305, |
| "speech_kl": 0.0, |
| "step": 439, |
| "text_entropy": 1.427042841911316, |
| "text_kl": 0.0, |
| "total_entropy": 1.928051233291626 |
| }, |
| { |
| "combined_loss": 0.5910226106643677, |
| "completion_length": 380.6875, |
| "epoch": 0.13994910941475827, |
| "grad_norm": 1.7229056358337402, |
| "kl": 0.0, |
| "learning_rate": 9.037238926342543e-07, |
| "loss": 0.591, |
| "num_samples": 1.0, |
| "reward": 5.0, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 5.0, |
| "rl_loss": 0.0, |
| "sft_loss": 1.970075249671936, |
| "speech_entropy": 1.9323487281799316, |
| "speech_kl": 0.0, |
| "step": 440, |
| "text_entropy": 0.8262702226638794, |
| "text_kl": 0.0, |
| "total_entropy": 1.7402493953704834 |
| }, |
| { |
| "combined_loss": 0.6713898777961731, |
| "completion_length": 546.75, |
| "epoch": 0.14026717557251908, |
| "grad_norm": 1.5187904834747314, |
| "kl": 0.0, |
| "learning_rate": 9.032820830856449e-07, |
| "loss": 0.6714, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.2379660606384277, |
| "speech_entropy": 2.4945101737976074, |
| "speech_kl": 0.0, |
| "step": 441, |
| "text_entropy": 2.5544350147247314, |
| "text_kl": 0.0, |
| "total_entropy": 2.6159682273864746 |
| }, |
| { |
| "combined_loss": 0.6225212812423706, |
| "completion_length": 336.75, |
| "epoch": 0.1405852417302799, |
| "grad_norm": 1.7801567316055298, |
| "kl": 0.0, |
| "learning_rate": 9.028393841498121e-07, |
| "loss": 0.6225, |
| "num_samples": 1.0, |
| "reward": 4.125, |
| "reward_std": 0.14443756639957428, |
| "rewards/gpt4o_holistic_reward": 4.125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.075070858001709, |
| "speech_entropy": 2.058767080307007, |
| "speech_kl": 0.0, |
| "step": 442, |
| "text_entropy": 0.9935916662216187, |
| "text_kl": 0.0, |
| "total_entropy": 1.8773467540740967 |
| }, |
| { |
| "combined_loss": 0.6718326210975647, |
| "completion_length": 393.75, |
| "epoch": 0.1409033078880407, |
| "grad_norm": 1.7748103141784668, |
| "kl": 0.0, |
| "learning_rate": 9.023957969412499e-07, |
| "loss": 0.6718, |
| "num_samples": 1.0, |
| "reward": 2.375, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 2.375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2394418716430664, |
| "speech_entropy": 2.0696630477905273, |
| "speech_kl": 0.0, |
| "step": 443, |
| "text_entropy": 1.2485952377319336, |
| "text_kl": 0.0, |
| "total_entropy": 1.9180147647857666 |
| }, |
| { |
| "combined_loss": 0.6381258964538574, |
| "completion_length": 492.1875, |
| "epoch": 0.14122137404580154, |
| "grad_norm": 3.0544703006744385, |
| "kl": 0.0, |
| "learning_rate": 9.019513225766888e-07, |
| "loss": 0.6381, |
| "num_samples": 1.0, |
| "reward": 4.125, |
| "reward_std": 0.8483423590660095, |
| "rewards/gpt4o_holistic_reward": 4.125, |
| "rl_loss": 0.0, |
| "sft_loss": 2.1270861625671387, |
| "speech_entropy": 2.6119399070739746, |
| "speech_kl": 0.0, |
| "step": 444, |
| "text_entropy": 1.2080113887786865, |
| "text_kl": 0.0, |
| "total_entropy": 2.211930990219116 |
| }, |
| { |
| "combined_loss": 0.6671527624130249, |
| "completion_length": 339.375, |
| "epoch": 0.14153944020356235, |
| "grad_norm": 3.0724539756774902, |
| "kl": 0.0, |
| "learning_rate": 9.01505962175092e-07, |
| "loss": 0.6672, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 1.125100016593933, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": 1.862645149230957e-09, |
| "sft_loss": 2.2238426208496094, |
| "speech_entropy": 2.0776891708374023, |
| "speech_kl": 0.0, |
| "step": 445, |
| "text_entropy": 1.0866782665252686, |
| "text_kl": 0.0, |
| "total_entropy": 1.9007325172424316 |
| }, |
| { |
| "combined_loss": 0.6805820465087891, |
| "completion_length": 565.625, |
| "epoch": 0.14185750636132316, |
| "grad_norm": 2.9971096515655518, |
| "kl": 0.0, |
| "learning_rate": 9.010597168576542e-07, |
| "loss": 0.6806, |
| "num_samples": 1.0, |
| "reward": 3.125, |
| "reward_std": 0.14443756639957428, |
| "rewards/gpt4o_holistic_reward": 3.125, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.268606662750244, |
| "speech_entropy": 3.4692184925079346, |
| "speech_kl": 0.0, |
| "step": 446, |
| "text_entropy": 4.159717082977295, |
| "text_kl": 0.0, |
| "total_entropy": 3.8382232189178467 |
| }, |
| { |
| "combined_loss": 0.6768923401832581, |
| "completion_length": 378.5, |
| "epoch": 0.14217557251908397, |
| "grad_norm": 1.5186785459518433, |
| "kl": 0.0, |
| "learning_rate": 9.006125877477975e-07, |
| "loss": 0.6769, |
| "num_samples": 1.0, |
| "reward": 4.0625, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 4.0625, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.256307601928711, |
| "speech_entropy": 2.0326013565063477, |
| "speech_kl": 0.0, |
| "step": 447, |
| "text_entropy": 1.1156654357910156, |
| "text_kl": 0.0, |
| "total_entropy": 1.8572213649749756 |
| }, |
| { |
| "combined_loss": 0.6424182653427124, |
| "completion_length": 378.5625, |
| "epoch": 0.14249363867684478, |
| "grad_norm": 1.8854279518127441, |
| "kl": 0.0, |
| "learning_rate": 9.001645759711687e-07, |
| "loss": 0.6424, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.6008730530738831, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.1413941383361816, |
| "speech_entropy": 2.0020644664764404, |
| "speech_kl": 0.0, |
| "step": 448, |
| "text_entropy": 0.8331085443496704, |
| "text_kl": 0.0, |
| "total_entropy": 1.8029673099517822 |
| }, |
| { |
| "combined_loss": 0.6341080665588379, |
| "completion_length": 501.1875, |
| "epoch": 0.1428117048346056, |
| "grad_norm": 1.8343334197998047, |
| "kl": 0.0, |
| "learning_rate": 8.997156826556369e-07, |
| "loss": 0.6341, |
| "num_samples": 1.0, |
| "reward": 4.0625, |
| "reward_std": 0.8600690364837646, |
| "rewards/gpt4o_holistic_reward": 4.0625, |
| "rl_loss": -1.862645149230957e-09, |
| "sft_loss": 2.1136932373046875, |
| "speech_entropy": 2.640713691711426, |
| "speech_kl": 0.0, |
| "step": 449, |
| "text_entropy": 2.6889724731445312, |
| "text_kl": 0.0, |
| "total_entropy": 2.807018280029297 |
| }, |
| { |
| "combined_loss": 0.6706026792526245, |
| "completion_length": 411.875, |
| "epoch": 0.1431297709923664, |
| "grad_norm": 2.0064008235931396, |
| "kl": 0.0, |
| "learning_rate": 8.992659089312905e-07, |
| "loss": 0.6706, |
| "num_samples": 1.0, |
| "reward": 4.5625, |
| "reward_std": 0.6251000165939331, |
| "rewards/gpt4o_holistic_reward": 4.5625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.235342025756836, |
| "speech_entropy": 2.0443549156188965, |
| "speech_kl": 0.0, |
| "step": 450, |
| "text_entropy": 1.1791572570800781, |
| "text_kl": 0.0, |
| "total_entropy": 1.8973615169525146 |
| }, |
| { |
| "combined_loss": 0.7649298906326294, |
| "completion_length": 356.4375, |
| "epoch": 0.14344783715012724, |
| "grad_norm": 2.9334285259246826, |
| "kl": 0.0, |
| "learning_rate": 8.988152559304345e-07, |
| "loss": 0.7649, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 1.4256925582885742, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.5497660636901855, |
| "speech_entropy": 2.2932775020599365, |
| "speech_kl": 0.0, |
| "step": 451, |
| "text_entropy": 1.6037144660949707, |
| "text_kl": 0.0, |
| "total_entropy": 2.1606814861297607 |
| }, |
| { |
| "combined_loss": 0.8326526880264282, |
| "completion_length": 575.9375, |
| "epoch": 0.14376590330788805, |
| "grad_norm": 2.880450963973999, |
| "kl": 0.0, |
| "learning_rate": 8.983637247875872e-07, |
| "loss": 0.8327, |
| "num_samples": 1.0, |
| "reward": 4.125, |
| "reward_std": 1.1683900356292725, |
| "rewards/gpt4o_holistic_reward": 4.125, |
| "rl_loss": 5.587935447692871e-09, |
| "sft_loss": 2.7755088806152344, |
| "speech_entropy": 2.5213093757629395, |
| "speech_kl": 0.0, |
| "step": 452, |
| "text_entropy": 1.762268304824829, |
| "text_kl": 0.0, |
| "total_entropy": 2.381089448928833 |
| }, |
| { |
| "combined_loss": 0.7429494261741638, |
| "completion_length": 398.0, |
| "epoch": 0.14408396946564886, |
| "grad_norm": 2.2405805587768555, |
| "kl": 0.0, |
| "learning_rate": 8.979113166394775e-07, |
| "loss": 0.7429, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.5000999569892883, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.4764981269836426, |
| "speech_entropy": 3.2521719932556152, |
| "speech_kl": 0.0, |
| "step": 453, |
| "text_entropy": 3.165997266769409, |
| "text_kl": 0.0, |
| "total_entropy": 3.449254035949707 |
| }, |
| { |
| "combined_loss": 0.5944318175315857, |
| "completion_length": 424.1875, |
| "epoch": 0.14440203562340967, |
| "grad_norm": 1.6569174528121948, |
| "kl": 0.0, |
| "learning_rate": 8.974580326250424e-07, |
| "loss": 0.5944, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": 1.862645149230957e-09, |
| "sft_loss": 1.9814393520355225, |
| "speech_entropy": 1.9895391464233398, |
| "speech_kl": 0.0, |
| "step": 454, |
| "text_entropy": 0.9808429479598999, |
| "text_kl": 0.0, |
| "total_entropy": 1.808826208114624 |
| }, |
| { |
| "combined_loss": 0.6611145734786987, |
| "completion_length": 466.625, |
| "epoch": 0.14472010178117048, |
| "grad_norm": 1.9934300184249878, |
| "kl": 0.0, |
| "learning_rate": 8.970038738854244e-07, |
| "loss": 0.6611, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 0.5194375514984131, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.2037153244018555, |
| "speech_entropy": 2.034078359603882, |
| "speech_kl": 0.0, |
| "step": 455, |
| "text_entropy": 1.189087986946106, |
| "text_kl": 0.0, |
| "total_entropy": 1.8778494596481323 |
| }, |
| { |
| "combined_loss": 0.6767491698265076, |
| "completion_length": 517.75, |
| "epoch": 0.1450381679389313, |
| "grad_norm": 1.939832091331482, |
| "kl": 0.0, |
| "learning_rate": 8.965488415639671e-07, |
| "loss": 0.6767, |
| "num_samples": 1.0, |
| "reward": 2.875, |
| "reward_std": 0.7217878103256226, |
| "rewards/gpt4o_holistic_reward": 2.875, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.2558302879333496, |
| "speech_entropy": 2.4079430103302, |
| "speech_kl": 0.0, |
| "step": 456, |
| "text_entropy": 2.5751752853393555, |
| "text_kl": 0.0, |
| "total_entropy": 2.6069483757019043 |
| }, |
| { |
| "combined_loss": 0.6979454159736633, |
| "completion_length": 484.125, |
| "epoch": 0.1453562340966921, |
| "grad_norm": 1.6610511541366577, |
| "kl": 0.0, |
| "learning_rate": 8.960929368062138e-07, |
| "loss": 0.6979, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 0.5774502754211426, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 0.0, |
| "sft_loss": 2.3264846801757812, |
| "speech_entropy": 2.0884976387023926, |
| "speech_kl": 0.0, |
| "step": 457, |
| "text_entropy": 1.078056812286377, |
| "text_kl": 0.0, |
| "total_entropy": 1.8959829807281494 |
| }, |
| { |
| "combined_loss": 0.6162172555923462, |
| "completion_length": 581.4375, |
| "epoch": 0.14567430025445294, |
| "grad_norm": 1.4069401025772095, |
| "kl": 0.0, |
| "learning_rate": 8.956361607599043e-07, |
| "loss": 0.6162, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.4331127107143402, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.0540573596954346, |
| "speech_entropy": 1.9977447986602783, |
| "speech_kl": 0.0, |
| "step": 458, |
| "text_entropy": 0.7805925607681274, |
| "text_kl": 0.0, |
| "total_entropy": 1.7389767169952393 |
| }, |
| { |
| "combined_loss": 0.677904486656189, |
| "completion_length": 249.4375, |
| "epoch": 0.14599236641221375, |
| "grad_norm": 2.1168832778930664, |
| "kl": 0.0, |
| "learning_rate": 8.951785145749719e-07, |
| "loss": 0.6779, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 0.7090140581130981, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.259681463241577, |
| "speech_entropy": 2.0867042541503906, |
| "speech_kl": 0.0, |
| "step": 459, |
| "text_entropy": 0.5684058666229248, |
| "text_kl": 0.0, |
| "total_entropy": 1.8018689155578613 |
| }, |
| { |
| "combined_loss": 0.6343377828598022, |
| "completion_length": 347.4375, |
| "epoch": 0.14631043256997456, |
| "grad_norm": 2.351423501968384, |
| "kl": 0.0, |
| "learning_rate": 8.9471999940354e-07, |
| "loss": 0.6343, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 1.1404881477355957, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.1144590377807617, |
| "speech_entropy": 2.272289752960205, |
| "speech_kl": 0.0, |
| "step": 460, |
| "text_entropy": 1.7216747999191284, |
| "text_kl": 0.0, |
| "total_entropy": 2.1894326210021973 |
| }, |
| { |
| "combined_loss": 0.7236204147338867, |
| "completion_length": 413.1875, |
| "epoch": 0.14662849872773537, |
| "grad_norm": 2.023482322692871, |
| "kl": 0.0, |
| "learning_rate": 8.942606163999204e-07, |
| "loss": 0.7236, |
| "num_samples": 1.0, |
| "reward": 3.6875, |
| "reward_std": 0.6250999569892883, |
| "rewards/gpt4o_holistic_reward": 3.6875, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.4120678901672363, |
| "speech_entropy": 2.0738351345062256, |
| "speech_kl": 0.0, |
| "step": 461, |
| "text_entropy": 1.424910306930542, |
| "text_kl": 0.0, |
| "total_entropy": 1.9624372720718384 |
| }, |
| { |
| "combined_loss": 0.6593168377876282, |
| "completion_length": 440.625, |
| "epoch": 0.14694656488549618, |
| "grad_norm": 2.023776054382324, |
| "kl": 0.0, |
| "learning_rate": 8.93800366720609e-07, |
| "loss": 0.6593, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.651972770690918, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.197722911834717, |
| "speech_entropy": 2.1023144721984863, |
| "speech_kl": 0.0, |
| "step": 462, |
| "text_entropy": 1.0320696830749512, |
| "text_kl": 0.0, |
| "total_entropy": 1.9018843173980713 |
| }, |
| { |
| "combined_loss": 0.6318823099136353, |
| "completion_length": 373.1875, |
| "epoch": 0.147264631043257, |
| "grad_norm": 2.802096128463745, |
| "kl": 0.0, |
| "learning_rate": 8.933392515242838e-07, |
| "loss": 0.6319, |
| "num_samples": 1.0, |
| "reward": 3.375, |
| "reward_std": 0.8731511235237122, |
| "rewards/gpt4o_holistic_reward": 3.375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.106274366378784, |
| "speech_entropy": 3.7126779556274414, |
| "speech_kl": 0.0, |
| "step": 463, |
| "text_entropy": 3.271230697631836, |
| "text_kl": 0.0, |
| "total_entropy": 3.7780895233154297 |
| }, |
| { |
| "combined_loss": 0.6602550745010376, |
| "completion_length": 463.25, |
| "epoch": 0.1475826972010178, |
| "grad_norm": 2.1378464698791504, |
| "kl": 0.0, |
| "learning_rate": 8.928772719718018e-07, |
| "loss": 0.6603, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.6477051377296448, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.200850248336792, |
| "speech_entropy": 1.9239245653152466, |
| "speech_kl": 0.0, |
| "step": 464, |
| "text_entropy": 1.3505656719207764, |
| "text_kl": 0.0, |
| "total_entropy": 1.8138980865478516 |
| }, |
| { |
| "combined_loss": 0.6675082445144653, |
| "completion_length": 347.75, |
| "epoch": 0.14790076335877864, |
| "grad_norm": 2.2810842990875244, |
| "kl": 0.0, |
| "learning_rate": 8.924144292261962e-07, |
| "loss": 0.6675, |
| "num_samples": 1.0, |
| "reward": 3.1875, |
| "reward_std": 0.8750999569892883, |
| "rewards/gpt4o_holistic_reward": 3.1875, |
| "rl_loss": -2.2351741790771484e-08, |
| "sft_loss": 2.225027561187744, |
| "speech_entropy": 2.1317272186279297, |
| "speech_kl": 0.0, |
| "step": 465, |
| "text_entropy": 1.1749463081359863, |
| "text_kl": 0.0, |
| "total_entropy": 1.9694660902023315 |
| }, |
| { |
| "combined_loss": 0.7008163928985596, |
| "completion_length": 294.5625, |
| "epoch": 0.14821882951653945, |
| "grad_norm": 2.6175572872161865, |
| "kl": 0.0, |
| "learning_rate": 8.919507244526726e-07, |
| "loss": 0.7008, |
| "num_samples": 1.0, |
| "reward": 4.875, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 4.875, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.336054563522339, |
| "speech_entropy": 2.284971237182617, |
| "speech_kl": 0.0, |
| "step": 466, |
| "text_entropy": 1.1186554431915283, |
| "text_kl": 0.0, |
| "total_entropy": 2.061189889907837 |
| }, |
| { |
| "combined_loss": 0.7040484547615051, |
| "completion_length": 343.25, |
| "epoch": 0.14853689567430026, |
| "grad_norm": 2.2278120517730713, |
| "kl": 0.0, |
| "learning_rate": 8.914861588186076e-07, |
| "loss": 0.704, |
| "num_samples": 1.0, |
| "reward": 4.0625, |
| "reward_std": 0.6038135886192322, |
| "rewards/gpt4o_holistic_reward": 4.0625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.346827983856201, |
| "speech_entropy": 2.906318426132202, |
| "speech_kl": 0.0, |
| "step": 467, |
| "text_entropy": 3.0286753177642822, |
| "text_kl": 0.0, |
| "total_entropy": 3.109015464782715 |
| }, |
| { |
| "combined_loss": 0.6465004682540894, |
| "completion_length": 415.1875, |
| "epoch": 0.14885496183206107, |
| "grad_norm": 2.1413097381591797, |
| "kl": 0.0, |
| "learning_rate": 8.910207334935446e-07, |
| "loss": 0.6465, |
| "num_samples": 1.0, |
| "reward": 4.0, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 4.0, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.155001640319824, |
| "speech_entropy": 2.1206655502319336, |
| "speech_kl": 0.0, |
| "step": 468, |
| "text_entropy": 1.0818634033203125, |
| "text_kl": 0.0, |
| "total_entropy": 1.9281299114227295 |
| }, |
| { |
| "combined_loss": 0.7584635615348816, |
| "completion_length": 545.25, |
| "epoch": 0.14917302798982188, |
| "grad_norm": 1.5538190603256226, |
| "kl": 0.0, |
| "learning_rate": 8.90554449649191e-07, |
| "loss": 0.7585, |
| "num_samples": 1.0, |
| "reward": 5.0, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 5.0, |
| "rl_loss": 0.0, |
| "sft_loss": 2.528211832046509, |
| "speech_entropy": 2.115117073059082, |
| "speech_kl": 0.0, |
| "step": 469, |
| "text_entropy": 1.4869662523269653, |
| "text_kl": 0.0, |
| "total_entropy": 2.0077667236328125 |
| }, |
| { |
| "combined_loss": 0.7136243581771851, |
| "completion_length": 401.3125, |
| "epoch": 0.1494910941475827, |
| "grad_norm": 1.8857200145721436, |
| "kl": 0.0, |
| "learning_rate": 8.900873084594161e-07, |
| "loss": 0.7136, |
| "num_samples": 1.0, |
| "reward": 4.8125, |
| "reward_std": 0.2694375813007355, |
| "rewards/gpt4o_holistic_reward": 4.8125, |
| "rl_loss": -3.725290298461914e-09, |
| "sft_loss": 2.3787477016448975, |
| "speech_entropy": 2.175069808959961, |
| "speech_kl": 0.0, |
| "step": 470, |
| "text_entropy": 1.4863579273223877, |
| "text_kl": 0.0, |
| "total_entropy": 2.0467123985290527 |
| }, |
| { |
| "combined_loss": 0.7089301347732544, |
| "completion_length": 378.0625, |
| "epoch": 0.14980916030534353, |
| "grad_norm": 2.2557709217071533, |
| "kl": 0.0, |
| "learning_rate": 8.896193111002475e-07, |
| "loss": 0.7089, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.739456832408905, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": 0.0, |
| "sft_loss": 2.363100528717041, |
| "speech_entropy": 2.1490824222564697, |
| "speech_kl": 0.0, |
| "step": 471, |
| "text_entropy": 0.9428769946098328, |
| "text_kl": 0.0, |
| "total_entropy": 1.9167104959487915 |
| }, |
| { |
| "combined_loss": 0.6535341143608093, |
| "completion_length": 417.375, |
| "epoch": 0.15012722646310434, |
| "grad_norm": 1.727138876914978, |
| "kl": 0.0, |
| "learning_rate": 8.891504587498674e-07, |
| "loss": 0.6535, |
| "num_samples": 1.0, |
| "reward": 3.5625, |
| "reward_std": 0.2694375813007355, |
| "rewards/gpt4o_holistic_reward": 3.5625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.1784467697143555, |
| "speech_entropy": 2.090775489807129, |
| "speech_kl": 0.0, |
| "step": 472, |
| "text_entropy": 1.3160064220428467, |
| "text_kl": 0.0, |
| "total_entropy": 1.9528473615646362 |
| }, |
| { |
| "combined_loss": 0.7152138948440552, |
| "completion_length": 432.625, |
| "epoch": 0.15044529262086515, |
| "grad_norm": 1.9705350399017334, |
| "kl": 0.0, |
| "learning_rate": 8.886807525886113e-07, |
| "loss": 0.7152, |
| "num_samples": 1.0, |
| "reward": 4.75, |
| "reward_std": 0.5001000165939331, |
| "rewards/gpt4o_holistic_reward": 4.75, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.3840465545654297, |
| "speech_entropy": 2.1602611541748047, |
| "speech_kl": 0.0, |
| "step": 473, |
| "text_entropy": 1.172673225402832, |
| "text_kl": 0.0, |
| "total_entropy": 1.9916683435440063 |
| }, |
| { |
| "combined_loss": 0.6396362781524658, |
| "completion_length": 311.8125, |
| "epoch": 0.15076335877862596, |
| "grad_norm": 1.9259790182113647, |
| "kl": 0.0, |
| "learning_rate": 8.882101937989642e-07, |
| "loss": 0.6396, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.5194376111030579, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.1321208477020264, |
| "speech_entropy": 2.147437334060669, |
| "speech_kl": 0.0, |
| "step": 474, |
| "text_entropy": 0.9479535818099976, |
| "text_kl": 0.0, |
| "total_entropy": 1.929750919342041 |
| }, |
| { |
| "combined_loss": 0.6557704210281372, |
| "completion_length": 485.6875, |
| "epoch": 0.15108142493638677, |
| "grad_norm": 2.2060537338256836, |
| "kl": 0.0, |
| "learning_rate": 8.87738783565557e-07, |
| "loss": 0.6558, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.933112621307373, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": 0.0, |
| "sft_loss": 2.185901165008545, |
| "speech_entropy": 2.608290672302246, |
| "speech_kl": 0.0, |
| "step": 475, |
| "text_entropy": 2.9066762924194336, |
| "text_kl": 0.0, |
| "total_entropy": 2.873114585876465 |
| }, |
| { |
| "combined_loss": 0.6064234972000122, |
| "completion_length": 427.6875, |
| "epoch": 0.15139949109414758, |
| "grad_norm": 1.4221584796905518, |
| "kl": 0.0, |
| "learning_rate": 8.872665230751643e-07, |
| "loss": 0.6064, |
| "num_samples": 1.0, |
| "reward": 4.8125, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 4.8125, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.021411657333374, |
| "speech_entropy": 1.9495731592178345, |
| "speech_kl": 0.0, |
| "step": 476, |
| "text_entropy": 0.6150962114334106, |
| "text_kl": 0.0, |
| "total_entropy": 1.6934847831726074 |
| }, |
| { |
| "combined_loss": 0.6592838168144226, |
| "completion_length": 535.5, |
| "epoch": 0.15171755725190839, |
| "grad_norm": 1.9688835144042969, |
| "kl": 0.0, |
| "learning_rate": 8.867934135167016e-07, |
| "loss": 0.6593, |
| "num_samples": 1.0, |
| "reward": 2.5, |
| "reward_std": 0.8944375514984131, |
| "rewards/gpt4o_holistic_reward": 2.5, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.197612762451172, |
| "speech_entropy": 2.0396499633789062, |
| "speech_kl": 0.0, |
| "step": 477, |
| "text_entropy": 1.2544045448303223, |
| "text_kl": 0.0, |
| "total_entropy": 1.8939778804779053 |
| }, |
| { |
| "combined_loss": 0.7103521823883057, |
| "completion_length": 476.3125, |
| "epoch": 0.15203562340966922, |
| "grad_norm": 1.6043519973754883, |
| "kl": 0.0, |
| "learning_rate": 8.863194560812214e-07, |
| "loss": 0.7104, |
| "num_samples": 1.0, |
| "reward": 5.0, |
| "reward_std": 9.999999747378752e-05, |
| "rewards/gpt4o_holistic_reward": 5.0, |
| "rl_loss": 0.0, |
| "sft_loss": 2.367840528488159, |
| "speech_entropy": 2.1345765590667725, |
| "speech_kl": 0.0, |
| "step": 478, |
| "text_entropy": 1.4203035831451416, |
| "text_kl": 0.0, |
| "total_entropy": 2.0092082023620605 |
| }, |
| { |
| "combined_loss": 0.6551531553268433, |
| "completion_length": 424.875, |
| "epoch": 0.15235368956743003, |
| "grad_norm": 1.9476300477981567, |
| "kl": 0.0, |
| "learning_rate": 8.858446519619112e-07, |
| "loss": 0.6552, |
| "num_samples": 1.0, |
| "reward": 4.0625, |
| "reward_std": 0.9063550233840942, |
| "rewards/gpt4o_holistic_reward": 4.0625, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.1838438510894775, |
| "speech_entropy": 2.67842435836792, |
| "speech_kl": 0.0, |
| "step": 479, |
| "text_entropy": 2.9070305824279785, |
| "text_kl": 0.0, |
| "total_entropy": 2.8740692138671875 |
| }, |
| { |
| "combined_loss": 0.6260417699813843, |
| "completion_length": 583.6875, |
| "epoch": 0.15267175572519084, |
| "grad_norm": 1.565351128578186, |
| "kl": 0.0, |
| "learning_rate": 8.853690023540895e-07, |
| "loss": 0.626, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": -1.4901161193847656e-08, |
| "sft_loss": 2.086805820465088, |
| "speech_entropy": 1.9153913259506226, |
| "speech_kl": 0.0, |
| "step": 480, |
| "text_entropy": 0.715573251247406, |
| "text_kl": 0.0, |
| "total_entropy": 1.687865972518921 |
| }, |
| { |
| "combined_loss": 0.6601178050041199, |
| "completion_length": 385.25, |
| "epoch": 0.15298982188295165, |
| "grad_norm": 2.2061314582824707, |
| "kl": 0.0, |
| "learning_rate": 8.84892508455204e-07, |
| "loss": 0.6601, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 1.1313834190368652, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.200392484664917, |
| "speech_entropy": 2.007784366607666, |
| "speech_kl": 0.0, |
| "step": 481, |
| "text_entropy": 1.371636986732483, |
| "text_kl": 0.0, |
| "total_entropy": 1.8983948230743408 |
| }, |
| { |
| "combined_loss": 0.6691581010818481, |
| "completion_length": 463.3125, |
| "epoch": 0.15330788804071246, |
| "grad_norm": 1.7773209810256958, |
| "kl": 0.0, |
| "learning_rate": 8.844151714648274e-07, |
| "loss": 0.6692, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 0.853813648223877, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.230526924133301, |
| "speech_entropy": 2.0789544582366943, |
| "speech_kl": 0.0, |
| "step": 482, |
| "text_entropy": 1.1371136903762817, |
| "text_kl": 0.0, |
| "total_entropy": 1.8991715908050537 |
| }, |
| { |
| "combined_loss": 0.6369400024414062, |
| "completion_length": 410.6875, |
| "epoch": 0.15362595419847327, |
| "grad_norm": 1.8571422100067139, |
| "kl": 0.0, |
| "learning_rate": 8.839369925846548e-07, |
| "loss": 0.6369, |
| "num_samples": 1.0, |
| "reward": 2.8125, |
| "reward_std": 0.48945680260658264, |
| "rewards/gpt4o_holistic_reward": 2.8125, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.1231331825256348, |
| "speech_entropy": 2.2014362812042236, |
| "speech_kl": 0.0, |
| "step": 483, |
| "text_entropy": 1.6307443380355835, |
| "text_kl": 0.0, |
| "total_entropy": 2.1009421348571777 |
| }, |
| { |
| "combined_loss": 0.7150350213050842, |
| "completion_length": 738.4375, |
| "epoch": 0.15394402035623408, |
| "grad_norm": 1.8518329858779907, |
| "kl": 0.0, |
| "learning_rate": 8.834579730185012e-07, |
| "loss": 0.715, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 0.6935809850692749, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.3834500312805176, |
| "speech_entropy": 2.07857608795166, |
| "speech_kl": 0.0, |
| "step": 484, |
| "text_entropy": 1.5495516061782837, |
| "text_kl": 0.0, |
| "total_entropy": 1.9772846698760986 |
| }, |
| { |
| "combined_loss": 0.7554680109024048, |
| "completion_length": 434.4375, |
| "epoch": 0.15426208651399492, |
| "grad_norm": 1.8048903942108154, |
| "kl": 0.0, |
| "learning_rate": 8.829781139722978e-07, |
| "loss": 0.7555, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.5182266235351562, |
| "speech_entropy": 2.2054073810577393, |
| "speech_kl": 0.0, |
| "step": 485, |
| "text_entropy": 1.5983672142028809, |
| "text_kl": 0.0, |
| "total_entropy": 2.0975542068481445 |
| }, |
| { |
| "combined_loss": 0.7759544253349304, |
| "completion_length": 575.25, |
| "epoch": 0.15458015267175573, |
| "grad_norm": 1.7967489957809448, |
| "kl": 0.0, |
| "learning_rate": 8.824974166540889e-07, |
| "loss": 0.776, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.3944375813007355, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": 1.1175870895385742e-08, |
| "sft_loss": 2.586514711380005, |
| "speech_entropy": 2.0900139808654785, |
| "speech_kl": 0.0, |
| "step": 486, |
| "text_entropy": 1.4879392385482788, |
| "text_kl": 0.0, |
| "total_entropy": 1.9754605293273926 |
| }, |
| { |
| "combined_loss": 0.7049446105957031, |
| "completion_length": 516.3125, |
| "epoch": 0.15489821882951654, |
| "grad_norm": 1.6339975595474243, |
| "kl": 0.0, |
| "learning_rate": 8.820158822740297e-07, |
| "loss": 0.7049, |
| "num_samples": 1.0, |
| "reward": 3.25, |
| "reward_std": 0.2501000165939331, |
| "rewards/gpt4o_holistic_reward": 3.25, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.3498153686523438, |
| "speech_entropy": 2.0781824588775635, |
| "speech_kl": 0.0, |
| "step": 487, |
| "text_entropy": 1.1251006126403809, |
| "text_kl": 0.0, |
| "total_entropy": 1.9013476371765137 |
| }, |
| { |
| "combined_loss": 0.7730693817138672, |
| "completion_length": 473.25, |
| "epoch": 0.15521628498727735, |
| "grad_norm": 1.7758270502090454, |
| "kl": 0.0, |
| "learning_rate": 8.81533512044382e-07, |
| "loss": 0.7731, |
| "num_samples": 1.0, |
| "reward": 3.875, |
| "reward_std": 1.0387752056121826, |
| "rewards/gpt4o_holistic_reward": 3.875, |
| "rl_loss": 0.0, |
| "sft_loss": 2.5768978595733643, |
| "speech_entropy": 2.2842307090759277, |
| "speech_kl": 0.0, |
| "step": 488, |
| "text_entropy": 1.3962032794952393, |
| "text_kl": 0.0, |
| "total_entropy": 2.119879961013794 |
| }, |
| { |
| "combined_loss": 0.6924104690551758, |
| "completion_length": 437.25, |
| "epoch": 0.15553435114503816, |
| "grad_norm": 2.08705735206604, |
| "kl": 0.0, |
| "learning_rate": 8.810503071795131e-07, |
| "loss": 0.6924, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 0.9478486180305481, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": 0.0, |
| "sft_loss": 2.308034896850586, |
| "speech_entropy": 2.0420119762420654, |
| "speech_kl": 0.0, |
| "step": 489, |
| "text_entropy": 1.393028736114502, |
| "text_kl": 0.0, |
| "total_entropy": 1.928663730621338 |
| }, |
| { |
| "combined_loss": 0.7534229159355164, |
| "completion_length": 616.9375, |
| "epoch": 0.15585241730279897, |
| "grad_norm": 1.6156688928604126, |
| "kl": 0.0, |
| "learning_rate": 8.805662688958898e-07, |
| "loss": 0.7534, |
| "num_samples": 1.0, |
| "reward": 4.3125, |
| "reward_std": 0.6978486180305481, |
| "rewards/gpt4o_holistic_reward": 4.3125, |
| "rl_loss": -9.313225746154785e-09, |
| "sft_loss": 2.5114095211029053, |
| "speech_entropy": 2.1210994720458984, |
| "speech_kl": 0.0, |
| "step": 490, |
| "text_entropy": 1.2488669157028198, |
| "text_kl": 0.0, |
| "total_entropy": 1.964477300643921 |
| }, |
| { |
| "combined_loss": 0.648138165473938, |
| "completion_length": 361.8125, |
| "epoch": 0.15617048346055978, |
| "grad_norm": 1.8364536762237549, |
| "kl": 0.0, |
| "learning_rate": 8.800813984120786e-07, |
| "loss": 0.6481, |
| "num_samples": 1.0, |
| "reward": 4.0625, |
| "reward_std": 0.3751000165939331, |
| "rewards/gpt4o_holistic_reward": 4.0625, |
| "rl_loss": 3.725290298461914e-09, |
| "sft_loss": 2.1604604721069336, |
| "speech_entropy": 2.1113858222961426, |
| "speech_kl": 0.0, |
| "step": 491, |
| "text_entropy": 1.0654577016830444, |
| "text_kl": 0.0, |
| "total_entropy": 1.9055767059326172 |
| }, |
| { |
| "combined_loss": 0.7758172750473022, |
| "completion_length": 603.0625, |
| "epoch": 0.15648854961832062, |
| "grad_norm": 1.6238081455230713, |
| "kl": 0.0, |
| "learning_rate": 8.795956969487398e-07, |
| "loss": 0.7758, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.8081127405166626, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": 1.862645149230957e-08, |
| "sft_loss": 2.586057662963867, |
| "speech_entropy": 2.1472387313842773, |
| "speech_kl": 0.0, |
| "step": 492, |
| "text_entropy": 1.605583667755127, |
| "text_kl": 0.0, |
| "total_entropy": 2.047147035598755 |
| }, |
| { |
| "combined_loss": 0.6960165500640869, |
| "completion_length": 649.0625, |
| "epoch": 0.15680661577608143, |
| "grad_norm": 6.360669136047363, |
| "kl": 0.0, |
| "learning_rate": 8.791091657286267e-07, |
| "loss": 0.696, |
| "num_samples": 1.0, |
| "reward": 3.3125, |
| "reward_std": 1.0713938474655151, |
| "rewards/gpt4o_holistic_reward": 3.3125, |
| "rl_loss": 1.862645149230957e-09, |
| "sft_loss": 2.3200550079345703, |
| "speech_entropy": 3.068434715270996, |
| "speech_kl": 0.0, |
| "step": 493, |
| "text_entropy": 2.149477005004883, |
| "text_kl": 0.0, |
| "total_entropy": 2.953958034515381 |
| }, |
| { |
| "combined_loss": 0.5907741785049438, |
| "completion_length": 397.0625, |
| "epoch": 0.15712468193384224, |
| "grad_norm": 1.611266016960144, |
| "kl": 0.0, |
| "learning_rate": 8.786218059765809e-07, |
| "loss": 0.5908, |
| "num_samples": 1.0, |
| "reward": 3.9375, |
| "reward_std": 0.6250999569892883, |
| "rewards/gpt4o_holistic_reward": 3.9375, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 1.9692471027374268, |
| "speech_entropy": 1.9849560260772705, |
| "speech_kl": 0.0, |
| "step": 494, |
| "text_entropy": 0.6464660167694092, |
| "text_kl": 0.0, |
| "total_entropy": 1.7163063287734985 |
| }, |
| { |
| "combined_loss": 0.6750938892364502, |
| "completion_length": 525.0, |
| "epoch": 0.15744274809160305, |
| "grad_norm": 2.001243829727173, |
| "kl": 0.0, |
| "learning_rate": 8.781336189195296e-07, |
| "loss": 0.6751, |
| "num_samples": 1.0, |
| "reward": 3.75, |
| "reward_std": 1.0048449039459229, |
| "rewards/gpt4o_holistic_reward": 3.75, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.2503128051757812, |
| "speech_entropy": 2.9428701400756836, |
| "speech_kl": 0.0, |
| "step": 495, |
| "text_entropy": 3.2061309814453125, |
| "text_kl": 0.0, |
| "total_entropy": 3.2631325721740723 |
| }, |
| { |
| "combined_loss": 0.691491961479187, |
| "completion_length": 531.9375, |
| "epoch": 0.15776081424936386, |
| "grad_norm": 1.7672028541564941, |
| "kl": 0.0, |
| "learning_rate": 8.776446057864838e-07, |
| "loss": 0.6915, |
| "num_samples": 1.0, |
| "reward": 3.625, |
| "reward_std": 0.8274502754211426, |
| "rewards/gpt4o_holistic_reward": 3.625, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.3049731254577637, |
| "speech_entropy": 2.1160459518432617, |
| "speech_kl": 0.0, |
| "step": 496, |
| "text_entropy": 1.1704938411712646, |
| "text_kl": 0.0, |
| "total_entropy": 1.9302992820739746 |
| }, |
| { |
| "combined_loss": 0.6495949625968933, |
| "completion_length": 476.375, |
| "epoch": 0.15807888040712467, |
| "grad_norm": 2.018303632736206, |
| "kl": 0.0, |
| "learning_rate": 8.77154767808533e-07, |
| "loss": 0.6496, |
| "num_samples": 1.0, |
| "reward": 3.8125, |
| "reward_std": 0.6637751460075378, |
| "rewards/gpt4o_holistic_reward": 3.8125, |
| "rl_loss": -1.1175870895385742e-08, |
| "sft_loss": 2.165316581726074, |
| "speech_entropy": 2.1160874366760254, |
| "speech_kl": 0.0, |
| "step": 497, |
| "text_entropy": 1.245253086090088, |
| "text_kl": 0.0, |
| "total_entropy": 1.9460238218307495 |
| }, |
| { |
| "combined_loss": 0.6617956161499023, |
| "completion_length": 528.8125, |
| "epoch": 0.15839694656488548, |
| "grad_norm": 1.7395589351654053, |
| "kl": 0.0, |
| "learning_rate": 8.766641062188442e-07, |
| "loss": 0.6618, |
| "num_samples": 1.0, |
| "reward": 4.1875, |
| "reward_std": 0.1251000016927719, |
| "rewards/gpt4o_holistic_reward": 4.1875, |
| "rl_loss": 1.4901161193847656e-08, |
| "sft_loss": 2.2059853076934814, |
| "speech_entropy": 2.0821304321289062, |
| "speech_kl": 0.0, |
| "step": 498, |
| "text_entropy": 1.037213921546936, |
| "text_kl": 0.0, |
| "total_entropy": 1.8841687440872192 |
| }, |
| { |
| "combined_loss": 0.6126347184181213, |
| "completion_length": 466.5, |
| "epoch": 0.15871501272264632, |
| "grad_norm": 1.8486003875732422, |
| "kl": 0.0, |
| "learning_rate": 8.761726222526569e-07, |
| "loss": 0.6126, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 1.2024502754211426, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": 7.450580596923828e-09, |
| "sft_loss": 2.0421156883239746, |
| "speech_entropy": 2.067373752593994, |
| "speech_kl": 0.0, |
| "step": 499, |
| "text_entropy": 0.8997583389282227, |
| "text_kl": 0.0, |
| "total_entropy": 1.8452403545379639 |
| }, |
| { |
| "combined_loss": 0.7227458953857422, |
| "completion_length": 520.3125, |
| "epoch": 0.15903307888040713, |
| "grad_norm": 1.7850871086120605, |
| "kl": 0.0, |
| "learning_rate": 8.756803171472816e-07, |
| "loss": 0.7227, |
| "num_samples": 1.0, |
| "reward": 3.4375, |
| "reward_std": 0.48945680260658264, |
| "rewards/gpt4o_holistic_reward": 3.4375, |
| "rl_loss": -7.450580596923828e-09, |
| "sft_loss": 2.4091529846191406, |
| "speech_entropy": 2.1188344955444336, |
| "speech_kl": 0.0, |
| "step": 500, |
| "text_entropy": 1.2516241073608398, |
| "text_kl": 0.0, |
| "total_entropy": 1.9562331438064575 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2000, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 20, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|