| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.22857142857142856, |
| "eval_steps": 500, |
| "global_step": 200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2216.625045776367, |
| "dapo/avg_reward_std": 0.23920068350331536, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3735632248993578, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 33.86904761904762, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.001142857142857143, |
| "grad_norm": 0.10874509066343307, |
| "kl": 0.0, |
| "learning_rate": 0.0, |
| "loss": 0.0468, |
| "reward": 0.6486758906394243, |
| "reward_std": 0.9342863708734512, |
| "step": 1 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2926.4757690429688, |
| "dapo/avg_reward_std": 0.24011585204040303, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3284313836518456, |
| "dapo/num_sampling_attempts": 4.25, |
| "dapo/sampling_efficiency": 26.874999999999993, |
| "dapo/total_prompts_processed": 25.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.002285714285714286, |
| "grad_norm": 0.12814132869243622, |
| "kl": 0.0, |
| "learning_rate": 1e-07, |
| "loss": 0.0508, |
| "reward": 0.2922485675662756, |
| "reward_std": 0.9327598959207535, |
| "step": 2 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2888.1527709960938, |
| "dapo/avg_reward_std": 0.2903491040070852, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.36111111839612325, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 36.875, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.0034285714285714284, |
| "grad_norm": 0.1155443787574768, |
| "kl": 2.9146671295166016e-05, |
| "learning_rate": 2e-07, |
| "loss": 0.0647, |
| "reward": 0.3509849710389972, |
| "reward_std": 0.9315856546163559, |
| "step": 3 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2535.718734741211, |
| "dapo/avg_reward_std": 0.25628158891642533, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.35802469595714853, |
| "dapo/num_sampling_attempts": 3.375, |
| "dapo/sampling_efficiency": 41.56249999999999, |
| "dapo/total_prompts_processed": 20.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.004571428571428572, |
| "grad_norm": 0.14338600635528564, |
| "kl": 2.1044164896011353e-05, |
| "learning_rate": 3e-07, |
| "loss": 0.0536, |
| "reward": 0.5615630690008402, |
| "reward_std": 0.9670609682798386, |
| "step": 4 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2548.916702270508, |
| "dapo/avg_reward_std": 0.2889887053391029, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.40804598814454573, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 36.875, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.005714285714285714, |
| "grad_norm": 0.10121661424636841, |
| "kl": 2.7820467948913574e-05, |
| "learning_rate": 4e-07, |
| "loss": 0.0263, |
| "reward": 0.5986085031181574, |
| "reward_std": 0.9444186091423035, |
| "step": 5 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2357.579864501953, |
| "dapo/avg_reward_std": 0.30308351665735245, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.36309524306229185, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 37.5, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.006857142857142857, |
| "grad_norm": 0.171969935297966, |
| "kl": 2.6032328605651855e-05, |
| "learning_rate": 5e-07, |
| "loss": 0.0906, |
| "reward": 0.4527070773765445, |
| "reward_std": 0.9109365493059158, |
| "step": 6 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2404.2534790039062, |
| "dapo/avg_reward_std": 0.3077041815828394, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.41975309506610586, |
| "dapo/num_sampling_attempts": 3.375, |
| "dapo/sampling_efficiency": 37.916666666666664, |
| "dapo/total_prompts_processed": 20.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.008, |
| "grad_norm": 0.12406504899263382, |
| "kl": 1.9066035747528076e-05, |
| "learning_rate": 6e-07, |
| "loss": 0.0645, |
| "reward": 0.5808906648308039, |
| "reward_std": 0.9664968773722649, |
| "step": 7 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2833.3056030273438, |
| "dapo/avg_reward_std": 0.2214778729023472, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.31182796435971416, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 36.577380952380956, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.009142857142857144, |
| "grad_norm": 0.13480524718761444, |
| "kl": 3.4965574741363525e-05, |
| "learning_rate": 7e-07, |
| "loss": 0.0738, |
| "reward": 0.5177570842206478, |
| "reward_std": 0.9147621840238571, |
| "step": 8 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2965.6736450195312, |
| "dapo/avg_reward_std": 0.2788830002148946, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3888888966154169, |
| "dapo/num_sampling_attempts": 3.375, |
| "dapo/sampling_efficiency": 46.36904761904761, |
| "dapo/total_prompts_processed": 20.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.010285714285714285, |
| "grad_norm": 0.08226096630096436, |
| "kl": 1.4536082744598389e-05, |
| "learning_rate": 8e-07, |
| "loss": 0.0316, |
| "reward": 0.5644797384738922, |
| "reward_std": 0.9423079788684845, |
| "step": 9 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2574.461814880371, |
| "dapo/avg_reward_std": 0.3602010520065532, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.588235302883036, |
| "dapo/num_sampling_attempts": 2.125, |
| "dapo/sampling_efficiency": 61.45833333333333, |
| "dapo/total_prompts_processed": 12.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.011428571428571429, |
| "grad_norm": 0.1667146533727646, |
| "kl": 2.9319897294044495e-05, |
| "learning_rate": 9e-07, |
| "loss": 0.0894, |
| "reward": 0.6415909845381975, |
| "reward_std": 0.9869548827409744, |
| "step": 10 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2798.982666015625, |
| "dapo/avg_reward_std": 0.15393146287117684, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.14880952797830105, |
| "dapo/num_sampling_attempts": 7.0, |
| "dapo/sampling_efficiency": 15.882936507936506, |
| "dapo/total_prompts_processed": 42.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.012571428571428572, |
| "grad_norm": 0.1166534572839737, |
| "kl": 2.0567327737808228e-05, |
| "learning_rate": 1e-06, |
| "loss": 0.0207, |
| "reward": 0.2987014357931912, |
| "reward_std": 0.868266686797142, |
| "step": 11 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2377.555595397949, |
| "dapo/avg_reward_std": 0.21645361091941595, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2968750037252903, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 38.125, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.013714285714285714, |
| "grad_norm": 0.23483960330486298, |
| "kl": 3.6854296922683716e-05, |
| "learning_rate": 9.997258721585931e-07, |
| "loss": 0.0491, |
| "reward": 0.6348252706229687, |
| "reward_std": 0.9863902181386948, |
| "step": 12 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2688.1111755371094, |
| "dapo/avg_reward_std": 0.34906478971242905, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.49166667461395264, |
| "dapo/num_sampling_attempts": 2.5, |
| "dapo/sampling_efficiency": 52.08333333333333, |
| "dapo/total_prompts_processed": 15.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.014857142857142857, |
| "grad_norm": 0.09364266693592072, |
| "kl": 3.152713179588318e-05, |
| "learning_rate": 9.989038226169207e-07, |
| "loss": 0.0431, |
| "reward": 0.5878111608326435, |
| "reward_std": 0.9752944633364677, |
| "step": 13 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2029.9132270812988, |
| "dapo/avg_reward_std": 0.25792322993278505, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.36666666984558105, |
| "dapo/num_sampling_attempts": 3.125, |
| "dapo/sampling_efficiency": 57.5, |
| "dapo/total_prompts_processed": 18.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.016, |
| "grad_norm": 0.13894271850585938, |
| "kl": 4.156678915023804e-05, |
| "learning_rate": 9.975348529157229e-07, |
| "loss": 0.0279, |
| "reward": 0.5834919223561883, |
| "reward_std": 0.9710095003247261, |
| "step": 14 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2817.8576583862305, |
| "dapo/avg_reward_std": 0.3106007158756256, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.5333333484828472, |
| "dapo/num_sampling_attempts": 2.5, |
| "dapo/sampling_efficiency": 52.08333333333333, |
| "dapo/total_prompts_processed": 15.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.017142857142857144, |
| "grad_norm": 0.08778129518032074, |
| "kl": 3.078579902648926e-05, |
| "learning_rate": 9.956206309337066e-07, |
| "loss": 0.0343, |
| "reward": 0.6716702915728092, |
| "reward_std": 0.99223193526268, |
| "step": 15 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2570.2500076293945, |
| "dapo/avg_reward_std": 0.244095021715531, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.35897436336829114, |
| "dapo/num_sampling_attempts": 3.25, |
| "dapo/sampling_efficiency": 44.49404761904762, |
| "dapo/total_prompts_processed": 19.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.018285714285714287, |
| "grad_norm": 0.07460447400808334, |
| "kl": 0.00025935471057891846, |
| "learning_rate": 9.931634888554935e-07, |
| "loss": 0.0146, |
| "reward": 0.7213943339884281, |
| "reward_std": 0.9671430364251137, |
| "step": 16 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2483.413215637207, |
| "dapo/avg_reward_std": 0.2672279636065165, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.35000000496705375, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 29.166666666666664, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.019428571428571427, |
| "grad_norm": 0.12397046387195587, |
| "kl": 0.00022289156913757324, |
| "learning_rate": 9.901664203302124e-07, |
| "loss": 0.0624, |
| "reward": 0.4952134042978287, |
| "reward_std": 0.9074268043041229, |
| "step": 17 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2537.8194580078125, |
| "dapo/avg_reward_std": 0.34170445956681905, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.5438596567040995, |
| "dapo/num_sampling_attempts": 2.375, |
| "dapo/sampling_efficiency": 48.95833333333333, |
| "dapo/total_prompts_processed": 14.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.02057142857142857, |
| "grad_norm": 0.1614188253879547, |
| "kl": 0.0003694295883178711, |
| "learning_rate": 9.866330768241983e-07, |
| "loss": 0.1136, |
| "reward": 0.6263789646327496, |
| "reward_std": 0.9367138147354126, |
| "step": 18 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2041.2916984558105, |
| "dapo/avg_reward_std": 0.23441629879402393, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.31818182224577124, |
| "dapo/num_sampling_attempts": 4.125, |
| "dapo/sampling_efficiency": 38.36805555555556, |
| "dapo/total_prompts_processed": 24.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.021714285714285714, |
| "grad_norm": 0.2115960717201233, |
| "kl": 0.0005898326635360718, |
| "learning_rate": 9.825677631722435e-07, |
| "loss": 0.0603, |
| "reward": 0.6228582374751568, |
| "reward_std": 0.9455358982086182, |
| "step": 19 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2392.7882385253906, |
| "dapo/avg_reward_std": 0.22908216629709516, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2952381010566439, |
| "dapo/num_sampling_attempts": 4.375, |
| "dapo/sampling_efficiency": 33.541666666666664, |
| "dapo/total_prompts_processed": 26.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.022857142857142857, |
| "grad_norm": 0.20383711159229279, |
| "kl": 0.0008958578109741211, |
| "learning_rate": 9.779754323328192e-07, |
| "loss": 0.1313, |
| "reward": 0.41653589624911547, |
| "reward_std": 0.9027180448174477, |
| "step": 20 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2966.260452270508, |
| "dapo/avg_reward_std": 0.16204138861762152, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.25555555986033546, |
| "dapo/num_sampling_attempts": 5.625, |
| "dapo/sampling_efficiency": 22.84722222222222, |
| "dapo/total_prompts_processed": 33.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.024, |
| "grad_norm": 0.1098903939127922, |
| "kl": 0.0002017766237258911, |
| "learning_rate": 9.728616793536587e-07, |
| "loss": 0.0825, |
| "reward": 0.43902475386857986, |
| "reward_std": 0.9111825451254845, |
| "step": 21 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3016.357696533203, |
| "dapo/avg_reward_std": 0.28799043401427893, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.42028986371081806, |
| "dapo/num_sampling_attempts": 2.875, |
| "dapo/sampling_efficiency": 52.20238095238095, |
| "dapo/total_prompts_processed": 17.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.025142857142857144, |
| "grad_norm": 0.1315963715314865, |
| "kl": 0.0005468130111694336, |
| "learning_rate": 9.672327345550543e-07, |
| "loss": 0.0657, |
| "reward": 0.5281127206981182, |
| "reward_std": 0.9846171587705612, |
| "step": 22 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2408.8333282470703, |
| "dapo/avg_reward_std": 0.24506365811383282, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3703703780968984, |
| "dapo/num_sampling_attempts": 3.375, |
| "dapo/sampling_efficiency": 40.74404761904761, |
| "dapo/total_prompts_processed": 20.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.026285714285714287, |
| "grad_norm": 0.12457310408353806, |
| "kl": 0.001109391450881958, |
| "learning_rate": 9.610954559391704e-07, |
| "loss": 0.0304, |
| "reward": 0.6419337540864944, |
| "reward_std": 0.9689808040857315, |
| "step": 23 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2891.7777709960938, |
| "dapo/avg_reward_std": 0.2580765459848487, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4420289954413538, |
| "dapo/num_sampling_attempts": 2.875, |
| "dapo/sampling_efficiency": 51.785714285714285, |
| "dapo/total_prompts_processed": 17.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.027428571428571427, |
| "grad_norm": 0.09673310071229935, |
| "kl": 0.0006018728017807007, |
| "learning_rate": 9.54457320834625e-07, |
| "loss": 0.0143, |
| "reward": 0.4589955974370241, |
| "reward_std": 0.9405186697840691, |
| "step": 24 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2994.8159790039062, |
| "dapo/avg_reward_std": 0.24148962597052256, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3777777850627899, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 34.99999999999999, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.02857142857142857, |
| "grad_norm": 0.12189235538244247, |
| "kl": 0.0021944642066955566, |
| "learning_rate": 9.473264167865171e-07, |
| "loss": 0.0869, |
| "reward": 0.4214114509522915, |
| "reward_std": 0.918621838092804, |
| "step": 25 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3106.2743530273438, |
| "dapo/avg_reward_std": 0.21211836412549018, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.23750000558793544, |
| "dapo/num_sampling_attempts": 5.0, |
| "dapo/sampling_efficiency": 25.729166666666664, |
| "dapo/total_prompts_processed": 30.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.029714285714285714, |
| "grad_norm": 0.11006143689155579, |
| "kl": 0.002092994749546051, |
| "learning_rate": 9.397114317029974e-07, |
| "loss": 0.0617, |
| "reward": 0.4296974149765447, |
| "reward_std": 0.9136241301894188, |
| "step": 26 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2601.388946533203, |
| "dapo/avg_reward_std": 0.24121128850513035, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.28240741416811943, |
| "dapo/num_sampling_attempts": 4.5, |
| "dapo/sampling_efficiency": 25.76388888888889, |
| "dapo/total_prompts_processed": 27.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.030857142857142857, |
| "grad_norm": 0.11345893889665604, |
| "kl": 0.003206908702850342, |
| "learning_rate": 9.316216432703916e-07, |
| "loss": 0.0926, |
| "reward": 0.5876726619899273, |
| "reward_std": 0.9382903277873993, |
| "step": 27 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2861.6180839538574, |
| "dapo/avg_reward_std": 0.23961352888080809, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3055555605226093, |
| "dapo/num_sampling_attempts": 4.5, |
| "dapo/sampling_efficiency": 27.94642857142857, |
| "dapo/total_prompts_processed": 27.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.032, |
| "grad_norm": 0.1445908397436142, |
| "kl": 0.0031346678733825684, |
| "learning_rate": 9.230669076497687e-07, |
| "loss": 0.0852, |
| "reward": 0.40619770623743534, |
| "reward_std": 0.9506878778338432, |
| "step": 28 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2729.1875, |
| "dapo/avg_reward_std": 0.24243796567122142, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.35555555919806164, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 39.93055555555555, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.03314285714285714, |
| "grad_norm": 0.11093314737081528, |
| "kl": 0.0027089565992355347, |
| "learning_rate": 9.140576474687263e-07, |
| "loss": 0.0604, |
| "reward": 0.6693072468042374, |
| "reward_std": 0.9926005378365517, |
| "step": 29 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3155.7083740234375, |
| "dapo/avg_reward_std": 0.222336781601752, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.311827961956301, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 42.93154761904761, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.03428571428571429, |
| "grad_norm": 0.08208812773227692, |
| "kl": 0.001552581787109375, |
| "learning_rate": 9.046048391230247e-07, |
| "loss": 0.0268, |
| "reward": 0.521108225453645, |
| "reward_std": 0.9469912871718407, |
| "step": 30 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2657.559036254883, |
| "dapo/avg_reward_std": 0.1865689324008094, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.259259263260497, |
| "dapo/num_sampling_attempts": 4.5, |
| "dapo/sampling_efficiency": 51.076388888888886, |
| "dapo/total_prompts_processed": 27.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.03542857142857143, |
| "grad_norm": 0.1316680908203125, |
| "kl": 0.009428024291992188, |
| "learning_rate": 8.9471999940354e-07, |
| "loss": 0.0745, |
| "reward": 0.6315789166837931, |
| "reward_std": 0.9327967762947083, |
| "step": 31 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3071.7535095214844, |
| "dapo/avg_reward_std": 0.3048748767375946, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.40000000298023225, |
| "dapo/num_sampling_attempts": 3.125, |
| "dapo/sampling_efficiency": 55.104166666666664, |
| "dapo/total_prompts_processed": 18.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.036571428571428574, |
| "grad_norm": 0.10442829132080078, |
| "kl": 0.0021753311157226562, |
| "learning_rate": 8.844151714648274e-07, |
| "loss": 0.0567, |
| "reward": 0.5447857324033976, |
| "reward_std": 0.921301856637001, |
| "step": 32 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3025.826416015625, |
| "dapo/avg_reward_std": 0.23097028769552708, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3385416748933494, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 38.95833333333333, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.037714285714285714, |
| "grad_norm": 0.09167502820491791, |
| "kl": 0.003194093704223633, |
| "learning_rate": 8.737029101523929e-07, |
| "loss": 0.0612, |
| "reward": 0.5547973131760955, |
| "reward_std": 0.9730775579810143, |
| "step": 33 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2558.7812423706055, |
| "dapo/avg_reward_std": 0.2557758816650936, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3452381007373333, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 42.113095238095234, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.038857142857142854, |
| "grad_norm": 0.11055821925401688, |
| "kl": 0.019285082817077637, |
| "learning_rate": 8.625962667065487e-07, |
| "loss": 0.0831, |
| "reward": 0.5826370492577553, |
| "reward_std": 0.9168377369642258, |
| "step": 34 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2909.2361602783203, |
| "dapo/avg_reward_std": 0.22593376713414345, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.30645161626800416, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 49.598214285714285, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.04, |
| "grad_norm": 0.09941194951534271, |
| "kl": 0.004673004150390625, |
| "learning_rate": 8.511087728614862e-07, |
| "loss": 0.0581, |
| "reward": 0.5392080545425415, |
| "reward_std": 0.9793680757284164, |
| "step": 35 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2629.3333435058594, |
| "dapo/avg_reward_std": 0.2632370889186859, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.338541675824672, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 29.513888888888886, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.04114285714285714, |
| "grad_norm": 0.17353901267051697, |
| "kl": 0.010207176208496094, |
| "learning_rate": 8.392544243589427e-07, |
| "loss": 0.0623, |
| "reward": 0.5811682712519541, |
| "reward_std": 0.9331383407115936, |
| "step": 36 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3220.9409790039062, |
| "dapo/avg_reward_std": 0.2187359256403787, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.29047619913305556, |
| "dapo/num_sampling_attempts": 4.375, |
| "dapo/sampling_efficiency": 36.25, |
| "dapo/total_prompts_processed": 26.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.04228571428571429, |
| "grad_norm": 0.10708803683519363, |
| "kl": 0.0023801326751708984, |
| "learning_rate": 8.270476638965461e-07, |
| "loss": 0.0657, |
| "reward": 0.48440539091825485, |
| "reward_std": 0.9014616012573242, |
| "step": 37 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3233.420135498047, |
| "dapo/avg_reward_std": 0.2624325007200241, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.30208333721384406, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 30.119047619047617, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.04342857142857143, |
| "grad_norm": 0.0923333689570427, |
| "kl": 0.0031156539916992188, |
| "learning_rate": 8.145033635316128e-07, |
| "loss": 0.053, |
| "reward": 0.45120809972286224, |
| "reward_std": 0.9732232913374901, |
| "step": 38 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2787.031280517578, |
| "dapo/avg_reward_std": 0.1930955442644301, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.23412698933056422, |
| "dapo/num_sampling_attempts": 5.25, |
| "dapo/sampling_efficiency": 23.244047619047617, |
| "dapo/total_prompts_processed": 31.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.044571428571428574, |
| "grad_norm": 0.12707453966140747, |
| "kl": 0.006325244903564453, |
| "learning_rate": 8.01636806561836e-07, |
| "loss": 0.0905, |
| "reward": 0.5048832832835615, |
| "reward_std": 0.9330806732177734, |
| "step": 39 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2921.6180572509766, |
| "dapo/avg_reward_std": 0.25906160804960465, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3009259340663751, |
| "dapo/num_sampling_attempts": 4.5, |
| "dapo/sampling_efficiency": 31.562499999999996, |
| "dapo/total_prompts_processed": 27.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.045714285714285714, |
| "grad_norm": 0.1152920126914978, |
| "kl": 0.004504203796386719, |
| "learning_rate": 7.884636689049422e-07, |
| "loss": 0.0443, |
| "reward": 0.3671413380652666, |
| "reward_std": 0.9126428663730621, |
| "step": 40 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3100.8194732666016, |
| "dapo/avg_reward_std": 0.26266304695087933, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3985507280930229, |
| "dapo/num_sampling_attempts": 2.875, |
| "dapo/sampling_efficiency": 59.895833333333336, |
| "dapo/total_prompts_processed": 17.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.046857142857142854, |
| "grad_norm": 0.1462322324514389, |
| "kl": 0.0058536529541015625, |
| "learning_rate": 7.75e-07, |
| "loss": 0.0836, |
| "reward": 0.6537042334675789, |
| "reward_std": 0.9643120691180229, |
| "step": 41 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3083.8541870117188, |
| "dapo/avg_reward_std": 0.2028282030540354, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.27941177215646296, |
| "dapo/num_sampling_attempts": 4.25, |
| "dapo/sampling_efficiency": 34.61309523809524, |
| "dapo/total_prompts_processed": 25.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.048, |
| "grad_norm": 0.11620575189590454, |
| "kl": 0.005963563919067383, |
| "learning_rate": 7.612622032536507e-07, |
| "loss": 0.0756, |
| "reward": 0.6132493373006582, |
| "reward_std": 0.9271278157830238, |
| "step": 42 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2860.6840209960938, |
| "dapo/avg_reward_std": 0.2537354379892349, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.31666667262713116, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 27.916666666666664, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.04914285714285714, |
| "grad_norm": 0.15706917643547058, |
| "kl": 0.012288570404052734, |
| "learning_rate": 7.472670160550848e-07, |
| "loss": 0.0864, |
| "reward": 0.4896182883530855, |
| "reward_std": 0.9406783953309059, |
| "step": 43 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3230.951416015625, |
| "dapo/avg_reward_std": 0.2785276919603348, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4047619104385376, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 35.20833333333333, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.05028571428571429, |
| "grad_norm": 0.10281670838594437, |
| "kl": 0.0028905868530273438, |
| "learning_rate": 7.330314893841101e-07, |
| "loss": 0.0474, |
| "reward": 0.5266857808455825, |
| "reward_std": 0.9769049882888794, |
| "step": 44 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2598.888885498047, |
| "dapo/avg_reward_std": 0.25520460651471066, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3205128231873879, |
| "dapo/num_sampling_attempts": 3.25, |
| "dapo/sampling_efficiency": 54.61309523809524, |
| "dapo/total_prompts_processed": 19.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.05142857142857143, |
| "grad_norm": 0.20818237960338593, |
| "kl": 0.0046825408935546875, |
| "learning_rate": 7.185729670371604e-07, |
| "loss": 0.111, |
| "reward": 0.8208948634564877, |
| "reward_std": 0.9365335553884506, |
| "step": 45 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2529.66316986084, |
| "dapo/avg_reward_std": 0.23859836988978916, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2916666753590107, |
| "dapo/num_sampling_attempts": 4.5, |
| "dapo/sampling_efficiency": 25.535714285714285, |
| "dapo/total_prompts_processed": 27.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.052571428571428575, |
| "grad_norm": 0.12924660742282867, |
| "kl": 0.05440711975097656, |
| "learning_rate": 7.039090644965509e-07, |
| "loss": 0.058, |
| "reward": 0.5307688321918249, |
| "reward_std": 0.9391194358468056, |
| "step": 46 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2737.288230895996, |
| "dapo/avg_reward_std": 0.25754969901052016, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3678160998327979, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 41.14583333333333, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.053714285714285714, |
| "grad_norm": 0.1452113687992096, |
| "kl": 0.01877737045288086, |
| "learning_rate": 6.890576474687263e-07, |
| "loss": 0.0601, |
| "reward": 0.5596560873091221, |
| "reward_std": 0.9911476969718933, |
| "step": 47 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2543.0694885253906, |
| "dapo/avg_reward_std": 0.2434165603839434, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3717948794364929, |
| "dapo/num_sampling_attempts": 3.25, |
| "dapo/sampling_efficiency": 34.37499999999999, |
| "dapo/total_prompts_processed": 19.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.054857142857142854, |
| "grad_norm": 0.15664616227149963, |
| "kl": 0.008816719055175781, |
| "learning_rate": 6.740368101176495e-07, |
| "loss": 0.0783, |
| "reward": 0.7667456082999706, |
| "reward_std": 0.9330208897590637, |
| "step": 48 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3054.357666015625, |
| "dapo/avg_reward_std": 0.16933719928448016, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.22222222693455526, |
| "dapo/num_sampling_attempts": 4.875, |
| "dapo/sampling_efficiency": 26.5625, |
| "dapo/total_prompts_processed": 29.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.056, |
| "grad_norm": 0.13884593546390533, |
| "kl": 0.00569915771484375, |
| "learning_rate": 6.588648530198504e-07, |
| "loss": 0.0645, |
| "reward": 0.7750914767384529, |
| "reward_std": 0.9781928732991219, |
| "step": 49 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3030.9652709960938, |
| "dapo/avg_reward_std": 0.2089548914721518, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.28282828629016876, |
| "dapo/num_sampling_attempts": 4.125, |
| "dapo/sampling_efficiency": 33.779761904761905, |
| "dapo/total_prompts_processed": 24.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.05714285714285714, |
| "grad_norm": 0.13095000386238098, |
| "kl": 0.005908966064453125, |
| "learning_rate": 6.435602608679916e-07, |
| "loss": 0.0854, |
| "reward": 0.7626989148557186, |
| "reward_std": 0.9684056863188744, |
| "step": 50 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3176.8819274902344, |
| "dapo/avg_reward_std": 0.2258962235516972, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.29629630057348144, |
| "dapo/num_sampling_attempts": 4.5, |
| "dapo/sampling_efficiency": 33.25892857142857, |
| "dapo/total_prompts_processed": 27.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.05828571428571429, |
| "grad_norm": 0.11041354387998581, |
| "kl": 0.002262115478515625, |
| "learning_rate": 6.281416799501187e-07, |
| "loss": 0.0892, |
| "reward": 0.6493857521563768, |
| "reward_std": 0.9608959034085274, |
| "step": 51 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2991.208366394043, |
| "dapo/avg_reward_std": 0.23346692004374095, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3392857201397419, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 52.70833333333333, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.05942857142857143, |
| "grad_norm": 0.13827170431613922, |
| "kl": 0.014558792114257812, |
| "learning_rate": 6.126278954320294e-07, |
| "loss": 0.0435, |
| "reward": 0.5274152141064405, |
| "reward_std": 0.9937505125999451, |
| "step": 52 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2921.013946533203, |
| "dapo/avg_reward_std": 0.2715419438378564, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3390804626818361, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 51.5625, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.060571428571428575, |
| "grad_norm": 0.09735170006752014, |
| "kl": 0.009172439575195312, |
| "learning_rate": 5.97037808470444e-07, |
| "loss": 0.0541, |
| "reward": 0.7217882052063942, |
| "reward_std": 0.9594404622912407, |
| "step": 53 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3133.46875, |
| "dapo/avg_reward_std": 0.2624934350068753, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.35256411077884525, |
| "dapo/num_sampling_attempts": 3.25, |
| "dapo/sampling_efficiency": 41.041666666666664, |
| "dapo/total_prompts_processed": 19.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.061714285714285715, |
| "grad_norm": 0.10414379835128784, |
| "kl": 0.010915756225585938, |
| "learning_rate": 5.813904131848564e-07, |
| "loss": 0.061, |
| "reward": 0.5302782151848078, |
| "reward_std": 0.9707583636045456, |
| "step": 54 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3010.5938110351562, |
| "dapo/avg_reward_std": 0.21664191484451295, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.24444444941149818, |
| "dapo/num_sampling_attempts": 5.625, |
| "dapo/sampling_efficiency": 19.791666666666664, |
| "dapo/total_prompts_processed": 33.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.06285714285714286, |
| "grad_norm": 0.11232081800699234, |
| "kl": 0.012262344360351562, |
| "learning_rate": 5.657047735161255e-07, |
| "loss": 0.0561, |
| "reward": 0.5284321270883083, |
| "reward_std": 0.9165859594941139, |
| "step": 55 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3144.951416015625, |
| "dapo/avg_reward_std": 0.2279102834207671, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.34523809807641165, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 45.32738095238095, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.064, |
| "grad_norm": 0.13161872327327728, |
| "kl": 0.007735252380371094, |
| "learning_rate": 5.5e-07, |
| "loss": 0.0717, |
| "reward": 0.6519734226167202, |
| "reward_std": 0.9642440155148506, |
| "step": 56 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3222.6111450195312, |
| "dapo/avg_reward_std": 0.2675224413042483, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4492753724689069, |
| "dapo/num_sampling_attempts": 2.875, |
| "dapo/sampling_efficiency": 45.535714285714285, |
| "dapo/total_prompts_processed": 17.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.06514285714285714, |
| "grad_norm": 0.09332293272018433, |
| "kl": 0.0064525604248046875, |
| "learning_rate": 5.342952264838747e-07, |
| "loss": 0.0302, |
| "reward": 0.5501165799796581, |
| "reward_std": 0.9585564360022545, |
| "step": 57 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2679.9236907958984, |
| "dapo/avg_reward_std": 0.17708626160254845, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.24358974741055414, |
| "dapo/num_sampling_attempts": 4.875, |
| "dapo/sampling_efficiency": 28.91865079365079, |
| "dapo/total_prompts_processed": 29.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.06628571428571428, |
| "grad_norm": 0.16309793293476105, |
| "kl": 0.01690673828125, |
| "learning_rate": 5.186095868151436e-07, |
| "loss": 0.0846, |
| "reward": 0.8469000309705734, |
| "reward_std": 0.9497043192386627, |
| "step": 58 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2847.048629760742, |
| "dapo/avg_reward_std": 0.2622834824282548, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3563218476443455, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 29.999999999999993, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.06742857142857143, |
| "grad_norm": 0.09638360142707825, |
| "kl": 0.0057086944580078125, |
| "learning_rate": 5.02962191529556e-07, |
| "loss": 0.0634, |
| "reward": 0.6089529246091843, |
| "reward_std": 0.9450863003730774, |
| "step": 59 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3119.9132385253906, |
| "dapo/avg_reward_std": 0.19833819533503333, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2674418656631958, |
| "dapo/num_sampling_attempts": 5.375, |
| "dapo/sampling_efficiency": 29.563492063492063, |
| "dapo/total_prompts_processed": 32.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.06857142857142857, |
| "grad_norm": 0.1252850890159607, |
| "kl": 0.008715629577636719, |
| "learning_rate": 4.873721045679706e-07, |
| "loss": 0.0666, |
| "reward": 0.5249154977500439, |
| "reward_std": 0.947566568851471, |
| "step": 60 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2844.795181274414, |
| "dapo/avg_reward_std": 0.2648707001373686, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.35632184610284606, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 39.791666666666664, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.06971428571428571, |
| "grad_norm": 0.10366301238536835, |
| "kl": 0.056069374084472656, |
| "learning_rate": 4.7185832004988133e-07, |
| "loss": 0.037, |
| "reward": 0.5161248315125704, |
| "reward_std": 0.9692364558577538, |
| "step": 61 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3053.951446533203, |
| "dapo/avg_reward_std": 0.21576767837679064, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.25225225574261434, |
| "dapo/num_sampling_attempts": 4.625, |
| "dapo/sampling_efficiency": 37.013888888888886, |
| "dapo/total_prompts_processed": 27.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.07085714285714285, |
| "grad_norm": 0.14441759884357452, |
| "kl": 0.009164810180664062, |
| "learning_rate": 4.5643973913200837e-07, |
| "loss": 0.0609, |
| "reward": 0.6510533541440964, |
| "reward_std": 0.9361515268683434, |
| "step": 62 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3326.781280517578, |
| "dapo/avg_reward_std": 0.2158982500885472, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3181818254066236, |
| "dapo/num_sampling_attempts": 4.125, |
| "dapo/sampling_efficiency": 44.49404761904761, |
| "dapo/total_prompts_processed": 24.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.072, |
| "grad_norm": 0.12127737700939178, |
| "kl": 0.031108856201171875, |
| "learning_rate": 4.4113514698014953e-07, |
| "loss": 0.0463, |
| "reward": 0.45860649459064007, |
| "reward_std": 0.9209225550293922, |
| "step": 63 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3208.6319885253906, |
| "dapo/avg_reward_std": 0.28419332668699065, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3563218440475135, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 53.591269841269835, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.07314285714285715, |
| "grad_norm": 0.13326792418956757, |
| "kl": 0.0061321258544921875, |
| "learning_rate": 4.2596318988235037e-07, |
| "loss": 0.0614, |
| "reward": 0.5644803196191788, |
| "reward_std": 0.9919605851173401, |
| "step": 64 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2597.437530517578, |
| "dapo/avg_reward_std": 0.2766759342380932, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3928571529686451, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 32.08333333333333, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.07428571428571429, |
| "grad_norm": 0.10434358566999435, |
| "kl": 0.049472808837890625, |
| "learning_rate": 4.1094235253127374e-07, |
| "loss": 0.0312, |
| "reward": 0.393868962302804, |
| "reward_std": 0.9459580257534981, |
| "step": 65 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2630.0833587646484, |
| "dapo/avg_reward_std": 0.25837596147148695, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.35802469595714853, |
| "dapo/num_sampling_attempts": 3.375, |
| "dapo/sampling_efficiency": 31.666666666666664, |
| "dapo/total_prompts_processed": 20.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.07542857142857143, |
| "grad_norm": 0.11327924579381943, |
| "kl": 0.23560714721679688, |
| "learning_rate": 3.9609093550344907e-07, |
| "loss": 0.0563, |
| "reward": 0.674448698759079, |
| "reward_std": 0.9591537117958069, |
| "step": 66 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3343.3159790039062, |
| "dapo/avg_reward_std": 0.2785816714167595, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.41666667101283866, |
| "dapo/num_sampling_attempts": 3.0, |
| "dapo/sampling_efficiency": 42.08333333333333, |
| "dapo/total_prompts_processed": 18.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.07657142857142857, |
| "grad_norm": 0.10341926664113998, |
| "kl": 0.005463600158691406, |
| "learning_rate": 3.8142703296283953e-07, |
| "loss": 0.0653, |
| "reward": 0.42072685062885284, |
| "reward_std": 0.9649706333875656, |
| "step": 67 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2880.0590438842773, |
| "dapo/avg_reward_std": 0.2447407204243872, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2916666728754838, |
| "dapo/num_sampling_attempts": 4.5, |
| "dapo/sampling_efficiency": 28.591269841269842, |
| "dapo/total_prompts_processed": 27.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.07771428571428571, |
| "grad_norm": 0.15764088928699493, |
| "kl": 0.011991500854492188, |
| "learning_rate": 3.6696851061588994e-07, |
| "loss": 0.1004, |
| "reward": 0.537701515480876, |
| "reward_std": 0.9107673466205597, |
| "step": 68 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2839.0069580078125, |
| "dapo/avg_reward_std": 0.21828406437849388, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.26495726865071517, |
| "dapo/num_sampling_attempts": 4.875, |
| "dapo/sampling_efficiency": 32.39583333333333, |
| "dapo/total_prompts_processed": 29.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.07885714285714286, |
| "grad_norm": 0.1426348239183426, |
| "kl": 0.16588592529296875, |
| "learning_rate": 3.5273298394491515e-07, |
| "loss": 0.065, |
| "reward": 0.5752462260425091, |
| "reward_std": 0.9265653118491173, |
| "step": 69 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3154.9479370117188, |
| "dapo/avg_reward_std": 0.24686445650600253, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.40476191185769583, |
| "dapo/num_sampling_attempts": 2.625, |
| "dapo/sampling_efficiency": 58.75, |
| "dapo/total_prompts_processed": 15.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.08, |
| "grad_norm": 0.09042708575725555, |
| "kl": 0.015224456787109375, |
| "learning_rate": 3.387377967463493e-07, |
| "loss": 0.0278, |
| "reward": 0.5091124139726162, |
| "reward_std": 0.9951601624488831, |
| "step": 70 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2558.7118377685547, |
| "dapo/avg_reward_std": 0.24922772922686168, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.35119048452803064, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 41.979166666666664, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.08114285714285714, |
| "grad_norm": 0.18424691259860992, |
| "kl": 0.012338638305664062, |
| "learning_rate": 3.250000000000001e-07, |
| "loss": 0.135, |
| "reward": 0.80832345969975, |
| "reward_std": 0.9256910160183907, |
| "step": 71 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2797.5659790039062, |
| "dapo/avg_reward_std": 0.3421325541677929, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4603174655210404, |
| "dapo/num_sampling_attempts": 2.625, |
| "dapo/sampling_efficiency": 52.916666666666664, |
| "dapo/total_prompts_processed": 15.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.08228571428571428, |
| "grad_norm": 0.10505988448858261, |
| "kl": 0.027385711669921875, |
| "learning_rate": 3.115363310950578e-07, |
| "loss": 0.0435, |
| "reward": 0.5198174491524696, |
| "reward_std": 0.932801865041256, |
| "step": 72 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3024.5243225097656, |
| "dapo/avg_reward_std": 0.26287247288611626, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.33333334038334506, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 40.0297619047619, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.08342857142857144, |
| "grad_norm": 0.09084703773260117, |
| "kl": 0.09223747253417969, |
| "learning_rate": 2.9836319343816397e-07, |
| "loss": 0.0314, |
| "reward": 0.3449883237481117, |
| "reward_std": 0.9521737843751907, |
| "step": 73 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2648.7257080078125, |
| "dapo/avg_reward_std": 0.2678213362340574, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.38271605582148943, |
| "dapo/num_sampling_attempts": 3.375, |
| "dapo/sampling_efficiency": 40.0, |
| "dapo/total_prompts_processed": 20.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.08457142857142858, |
| "grad_norm": 0.15155129134655, |
| "kl": 1.0743579864501953, |
| "learning_rate": 2.854966364683872e-07, |
| "loss": 0.0851, |
| "reward": 0.7227161657065153, |
| "reward_std": 0.9239719212055206, |
| "step": 74 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2659.388900756836, |
| "dapo/avg_reward_std": 0.28101804742106684, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.37037037699310865, |
| "dapo/num_sampling_attempts": 3.375, |
| "dapo/sampling_efficiency": 34.791666666666664, |
| "dapo/total_prompts_processed": 20.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.08571428571428572, |
| "grad_norm": 0.1127755343914032, |
| "kl": 0.02587890625, |
| "learning_rate": 2.729523361034538e-07, |
| "loss": 0.0523, |
| "reward": 0.7372388476505876, |
| "reward_std": 0.918749064207077, |
| "step": 75 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2402.364585876465, |
| "dapo/avg_reward_std": 0.26893362632164586, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.36538462111583125, |
| "dapo/num_sampling_attempts": 3.25, |
| "dapo/sampling_efficiency": 48.854166666666664, |
| "dapo/total_prompts_processed": 19.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.08685714285714285, |
| "grad_norm": 0.14693324267864227, |
| "kl": 0.12501144409179688, |
| "learning_rate": 2.6074557564105724e-07, |
| "loss": 0.0747, |
| "reward": 0.6182113699615002, |
| "reward_std": 0.9421844929456711, |
| "step": 76 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2970.1146392822266, |
| "dapo/avg_reward_std": 0.2118390180170536, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.25000000521540644, |
| "dapo/num_sampling_attempts": 5.0, |
| "dapo/sampling_efficiency": 30.53571428571428, |
| "dapo/total_prompts_processed": 30.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.088, |
| "grad_norm": 0.12072475999593735, |
| "kl": 0.05495643615722656, |
| "learning_rate": 2.488912271385139e-07, |
| "loss": 0.0498, |
| "reward": 0.46035338938236237, |
| "reward_std": 0.9146044701337814, |
| "step": 77 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2959.0972442626953, |
| "dapo/avg_reward_std": 0.13832776496807733, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.1631944477558136, |
| "dapo/num_sampling_attempts": 6.0, |
| "dapo/sampling_efficiency": 30.868055555555557, |
| "dapo/total_prompts_processed": 36.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.08914285714285715, |
| "grad_norm": 0.14289411902427673, |
| "kl": 0.23297691345214844, |
| "learning_rate": 2.374037332934512e-07, |
| "loss": 0.0742, |
| "reward": 0.49553669430315495, |
| "reward_std": 0.9023259580135345, |
| "step": 78 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2935.8159942626953, |
| "dapo/avg_reward_std": 0.2931290553374724, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.44696970080787485, |
| "dapo/num_sampling_attempts": 2.75, |
| "dapo/sampling_efficiency": 58.854166666666664, |
| "dapo/total_prompts_processed": 16.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.09028571428571429, |
| "grad_norm": 0.13638050854206085, |
| "kl": 0.03482818603515625, |
| "learning_rate": 2.2629708984760706e-07, |
| "loss": 0.0609, |
| "reward": 0.4563083341345191, |
| "reward_std": 0.9425384849309921, |
| "step": 79 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3111.340301513672, |
| "dapo/avg_reward_std": 0.22562272967518987, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3063063154349456, |
| "dapo/num_sampling_attempts": 4.625, |
| "dapo/sampling_efficiency": 28.819444444444446, |
| "dapo/total_prompts_processed": 27.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.09142857142857143, |
| "grad_norm": 0.10739335417747498, |
| "kl": 0.008031845092773438, |
| "learning_rate": 2.1558482853517253e-07, |
| "loss": 0.0574, |
| "reward": 0.6980459969490767, |
| "reward_std": 0.9673654958605766, |
| "step": 80 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2921.6111450195312, |
| "dapo/avg_reward_std": 0.2788313144239886, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3333333386429425, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 34.27083333333333, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.09257142857142857, |
| "grad_norm": 0.18038466572761536, |
| "kl": 0.016963958740234375, |
| "learning_rate": 2.0528000059645995e-07, |
| "loss": 0.0958, |
| "reward": 0.6405055914074183, |
| "reward_std": 0.9560460075736046, |
| "step": 81 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3220.687530517578, |
| "dapo/avg_reward_std": 0.1744266465688363, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2307692349721224, |
| "dapo/num_sampling_attempts": 4.875, |
| "dapo/sampling_efficiency": 26.666666666666664, |
| "dapo/total_prompts_processed": 29.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.09371428571428571, |
| "grad_norm": 0.12377161532640457, |
| "kl": 0.009552001953125, |
| "learning_rate": 1.9539516087697517e-07, |
| "loss": 0.061, |
| "reward": 0.5073397234082222, |
| "reward_std": 0.9641925543546677, |
| "step": 82 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2663.1597442626953, |
| "dapo/avg_reward_std": 0.2496542421079451, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3333333413447103, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 40.451388888888886, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.09485714285714286, |
| "grad_norm": 0.1273493468761444, |
| "kl": 0.04001617431640625, |
| "learning_rate": 1.8594235253127372e-07, |
| "loss": 0.0521, |
| "reward": 0.49824655149132013, |
| "reward_std": 0.9464590474963188, |
| "step": 83 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3073.2986450195312, |
| "dapo/avg_reward_std": 0.27911247177557513, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4318181892687624, |
| "dapo/num_sampling_attempts": 2.75, |
| "dapo/sampling_efficiency": 60.3125, |
| "dapo/total_prompts_processed": 16.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.096, |
| "grad_norm": 0.14399568736553192, |
| "kl": 0.010408401489257812, |
| "learning_rate": 1.7693309235023127e-07, |
| "loss": 0.0657, |
| "reward": 0.624765045940876, |
| "reward_std": 0.954634428024292, |
| "step": 84 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3073.7535095214844, |
| "dapo/avg_reward_std": 0.17655213298024358, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.24324324888152046, |
| "dapo/num_sampling_attempts": 4.625, |
| "dapo/sampling_efficiency": 29.82142857142857, |
| "dapo/total_prompts_processed": 27.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.09714285714285714, |
| "grad_norm": 0.12462300807237625, |
| "kl": 0.007053375244140625, |
| "learning_rate": 1.6837835672960831e-07, |
| "loss": 0.062, |
| "reward": 0.6820014184340835, |
| "reward_std": 0.8695997595787048, |
| "step": 85 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2741.204849243164, |
| "dapo/avg_reward_std": 0.21997538357973098, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2666666731238365, |
| "dapo/num_sampling_attempts": 5.0, |
| "dapo/sampling_efficiency": 27.896825396825395, |
| "dapo/total_prompts_processed": 30.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.09828571428571428, |
| "grad_norm": 0.14978615939617157, |
| "kl": 0.025630950927734375, |
| "learning_rate": 1.6028856829700258e-07, |
| "loss": 0.0585, |
| "reward": 0.5304304007440805, |
| "reward_std": 0.9523463025689125, |
| "step": 86 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3223.7257080078125, |
| "dapo/avg_reward_std": 0.27104776600996655, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3666666716337204, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 43.333333333333336, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.09942857142857142, |
| "grad_norm": 0.1086694523692131, |
| "kl": 0.009660720825195312, |
| "learning_rate": 1.5267358321348285e-07, |
| "loss": 0.058, |
| "reward": 0.5936380252242088, |
| "reward_std": 0.919317290186882, |
| "step": 87 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2934.5833740234375, |
| "dapo/avg_reward_std": 0.23462909049001232, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3333333376152762, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 52.84722222222222, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.10057142857142858, |
| "grad_norm": 0.14571106433868408, |
| "kl": 0.02588653564453125, |
| "learning_rate": 1.4554267916537495e-07, |
| "loss": 0.0741, |
| "reward": 0.5716092269867659, |
| "reward_std": 0.9475584626197815, |
| "step": 88 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3017.2673950195312, |
| "dapo/avg_reward_std": 0.22858241697152457, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3333333383003871, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 45.416666666666664, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.10171428571428572, |
| "grad_norm": 0.10647116601467133, |
| "kl": 0.034389495849609375, |
| "learning_rate": 1.3890454406082956e-07, |
| "loss": 0.0586, |
| "reward": 0.5356123449746519, |
| "reward_std": 0.9426311627030373, |
| "step": 89 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2540.9548950195312, |
| "dapo/avg_reward_std": 0.16863613526026408, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.22592593100335862, |
| "dapo/num_sampling_attempts": 5.625, |
| "dapo/sampling_efficiency": 28.75, |
| "dapo/total_prompts_processed": 33.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.10285714285714286, |
| "grad_norm": 0.1207195371389389, |
| "kl": 0.7361793518066406, |
| "learning_rate": 1.3276726544494571e-07, |
| "loss": 0.0349, |
| "reward": 0.750616230070591, |
| "reward_std": 1.0088519006967545, |
| "step": 90 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3054.5833435058594, |
| "dapo/avg_reward_std": 0.2058313423767686, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.28645834047347307, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 41.36904761904762, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.104, |
| "grad_norm": 0.10659411549568176, |
| "kl": 0.009166717529296875, |
| "learning_rate": 1.2713832064634125e-07, |
| "loss": 0.06, |
| "reward": 0.49192704539746046, |
| "reward_std": 0.8957021087408066, |
| "step": 91 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2958.43408203125, |
| "dapo/avg_reward_std": 0.317311546076899, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.47101450160793634, |
| "dapo/num_sampling_attempts": 2.875, |
| "dapo/sampling_efficiency": 44.166666666666664, |
| "dapo/total_prompts_processed": 17.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.10514285714285715, |
| "grad_norm": 0.1002211645245552, |
| "kl": 0.00801849365234375, |
| "learning_rate": 1.220245676671809e-07, |
| "loss": 0.0508, |
| "reward": 0.7598672257736325, |
| "reward_std": 0.9218961223959923, |
| "step": 92 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3257.7881774902344, |
| "dapo/avg_reward_std": 0.2586492033941405, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.36309524678758215, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 41.5625, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.10628571428571429, |
| "grad_norm": 0.12036111950874329, |
| "kl": 0.01373291015625, |
| "learning_rate": 1.1743223682775649e-07, |
| "loss": 0.0459, |
| "reward": 0.5575436241924763, |
| "reward_std": 0.9431066736578941, |
| "step": 93 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2740.1284942626953, |
| "dapo/avg_reward_std": 0.2375115204241968, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.35483871688765867, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 29.999999999999996, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.10742857142857143, |
| "grad_norm": 0.14863841235637665, |
| "kl": 0.032642364501953125, |
| "learning_rate": 1.1336692317580158e-07, |
| "loss": 0.0742, |
| "reward": 0.5738632343709469, |
| "reward_std": 0.9468542039394379, |
| "step": 94 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2899.937515258789, |
| "dapo/avg_reward_std": 0.2901096656208947, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4206349246558689, |
| "dapo/num_sampling_attempts": 2.625, |
| "dapo/sampling_efficiency": 64.58333333333333, |
| "dapo/total_prompts_processed": 15.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.10857142857142857, |
| "grad_norm": 0.13841120898723602, |
| "kl": 0.012683868408203125, |
| "learning_rate": 1.0983357966978745e-07, |
| "loss": 0.0653, |
| "reward": 0.6555321607738733, |
| "reward_std": 0.9674765914678574, |
| "step": 95 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2926.1910247802734, |
| "dapo/avg_reward_std": 0.18252932499436772, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2696078485425781, |
| "dapo/num_sampling_attempts": 4.25, |
| "dapo/sampling_efficiency": 37.82738095238095, |
| "dapo/total_prompts_processed": 25.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.10971428571428571, |
| "grad_norm": 0.13530230522155762, |
| "kl": 0.05282402038574219, |
| "learning_rate": 1.068365111445064e-07, |
| "loss": 0.0762, |
| "reward": 0.5449853939935565, |
| "reward_std": 0.952080488204956, |
| "step": 96 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2798.031280517578, |
| "dapo/avg_reward_std": 0.23633464597738707, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3461538478732109, |
| "dapo/num_sampling_attempts": 3.25, |
| "dapo/sampling_efficiency": 38.541666666666664, |
| "dapo/total_prompts_processed": 19.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.11085714285714286, |
| "grad_norm": 0.1648494303226471, |
| "kl": 0.025691986083984375, |
| "learning_rate": 1.0437936906629334e-07, |
| "loss": 0.0939, |
| "reward": 0.673285935074091, |
| "reward_std": 0.979133740067482, |
| "step": 97 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3240.7361450195312, |
| "dapo/avg_reward_std": 0.2805523918225215, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3653846193964665, |
| "dapo/num_sampling_attempts": 3.25, |
| "dapo/sampling_efficiency": 54.513888888888886, |
| "dapo/total_prompts_processed": 19.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.112, |
| "grad_norm": 0.12132810056209564, |
| "kl": 0.01453399658203125, |
| "learning_rate": 1.0246514708427701e-07, |
| "loss": 0.0557, |
| "reward": 0.5335402796044946, |
| "reward_std": 0.9456770345568657, |
| "step": 98 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2497.9132499694824, |
| "dapo/avg_reward_std": 0.2488528937101364, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3888888942698638, |
| "dapo/num_sampling_attempts": 3.0, |
| "dapo/sampling_efficiency": 58.05555555555555, |
| "dapo/total_prompts_processed": 18.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.11314285714285714, |
| "grad_norm": 0.24999241530895233, |
| "kl": 0.028301239013671875, |
| "learning_rate": 1.0109617738307911e-07, |
| "loss": 0.1037, |
| "reward": 0.785055335611105, |
| "reward_std": 0.9553829357028008, |
| "step": 99 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3039.6284790039062, |
| "dapo/avg_reward_std": 0.2903642791012923, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.38194445086022216, |
| "dapo/num_sampling_attempts": 3.0, |
| "dapo/sampling_efficiency": 46.24999999999999, |
| "dapo/total_prompts_processed": 18.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.11428571428571428, |
| "grad_norm": 0.14126254618167877, |
| "kl": 0.014410018920898438, |
| "learning_rate": 1.002741278414069e-07, |
| "loss": 0.0643, |
| "reward": 0.4948624651879072, |
| "reward_std": 0.9704382866621017, |
| "step": 100 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3318.513916015625, |
| "dapo/avg_reward_std": 0.22042016812733242, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.29523810063089645, |
| "dapo/num_sampling_attempts": 4.375, |
| "dapo/sampling_efficiency": 28.645833333333332, |
| "dapo/total_prompts_processed": 26.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.11542857142857142, |
| "grad_norm": 0.22150926291942596, |
| "kl": 0.011791229248046875, |
| "learning_rate": 1e-07, |
| "loss": 0.0631, |
| "reward": 0.46524661034345627, |
| "reward_std": 0.9665903598070145, |
| "step": 101 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3083.875, |
| "dapo/avg_reward_std": 0.21663353669232335, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3390804637095024, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 39.93055555555555, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.11657142857142858, |
| "grad_norm": 0.16289636492729187, |
| "kl": 0.008695602416992188, |
| "learning_rate": 6.203955092681039e-07, |
| "loss": 0.098, |
| "reward": 0.8642945289611816, |
| "reward_std": 1.031830094754696, |
| "step": 102 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3364.701446533203, |
| "dapo/avg_reward_std": 0.24887267331923207, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3172043090866458, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 31.69642857142857, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.11771428571428572, |
| "grad_norm": 0.08825232833623886, |
| "kl": 0.009820938110351562, |
| "learning_rate": 6.126278954320294e-07, |
| "loss": 0.0178, |
| "reward": 0.3627179069444537, |
| "reward_std": 0.8941863179206848, |
| "step": 103 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3255.3055725097656, |
| "dapo/avg_reward_std": 0.24808817549988074, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.33950618074999916, |
| "dapo/num_sampling_attempts": 3.375, |
| "dapo/sampling_efficiency": 38.95833333333333, |
| "dapo/total_prompts_processed": 20.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.11885714285714286, |
| "grad_norm": 0.13638561964035034, |
| "kl": 0.011318206787109375, |
| "learning_rate": 6.048412045323164e-07, |
| "loss": 0.0643, |
| "reward": 0.5508436523377895, |
| "reward_std": 0.9409585371613503, |
| "step": 104 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3270.4930419921875, |
| "dapo/avg_reward_std": 0.23700118958950042, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3166666706403097, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 61.07142857142857, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.12, |
| "grad_norm": 0.10357476025819778, |
| "kl": 0.0117034912109375, |
| "learning_rate": 5.97037808470444e-07, |
| "loss": 0.0278, |
| "reward": 0.4137148158624768, |
| "reward_std": 0.9205853268504143, |
| "step": 105 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3118.9584045410156, |
| "dapo/avg_reward_std": 0.22452521603554487, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3333333395421505, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 28.869047619047613, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.12114285714285715, |
| "grad_norm": 0.11885393410921097, |
| "kl": 0.011783599853515625, |
| "learning_rate": 5.892200842364462e-07, |
| "loss": 0.0786, |
| "reward": 0.673494272865355, |
| "reward_std": 0.9388571679592133, |
| "step": 106 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3183.666717529297, |
| "dapo/avg_reward_std": 0.23609773551716523, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.30882353467099805, |
| "dapo/num_sampling_attempts": 4.25, |
| "dapo/sampling_efficiency": 37.74305555555556, |
| "dapo/total_prompts_processed": 25.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.12228571428571429, |
| "grad_norm": 0.13629400730133057, |
| "kl": 0.0092010498046875, |
| "learning_rate": 5.813904131848564e-07, |
| "loss": 0.0615, |
| "reward": 0.5680118557065725, |
| "reward_std": 0.8982010260224342, |
| "step": 107 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3170.263916015625, |
| "dapo/avg_reward_std": 0.21017570431168014, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3018018079770578, |
| "dapo/num_sampling_attempts": 4.625, |
| "dapo/sampling_efficiency": 30.625, |
| "dapo/total_prompts_processed": 27.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.12342857142857143, |
| "grad_norm": 0.1134539544582367, |
| "kl": 0.010692596435546875, |
| "learning_rate": 5.735511803093248e-07, |
| "loss": 0.0433, |
| "reward": 0.6368884779512882, |
| "reward_std": 0.9655679985880852, |
| "step": 108 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2938.5243530273438, |
| "dapo/avg_reward_std": 0.30796096875117374, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3974359052685591, |
| "dapo/num_sampling_attempts": 3.25, |
| "dapo/sampling_efficiency": 38.95833333333333, |
| "dapo/total_prompts_processed": 19.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.12457142857142857, |
| "grad_norm": 0.16064728796482086, |
| "kl": 0.014812469482421875, |
| "learning_rate": 5.657047735161255e-07, |
| "loss": 0.0874, |
| "reward": 0.4405923653393984, |
| "reward_std": 0.899710550904274, |
| "step": 109 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3333.5556030273438, |
| "dapo/avg_reward_std": 0.17683410130698105, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.28735632475080164, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 40.104166666666664, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.12571428571428572, |
| "grad_norm": 0.1374766230583191, |
| "kl": 0.00823211669921875, |
| "learning_rate": 5.578535828967777e-07, |
| "loss": 0.0525, |
| "reward": 0.6373127717524767, |
| "reward_std": 0.949370414018631, |
| "step": 110 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3404.166717529297, |
| "dapo/avg_reward_std": 0.2707539377734065, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3437500074505806, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 28.124999999999996, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.12685714285714286, |
| "grad_norm": 0.09096160531044006, |
| "kl": 0.0152435302734375, |
| "learning_rate": 5.5e-07, |
| "loss": 0.0286, |
| "reward": 0.4166172882542014, |
| "reward_std": 0.9417606145143509, |
| "step": 111 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3306.263946533203, |
| "dapo/avg_reward_std": 0.17227381931410896, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.21481482055452134, |
| "dapo/num_sampling_attempts": 5.625, |
| "dapo/sampling_efficiency": 27.395833333333332, |
| "dapo/total_prompts_processed": 33.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.128, |
| "grad_norm": 0.11950567364692688, |
| "kl": 0.01320648193359375, |
| "learning_rate": 5.421464171032224e-07, |
| "loss": 0.0449, |
| "reward": 0.4937558462843299, |
| "reward_std": 0.9720155894756317, |
| "step": 112 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3117.1979064941406, |
| "dapo/avg_reward_std": 0.30339551545106447, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3846153886272357, |
| "dapo/num_sampling_attempts": 3.25, |
| "dapo/sampling_efficiency": 38.95833333333333, |
| "dapo/total_prompts_processed": 19.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.12914285714285714, |
| "grad_norm": 0.15823398530483246, |
| "kl": 0.01418304443359375, |
| "learning_rate": 5.342952264838747e-07, |
| "loss": 0.0743, |
| "reward": 0.5596551271155477, |
| "reward_std": 0.8979872986674309, |
| "step": 113 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3239.031280517578, |
| "dapo/avg_reward_std": 0.24120492219924927, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.34000000298023225, |
| "dapo/num_sampling_attempts": 3.125, |
| "dapo/sampling_efficiency": 56.770833333333336, |
| "dapo/total_prompts_processed": 18.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.13028571428571428, |
| "grad_norm": 0.20106364786624908, |
| "kl": 0.01206207275390625, |
| "learning_rate": 5.264488196906752e-07, |
| "loss": 0.0817, |
| "reward": 0.697497084736824, |
| "reward_std": 0.9489930346608162, |
| "step": 114 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3197.2430725097656, |
| "dapo/avg_reward_std": 0.20663932577157632, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.26495727056112045, |
| "dapo/num_sampling_attempts": 4.875, |
| "dapo/sampling_efficiency": 38.4375, |
| "dapo/total_prompts_processed": 29.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.13142857142857142, |
| "grad_norm": 0.15399962663650513, |
| "kl": 0.015567779541015625, |
| "learning_rate": 5.186095868151436e-07, |
| "loss": 0.0667, |
| "reward": 0.5802914081141353, |
| "reward_std": 0.9295158162713051, |
| "step": 115 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3272.6007080078125, |
| "dapo/avg_reward_std": 0.22710687816143035, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3166666701436043, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 37.61904761904762, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.13257142857142856, |
| "grad_norm": 0.140142023563385, |
| "kl": 0.01934814453125, |
| "learning_rate": 5.107799157635538e-07, |
| "loss": 0.0611, |
| "reward": 0.6176847349852324, |
| "reward_std": 0.944318100810051, |
| "step": 116 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3268.4305725097656, |
| "dapo/avg_reward_std": 0.23266587586238466, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.344827591345228, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 38.125, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.1337142857142857, |
| "grad_norm": 0.1582440286874771, |
| "kl": 0.01198577880859375, |
| "learning_rate": 5.02962191529556e-07, |
| "loss": 0.0556, |
| "reward": 0.5785031230188906, |
| "reward_std": 0.954645112156868, |
| "step": 117 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2941.9722595214844, |
| "dapo/avg_reward_std": 0.24969401342027328, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3284313814604984, |
| "dapo/num_sampling_attempts": 4.25, |
| "dapo/sampling_efficiency": 27.20238095238095, |
| "dapo/total_prompts_processed": 25.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.13485714285714287, |
| "grad_norm": 0.1869765818119049, |
| "kl": 0.01676177978515625, |
| "learning_rate": 4.951587954676837e-07, |
| "loss": 0.1063, |
| "reward": 0.6486848145723343, |
| "reward_std": 0.9332743212580681, |
| "step": 118 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3206.982635498047, |
| "dapo/avg_reward_std": 0.20580977627209254, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.26666667333671024, |
| "dapo/num_sampling_attempts": 4.375, |
| "dapo/sampling_efficiency": 41.28472222222222, |
| "dapo/total_prompts_processed": 26.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.136, |
| "grad_norm": 0.13004696369171143, |
| "kl": 0.015842437744140625, |
| "learning_rate": 4.873721045679706e-07, |
| "loss": 0.0453, |
| "reward": 0.4798949249088764, |
| "reward_std": 0.9390313774347305, |
| "step": 119 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3015.545135498047, |
| "dapo/avg_reward_std": 0.22217401381461852, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3548387149649282, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 28.95833333333333, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.13714285714285715, |
| "grad_norm": 0.229897141456604, |
| "kl": 0.02198028564453125, |
| "learning_rate": 4.79604490731896e-07, |
| "loss": 0.0749, |
| "reward": 0.7311479561030865, |
| "reward_std": 0.9607837572693825, |
| "step": 120 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3098.656280517578, |
| "dapo/avg_reward_std": 0.22588159143924713, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.32777778506278993, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 44.613095238095234, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.1382857142857143, |
| "grad_norm": 0.13800247013568878, |
| "kl": 0.014202117919921875, |
| "learning_rate": 4.7185832004988133e-07, |
| "loss": 0.0814, |
| "reward": 0.8461479842662811, |
| "reward_std": 0.9660850539803505, |
| "step": 121 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3064.3924255371094, |
| "dapo/avg_reward_std": 0.16500467896461488, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.19666667193174361, |
| "dapo/num_sampling_attempts": 6.25, |
| "dapo/sampling_efficiency": 21.07142857142857, |
| "dapo/total_prompts_processed": 37.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.13942857142857143, |
| "grad_norm": 0.1680934727191925, |
| "kl": 0.01361083984375, |
| "learning_rate": 4.641359520805548e-07, |
| "loss": 0.066, |
| "reward": 0.7812346797436476, |
| "reward_std": 0.9529108256101608, |
| "step": 122 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3097.4861755371094, |
| "dapo/avg_reward_std": 0.22939075000824466, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.33333334038334506, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 33.75, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.14057142857142857, |
| "grad_norm": 0.18081900477409363, |
| "kl": 0.014842987060546875, |
| "learning_rate": 4.5643973913200837e-07, |
| "loss": 0.0877, |
| "reward": 0.7531900368630886, |
| "reward_std": 0.9868133068084717, |
| "step": 123 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3203.888885498047, |
| "dapo/avg_reward_std": 0.24352495979379724, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.35185185737080044, |
| "dapo/num_sampling_attempts": 3.375, |
| "dapo/sampling_efficiency": 43.05555555555556, |
| "dapo/total_prompts_processed": 20.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.1417142857142857, |
| "grad_norm": 0.16807734966278076, |
| "kl": 0.0139007568359375, |
| "learning_rate": 4.4877202554526084e-07, |
| "loss": 0.0612, |
| "reward": 0.715996683575213, |
| "reward_std": 0.9595553278923035, |
| "step": 124 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2885.5625610351562, |
| "dapo/avg_reward_std": 0.2548297820612788, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.31770833814516664, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 27.20238095238095, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.14285714285714285, |
| "grad_norm": 0.16355834901332855, |
| "kl": 0.02027130126953125, |
| "learning_rate": 4.4113514698014953e-07, |
| "loss": 0.0597, |
| "reward": 0.8311022147536278, |
| "reward_std": 0.9600836709141731, |
| "step": 125 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3250.843780517578, |
| "dapo/avg_reward_std": 0.2203440727858708, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.32758621152105005, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 46.770833333333336, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.144, |
| "grad_norm": 0.18190248310565948, |
| "kl": 0.0158843994140625, |
| "learning_rate": 4.3353142970386557e-07, |
| "loss": 0.068, |
| "reward": 0.7400151332840323, |
| "reward_std": 0.9569809287786484, |
| "step": 126 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3264.420166015625, |
| "dapo/avg_reward_std": 0.25137073759521755, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.41666667429464205, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 40.11904761904761, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.14514285714285713, |
| "grad_norm": 0.17950685322284698, |
| "kl": 0.0223236083984375, |
| "learning_rate": 4.2596318988235037e-07, |
| "loss": 0.0528, |
| "reward": 0.5194851458072662, |
| "reward_std": 0.9414050430059433, |
| "step": 127 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2892.9132690429688, |
| "dapo/avg_reward_std": 0.2416491061449051, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2631579002267436, |
| "dapo/num_sampling_attempts": 4.75, |
| "dapo/sampling_efficiency": 26.9047619047619, |
| "dapo/total_prompts_processed": 28.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.1462857142857143, |
| "grad_norm": 0.25602471828460693, |
| "kl": 0.02016448974609375, |
| "learning_rate": 4.1843273287476854e-07, |
| "loss": 0.0933, |
| "reward": 0.8592288717627525, |
| "reward_std": 0.9212958365678787, |
| "step": 128 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3146.6944580078125, |
| "dapo/avg_reward_std": 0.22558308675371366, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3218390854268238, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 54.07738095238095, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.14742857142857144, |
| "grad_norm": 0.21352027356624603, |
| "kl": 0.0198211669921875, |
| "learning_rate": 4.1094235253127374e-07, |
| "loss": 0.0679, |
| "reward": 0.5732525363564491, |
| "reward_std": 0.9645283669233322, |
| "step": 129 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3248.4236450195312, |
| "dapo/avg_reward_std": 0.35807471639580196, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.5000000066227384, |
| "dapo/num_sampling_attempts": 2.25, |
| "dapo/sampling_efficiency": 51.041666666666664, |
| "dapo/total_prompts_processed": 13.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.14857142857142858, |
| "grad_norm": 0.1599435657262802, |
| "kl": 0.0216827392578125, |
| "learning_rate": 4.034943304942796e-07, |
| "loss": 0.0443, |
| "reward": 0.5955070666968822, |
| "reward_std": 0.9924386888742447, |
| "step": 130 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2958.5347595214844, |
| "dapo/avg_reward_std": 0.18185590389298228, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.23170731998071437, |
| "dapo/num_sampling_attempts": 5.125, |
| "dapo/sampling_efficiency": 24.945436507936506, |
| "dapo/total_prompts_processed": 30.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.14971428571428572, |
| "grad_norm": 0.21188445389270782, |
| "kl": 0.02074432373046875, |
| "learning_rate": 3.9609093550344907e-07, |
| "loss": 0.0628, |
| "reward": 0.8608505353331566, |
| "reward_std": 0.9059992283582687, |
| "step": 131 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3019.888931274414, |
| "dapo/avg_reward_std": 0.3038036392794715, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.36419753785486575, |
| "dapo/num_sampling_attempts": 3.375, |
| "dapo/sampling_efficiency": 38.33333333333333, |
| "dapo/total_prompts_processed": 20.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.15085714285714286, |
| "grad_norm": 0.19752100110054016, |
| "kl": 0.024078369140625, |
| "learning_rate": 3.8873442270461485e-07, |
| "loss": 0.0698, |
| "reward": 0.7191393785178661, |
| "reward_std": 0.9548436179757118, |
| "step": 132 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3251.6909790039062, |
| "dapo/avg_reward_std": 0.17617152915114448, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.22222222494227545, |
| "dapo/num_sampling_attempts": 5.25, |
| "dapo/sampling_efficiency": 31.369047619047613, |
| "dapo/total_prompts_processed": 31.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.152, |
| "grad_norm": 0.1220565065741539, |
| "kl": 0.01824951171875, |
| "learning_rate": 3.8142703296283953e-07, |
| "loss": 0.0249, |
| "reward": 0.3546891317819245, |
| "reward_std": 0.9377138167619705, |
| "step": 133 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3146.545196533203, |
| "dapo/avg_reward_std": 0.2565364229679108, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.32666667103767394, |
| "dapo/num_sampling_attempts": 3.125, |
| "dapo/sampling_efficiency": 47.08333333333333, |
| "dapo/total_prompts_processed": 18.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.15314285714285714, |
| "grad_norm": 0.15810362994670868, |
| "kl": 0.03081512451171875, |
| "learning_rate": 3.7417099217982686e-07, |
| "loss": 0.0306, |
| "reward": 0.5206232005730271, |
| "reward_std": 0.9619846642017365, |
| "step": 134 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3085.5972900390625, |
| "dapo/avg_reward_std": 0.30491976333515985, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.40476191469601225, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 31.666666666666664, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.15428571428571428, |
| "grad_norm": 0.2133372277021408, |
| "kl": 0.0204620361328125, |
| "learning_rate": 3.6696851061588994e-07, |
| "loss": 0.0681, |
| "reward": 0.7713347226381302, |
| "reward_std": 0.9403144493699074, |
| "step": 135 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3326.295196533203, |
| "dapo/avg_reward_std": 0.22884555886953306, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.24358974817471626, |
| "dapo/num_sampling_attempts": 4.875, |
| "dapo/sampling_efficiency": 25.868055555555557, |
| "dapo/total_prompts_processed": 29.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.15542857142857142, |
| "grad_norm": 0.18792302906513214, |
| "kl": 0.029754638671875, |
| "learning_rate": 3.5982178221668533e-07, |
| "loss": 0.0468, |
| "reward": 0.5651950668543577, |
| "reward_std": 0.9934203922748566, |
| "step": 136 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3265.2882080078125, |
| "dapo/avg_reward_std": 0.304972759137551, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.43055556155741215, |
| "dapo/num_sampling_attempts": 3.0, |
| "dapo/sampling_efficiency": 54.375, |
| "dapo/total_prompts_processed": 18.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.15657142857142858, |
| "grad_norm": 0.13081717491149902, |
| "kl": 0.0223846435546875, |
| "learning_rate": 3.5273298394491515e-07, |
| "loss": 0.0443, |
| "reward": 0.5535581167787313, |
| "reward_std": 0.9467164501547813, |
| "step": 137 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2895.8646545410156, |
| "dapo/avg_reward_std": 0.2690910736719767, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3333333387970924, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 32.82738095238095, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.15771428571428572, |
| "grad_norm": 0.18165208399295807, |
| "kl": 0.032073974609375, |
| "learning_rate": 3.45704275117204e-07, |
| "loss": 0.0288, |
| "reward": 0.5253790076822042, |
| "reward_std": 0.9247673749923706, |
| "step": 138 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3049.8507080078125, |
| "dapo/avg_reward_std": 0.2440622321196965, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.33928572067192625, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 40.11904761904761, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.15885714285714286, |
| "grad_norm": 0.19676071405410767, |
| "kl": 0.03052520751953125, |
| "learning_rate": 3.387377967463493e-07, |
| "loss": 0.0477, |
| "reward": 0.6778539270162582, |
| "reward_std": 0.9344745948910713, |
| "step": 139 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3029.0486450195312, |
| "dapo/avg_reward_std": 0.3111469969153404, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4916666768491268, |
| "dapo/num_sampling_attempts": 2.5, |
| "dapo/sampling_efficiency": 41.666666666666664, |
| "dapo/total_prompts_processed": 15.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.16, |
| "grad_norm": 0.18594416975975037, |
| "kl": 0.0277557373046875, |
| "learning_rate": 3.3183567088914833e-07, |
| "loss": 0.0431, |
| "reward": 0.5210836753249168, |
| "reward_std": 0.9851464107632637, |
| "step": 140 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3151.5486755371094, |
| "dapo/avg_reward_std": 0.23511080997330802, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3095238127878734, |
| "dapo/num_sampling_attempts": 4.375, |
| "dapo/sampling_efficiency": 26.18055555555555, |
| "dapo/total_prompts_processed": 26.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.16114285714285714, |
| "grad_norm": 0.17807213962078094, |
| "kl": 0.0266265869140625, |
| "learning_rate": 3.250000000000001e-07, |
| "loss": 0.0498, |
| "reward": 0.5591800361871719, |
| "reward_std": 0.9730060175061226, |
| "step": 141 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2963.59033203125, |
| "dapo/avg_reward_std": 0.19928012508898973, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2812500069849193, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 38.02083333333333, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.16228571428571428, |
| "grad_norm": 0.24388359487056732, |
| "kl": 0.0318603515625, |
| "learning_rate": 3.182328662904756e-07, |
| "loss": 0.0567, |
| "reward": 0.7148469444364309, |
| "reward_std": 0.9495278596878052, |
| "step": 142 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3157.791717529297, |
| "dapo/avg_reward_std": 0.23966079843895777, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3214285767504147, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 39.166666666666664, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.16342857142857142, |
| "grad_norm": 0.20528583228588104, |
| "kl": 0.041290283203125, |
| "learning_rate": 3.115363310950578e-07, |
| "loss": 0.0443, |
| "reward": 0.5249591246247292, |
| "reward_std": 0.9509934857487679, |
| "step": 143 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3030.187530517578, |
| "dapo/avg_reward_std": 0.30880050485332805, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4375000099341075, |
| "dapo/num_sampling_attempts": 3.0, |
| "dapo/sampling_efficiency": 41.04166666666666, |
| "dapo/total_prompts_processed": 18.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.16457142857142856, |
| "grad_norm": 0.15082307159900665, |
| "kl": 0.02729034423828125, |
| "learning_rate": 3.0491243424323783e-07, |
| "loss": 0.0511, |
| "reward": 0.5894143544137478, |
| "reward_std": 0.954010546207428, |
| "step": 144 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2973.3993225097656, |
| "dapo/avg_reward_std": 0.32683228328824043, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4236111181477706, |
| "dapo/num_sampling_attempts": 3.0, |
| "dapo/sampling_efficiency": 48.66071428571428, |
| "dapo/total_prompts_processed": 18.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.1657142857142857, |
| "grad_norm": 0.2588576078414917, |
| "kl": 0.038238525390625, |
| "learning_rate": 2.9836319343816397e-07, |
| "loss": 0.0611, |
| "reward": 0.6702784113585949, |
| "reward_std": 0.9678368121385574, |
| "step": 145 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3289.8368530273438, |
| "dapo/avg_reward_std": 0.29686578666722335, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.34567901823255753, |
| "dapo/num_sampling_attempts": 3.375, |
| "dapo/sampling_efficiency": 51.57738095238095, |
| "dapo/total_prompts_processed": 20.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.16685714285714287, |
| "grad_norm": 0.2035798877477646, |
| "kl": 0.0394744873046875, |
| "learning_rate": 2.918906036420294e-07, |
| "loss": 0.0576, |
| "reward": 0.4602743685245514, |
| "reward_std": 0.9194413796067238, |
| "step": 146 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3068.7604064941406, |
| "dapo/avg_reward_std": 0.27814541943371296, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3437500069849193, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 36.666666666666664, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.168, |
| "grad_norm": 0.22469140589237213, |
| "kl": 0.030426025390625, |
| "learning_rate": 2.854966364683872e-07, |
| "loss": 0.0696, |
| "reward": 0.6243265215307474, |
| "reward_std": 0.9174878597259521, |
| "step": 147 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3041.357635498047, |
| "dapo/avg_reward_std": 0.2907161459326744, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.458333346247673, |
| "dapo/num_sampling_attempts": 2.5, |
| "dapo/sampling_efficiency": 57.70833333333333, |
| "dapo/total_prompts_processed": 15.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.16914285714285715, |
| "grad_norm": 0.3123789429664612, |
| "kl": 0.0328521728515625, |
| "learning_rate": 2.791832395815782e-07, |
| "loss": 0.0819, |
| "reward": 0.8250775411725044, |
| "reward_std": 0.9233218431472778, |
| "step": 148 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2433.0694732666016, |
| "dapo/avg_reward_std": 0.22243764168686336, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2777777839865949, |
| "dapo/num_sampling_attempts": 4.5, |
| "dapo/sampling_efficiency": 35.75892857142857, |
| "dapo/total_prompts_processed": 27.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.1702857142857143, |
| "grad_norm": 0.2827485203742981, |
| "kl": 0.0386505126953125, |
| "learning_rate": 2.729523361034538e-07, |
| "loss": 0.0784, |
| "reward": 0.6995697831735015, |
| "reward_std": 0.9434132054448128, |
| "step": 149 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3096.59033203125, |
| "dapo/avg_reward_std": 0.347408726811409, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.541666672565043, |
| "dapo/num_sampling_attempts": 2.0, |
| "dapo/sampling_efficiency": 63.541666666666664, |
| "dapo/total_prompts_processed": 12.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.17142857142857143, |
| "grad_norm": 0.30529579520225525, |
| "kl": 0.03045654296875, |
| "learning_rate": 2.6680582402757324e-07, |
| "loss": 0.0868, |
| "reward": 0.7112221932038665, |
| "reward_std": 0.9602288007736206, |
| "step": 150 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3184.611083984375, |
| "dapo/avg_reward_std": 0.1674806038115887, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.20212766528129578, |
| "dapo/num_sampling_attempts": 5.875, |
| "dapo/sampling_efficiency": 23.749999999999996, |
| "dapo/total_prompts_processed": 35.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.17257142857142857, |
| "grad_norm": 0.19142813980579376, |
| "kl": 0.037353515625, |
| "learning_rate": 2.6074557564105724e-07, |
| "loss": 0.045, |
| "reward": 0.41017685225233436, |
| "reward_std": 0.9152907580137253, |
| "step": 151 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3437.3541564941406, |
| "dapo/avg_reward_std": 0.208841644014631, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2571428622518267, |
| "dapo/num_sampling_attempts": 4.375, |
| "dapo/sampling_efficiency": 40.416666666666664, |
| "dapo/total_prompts_processed": 26.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.1737142857142857, |
| "grad_norm": 0.15321692824363708, |
| "kl": 0.03997802734375, |
| "learning_rate": 2.547734369542718e-07, |
| "loss": 0.0346, |
| "reward": 0.34562894329428673, |
| "reward_std": 0.856454074382782, |
| "step": 152 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3008.1285095214844, |
| "dapo/avg_reward_std": 0.3009934023022652, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.5000000096857548, |
| "dapo/num_sampling_attempts": 2.5, |
| "dapo/sampling_efficiency": 43.75, |
| "dapo/total_prompts_processed": 15.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.17485714285714285, |
| "grad_norm": 0.20332548022270203, |
| "kl": 0.0509033203125, |
| "learning_rate": 2.488912271385139e-07, |
| "loss": 0.0536, |
| "reward": 0.7641689777374268, |
| "reward_std": 0.95648343116045, |
| "step": 153 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3165.52783203125, |
| "dapo/avg_reward_std": 0.2268627045246271, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.35256410905948055, |
| "dapo/num_sampling_attempts": 3.25, |
| "dapo/sampling_efficiency": 40.625, |
| "dapo/total_prompts_processed": 19.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.176, |
| "grad_norm": 0.2415708601474762, |
| "kl": 0.032623291015625, |
| "learning_rate": 2.4310073797187573e-07, |
| "loss": 0.0658, |
| "reward": 0.6375892572104931, |
| "reward_std": 0.9544621706008911, |
| "step": 154 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3226.4652709960938, |
| "dapo/avg_reward_std": 0.2563069482644399, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.38333334078391396, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 31.249999999999996, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.17714285714285713, |
| "grad_norm": 0.2137623131275177, |
| "kl": 0.0427093505859375, |
| "learning_rate": 2.374037332934512e-07, |
| "loss": 0.0533, |
| "reward": 0.537381574511528, |
| "reward_std": 0.9281218275427818, |
| "step": 155 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2680.3090209960938, |
| "dapo/avg_reward_std": 0.22888225678241614, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3181818226973216, |
| "dapo/num_sampling_attempts": 4.125, |
| "dapo/sampling_efficiency": 31.29960317460317, |
| "dapo/total_prompts_processed": 24.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.1782857142857143, |
| "grad_norm": 0.3409210443496704, |
| "kl": 0.03851318359375, |
| "learning_rate": 2.3180194846605364e-07, |
| "loss": 0.0962, |
| "reward": 0.8820424377918243, |
| "reward_std": 0.9246840327978134, |
| "step": 156 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3045.3299255371094, |
| "dapo/avg_reward_std": 0.2491180575810946, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3653846222620744, |
| "dapo/num_sampling_attempts": 3.25, |
| "dapo/sampling_efficiency": 45.83333333333332, |
| "dapo/total_prompts_processed": 19.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.17942857142857144, |
| "grad_norm": 0.23701035976409912, |
| "kl": 0.0436248779296875, |
| "learning_rate": 2.2629708984760706e-07, |
| "loss": 0.0414, |
| "reward": 0.6551959328353405, |
| "reward_std": 0.9744707196950912, |
| "step": 157 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2918.892364501953, |
| "dapo/avg_reward_std": 0.22537656256130764, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.33333333730697634, |
| "dapo/num_sampling_attempts": 4.375, |
| "dapo/sampling_efficiency": 39.93055555555556, |
| "dapo/total_prompts_processed": 26.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.18057142857142858, |
| "grad_norm": 0.3551786541938782, |
| "kl": 0.0572357177734375, |
| "learning_rate": 2.2089083427137329e-07, |
| "loss": 0.0732, |
| "reward": 0.5248121619224548, |
| "reward_std": 0.9334831684827805, |
| "step": 158 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2874.0729446411133, |
| "dapo/avg_reward_std": 0.18832522351294756, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2812500046566129, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 38.69047619047618, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.18171428571428572, |
| "grad_norm": 0.25500980019569397, |
| "kl": 0.03741455078125, |
| "learning_rate": 2.1558482853517253e-07, |
| "loss": 0.0537, |
| "reward": 0.7963100634515285, |
| "reward_std": 0.987776905298233, |
| "step": 159 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2940.701385498047, |
| "dapo/avg_reward_std": 0.16297742784023284, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.20000000536441803, |
| "dapo/num_sampling_attempts": 6.25, |
| "dapo/sampling_efficiency": 18.368055555555557, |
| "dapo/total_prompts_processed": 37.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.18285714285714286, |
| "grad_norm": 0.2898014187812805, |
| "kl": 0.058013916015625, |
| "learning_rate": 2.1038068889975259e-07, |
| "loss": 0.037, |
| "reward": 0.5323189618065953, |
| "reward_std": 0.9483579620718956, |
| "step": 160 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3090.7882385253906, |
| "dapo/avg_reward_std": 0.3046227526664734, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3733333414793015, |
| "dapo/num_sampling_attempts": 3.125, |
| "dapo/sampling_efficiency": 43.45238095238095, |
| "dapo/total_prompts_processed": 18.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.184, |
| "grad_norm": 0.28573325276374817, |
| "kl": 0.040771484375, |
| "learning_rate": 2.0528000059645995e-07, |
| "loss": 0.0511, |
| "reward": 0.6970310118049383, |
| "reward_std": 0.9432796016335487, |
| "step": 161 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3205.4270629882812, |
| "dapo/avg_reward_std": 0.36972329020500183, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.5438596621940011, |
| "dapo/num_sampling_attempts": 2.375, |
| "dapo/sampling_efficiency": 55.625, |
| "dapo/total_prompts_processed": 14.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.18514285714285714, |
| "grad_norm": 0.390523225069046, |
| "kl": 0.052459716796875, |
| "learning_rate": 2.0028431734436308e-07, |
| "loss": 0.0818, |
| "reward": 0.6346883065998554, |
| "reward_std": 0.9713371768593788, |
| "step": 162 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3082.107635498047, |
| "dapo/avg_reward_std": 0.2315557522158469, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3440860264724301, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 44.513888888888886, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.18628571428571428, |
| "grad_norm": 0.31898149847984314, |
| "kl": 0.05328369140625, |
| "learning_rate": 1.9539516087697517e-07, |
| "loss": 0.0722, |
| "reward": 0.6942785531282425, |
| "reward_std": 0.9776681512594223, |
| "step": 163 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3027.0243530273438, |
| "dapo/avg_reward_std": 0.15836979811255997, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.22972973214613424, |
| "dapo/num_sampling_attempts": 4.625, |
| "dapo/sampling_efficiency": 41.69642857142857, |
| "dapo/total_prompts_processed": 27.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.18742857142857142, |
| "grad_norm": 0.2931766211986542, |
| "kl": 0.033111572265625, |
| "learning_rate": 1.9061402047871833e-07, |
| "loss": 0.0754, |
| "reward": 0.944303285330534, |
| "reward_std": 0.9451126903295517, |
| "step": 164 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2894.260482788086, |
| "dapo/avg_reward_std": 0.224585828371346, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2916666716337204, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 37.5, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.18857142857142858, |
| "grad_norm": 0.24178634583950043, |
| "kl": 0.0533447265625, |
| "learning_rate": 1.8594235253127372e-07, |
| "loss": 0.0505, |
| "reward": 0.6519163623452187, |
| "reward_std": 0.9615699052810669, |
| "step": 165 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3002.7882385253906, |
| "dapo/avg_reward_std": 0.29886600477942105, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3160919598464308, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 35.416666666666664, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.18971428571428572, |
| "grad_norm": 0.31221655011177063, |
| "kl": 0.047943115234375, |
| "learning_rate": 1.8138158006995363e-07, |
| "loss": 0.066, |
| "reward": 0.6383479349315166, |
| "reward_std": 0.9029820337891579, |
| "step": 166 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2927.295150756836, |
| "dapo/avg_reward_std": 0.34752671499001353, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.5438596621940011, |
| "dapo/num_sampling_attempts": 2.375, |
| "dapo/sampling_efficiency": 48.95833333333333, |
| "dapo/total_prompts_processed": 14.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.19085714285714286, |
| "grad_norm": 0.2697528600692749, |
| "kl": 0.045745849609375, |
| "learning_rate": 1.7693309235023127e-07, |
| "loss": 0.0483, |
| "reward": 0.8266985702211969, |
| "reward_std": 0.9544429406523705, |
| "step": 167 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3212.857666015625, |
| "dapo/avg_reward_std": 0.263968757220677, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3690476247242519, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 41.388888888888886, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.192, |
| "grad_norm": 0.27940821647644043, |
| "kl": 0.05059814453125, |
| "learning_rate": 1.7259824442455923e-07, |
| "loss": 0.0415, |
| "reward": 0.7715255841612816, |
| "reward_std": 0.95072440803051, |
| "step": 168 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3112.5799255371094, |
| "dapo/avg_reward_std": 0.22730760558231458, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3153153222960395, |
| "dapo/num_sampling_attempts": 4.625, |
| "dapo/sampling_efficiency": 26.249999999999996, |
| "dapo/total_prompts_processed": 27.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.19314285714285714, |
| "grad_norm": 0.4339730143547058, |
| "kl": 0.06396484375, |
| "learning_rate": 1.6837835672960831e-07, |
| "loss": 0.0777, |
| "reward": 0.5262689627707005, |
| "reward_std": 0.9779800549149513, |
| "step": 169 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3088.6632385253906, |
| "dapo/avg_reward_std": 0.2333034286275506, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.33333333721384406, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 38.263888888888886, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.19428571428571428, |
| "grad_norm": 0.48384836316108704, |
| "kl": 0.0555419921875, |
| "learning_rate": 1.6427471468404952e-07, |
| "loss": 0.0974, |
| "reward": 0.7407102398574352, |
| "reward_std": 0.9568767622113228, |
| "step": 170 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3099.347198486328, |
| "dapo/avg_reward_std": 0.17301563743282766, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.27941177127992406, |
| "dapo/num_sampling_attempts": 4.25, |
| "dapo/sampling_efficiency": 31.874999999999996, |
| "dapo/total_prompts_processed": 25.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.19542857142857142, |
| "grad_norm": 0.42263394594192505, |
| "kl": 0.0595703125, |
| "learning_rate": 1.6028856829700258e-07, |
| "loss": 0.0812, |
| "reward": 0.4282900430262089, |
| "reward_std": 0.914498083293438, |
| "step": 171 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3111.232696533203, |
| "dapo/avg_reward_std": 0.2433939976617694, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.33333334093913436, |
| "dapo/num_sampling_attempts": 4.0, |
| "dapo/sampling_efficiency": 36.80555555555555, |
| "dapo/total_prompts_processed": 24.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.19657142857142856, |
| "grad_norm": 0.4814501404762268, |
| "kl": 0.05926513671875, |
| "learning_rate": 1.5642113178727193e-07, |
| "loss": 0.0843, |
| "reward": 0.6843680012971163, |
| "reward_std": 0.8743765726685524, |
| "step": 172 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3008.6563110351562, |
| "dapo/avg_reward_std": 0.25363275137814606, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.31818182811592566, |
| "dapo/num_sampling_attempts": 4.125, |
| "dapo/sampling_efficiency": 38.78472222222222, |
| "dapo/total_prompts_processed": 24.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.1977142857142857, |
| "grad_norm": 0.285697877407074, |
| "kl": 0.05755615234375, |
| "learning_rate": 1.5267358321348285e-07, |
| "loss": 0.0456, |
| "reward": 0.5798944532871246, |
| "reward_std": 0.984041191637516, |
| "step": 173 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3067.9791870117188, |
| "dapo/avg_reward_std": 0.3438388824462891, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4500000044703484, |
| "dapo/num_sampling_attempts": 2.5, |
| "dapo/sampling_efficiency": 48.33333333333333, |
| "dapo/total_prompts_processed": 15.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.19885714285714284, |
| "grad_norm": 0.43520498275756836, |
| "kl": 0.07098388671875, |
| "learning_rate": 1.4904706411523448e-07, |
| "loss": 0.0716, |
| "reward": 0.5646946905180812, |
| "reward_std": 0.9460153579711914, |
| "step": 174 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3223.2916870117188, |
| "dapo/avg_reward_std": 0.2690600073337555, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4133333420753479, |
| "dapo/num_sampling_attempts": 3.125, |
| "dapo/sampling_efficiency": 41.45833333333333, |
| "dapo/total_prompts_processed": 18.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.2, |
| "grad_norm": 0.35144945979118347, |
| "kl": 0.06170654296875, |
| "learning_rate": 1.4554267916537495e-07, |
| "loss": 0.0348, |
| "reward": 0.556399748660624, |
| "reward_std": 0.9192204177379608, |
| "step": 175 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2946.0799102783203, |
| "dapo/avg_reward_std": 0.25316954652468365, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.37500000558793545, |
| "dapo/num_sampling_attempts": 3.0, |
| "dapo/sampling_efficiency": 43.75, |
| "dapo/total_prompts_processed": 18.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.20114285714285715, |
| "grad_norm": 0.46807849407196045, |
| "kl": 0.063018798828125, |
| "learning_rate": 1.4216149583350755e-07, |
| "loss": 0.0796, |
| "reward": 0.6736351866275072, |
| "reward_std": 0.9649264737963676, |
| "step": 176 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3096.829864501953, |
| "dapo/avg_reward_std": 0.31567848042437907, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.482456142965116, |
| "dapo/num_sampling_attempts": 2.375, |
| "dapo/sampling_efficiency": 55.625, |
| "dapo/total_prompts_processed": 14.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.2022857142857143, |
| "grad_norm": 0.31731271743774414, |
| "kl": 0.055938720703125, |
| "learning_rate": 1.3890454406082956e-07, |
| "loss": 0.0386, |
| "reward": 0.681073285639286, |
| "reward_std": 0.9661536440253258, |
| "step": 177 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3235.8056030273438, |
| "dapo/avg_reward_std": 0.24198689542967697, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3448275898037286, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 47.08333333333333, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.20342857142857143, |
| "grad_norm": 0.4640950560569763, |
| "kl": 0.072052001953125, |
| "learning_rate": 1.3577281594640182e-07, |
| "loss": 0.0702, |
| "reward": 0.5520291309803724, |
| "reward_std": 0.9967257082462311, |
| "step": 178 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3237.77783203125, |
| "dapo/avg_reward_std": 0.30828417566689575, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4242424341765317, |
| "dapo/num_sampling_attempts": 2.75, |
| "dapo/sampling_efficiency": 52.82738095238095, |
| "dapo/total_prompts_processed": 16.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.20457142857142857, |
| "grad_norm": 0.4502318203449249, |
| "kl": 0.07550048828125, |
| "learning_rate": 1.3276726544494571e-07, |
| "loss": 0.0614, |
| "reward": 0.6213867999613285, |
| "reward_std": 0.9431608989834785, |
| "step": 179 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2887.9236450195312, |
| "dapo/avg_reward_std": 0.2488611958645008, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3518518612340645, |
| "dapo/num_sampling_attempts": 3.375, |
| "dapo/sampling_efficiency": 48.035714285714285, |
| "dapo/total_prompts_processed": 20.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.2057142857142857, |
| "grad_norm": 0.44646504521369934, |
| "kl": 0.073760986328125, |
| "learning_rate": 1.2988880807625927e-07, |
| "loss": 0.0683, |
| "reward": 0.5839751102030277, |
| "reward_std": 0.9090578481554985, |
| "step": 180 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3021.2916870117188, |
| "dapo/avg_reward_std": 0.20883248069069602, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2878787942004926, |
| "dapo/num_sampling_attempts": 4.125, |
| "dapo/sampling_efficiency": 39.632936507936506, |
| "dapo/total_prompts_processed": 24.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.20685714285714285, |
| "grad_norm": 0.36042678356170654, |
| "kl": 0.07421875, |
| "learning_rate": 1.2713832064634125e-07, |
| "loss": 0.054, |
| "reward": 0.5517729418352246, |
| "reward_std": 0.9483400657773018, |
| "step": 181 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3249.2118530273438, |
| "dapo/avg_reward_std": 0.2615335573043142, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.33333333847778185, |
| "dapo/num_sampling_attempts": 3.5, |
| "dapo/sampling_efficiency": 46.785714285714285, |
| "dapo/total_prompts_processed": 21.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.208, |
| "grad_norm": 0.4518042504787445, |
| "kl": 0.072021484375, |
| "learning_rate": 1.2451664098030743e-07, |
| "loss": 0.0654, |
| "reward": 0.686168298125267, |
| "reward_std": 0.9350233674049377, |
| "step": 182 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3221.6631774902344, |
| "dapo/avg_reward_std": 0.27866364789731574, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3686868738044392, |
| "dapo/num_sampling_attempts": 4.125, |
| "dapo/sampling_efficiency": 28.4375, |
| "dapo/total_prompts_processed": 24.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.20914285714285713, |
| "grad_norm": 0.32408109307289124, |
| "kl": 0.062255859375, |
| "learning_rate": 1.220245676671809e-07, |
| "loss": 0.0384, |
| "reward": 0.6384344138205051, |
| "reward_std": 0.9783304929733276, |
| "step": 183 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3199.1354370117188, |
| "dapo/avg_reward_std": 0.2816663732131322, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.316666671137015, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 45.55555555555555, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.2102857142857143, |
| "grad_norm": 0.2197091430425644, |
| "kl": 0.07550048828125, |
| "learning_rate": 1.1966285981663407e-07, |
| "loss": 0.0211, |
| "reward": 0.45471471454948187, |
| "reward_std": 0.9136239141225815, |
| "step": 184 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3037.420166015625, |
| "dapo/avg_reward_std": 0.17516983683044846, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.2657657728807346, |
| "dapo/num_sampling_attempts": 4.625, |
| "dapo/sampling_efficiency": 25.729166666666664, |
| "dapo/total_prompts_processed": 27.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.21142857142857144, |
| "grad_norm": 0.4012245535850525, |
| "kl": 0.091796875, |
| "learning_rate": 1.1743223682775649e-07, |
| "loss": 0.0442, |
| "reward": 0.7168623730540276, |
| "reward_std": 0.9515729621052742, |
| "step": 185 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3222.767364501953, |
| "dapo/avg_reward_std": 0.2550514280796051, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3444444512327512, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 43.64583333333333, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.21257142857142858, |
| "grad_norm": 0.4945845305919647, |
| "kl": 0.083465576171875, |
| "learning_rate": 1.1533337816991931e-07, |
| "loss": 0.0667, |
| "reward": 0.5391142014414072, |
| "reward_std": 0.9342528805136681, |
| "step": 186 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2858.5659942626953, |
| "dapo/avg_reward_std": 0.23423856112264818, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3118279609949358, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 40.0297619047619, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.21371428571428572, |
| "grad_norm": 0.4291866421699524, |
| "kl": 0.091796875, |
| "learning_rate": 1.1336692317580158e-07, |
| "loss": 0.0384, |
| "reward": 0.7481220848858356, |
| "reward_std": 0.9474795907735825, |
| "step": 187 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3123.170166015625, |
| "dapo/avg_reward_std": 0.1988734739857751, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.25675675997862946, |
| "dapo/num_sampling_attempts": 4.625, |
| "dapo/sampling_efficiency": 31.875, |
| "dapo/total_prompts_processed": 27.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.21485714285714286, |
| "grad_norm": 0.30453264713287354, |
| "kl": 0.080657958984375, |
| "learning_rate": 1.1153347084664419e-07, |
| "loss": 0.0273, |
| "reward": 0.6236942922696471, |
| "reward_std": 0.9715093299746513, |
| "step": 188 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2872.9618530273438, |
| "dapo/avg_reward_std": 0.21385114904372923, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3333333365378841, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 39.18154761904762, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.216, |
| "grad_norm": 0.5780288577079773, |
| "kl": 0.08612060546875, |
| "learning_rate": 1.0983357966978745e-07, |
| "loss": 0.0607, |
| "reward": 0.7514887787401676, |
| "reward_std": 1.0098591819405556, |
| "step": 189 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2937.093780517578, |
| "dapo/avg_reward_std": 0.1677520631575117, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.21895425284610076, |
| "dapo/num_sampling_attempts": 6.375, |
| "dapo/sampling_efficiency": 20.689484126984123, |
| "dapo/total_prompts_processed": 38.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.21714285714285714, |
| "grad_norm": 0.3947860896587372, |
| "kl": 0.076263427734375, |
| "learning_rate": 1.0826776744855121e-07, |
| "loss": 0.0487, |
| "reward": 0.6180934552103281, |
| "reward_std": 0.9050487726926804, |
| "step": 190 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3252.090301513672, |
| "dapo/avg_reward_std": 0.24265852073828378, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3055555621782939, |
| "dapo/num_sampling_attempts": 3.75, |
| "dapo/sampling_efficiency": 38.020833333333336, |
| "dapo/total_prompts_processed": 22.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.21828571428571428, |
| "grad_norm": 0.48333072662353516, |
| "kl": 0.09661865234375, |
| "learning_rate": 1.068365111445064e-07, |
| "loss": 0.0584, |
| "reward": 0.4759152363985777, |
| "reward_std": 0.9479196071624756, |
| "step": 191 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3074.420166015625, |
| "dapo/avg_reward_std": 0.2189681170315578, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.3333333371014431, |
| "dapo/num_sampling_attempts": 3.625, |
| "dapo/sampling_efficiency": 46.45833333333333, |
| "dapo/total_prompts_processed": 21.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.21942857142857142, |
| "grad_norm": 0.5536202192306519, |
| "kl": 0.09814453125, |
| "learning_rate": 1.0554024673218806e-07, |
| "loss": 0.0731, |
| "reward": 0.48804986744653434, |
| "reward_std": 0.9367131069302559, |
| "step": 192 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3026.9097595214844, |
| "dapo/avg_reward_std": 0.21337791310774312, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.256756762395034, |
| "dapo/num_sampling_attempts": 4.625, |
| "dapo/sampling_efficiency": 30.3125, |
| "dapo/total_prompts_processed": 27.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.22057142857142858, |
| "grad_norm": 0.5239105224609375, |
| "kl": 0.0985107421875, |
| "learning_rate": 1.0437936906629334e-07, |
| "loss": 0.0561, |
| "reward": 0.45341441221535206, |
| "reward_std": 0.8912393003702164, |
| "step": 193 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2896.656280517578, |
| "dapo/avg_reward_std": 0.31374274492263793, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.4333333417773247, |
| "dapo/num_sampling_attempts": 2.5, |
| "dapo/sampling_efficiency": 46.875, |
| "dapo/total_prompts_processed": 15.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.22171428571428572, |
| "grad_norm": 0.6310634016990662, |
| "kl": 0.108062744140625, |
| "learning_rate": 1.0335423176140511e-07, |
| "loss": 0.0809, |
| "reward": 0.6844924800097942, |
| "reward_std": 0.9649646729230881, |
| "step": 194 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3319.7048950195312, |
| "dapo/avg_reward_std": 0.21983732057340216, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.30303031251286017, |
| "dapo/num_sampling_attempts": 4.125, |
| "dapo/sampling_efficiency": 29.479166666666664, |
| "dapo/total_prompts_processed": 24.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.22285714285714286, |
| "grad_norm": 0.47936248779296875, |
| "kl": 0.0997314453125, |
| "learning_rate": 1.0246514708427701e-07, |
| "loss": 0.0479, |
| "reward": 0.3993752491660416, |
| "reward_std": 0.9481607303023338, |
| "step": 195 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3298.1736450195312, |
| "dapo/avg_reward_std": 0.2514548934996128, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.28333333916962145, |
| "dapo/num_sampling_attempts": 5.0, |
| "dapo/sampling_efficiency": 33.13988095238095, |
| "dapo/total_prompts_processed": 30.0, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.224, |
| "grad_norm": 0.36350947618484497, |
| "kl": 0.1043701171875, |
| "learning_rate": 1.017123858587145e-07, |
| "loss": 0.0389, |
| "reward": 0.31427645590156317, |
| "reward_std": 0.8980218172073364, |
| "step": 196 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3260.4861450195312, |
| "dapo/avg_reward_std": 0.1836753969009106, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.24786325486806723, |
| "dapo/num_sampling_attempts": 4.875, |
| "dapo/sampling_efficiency": 28.154761904761905, |
| "dapo/total_prompts_processed": 29.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.22514285714285714, |
| "grad_norm": 0.3354601562023163, |
| "kl": 0.0946044921875, |
| "learning_rate": 1.0109617738307911e-07, |
| "loss": 0.0301, |
| "reward": 0.5015182960778475, |
| "reward_std": 0.9334053322672844, |
| "step": 197 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 3031.3958129882812, |
| "dapo/avg_reward_std": 0.3008538554696476, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.5196078463512308, |
| "dapo/num_sampling_attempts": 2.125, |
| "dapo/sampling_efficiency": 76.5625, |
| "dapo/total_prompts_processed": 12.75, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.22628571428571428, |
| "grad_norm": 0.48223650455474854, |
| "kl": 0.10247802734375, |
| "learning_rate": 1.0061670936044178e-07, |
| "loss": 0.0648, |
| "reward": 0.573589576408267, |
| "reward_std": 0.9578919112682343, |
| "step": 198 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2948.3854064941406, |
| "dapo/avg_reward_std": 0.43072181940078735, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.8333333373069763, |
| "dapo/num_sampling_attempts": 1.25, |
| "dapo/sampling_efficiency": 87.5, |
| "dapo/total_prompts_processed": 7.5, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.22742857142857142, |
| "grad_norm": 0.6141620874404907, |
| "kl": 0.09808349609375, |
| "learning_rate": 1.002741278414069e-07, |
| "loss": 0.0827, |
| "reward": 0.7053878791630268, |
| "reward_std": 0.9694960787892342, |
| "step": 199 |
| }, |
| { |
| "clip_fraction": 0.0, |
| "completion_length": 2714.482666015625, |
| "dapo/avg_reward_std": 0.26207208441149804, |
| "dapo/filter_reward_index": 0.0, |
| "dapo/kept_prompts_ratio": 0.37096774914572317, |
| "dapo/num_sampling_attempts": 3.875, |
| "dapo/sampling_efficiency": 30.624999999999993, |
| "dapo/total_prompts_processed": 23.25, |
| "dapo/valid_prompts_collected": 6.0, |
| "epoch": 0.22857142857142856, |
| "grad_norm": 0.2072688341140747, |
| "kl": 0.1064453125, |
| "learning_rate": 1.0006853717962393e-07, |
| "loss": 0.0122, |
| "reward": 0.5771910101175308, |
| "reward_std": 0.9156405553221703, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.22857142857142856, |
| "step": 200, |
| "total_flos": 0.0, |
| "train_loss": 0.02940896774176508, |
| "train_runtime": 83918.4654, |
| "train_samples_per_second": 0.114, |
| "train_steps_per_second": 0.002 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 200, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 10, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 6, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|