| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.9973045822102425, |
| "eval_steps": 500, |
| "global_step": 185, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 3001.0, |
| "completions/mean_length": 1635.390625, |
| "completions/min_length": 880.0, |
| "epoch": 0.005390835579514825, |
| "grad_norm": 0.07817294615231643, |
| "kl": 0.0, |
| "learning_rate": 2.127659574468085e-08, |
| "loss": 0.01464410312473774, |
| "memory(GiB)": 53.08, |
| "reward": 1.3704201579093933, |
| "reward_std": 0.19254888594150543, |
| "rewards/Table2LatexAcc/mean": 0.5549997389316559, |
| "rewards/Table2LatexAcc/std": 0.2269514873623848, |
| "rewards/Table2Latexform/mean": 0.815420389175415, |
| "rewards/Table2Latexform/std": 0.27713412046432495, |
| "step": 1, |
| "train_speed(iter/s)": 0.003012 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2923.875, |
| "completions/mean_length": 1629.7890625, |
| "completions/min_length": 886.0, |
| "epoch": 0.026954177897574125, |
| "grad_norm": 0.07213148347345341, |
| "kl": 1.5087425708770752e-05, |
| "learning_rate": 1.0638297872340425e-07, |
| "loss": 0.028215568512678146, |
| "memory(GiB)": 74.0, |
| "reward": 1.3842923939228058, |
| "reward_std": 0.18567332532256842, |
| "rewards/Table2LatexAcc/mean": 0.5712194591760635, |
| "rewards/Table2LatexAcc/std": 0.19849798548966646, |
| "rewards/Table2Latexform/mean": 0.8130729347467422, |
| "rewards/Table2Latexform/std": 0.2439529187977314, |
| "step": 5, |
| "train_speed(iter/s)": 0.003096 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2889.2, |
| "completions/mean_length": 1723.5890625, |
| "completions/min_length": 962.5, |
| "epoch": 0.05390835579514825, |
| "grad_norm": 0.06916726038351133, |
| "kl": 1.736283302307129e-05, |
| "learning_rate": 2.127659574468085e-07, |
| "loss": 0.019673459231853485, |
| "memory(GiB)": 74.0, |
| "reward": 1.3981751084327698, |
| "reward_std": 0.16928213015198706, |
| "rewards/Table2LatexAcc/mean": 0.573980861902237, |
| "rewards/Table2LatexAcc/std": 0.19604488760232924, |
| "rewards/Table2Latexform/mean": 0.8241942763328552, |
| "rewards/Table2Latexform/std": 0.23927551954984666, |
| "step": 10, |
| "train_speed(iter/s)": 0.003101 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2915.4, |
| "completions/mean_length": 1641.03125, |
| "completions/min_length": 704.9, |
| "epoch": 0.08086253369272237, |
| "grad_norm": 0.07279863906405569, |
| "kl": 2.1731853485107423e-05, |
| "learning_rate": 3.1914893617021275e-07, |
| "loss": 0.02381864786148071, |
| "memory(GiB)": 74.0, |
| "reward": 1.379032826423645, |
| "reward_std": 0.15062467977404595, |
| "rewards/Table2LatexAcc/mean": 0.5421488165855408, |
| "rewards/Table2LatexAcc/std": 0.19123097956180574, |
| "rewards/Table2Latexform/mean": 0.8368840157985687, |
| "rewards/Table2Latexform/std": 0.21790579557418824, |
| "step": 15, |
| "train_speed(iter/s)": 0.003068 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2961.5, |
| "completions/mean_length": 1699.853125, |
| "completions/min_length": 861.8, |
| "epoch": 0.1078167115902965, |
| "grad_norm": 0.07154949573308267, |
| "kl": 2.1332502365112303e-05, |
| "learning_rate": 4.25531914893617e-07, |
| "loss": 0.027176868915557862, |
| "memory(GiB)": 74.0, |
| "reward": 1.3628795862197876, |
| "reward_std": 0.19128143787384033, |
| "rewards/Table2LatexAcc/mean": 0.5719542324542999, |
| "rewards/Table2LatexAcc/std": 0.1954931139945984, |
| "rewards/Table2Latexform/mean": 0.7909253478050232, |
| "rewards/Table2Latexform/std": 0.278898648917675, |
| "step": 20, |
| "train_speed(iter/s)": 0.003057 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2639.0, |
| "completions/mean_length": 1594.8921875, |
| "completions/min_length": 790.5, |
| "epoch": 0.1347708894878706, |
| "grad_norm": 0.13131531927012402, |
| "kl": 2.13623046875e-05, |
| "learning_rate": 5.319148936170212e-07, |
| "loss": 0.01629452407360077, |
| "memory(GiB)": 74.0, |
| "reward": 1.4447253465652465, |
| "reward_std": 0.15758238062262536, |
| "rewards/Table2LatexAcc/mean": 0.6045001387596131, |
| "rewards/Table2LatexAcc/std": 0.18096636980772018, |
| "rewards/Table2Latexform/mean": 0.840225213766098, |
| "rewards/Table2Latexform/std": 0.22630088329315184, |
| "step": 25, |
| "train_speed(iter/s)": 0.003093 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2905.0, |
| "completions/mean_length": 1642.709375, |
| "completions/min_length": 804.4, |
| "epoch": 0.16172506738544473, |
| "grad_norm": 0.06595082858040417, |
| "kl": 2.499222755432129e-05, |
| "learning_rate": 6.382978723404255e-07, |
| "loss": 0.027088361978530883, |
| "memory(GiB)": 74.0, |
| "reward": 1.3934171557426454, |
| "reward_std": 0.17848547250032426, |
| "rewards/Table2LatexAcc/mean": 0.5714545011520386, |
| "rewards/Table2LatexAcc/std": 0.1947036311030388, |
| "rewards/Table2Latexform/mean": 0.8219626545906067, |
| "rewards/Table2Latexform/std": 0.26306993812322615, |
| "step": 30, |
| "train_speed(iter/s)": 0.003089 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2934.1, |
| "completions/mean_length": 1610.1421875, |
| "completions/min_length": 755.4, |
| "epoch": 0.18867924528301888, |
| "grad_norm": 0.06925838510518537, |
| "kl": 4.082918167114258e-05, |
| "learning_rate": 7.446808510638297e-07, |
| "loss": 0.026965773105621337, |
| "memory(GiB)": 74.0, |
| "reward": 1.3997071743011475, |
| "reward_std": 0.1628158211708069, |
| "rewards/Table2LatexAcc/mean": 0.5788642525672912, |
| "rewards/Table2LatexAcc/std": 0.1913457229733467, |
| "rewards/Table2Latexform/mean": 0.8208428978919983, |
| "rewards/Table2Latexform/std": 0.24103213250637054, |
| "step": 35, |
| "train_speed(iter/s)": 0.003093 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2921.4, |
| "completions/mean_length": 1588.996875, |
| "completions/min_length": 877.3, |
| "epoch": 0.215633423180593, |
| "grad_norm": 0.07126416986934427, |
| "kl": 7.665157318115234e-05, |
| "learning_rate": 8.51063829787234e-07, |
| "loss": 0.019620102643966675, |
| "memory(GiB)": 74.0, |
| "reward": 1.3808103442192077, |
| "reward_std": 0.16285659074783326, |
| "rewards/Table2LatexAcc/mean": 0.575913542509079, |
| "rewards/Table2LatexAcc/std": 0.1952654466032982, |
| "rewards/Table2Latexform/mean": 0.8048967957496643, |
| "rewards/Table2Latexform/std": 0.26039574593305587, |
| "step": 40, |
| "train_speed(iter/s)": 0.003098 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2788.6, |
| "completions/mean_length": 1593.7375, |
| "completions/min_length": 759.5, |
| "epoch": 0.24258760107816713, |
| "grad_norm": 0.08624615635734913, |
| "kl": 0.0001492023468017578, |
| "learning_rate": 9.574468085106384e-07, |
| "loss": 0.015057304501533508, |
| "memory(GiB)": 74.0, |
| "reward": 1.4499380350112916, |
| "reward_std": 0.13701159432530402, |
| "rewards/Table2LatexAcc/mean": 0.6036670506000519, |
| "rewards/Table2LatexAcc/std": 0.19481946676969528, |
| "rewards/Table2Latexform/mean": 0.8462709665298462, |
| "rewards/Table2Latexform/std": 0.2081604614853859, |
| "step": 45, |
| "train_speed(iter/s)": 0.00312 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2874.3, |
| "completions/mean_length": 1671.671875, |
| "completions/min_length": 803.0, |
| "epoch": 0.2695417789757412, |
| "grad_norm": 0.0716967712923244, |
| "kl": 0.00020017623901367188, |
| "learning_rate": 9.99971193595054e-07, |
| "loss": 0.01770862340927124, |
| "memory(GiB)": 74.0, |
| "reward": 1.4406983852386475, |
| "reward_std": 0.13970830887556077, |
| "rewards/Table2LatexAcc/mean": 0.5882811903953552, |
| "rewards/Table2LatexAcc/std": 0.1866762012243271, |
| "rewards/Table2Latexform/mean": 0.8524171948432923, |
| "rewards/Table2Latexform/std": 0.20919820815324783, |
| "step": 50, |
| "train_speed(iter/s)": 0.003122 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2952.4, |
| "completions/mean_length": 1598.51875, |
| "completions/min_length": 714.2, |
| "epoch": 0.29649595687331537, |
| "grad_norm": 0.06459857928181523, |
| "kl": 0.000313568115234375, |
| "learning_rate": 9.99795166473852e-07, |
| "loss": 0.028602027893066408, |
| "memory(GiB)": 74.0, |
| "reward": 1.4819077610969544, |
| "reward_std": 0.13568009808659554, |
| "rewards/Table2LatexAcc/mean": 0.6212433338165283, |
| "rewards/Table2LatexAcc/std": 0.2183626562356949, |
| "rewards/Table2Latexform/mean": 0.860664427280426, |
| "rewards/Table2Latexform/std": 0.22936906069517135, |
| "step": 55, |
| "train_speed(iter/s)": 0.003116 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2717.5, |
| "completions/mean_length": 1575.8484375, |
| "completions/min_length": 791.8, |
| "epoch": 0.32345013477088946, |
| "grad_norm": 0.0685119094996376, |
| "kl": 0.0005132675170898438, |
| "learning_rate": 9.994591720616975e-07, |
| "loss": 0.009688837081193924, |
| "memory(GiB)": 74.0, |
| "reward": 1.4809726119041442, |
| "reward_std": 0.12747596204280853, |
| "rewards/Table2LatexAcc/mean": 0.6219225466251374, |
| "rewards/Table2LatexAcc/std": 0.18778605610132218, |
| "rewards/Table2Latexform/mean": 0.8590500473976135, |
| "rewards/Table2Latexform/std": 0.2048894114792347, |
| "step": 60, |
| "train_speed(iter/s)": 0.003124 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2847.6, |
| "completions/mean_length": 1657.3984375, |
| "completions/min_length": 848.6, |
| "epoch": 0.3504043126684636, |
| "grad_norm": 0.08157608763386034, |
| "kl": 0.0006221771240234375, |
| "learning_rate": 9.98963317898878e-07, |
| "loss": 0.019288820028305054, |
| "memory(GiB)": 74.0, |
| "reward": 1.5079341650009155, |
| "reward_std": 0.14110046178102492, |
| "rewards/Table2LatexAcc/mean": 0.634680551290512, |
| "rewards/Table2LatexAcc/std": 0.20506853014230728, |
| "rewards/Table2Latexform/mean": 0.8732536375522614, |
| "rewards/Table2Latexform/std": 0.2030529037117958, |
| "step": 65, |
| "train_speed(iter/s)": 0.003131 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2604.5, |
| "completions/mean_length": 1594.03125, |
| "completions/min_length": 874.2, |
| "epoch": 0.37735849056603776, |
| "grad_norm": 0.08207408816722973, |
| "kl": 0.0008758544921875, |
| "learning_rate": 9.983077626913043e-07, |
| "loss": 0.01205739676952362, |
| "memory(GiB)": 74.0, |
| "reward": 1.507494068145752, |
| "reward_std": 0.11759327277541161, |
| "rewards/Table2LatexAcc/mean": 0.6351809322834014, |
| "rewards/Table2LatexAcc/std": 0.20378359854221345, |
| "rewards/Table2Latexform/mean": 0.8723131835460662, |
| "rewards/Table2Latexform/std": 0.19805027171969414, |
| "step": 70, |
| "train_speed(iter/s)": 0.003146 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2689.8, |
| "completions/mean_length": 1616.209375, |
| "completions/min_length": 859.7, |
| "epoch": 0.40431266846361186, |
| "grad_norm": 0.07397791319392964, |
| "kl": 0.0009979248046875, |
| "learning_rate": 9.974927162597145e-07, |
| "loss": 0.00553036704659462, |
| "memory(GiB)": 74.0, |
| "reward": 1.4614445567131042, |
| "reward_std": 0.09695540629327297, |
| "rewards/Table2LatexAcc/mean": 0.5970049917697906, |
| "rewards/Table2LatexAcc/std": 0.19226298183202745, |
| "rewards/Table2Latexform/mean": 0.8644395887851715, |
| "rewards/Table2Latexform/std": 0.19864091277122498, |
| "step": 75, |
| "train_speed(iter/s)": 0.003149 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2567.9, |
| "completions/mean_length": 1566.4984375, |
| "completions/min_length": 893.1, |
| "epoch": 0.431266846361186, |
| "grad_norm": 0.07175621361462335, |
| "kl": 0.0010894775390625, |
| "learning_rate": 9.965184394725169e-07, |
| "loss": 0.0031857024878263474, |
| "memory(GiB)": 74.0, |
| "reward": 1.519572389125824, |
| "reward_std": 0.11443859413266182, |
| "rewards/Table2LatexAcc/mean": 0.6457596719264984, |
| "rewards/Table2LatexAcc/std": 0.19394133985042572, |
| "rewards/Table2Latexform/mean": 0.8738127529621125, |
| "rewards/Table2Latexform/std": 0.2086488611996174, |
| "step": 80, |
| "train_speed(iter/s)": 0.003163 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2495.6, |
| "completions/mean_length": 1533.6078125, |
| "completions/min_length": 824.4, |
| "epoch": 0.4582210242587601, |
| "grad_norm": 0.07293410493568253, |
| "kl": 0.0012256622314453125, |
| "learning_rate": 9.953852441622956e-07, |
| "loss": 0.010935479402542114, |
| "memory(GiB)": 74.0, |
| "reward": 1.5418180227279663, |
| "reward_std": 0.09861706346273422, |
| "rewards/Table2LatexAcc/mean": 0.6385594129562377, |
| "rewards/Table2LatexAcc/std": 0.20701712965965272, |
| "rewards/Table2Latexform/mean": 0.9032586097717286, |
| "rewards/Table2Latexform/std": 0.13576763048768042, |
| "step": 85, |
| "train_speed(iter/s)": 0.003179 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2680.3, |
| "completions/mean_length": 1574.1953125, |
| "completions/min_length": 785.9, |
| "epoch": 0.48517520215633425, |
| "grad_norm": 0.06793751628944666, |
| "kl": 0.0012157440185546875, |
| "learning_rate": 9.940934930260036e-07, |
| "loss": 5.354555323719978e-05, |
| "memory(GiB)": 74.0, |
| "reward": 1.4896148085594176, |
| "reward_std": 0.09992180205881596, |
| "rewards/Table2LatexAcc/mean": 0.6215297818183899, |
| "rewards/Table2LatexAcc/std": 0.19945850372314453, |
| "rewards/Table2Latexform/mean": 0.8680850267410278, |
| "rewards/Table2Latexform/std": 0.21053530871868134, |
| "step": 90, |
| "train_speed(iter/s)": 0.003182 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2833.6, |
| "completions/mean_length": 1614.984375, |
| "completions/min_length": 836.7, |
| "epoch": 0.5121293800539084, |
| "grad_norm": 0.07800355989359384, |
| "kl": 0.001270294189453125, |
| "learning_rate": 9.92643599508875e-07, |
| "loss": 0.01619407832622528, |
| "memory(GiB)": 74.0, |
| "reward": 1.4949531078338623, |
| "reward_std": 0.13312736451625823, |
| "rewards/Table2LatexAcc/mean": 0.6362012684345245, |
| "rewards/Table2LatexAcc/std": 0.20503575205802918, |
| "rewards/Table2Latexform/mean": 0.8587518692016601, |
| "rewards/Table2Latexform/std": 0.21175305247306825, |
| "step": 95, |
| "train_speed(iter/s)": 0.003179 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2672.2, |
| "completions/mean_length": 1541.909375, |
| "completions/min_length": 850.3, |
| "epoch": 0.5390835579514824, |
| "grad_norm": 0.06742732491254022, |
| "kl": 0.001406097412109375, |
| "learning_rate": 9.910360276720974e-07, |
| "loss": 0.011617515981197358, |
| "memory(GiB)": 74.0, |
| "reward": 1.5225663423538207, |
| "reward_std": 0.12018043175339699, |
| "rewards/Table2LatexAcc/mean": 0.634308785200119, |
| "rewards/Table2LatexAcc/std": 0.19708103239536284, |
| "rewards/Table2Latexform/mean": 0.8882575571537018, |
| "rewards/Table2Latexform/std": 0.1708666443824768, |
| "step": 100, |
| "train_speed(iter/s)": 0.003187 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2863.4, |
| "completions/mean_length": 1613.728125, |
| "completions/min_length": 945.5, |
| "epoch": 0.5660377358490566, |
| "grad_norm": 0.0656531441071073, |
| "kl": 0.0012493133544921875, |
| "learning_rate": 9.89271292044279e-07, |
| "loss": 0.016812124848365785, |
| "memory(GiB)": 74.0, |
| "reward": 1.494718039035797, |
| "reward_std": 0.13982294127345085, |
| "rewards/Table2LatexAcc/mean": 0.6318223595619201, |
| "rewards/Table2LatexAcc/std": 0.2267067864537239, |
| "rewards/Table2Latexform/mean": 0.862895667552948, |
| "rewards/Table2Latexform/std": 0.21887822449207306, |
| "step": 105, |
| "train_speed(iter/s)": 0.003185 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2608.4, |
| "completions/mean_length": 1581.09375, |
| "completions/min_length": 790.2, |
| "epoch": 0.5929919137466307, |
| "grad_norm": 0.06582315254735907, |
| "kl": 0.001549530029296875, |
| "learning_rate": 9.873499574567681e-07, |
| "loss": 0.010095475614070893, |
| "memory(GiB)": 74.0, |
| "reward": 1.4990519642829896, |
| "reward_std": 0.10162455774843693, |
| "rewards/Table2LatexAcc/mean": 0.6363059639930725, |
| "rewards/Table2LatexAcc/std": 0.19157345294952394, |
| "rewards/Table2Latexform/mean": 0.862746000289917, |
| "rewards/Table2Latexform/std": 0.20280475318431854, |
| "step": 110, |
| "train_speed(iter/s)": 0.003195 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2667.1, |
| "completions/mean_length": 1623.0609375, |
| "completions/min_length": 862.5, |
| "epoch": 0.6199460916442049, |
| "grad_norm": 0.0676327840344494, |
| "kl": 0.0012561798095703125, |
| "learning_rate": 9.852726388628688e-07, |
| "loss": 0.009667134284973145, |
| "memory(GiB)": 74.0, |
| "reward": 1.499183714389801, |
| "reward_std": 0.11013109833002091, |
| "rewards/Table2LatexAcc/mean": 0.6425846576690674, |
| "rewards/Table2LatexAcc/std": 0.20786909610033036, |
| "rewards/Table2Latexform/mean": 0.8565990567207337, |
| "rewards/Table2Latexform/std": 0.22757124677300453, |
| "step": 115, |
| "train_speed(iter/s)": 0.003199 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2648.9, |
| "completions/mean_length": 1617.8234375, |
| "completions/min_length": 878.5, |
| "epoch": 0.6469002695417789, |
| "grad_norm": 0.05968134371530777, |
| "kl": 0.00138702392578125, |
| "learning_rate": 9.830400011410156e-07, |
| "loss": 0.003092067874968052, |
| "memory(GiB)": 74.0, |
| "reward": 1.4849407434463502, |
| "reward_std": 0.08951778598129749, |
| "rewards/Table2LatexAcc/mean": 0.6164660751819611, |
| "rewards/Table2LatexAcc/std": 0.204762826859951, |
| "rewards/Table2Latexform/mean": 0.8684746503829956, |
| "rewards/Table2Latexform/std": 0.20679847225546838, |
| "step": 120, |
| "train_speed(iter/s)": 0.003203 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2692.2, |
| "completions/mean_length": 1548.7328125, |
| "completions/min_length": 787.5, |
| "epoch": 0.6738544474393531, |
| "grad_norm": 0.08102589945740883, |
| "kl": 0.0015228271484375, |
| "learning_rate": 9.806527588819692e-07, |
| "loss": 0.010635277628898621, |
| "memory(GiB)": 74.0, |
| "reward": 1.4484204292297362, |
| "reward_std": 0.12051350250840187, |
| "rewards/Table2LatexAcc/mean": 0.5983371019363404, |
| "rewards/Table2LatexAcc/std": 0.19683932662010192, |
| "rewards/Table2Latexform/mean": 0.8500832915306091, |
| "rewards/Table2Latexform/std": 0.22092146053910255, |
| "step": 125, |
| "train_speed(iter/s)": 0.003201 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2778.3, |
| "completions/mean_length": 1625.8203125, |
| "completions/min_length": 975.0, |
| "epoch": 0.7008086253369272, |
| "grad_norm": 0.06581718834736765, |
| "kl": 0.0013751983642578125, |
| "learning_rate": 9.781116761600992e-07, |
| "loss": 0.008332135528326035, |
| "memory(GiB)": 74.0, |
| "reward": 1.4899320960044862, |
| "reward_std": 0.1020436353981495, |
| "rewards/Table2LatexAcc/mean": 0.6282051384449006, |
| "rewards/Table2LatexAcc/std": 0.18501487672328948, |
| "rewards/Table2Latexform/mean": 0.8617269277572632, |
| "rewards/Table2Latexform/std": 0.21871328055858613, |
| "step": 130, |
| "train_speed(iter/s)": 0.003199 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2638.1, |
| "completions/mean_length": 1558.1703125, |
| "completions/min_length": 627.1, |
| "epoch": 0.7277628032345014, |
| "grad_norm": 0.08042871559451785, |
| "kl": 0.0016143798828125, |
| "learning_rate": 9.75417566288832e-07, |
| "loss": 0.022313964366912842, |
| "memory(GiB)": 74.0, |
| "reward": 1.4969127774238586, |
| "reward_std": 0.09756124764680862, |
| "rewards/Table2LatexAcc/mean": 0.6259892284870148, |
| "rewards/Table2LatexAcc/std": 0.18883997797966004, |
| "rewards/Table2Latexform/mean": 0.8709235429763794, |
| "rewards/Table2Latexform/std": 0.20598914995789527, |
| "step": 135, |
| "train_speed(iter/s)": 0.003199 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2594.9, |
| "completions/mean_length": 1566.1609375, |
| "completions/min_length": 858.8, |
| "epoch": 0.7547169811320755, |
| "grad_norm": 0.06451190019619368, |
| "kl": 0.00159912109375, |
| "learning_rate": 9.725712915603353e-07, |
| "loss": 0.00471530370414257, |
| "memory(GiB)": 74.0, |
| "reward": 1.4983545541763306, |
| "reward_std": 0.10673168860375881, |
| "rewards/Table2LatexAcc/mean": 0.6402543127536774, |
| "rewards/Table2LatexAcc/std": 0.20193217247724532, |
| "rewards/Table2Latexform/mean": 0.8581002414226532, |
| "rewards/Table2Latexform/std": 0.2166273184120655, |
| "step": 140, |
| "train_speed(iter/s)": 0.003204 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2585.7, |
| "completions/mean_length": 1566.3171875, |
| "completions/min_length": 783.5, |
| "epoch": 0.7816711590296496, |
| "grad_norm": 0.06842850537031975, |
| "kl": 0.0016510009765625, |
| "learning_rate": 9.69573762969529e-07, |
| "loss": 0.008447134494781494, |
| "memory(GiB)": 74.0, |
| "reward": 1.5043591618537904, |
| "reward_std": 0.10398341864347457, |
| "rewards/Table2LatexAcc/mean": 0.6323516488075256, |
| "rewards/Table2LatexAcc/std": 0.19334442913532257, |
| "rewards/Table2Latexform/mean": 0.8720075249671936, |
| "rewards/Table2Latexform/std": 0.18240121901035308, |
| "step": 145, |
| "train_speed(iter/s)": 0.00321 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2677.8, |
| "completions/mean_length": 1609.378125, |
| "completions/min_length": 905.7, |
| "epoch": 0.8086253369272237, |
| "grad_norm": 0.06551621158024094, |
| "kl": 0.0015777587890625, |
| "learning_rate": 9.664259399225067e-07, |
| "loss": 0.005352784693241119, |
| "memory(GiB)": 74.0, |
| "reward": 1.5480861902236938, |
| "reward_std": 0.0993690624833107, |
| "rewards/Table2LatexAcc/mean": 0.6449747204780578, |
| "rewards/Table2LatexAcc/std": 0.1948181599378586, |
| "rewards/Table2Latexform/mean": 0.9031114995479583, |
| "rewards/Table2Latexform/std": 0.15625113472342492, |
| "step": 150, |
| "train_speed(iter/s)": 0.003209 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2549.0, |
| "completions/mean_length": 1590.425, |
| "completions/min_length": 906.1, |
| "epoch": 0.8355795148247979, |
| "grad_norm": 0.062419199774521504, |
| "kl": 0.001617431640625, |
| "learning_rate": 9.631288299294624e-07, |
| "loss": 0.005914273858070374, |
| "memory(GiB)": 74.0, |
| "reward": 1.5300285577774049, |
| "reward_std": 0.07754914276301861, |
| "rewards/Table2LatexAcc/mean": 0.6536247074604035, |
| "rewards/Table2LatexAcc/std": 0.1888583406805992, |
| "rewards/Table2Latexform/mean": 0.8764038562774659, |
| "rewards/Table2Latexform/std": 0.19668345972895623, |
| "step": 155, |
| "train_speed(iter/s)": 0.003214 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2675.8, |
| "completions/mean_length": 1594.9125, |
| "completions/min_length": 904.8, |
| "epoch": 0.862533692722372, |
| "grad_norm": 0.07381311561133726, |
| "kl": 0.001567840576171875, |
| "learning_rate": 9.596834882822218e-07, |
| "loss": 0.0008831036277115345, |
| "memory(GiB)": 74.0, |
| "reward": 1.5059723734855652, |
| "reward_std": 0.11190913170576096, |
| "rewards/Table2LatexAcc/mean": 0.6299772620201111, |
| "rewards/Table2LatexAcc/std": 0.18921414837241174, |
| "rewards/Table2Latexform/mean": 0.8759951233863831, |
| "rewards/Table2Latexform/std": 0.18999719768762588, |
| "step": 160, |
| "train_speed(iter/s)": 0.003217 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2556.4, |
| "completions/mean_length": 1544.603125, |
| "completions/min_length": 762.6, |
| "epoch": 0.889487870619946, |
| "grad_norm": 0.05908311708682426, |
| "kl": 0.00150909423828125, |
| "learning_rate": 9.560910177164787e-07, |
| "loss": 0.007628290355205536, |
| "memory(GiB)": 74.0, |
| "reward": 1.5502776145935058, |
| "reward_std": 0.07942587062716484, |
| "rewards/Table2LatexAcc/mean": 0.6583487272262574, |
| "rewards/Table2LatexAcc/std": 0.18620822578668594, |
| "rewards/Table2Latexform/mean": 0.8919288635253906, |
| "rewards/Table2Latexform/std": 0.18032970726490022, |
| "step": 165, |
| "train_speed(iter/s)": 0.003222 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2511.0, |
| "completions/mean_length": 1593.021875, |
| "completions/min_length": 835.0, |
| "epoch": 0.9164420485175202, |
| "grad_norm": 0.059058153054454235, |
| "kl": 0.00181427001953125, |
| "learning_rate": 9.523525680588476e-07, |
| "loss": 0.008848436921834946, |
| "memory(GiB)": 74.0, |
| "reward": 1.5144242644309998, |
| "reward_std": 0.09105739071965217, |
| "rewards/Table2LatexAcc/mean": 0.6321313917636872, |
| "rewards/Table2LatexAcc/std": 0.18138092905282974, |
| "rewards/Table2Latexform/mean": 0.8822928845882416, |
| "rewards/Table2Latexform/std": 0.19216497614979744, |
| "step": 170, |
| "train_speed(iter/s)": 0.003226 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2540.9, |
| "completions/mean_length": 1593.3265625, |
| "completions/min_length": 730.9, |
| "epoch": 0.9433962264150944, |
| "grad_norm": 0.060705012485582154, |
| "kl": 0.00139312744140625, |
| "learning_rate": 9.484693358588434e-07, |
| "loss": 0.007192098349332809, |
| "memory(GiB)": 74.0, |
| "reward": 1.5356804728507996, |
| "reward_std": 0.09475091025233269, |
| "rewards/Table2LatexAcc/mean": 0.6415718376636506, |
| "rewards/Table2LatexAcc/std": 0.1903410866856575, |
| "rewards/Table2Latexform/mean": 0.8941086232662201, |
| "rewards/Table2Latexform/std": 0.1649520058184862, |
| "step": 175, |
| "train_speed(iter/s)": 0.003229 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2650.3, |
| "completions/mean_length": 1586.671875, |
| "completions/min_length": 818.4, |
| "epoch": 0.9703504043126685, |
| "grad_norm": 0.07391935117978014, |
| "kl": 0.001525115966796875, |
| "learning_rate": 9.444425640059076e-07, |
| "loss": 0.007059115171432495, |
| "memory(GiB)": 74.0, |
| "reward": 1.5181043028831482, |
| "reward_std": 0.09545421227812767, |
| "rewards/Table2LatexAcc/mean": 0.638035798072815, |
| "rewards/Table2LatexAcc/std": 0.20127029120922088, |
| "rewards/Table2Latexform/mean": 0.8800684928894043, |
| "rewards/Table2Latexform/std": 0.18569674119353294, |
| "step": 180, |
| "train_speed(iter/s)": 0.00323 |
| }, |
| { |
| "clip_ratio": 0.0, |
| "completions/clipped_ratio": 0.0, |
| "completions/max_length": 2459.1, |
| "completions/mean_length": 1533.0296875, |
| "completions/min_length": 935.5, |
| "epoch": 0.9973045822102425, |
| "grad_norm": 0.07964238807994012, |
| "kl": 0.00167999267578125, |
| "learning_rate": 9.402735413316011e-07, |
| "loss": -0.00023833760060369967, |
| "memory(GiB)": 74.0, |
| "reward": 1.5326952815055848, |
| "reward_std": 0.08919371329247952, |
| "rewards/Table2LatexAcc/mean": 0.6511692225933075, |
| "rewards/Table2LatexAcc/std": 0.1804724305868149, |
| "rewards/Table2Latexform/mean": 0.8815260589122772, |
| "rewards/Table2Latexform/std": 0.19189485386013985, |
| "step": 185, |
| "train_speed(iter/s)": 0.003237 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 925, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|