| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8806026624471123, |
| "eval_steps": 200, |
| "global_step": 3200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0005503766640294451, |
| "grad_norm": 2.2015435695648193, |
| "learning_rate": 1.4775011317868612e-06, |
| "loss": 0.7802, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.0011007533280588903, |
| "grad_norm": 2.0623114109039307, |
| "learning_rate": 2.9550022635737224e-06, |
| "loss": 0.6659, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0016511299920883354, |
| "grad_norm": 0.8444932699203491, |
| "learning_rate": 3.819285020442103e-06, |
| "loss": 0.6275, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0022015066561177805, |
| "grad_norm": 0.7291238307952881, |
| "learning_rate": 4.432503395360583e-06, |
| "loss": 0.5955, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.0027518833201472257, |
| "grad_norm": 0.591098427772522, |
| "learning_rate": 4.90815251991065e-06, |
| "loss": 0.5906, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.003302259984176671, |
| "grad_norm": 0.6075527667999268, |
| "learning_rate": 5.2967861522289644e-06, |
| "loss": 0.5451, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.003852636648206116, |
| "grad_norm": 0.5598031878471375, |
| "learning_rate": 5.625371206454386e-06, |
| "loss": 0.5539, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.004403013312235561, |
| "grad_norm": 0.5352339148521423, |
| "learning_rate": 5.910004527147445e-06, |
| "loss": 0.5452, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.004953389976265006, |
| "grad_norm": 0.524741530418396, |
| "learning_rate": 6.161068909097345e-06, |
| "loss": 0.5536, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.005503766640294451, |
| "grad_norm": 0.4852159321308136, |
| "learning_rate": 6.38565365169751e-06, |
| "loss": 0.5439, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.0060541433043238965, |
| "grad_norm": 0.4764852225780487, |
| "learning_rate": 6.5888152636627215e-06, |
| "loss": 0.5468, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.006604519968353342, |
| "grad_norm": 0.463278591632843, |
| "learning_rate": 6.774287284015826e-06, |
| "loss": 0.541, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.007154896632382787, |
| "grad_norm": 0.4566305875778198, |
| "learning_rate": 6.944905003449378e-06, |
| "loss": 0.5258, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.007705273296412232, |
| "grad_norm": 0.4572094678878784, |
| "learning_rate": 7.102872338241248e-06, |
| "loss": 0.5385, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.008255649960441678, |
| "grad_norm": 0.4581094980239868, |
| "learning_rate": 7.2499364085658915e-06, |
| "loss": 0.5258, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.008806026624471122, |
| "grad_norm": 0.4602491557598114, |
| "learning_rate": 7.387505658934305e-06, |
| "loss": 0.5239, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.009356403288500568, |
| "grad_norm": 0.4633028507232666, |
| "learning_rate": 7.516732105870977e-06, |
| "loss": 0.5237, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.009906779952530012, |
| "grad_norm": 0.4267115592956543, |
| "learning_rate": 7.638570040884206e-06, |
| "loss": 0.5467, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.010457156616559458, |
| "grad_norm": 0.4587521255016327, |
| "learning_rate": 7.753818840648305e-06, |
| "loss": 0.5282, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.011007533280588903, |
| "grad_norm": 0.44529175758361816, |
| "learning_rate": 7.863154783484372e-06, |
| "loss": 0.536, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.011557909944618349, |
| "grad_norm": 0.436199814081192, |
| "learning_rate": 7.967155095109629e-06, |
| "loss": 0.5259, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.012108286608647793, |
| "grad_norm": 0.43157511949539185, |
| "learning_rate": 8.066316395449581e-06, |
| "loss": 0.5173, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.012658663272677239, |
| "grad_norm": 0.4393196105957031, |
| "learning_rate": 8.161069041569085e-06, |
| "loss": 0.5037, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.013209039936706683, |
| "grad_norm": 0.5085024237632751, |
| "learning_rate": 8.251788415802687e-06, |
| "loss": 0.5015, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.01375941660073613, |
| "grad_norm": 0.40056705474853516, |
| "learning_rate": 8.338803908034438e-06, |
| "loss": 0.5194, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.014309793264765574, |
| "grad_norm": 0.43617382645606995, |
| "learning_rate": 8.422406135236239e-06, |
| "loss": 0.5327, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.01486016992879502, |
| "grad_norm": 0.42528873682022095, |
| "learning_rate": 8.502852797752587e-06, |
| "loss": 0.5245, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.015410546592824464, |
| "grad_norm": 0.40340831875801086, |
| "learning_rate": 8.58037347002811e-06, |
| "loss": 0.5059, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.015960923256853908, |
| "grad_norm": 0.40859195590019226, |
| "learning_rate": 8.65517355028691e-06, |
| "loss": 0.5136, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.016511299920883356, |
| "grad_norm": 0.42262887954711914, |
| "learning_rate": 8.727437540352753e-06, |
| "loss": 0.5011, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.0170616765849128, |
| "grad_norm": 0.4255228638648987, |
| "learning_rate": 8.79733178747776e-06, |
| "loss": 0.5218, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.017612053248942244, |
| "grad_norm": 0.4350854754447937, |
| "learning_rate": 8.865006790721166e-06, |
| "loss": 0.5334, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.01816242991297169, |
| "grad_norm": 0.41395291686058044, |
| "learning_rate": 8.930599152317962e-06, |
| "loss": 0.5233, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.018712806577001136, |
| "grad_norm": 0.4127484858036041, |
| "learning_rate": 8.99423323765784e-06, |
| "loss": 0.5143, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.01926318324103058, |
| "grad_norm": 0.42464280128479004, |
| "learning_rate": 9.056022594578175e-06, |
| "loss": 0.5164, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.019813559905060025, |
| "grad_norm": 0.4011682868003845, |
| "learning_rate": 9.116071172671068e-06, |
| "loss": 0.5036, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.02036393656908947, |
| "grad_norm": 0.39912551641464233, |
| "learning_rate": 9.174474375494509e-06, |
| "loss": 0.5038, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.020914313233118917, |
| "grad_norm": 0.40526625514030457, |
| "learning_rate": 9.231319972435167e-06, |
| "loss": 0.518, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.02146468989714836, |
| "grad_norm": 0.4195484220981598, |
| "learning_rate": 9.28668889210462e-06, |
| "loss": 0.5065, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.022015066561177805, |
| "grad_norm": 0.4483351409435272, |
| "learning_rate": 9.340655915271231e-06, |
| "loss": 0.5128, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.02256544322520725, |
| "grad_norm": 0.42789894342422485, |
| "learning_rate": 9.393290282217048e-06, |
| "loss": 0.525, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.023115819889236697, |
| "grad_norm": 0.434644490480423, |
| "learning_rate": 9.444656226896488e-06, |
| "loss": 0.5248, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.02366619655326614, |
| "grad_norm": 0.4532856345176697, |
| "learning_rate": 9.494813448234365e-06, |
| "loss": 0.5226, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.024216573217295586, |
| "grad_norm": 0.4103749692440033, |
| "learning_rate": 9.543817527236444e-06, |
| "loss": 0.5034, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.02476694988132503, |
| "grad_norm": 0.4208613336086273, |
| "learning_rate": 9.591720297221133e-06, |
| "loss": 0.5214, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.025317326545354478, |
| "grad_norm": 0.4020327627658844, |
| "learning_rate": 9.638570173355947e-06, |
| "loss": 0.5047, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.025867703209383922, |
| "grad_norm": 0.4074559509754181, |
| "learning_rate": 9.684412446751251e-06, |
| "loss": 0.4999, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.026418079873413366, |
| "grad_norm": 0.43330731987953186, |
| "learning_rate": 9.729289547589548e-06, |
| "loss": 0.5089, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.02696845653744281, |
| "grad_norm": 0.42775431275367737, |
| "learning_rate": 9.773241281121913e-06, |
| "loss": 0.5169, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.02751883320147226, |
| "grad_norm": 0.421403706073761, |
| "learning_rate": 9.8163050398213e-06, |
| "loss": 0.5123, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.028069209865501703, |
| "grad_norm": 0.42337778210639954, |
| "learning_rate": 9.858515994526218e-06, |
| "loss": 0.5116, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.028619586529531147, |
| "grad_norm": 0.4156826138496399, |
| "learning_rate": 9.8999072670231e-06, |
| "loss": 0.5077, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.02916996319356059, |
| "grad_norm": 0.4544354975223541, |
| "learning_rate": 9.9405100861891e-06, |
| "loss": 0.5099, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.02972033985759004, |
| "grad_norm": 0.4015970528125763, |
| "learning_rate": 9.980353929539448e-06, |
| "loss": 0.5049, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.030270716521619483, |
| "grad_norm": 0.3907098174095154, |
| "learning_rate": 1e-05, |
| "loss": 0.5202, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.030821093185648928, |
| "grad_norm": 0.4184499979019165, |
| "learning_rate": 1e-05, |
| "loss": 0.5085, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.031371469849678375, |
| "grad_norm": 0.47195565700531006, |
| "learning_rate": 1e-05, |
| "loss": 0.5161, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.031921846513707816, |
| "grad_norm": 0.43992695212364197, |
| "learning_rate": 1e-05, |
| "loss": 0.4978, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.032472223177737264, |
| "grad_norm": 0.43099331855773926, |
| "learning_rate": 1e-05, |
| "loss": 0.5035, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.03302259984176671, |
| "grad_norm": 0.44256317615509033, |
| "learning_rate": 1e-05, |
| "loss": 0.4991, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.03357297650579615, |
| "grad_norm": 0.42082124948501587, |
| "learning_rate": 1e-05, |
| "loss": 0.5028, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.0341233531698256, |
| "grad_norm": 0.38576358556747437, |
| "learning_rate": 1e-05, |
| "loss": 0.5081, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.03467372983385505, |
| "grad_norm": 0.3880733251571655, |
| "learning_rate": 1e-05, |
| "loss": 0.5001, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.03522410649788449, |
| "grad_norm": 0.41802075505256653, |
| "learning_rate": 1e-05, |
| "loss": 0.5056, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.035774483161913936, |
| "grad_norm": 0.3949527144432068, |
| "learning_rate": 1e-05, |
| "loss": 0.5155, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.03632485982594338, |
| "grad_norm": 0.4038969576358795, |
| "learning_rate": 1e-05, |
| "loss": 0.5056, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.036875236489972825, |
| "grad_norm": 0.40195325016975403, |
| "learning_rate": 1e-05, |
| "loss": 0.4968, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.03742561315400227, |
| "grad_norm": 0.3946043848991394, |
| "learning_rate": 1e-05, |
| "loss": 0.4981, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.037975989818031713, |
| "grad_norm": 0.3914756774902344, |
| "learning_rate": 1e-05, |
| "loss": 0.5, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.03852636648206116, |
| "grad_norm": 0.4295148551464081, |
| "learning_rate": 1e-05, |
| "loss": 0.5147, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.03907674314609061, |
| "grad_norm": 0.40092742443084717, |
| "learning_rate": 1e-05, |
| "loss": 0.5196, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.03962711981012005, |
| "grad_norm": 0.41200628876686096, |
| "learning_rate": 1e-05, |
| "loss": 0.5031, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.0401774964741495, |
| "grad_norm": 0.43834391236305237, |
| "learning_rate": 1e-05, |
| "loss": 0.5047, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.04072787313817894, |
| "grad_norm": 0.3940436542034149, |
| "learning_rate": 1e-05, |
| "loss": 0.4912, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.041278249802208386, |
| "grad_norm": 0.3873765170574188, |
| "learning_rate": 1e-05, |
| "loss": 0.482, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.041828626466237834, |
| "grad_norm": 0.4272858798503876, |
| "learning_rate": 1e-05, |
| "loss": 0.4923, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.042379003130267275, |
| "grad_norm": 0.40542730689048767, |
| "learning_rate": 1e-05, |
| "loss": 0.4892, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.04292937979429672, |
| "grad_norm": 0.38277357816696167, |
| "learning_rate": 1e-05, |
| "loss": 0.517, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.04347975645832617, |
| "grad_norm": 0.39421385526657104, |
| "learning_rate": 1e-05, |
| "loss": 0.503, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.04403013312235561, |
| "grad_norm": 0.3984109163284302, |
| "learning_rate": 1e-05, |
| "loss": 0.5074, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.04458050978638506, |
| "grad_norm": 0.40513876080513, |
| "learning_rate": 1e-05, |
| "loss": 0.5092, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.0451308864504145, |
| "grad_norm": 0.45850449800491333, |
| "learning_rate": 1e-05, |
| "loss": 0.5086, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.04568126311444395, |
| "grad_norm": 0.4050631821155548, |
| "learning_rate": 1e-05, |
| "loss": 0.5073, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.046231639778473395, |
| "grad_norm": 0.41050952672958374, |
| "learning_rate": 1e-05, |
| "loss": 0.5007, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.046782016442502836, |
| "grad_norm": 0.39902788400650024, |
| "learning_rate": 1e-05, |
| "loss": 0.4941, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.04733239310653228, |
| "grad_norm": 0.4421572983264923, |
| "learning_rate": 1e-05, |
| "loss": 0.4988, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.04788276977056173, |
| "grad_norm": 0.4092646837234497, |
| "learning_rate": 1e-05, |
| "loss": 0.5001, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.04843314643459117, |
| "grad_norm": 0.4195966124534607, |
| "learning_rate": 1e-05, |
| "loss": 0.4964, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.04898352309862062, |
| "grad_norm": 0.3937481641769409, |
| "learning_rate": 1e-05, |
| "loss": 0.4977, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.04953389976265006, |
| "grad_norm": 0.434950590133667, |
| "learning_rate": 1e-05, |
| "loss": 0.5054, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.05008427642667951, |
| "grad_norm": 0.40112894773483276, |
| "learning_rate": 1e-05, |
| "loss": 0.494, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.050634653090708956, |
| "grad_norm": 0.42001938819885254, |
| "learning_rate": 1e-05, |
| "loss": 0.4744, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0511850297547384, |
| "grad_norm": 0.4066455364227295, |
| "learning_rate": 1e-05, |
| "loss": 0.4838, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.051735406418767844, |
| "grad_norm": 0.3934157192707062, |
| "learning_rate": 1e-05, |
| "loss": 0.5017, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.05228578308279729, |
| "grad_norm": 0.38877320289611816, |
| "learning_rate": 1e-05, |
| "loss": 0.5018, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.05283615974682673, |
| "grad_norm": 0.39771756529808044, |
| "learning_rate": 1e-05, |
| "loss": 0.485, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.05338653641085618, |
| "grad_norm": 0.3938674330711365, |
| "learning_rate": 1e-05, |
| "loss": 0.5034, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.05393691307488562, |
| "grad_norm": 0.40473559498786926, |
| "learning_rate": 1e-05, |
| "loss": 0.5082, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.05448728973891507, |
| "grad_norm": 0.3977149426937103, |
| "learning_rate": 1e-05, |
| "loss": 0.4997, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "grad_norm": 0.39340054988861084, |
| "learning_rate": 1e-05, |
| "loss": 0.4859, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_merge_loss": 0.4414624571800232, |
| "eval_merge_runtime": 600.1539, |
| "eval_merge_samples_per_second": 56.239, |
| "eval_merge_steps_per_second": 2.344, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_new_aug_datas_filtered.json_loss": 0.5691156983375549, |
| "eval_new_aug_datas_filtered.json_runtime": 10.6767, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 71.839, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 2.997, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_sharegpt_gpt4.json_loss": 0.8223738670349121, |
| "eval_sharegpt_gpt4.json_runtime": 31.6183, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.858, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.467, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_Table_GPT.json_loss": 0.09253557026386261, |
| "eval_Table_GPT.json_runtime": 24.9748, |
| "eval_Table_GPT.json_samples_per_second": 83.804, |
| "eval_Table_GPT.json_steps_per_second": 3.524, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_gpt_4o_200k.json_loss": 0.849287211894989, |
| "eval_gpt_4o_200k.json_runtime": 48.5339, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.415, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.398, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_multi_turn_datas.json_loss": 0.3907540738582611, |
| "eval_multi_turn_datas.json_runtime": 75.6133, |
| "eval_multi_turn_datas.json_samples_per_second": 52.927, |
| "eval_multi_turn_datas.json_steps_per_second": 2.209, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_table_python_code_datas.json_loss": 0.33119720220565796, |
| "eval_table_python_code_datas.json_runtime": 43.1313, |
| "eval_table_python_code_datas.json_samples_per_second": 50.056, |
| "eval_table_python_code_datas.json_steps_per_second": 2.087, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_tabular_llm_data.json_loss": 0.14601922035217285, |
| "eval_tabular_llm_data.json_runtime": 8.7785, |
| "eval_tabular_llm_data.json_samples_per_second": 28.023, |
| "eval_tabular_llm_data.json_steps_per_second": 1.253, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_python_code_critic_21k.json_loss": 0.625038743019104, |
| "eval_python_code_critic_21k.json_runtime": 3.237, |
| "eval_python_code_critic_21k.json_samples_per_second": 184.43, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.723, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_all_merge_table_dataset.json_loss": 0.09772461652755737, |
| "eval_all_merge_table_dataset.json_runtime": 24.3077, |
| "eval_all_merge_table_dataset.json_samples_per_second": 29.291, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.234, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_code_feedback_multi_turn.json_loss": 0.6093290448188782, |
| "eval_code_feedback_multi_turn.json_runtime": 32.4589, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.809, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.834, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_ultrainteract_sft.json_loss": 0.4469935894012451, |
| "eval_ultrainteract_sft.json_runtime": 8.6702, |
| "eval_ultrainteract_sft.json_samples_per_second": 167.931, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.036, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_synthetic_text_to_sql.json_loss": 0.11159003525972366, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1306, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 260.355, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.315, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_sft_react_sql_datas.json_loss": 0.6847189664840698, |
| "eval_sft_react_sql_datas.json_runtime": 7.8434, |
| "eval_sft_react_sql_datas.json_samples_per_second": 40.034, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.785, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_all_merge_code.json_loss": 0.32269543409347534, |
| "eval_all_merge_code.json_runtime": 0.3287, |
| "eval_all_merge_code.json_samples_per_second": 191.649, |
| "eval_all_merge_code.json_steps_per_second": 9.126, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_magpie_datas.json_loss": 0.4600640833377838, |
| "eval_magpie_datas.json_runtime": 2.2095, |
| "eval_magpie_datas.json_samples_per_second": 77.844, |
| "eval_magpie_datas.json_steps_per_second": 3.621, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_train_data_for_qwen.json_loss": 0.017207294702529907, |
| "eval_train_data_for_qwen.json_runtime": 0.2494, |
| "eval_train_data_for_qwen.json_samples_per_second": 40.095, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.01, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_alpaca_cleaned.json_loss": 0.9374485015869141, |
| "eval_alpaca_cleaned.json_runtime": 0.1149, |
| "eval_alpaca_cleaned.json_samples_per_second": 234.896, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.4, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_agent_instruct.json_loss": 0.23996739089488983, |
| "eval_agent_instruct.json_runtime": 0.5126, |
| "eval_agent_instruct.json_samples_per_second": 93.639, |
| "eval_agent_instruct.json_steps_per_second": 3.902, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_MathInstruct.json_loss": 0.2269323617219925, |
| "eval_MathInstruct.json_runtime": 0.3472, |
| "eval_MathInstruct.json_samples_per_second": 164.184, |
| "eval_MathInstruct.json_steps_per_second": 8.641, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_tested_143k_python_alpaca.json_loss": 0.4513254165649414, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3017, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 112.684, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.628, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_xlam_function_calling_60k.json_loss": 0.011208846233785152, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1011, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 227.556, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.894, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.6813441514968872, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0523, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 306.208, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.138, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_alpaca_gpt4_zh.json_loss": 1.0053786039352417, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0504, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 218.451, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 19.859, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05503766640294452, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5965134501457214, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4841, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.316, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.066, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05558804306697396, |
| "grad_norm": 0.38687607645988464, |
| "learning_rate": 1e-05, |
| "loss": 0.49, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.056138419731003406, |
| "grad_norm": 0.39803430438041687, |
| "learning_rate": 1e-05, |
| "loss": 0.5047, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.05668879639503285, |
| "grad_norm": 0.41770851612091064, |
| "learning_rate": 1e-05, |
| "loss": 0.4874, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.057239173059062294, |
| "grad_norm": 0.3909968435764313, |
| "learning_rate": 1e-05, |
| "loss": 0.4992, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.05778954972309174, |
| "grad_norm": 0.3818782866001129, |
| "learning_rate": 1e-05, |
| "loss": 0.5006, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.05833992638712118, |
| "grad_norm": 0.4179542362689972, |
| "learning_rate": 1e-05, |
| "loss": 0.4945, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.05889030305115063, |
| "grad_norm": 0.3872973322868347, |
| "learning_rate": 1e-05, |
| "loss": 0.4918, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.05944067971518008, |
| "grad_norm": 0.4249219298362732, |
| "learning_rate": 1e-05, |
| "loss": 0.5039, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.05999105637920952, |
| "grad_norm": 0.43381986021995544, |
| "learning_rate": 1e-05, |
| "loss": 0.4873, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.06054143304323897, |
| "grad_norm": 0.40741005539894104, |
| "learning_rate": 1e-05, |
| "loss": 0.4771, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.061091809707268414, |
| "grad_norm": 0.37800464034080505, |
| "learning_rate": 1e-05, |
| "loss": 0.5015, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.061642186371297855, |
| "grad_norm": 0.42365899682044983, |
| "learning_rate": 1e-05, |
| "loss": 0.4906, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.0621925630353273, |
| "grad_norm": 0.39279666543006897, |
| "learning_rate": 1e-05, |
| "loss": 0.51, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.06274293969935675, |
| "grad_norm": 0.4037010073661804, |
| "learning_rate": 1e-05, |
| "loss": 0.5162, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.0632933163633862, |
| "grad_norm": 0.37650179862976074, |
| "learning_rate": 1e-05, |
| "loss": 0.4984, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.06384369302741563, |
| "grad_norm": 0.42879757285118103, |
| "learning_rate": 1e-05, |
| "loss": 0.492, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.06439406969144508, |
| "grad_norm": 0.42225000262260437, |
| "learning_rate": 1e-05, |
| "loss": 0.5215, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.06494444635547453, |
| "grad_norm": 0.3948579430580139, |
| "learning_rate": 1e-05, |
| "loss": 0.5045, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.06549482301950398, |
| "grad_norm": 0.40142592787742615, |
| "learning_rate": 1e-05, |
| "loss": 0.5083, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.06604519968353342, |
| "grad_norm": 0.41938111186027527, |
| "learning_rate": 1e-05, |
| "loss": 0.5094, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.06659557634756286, |
| "grad_norm": 0.4345923066139221, |
| "learning_rate": 1e-05, |
| "loss": 0.5076, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.0671459530115923, |
| "grad_norm": 0.3985568881034851, |
| "learning_rate": 1e-05, |
| "loss": 0.5007, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.06769632967562175, |
| "grad_norm": 0.37891215085983276, |
| "learning_rate": 1e-05, |
| "loss": 0.513, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.0682467063396512, |
| "grad_norm": 0.413566917181015, |
| "learning_rate": 1e-05, |
| "loss": 0.493, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.06879708300368065, |
| "grad_norm": 0.3980996608734131, |
| "learning_rate": 1e-05, |
| "loss": 0.5161, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.0693474596677101, |
| "grad_norm": 0.4525178372859955, |
| "learning_rate": 1e-05, |
| "loss": 0.5077, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.06989783633173953, |
| "grad_norm": 0.3720250427722931, |
| "learning_rate": 1e-05, |
| "loss": 0.4809, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.07044821299576898, |
| "grad_norm": 0.37366852164268494, |
| "learning_rate": 1e-05, |
| "loss": 0.4724, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.07099858965979843, |
| "grad_norm": 0.38189247250556946, |
| "learning_rate": 1e-05, |
| "loss": 0.5062, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.07154896632382787, |
| "grad_norm": 0.39108410477638245, |
| "learning_rate": 1e-05, |
| "loss": 0.4894, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.07209934298785732, |
| "grad_norm": 0.4071044921875, |
| "learning_rate": 1e-05, |
| "loss": 0.4916, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.07264971965188675, |
| "grad_norm": 0.38570597767829895, |
| "learning_rate": 1e-05, |
| "loss": 0.4925, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.0732000963159162, |
| "grad_norm": 0.409600168466568, |
| "learning_rate": 1e-05, |
| "loss": 0.4987, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.07375047297994565, |
| "grad_norm": 0.3844049274921417, |
| "learning_rate": 1e-05, |
| "loss": 0.5011, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.0743008496439751, |
| "grad_norm": 0.41260388493537903, |
| "learning_rate": 1e-05, |
| "loss": 0.5014, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.07485122630800455, |
| "grad_norm": 0.402567982673645, |
| "learning_rate": 1e-05, |
| "loss": 0.4926, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.07540160297203398, |
| "grad_norm": 0.4058002233505249, |
| "learning_rate": 1e-05, |
| "loss": 0.4879, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.07595197963606343, |
| "grad_norm": 0.42676812410354614, |
| "learning_rate": 1e-05, |
| "loss": 0.5073, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.07650235630009287, |
| "grad_norm": 0.3878956735134125, |
| "learning_rate": 1e-05, |
| "loss": 0.4831, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.07705273296412232, |
| "grad_norm": 0.37560945749282837, |
| "learning_rate": 1e-05, |
| "loss": 0.4705, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.07760310962815177, |
| "grad_norm": 0.4071865379810333, |
| "learning_rate": 1e-05, |
| "loss": 0.489, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.07815348629218122, |
| "grad_norm": 0.3832094073295593, |
| "learning_rate": 1e-05, |
| "loss": 0.4843, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.07870386295621065, |
| "grad_norm": 0.3808830976486206, |
| "learning_rate": 1e-05, |
| "loss": 0.5019, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.0792542396202401, |
| "grad_norm": 0.40182846784591675, |
| "learning_rate": 1e-05, |
| "loss": 0.4921, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.07980461628426955, |
| "grad_norm": 0.4483119249343872, |
| "learning_rate": 1e-05, |
| "loss": 0.5042, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.080354992948299, |
| "grad_norm": 0.3664950132369995, |
| "learning_rate": 1e-05, |
| "loss": 0.4758, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.08090536961232844, |
| "grad_norm": 0.39573603868484497, |
| "learning_rate": 1e-05, |
| "loss": 0.4945, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.08145574627635788, |
| "grad_norm": 0.44645532965660095, |
| "learning_rate": 1e-05, |
| "loss": 0.4964, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.08200612294038732, |
| "grad_norm": 0.39092323184013367, |
| "learning_rate": 1e-05, |
| "loss": 0.4947, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.08255649960441677, |
| "grad_norm": 0.41762229800224304, |
| "learning_rate": 1e-05, |
| "loss": 0.4949, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.08310687626844622, |
| "grad_norm": 0.39803358912467957, |
| "learning_rate": 1e-05, |
| "loss": 0.4822, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.08365725293247567, |
| "grad_norm": 0.39895498752593994, |
| "learning_rate": 1e-05, |
| "loss": 0.4893, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.0842076295965051, |
| "grad_norm": 0.3883228600025177, |
| "learning_rate": 1e-05, |
| "loss": 0.5062, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.08475800626053455, |
| "grad_norm": 0.4112294018268585, |
| "learning_rate": 1e-05, |
| "loss": 0.4979, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.085308382924564, |
| "grad_norm": 0.3851683437824249, |
| "learning_rate": 1e-05, |
| "loss": 0.4934, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.08585875958859344, |
| "grad_norm": 0.39728567004203796, |
| "learning_rate": 1e-05, |
| "loss": 0.4746, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.08640913625262289, |
| "grad_norm": 0.3943733274936676, |
| "learning_rate": 1e-05, |
| "loss": 0.4904, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.08695951291665234, |
| "grad_norm": 0.3954530656337738, |
| "learning_rate": 1e-05, |
| "loss": 0.4796, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.08750988958068177, |
| "grad_norm": 0.41237205266952515, |
| "learning_rate": 1e-05, |
| "loss": 0.4908, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.08806026624471122, |
| "grad_norm": 0.3923771381378174, |
| "learning_rate": 1e-05, |
| "loss": 0.4988, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.08861064290874067, |
| "grad_norm": 0.38542094826698303, |
| "learning_rate": 1e-05, |
| "loss": 0.5027, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.08916101957277012, |
| "grad_norm": 0.41598251461982727, |
| "learning_rate": 1e-05, |
| "loss": 0.4976, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.08971139623679956, |
| "grad_norm": 0.40826794505119324, |
| "learning_rate": 1e-05, |
| "loss": 0.4929, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.090261772900829, |
| "grad_norm": 0.39970022439956665, |
| "learning_rate": 1e-05, |
| "loss": 0.4946, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.09081214956485845, |
| "grad_norm": 0.3739086985588074, |
| "learning_rate": 1e-05, |
| "loss": 0.4678, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.0913625262288879, |
| "grad_norm": 0.3746420741081238, |
| "learning_rate": 1e-05, |
| "loss": 0.4757, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.09191290289291734, |
| "grad_norm": 0.3976924419403076, |
| "learning_rate": 1e-05, |
| "loss": 0.487, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.09246327955694679, |
| "grad_norm": 0.398971289396286, |
| "learning_rate": 1e-05, |
| "loss": 0.5077, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.09301365622097624, |
| "grad_norm": 0.3937431871891022, |
| "learning_rate": 1e-05, |
| "loss": 0.4885, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.09356403288500567, |
| "grad_norm": 0.395084410905838, |
| "learning_rate": 1e-05, |
| "loss": 0.4871, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.09411440954903512, |
| "grad_norm": 0.3677273690700531, |
| "learning_rate": 1e-05, |
| "loss": 0.4813, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.09466478621306457, |
| "grad_norm": 0.39645129442214966, |
| "learning_rate": 1e-05, |
| "loss": 0.4842, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.09521516287709401, |
| "grad_norm": 0.3642916679382324, |
| "learning_rate": 1e-05, |
| "loss": 0.504, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.09576553954112346, |
| "grad_norm": 0.40385907888412476, |
| "learning_rate": 1e-05, |
| "loss": 0.4933, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.0963159162051529, |
| "grad_norm": 0.39063799381256104, |
| "learning_rate": 1e-05, |
| "loss": 0.4856, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.09686629286918234, |
| "grad_norm": 0.38000059127807617, |
| "learning_rate": 1e-05, |
| "loss": 0.5001, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.09741666953321179, |
| "grad_norm": 0.39380577206611633, |
| "learning_rate": 1e-05, |
| "loss": 0.4961, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.09796704619724124, |
| "grad_norm": 0.39326363801956177, |
| "learning_rate": 1e-05, |
| "loss": 0.498, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.09851742286127069, |
| "grad_norm": 0.3775707185268402, |
| "learning_rate": 1e-05, |
| "loss": 0.4792, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.09906779952530012, |
| "grad_norm": 0.3770863115787506, |
| "learning_rate": 1e-05, |
| "loss": 0.4837, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.09961817618932957, |
| "grad_norm": 0.41484272480010986, |
| "learning_rate": 1e-05, |
| "loss": 0.4739, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.10016855285335902, |
| "grad_norm": 0.39758750796318054, |
| "learning_rate": 1e-05, |
| "loss": 0.4957, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.10071892951738846, |
| "grad_norm": 0.43485164642333984, |
| "learning_rate": 1e-05, |
| "loss": 0.492, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.10126930618141791, |
| "grad_norm": 0.40296798944473267, |
| "learning_rate": 1e-05, |
| "loss": 0.4977, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.10181968284544736, |
| "grad_norm": 0.3818409740924835, |
| "learning_rate": 1e-05, |
| "loss": 0.481, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1023700595094768, |
| "grad_norm": 0.3949006199836731, |
| "learning_rate": 1e-05, |
| "loss": 0.5021, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.10292043617350624, |
| "grad_norm": 0.4327391982078552, |
| "learning_rate": 1e-05, |
| "loss": 0.5036, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.10347081283753569, |
| "grad_norm": 0.4008086025714874, |
| "learning_rate": 1e-05, |
| "loss": 0.4854, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.10402118950156514, |
| "grad_norm": 0.4146427810192108, |
| "learning_rate": 1e-05, |
| "loss": 0.4933, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.10457156616559458, |
| "grad_norm": 0.4073733389377594, |
| "learning_rate": 1e-05, |
| "loss": 0.4923, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.10512194282962402, |
| "grad_norm": 0.40570083260536194, |
| "learning_rate": 1e-05, |
| "loss": 0.4806, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.10567231949365347, |
| "grad_norm": 0.39516401290893555, |
| "learning_rate": 1e-05, |
| "loss": 0.5038, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.10622269615768291, |
| "grad_norm": 0.3886268138885498, |
| "learning_rate": 1e-05, |
| "loss": 0.4737, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.10677307282171236, |
| "grad_norm": 0.3846561014652252, |
| "learning_rate": 1e-05, |
| "loss": 0.4852, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.10732344948574181, |
| "grad_norm": 0.3952987492084503, |
| "learning_rate": 1e-05, |
| "loss": 0.496, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.10787382614977124, |
| "grad_norm": 0.3840448558330536, |
| "learning_rate": 1e-05, |
| "loss": 0.4976, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.10842420281380069, |
| "grad_norm": 0.38074344396591187, |
| "learning_rate": 1e-05, |
| "loss": 0.508, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.10897457947783014, |
| "grad_norm": 0.4216584861278534, |
| "learning_rate": 1e-05, |
| "loss": 0.4841, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.10952495614185959, |
| "grad_norm": 0.39932167530059814, |
| "learning_rate": 1e-05, |
| "loss": 0.4783, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "grad_norm": 0.3687106966972351, |
| "learning_rate": 1e-05, |
| "loss": 0.4747, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_merge_loss": 0.42975950241088867, |
| "eval_merge_runtime": 600.4283, |
| "eval_merge_samples_per_second": 56.213, |
| "eval_merge_steps_per_second": 2.343, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_new_aug_datas_filtered.json_loss": 0.558424174785614, |
| "eval_new_aug_datas_filtered.json_runtime": 10.4015, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 73.74, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.076, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_sharegpt_gpt4.json_loss": 0.8120941519737244, |
| "eval_sharegpt_gpt4.json_runtime": 31.6378, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.822, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.465, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_Table_GPT.json_loss": 0.08201506733894348, |
| "eval_Table_GPT.json_runtime": 24.9859, |
| "eval_Table_GPT.json_samples_per_second": 83.767, |
| "eval_Table_GPT.json_steps_per_second": 3.522, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_gpt_4o_200k.json_loss": 0.8391836881637573, |
| "eval_gpt_4o_200k.json_runtime": 48.456, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.623, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.407, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_multi_turn_datas.json_loss": 0.37471804022789, |
| "eval_multi_turn_datas.json_runtime": 75.4526, |
| "eval_multi_turn_datas.json_samples_per_second": 53.04, |
| "eval_multi_turn_datas.json_steps_per_second": 2.213, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_table_python_code_datas.json_loss": 0.3102189898490906, |
| "eval_table_python_code_datas.json_runtime": 42.9961, |
| "eval_table_python_code_datas.json_samples_per_second": 50.214, |
| "eval_table_python_code_datas.json_steps_per_second": 2.093, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_tabular_llm_data.json_loss": 0.16565443575382233, |
| "eval_tabular_llm_data.json_runtime": 8.5095, |
| "eval_tabular_llm_data.json_samples_per_second": 28.909, |
| "eval_tabular_llm_data.json_steps_per_second": 1.293, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_python_code_critic_21k.json_loss": 0.6095640063285828, |
| "eval_python_code_critic_21k.json_runtime": 3.2106, |
| "eval_python_code_critic_21k.json_samples_per_second": 185.945, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.787, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_all_merge_table_dataset.json_loss": 0.09411227703094482, |
| "eval_all_merge_table_dataset.json_runtime": 23.2459, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.629, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.291, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_code_feedback_multi_turn.json_loss": 0.6033111810684204, |
| "eval_code_feedback_multi_turn.json_runtime": 32.3176, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 68.105, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.847, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_ultrainteract_sft.json_loss": 0.4417967200279236, |
| "eval_ultrainteract_sft.json_runtime": 8.6225, |
| "eval_ultrainteract_sft.json_samples_per_second": 168.86, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.075, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_synthetic_text_to_sql.json_loss": 0.10689640045166016, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1258, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 270.238, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.896, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_sft_react_sql_datas.json_loss": 0.6745051145553589, |
| "eval_sft_react_sql_datas.json_runtime": 7.8354, |
| "eval_sft_react_sql_datas.json_samples_per_second": 40.074, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.787, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_all_merge_code.json_loss": 0.3035649061203003, |
| "eval_all_merge_code.json_runtime": 0.3282, |
| "eval_all_merge_code.json_samples_per_second": 191.936, |
| "eval_all_merge_code.json_steps_per_second": 9.14, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_magpie_datas.json_loss": 0.4511661231517792, |
| "eval_magpie_datas.json_runtime": 2.2095, |
| "eval_magpie_datas.json_samples_per_second": 77.847, |
| "eval_magpie_datas.json_steps_per_second": 3.621, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_train_data_for_qwen.json_loss": 0.012529651634395123, |
| "eval_train_data_for_qwen.json_runtime": 0.2431, |
| "eval_train_data_for_qwen.json_samples_per_second": 41.135, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.113, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_alpaca_cleaned.json_loss": 0.9377387166023254, |
| "eval_alpaca_cleaned.json_runtime": 0.115, |
| "eval_alpaca_cleaned.json_samples_per_second": 234.777, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.391, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_agent_instruct.json_loss": 0.2363067865371704, |
| "eval_agent_instruct.json_runtime": 0.5121, |
| "eval_agent_instruct.json_samples_per_second": 93.728, |
| "eval_agent_instruct.json_steps_per_second": 3.905, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_MathInstruct.json_loss": 0.21367128193378448, |
| "eval_MathInstruct.json_runtime": 0.3647, |
| "eval_MathInstruct.json_samples_per_second": 156.291, |
| "eval_MathInstruct.json_steps_per_second": 8.226, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_tested_143k_python_alpaca.json_loss": 0.4474259316921234, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3019, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 112.631, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.625, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_xlam_function_calling_60k.json_loss": 0.011296543292701244, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 230.102, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 10.004, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.645748496055603, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0515, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 310.905, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.432, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9886136651039124, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0503, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 218.827, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 19.893, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11007533280588903, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5969922542572021, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4851, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.226, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.061, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11062570946991848, |
| "grad_norm": 0.40487441420555115, |
| "learning_rate": 1e-05, |
| "loss": 0.4811, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.11117608613394792, |
| "grad_norm": 0.39143064618110657, |
| "learning_rate": 1e-05, |
| "loss": 0.4704, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.11172646279797736, |
| "grad_norm": 0.46816787123680115, |
| "learning_rate": 1e-05, |
| "loss": 0.4941, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.11227683946200681, |
| "grad_norm": 0.37707188725471497, |
| "learning_rate": 1e-05, |
| "loss": 0.4839, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.11282721612603626, |
| "grad_norm": 0.3780951201915741, |
| "learning_rate": 1e-05, |
| "loss": 0.4889, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.1133775927900657, |
| "grad_norm": 0.36941519379615784, |
| "learning_rate": 1e-05, |
| "loss": 0.4747, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.11392796945409514, |
| "grad_norm": 0.39626002311706543, |
| "learning_rate": 1e-05, |
| "loss": 0.4872, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.11447834611812459, |
| "grad_norm": 0.38315075635910034, |
| "learning_rate": 1e-05, |
| "loss": 0.471, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.11502872278215404, |
| "grad_norm": 0.37200862169265747, |
| "learning_rate": 1e-05, |
| "loss": 0.4891, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.11557909944618348, |
| "grad_norm": 0.39199399948120117, |
| "learning_rate": 1e-05, |
| "loss": 0.4807, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.11612947611021293, |
| "grad_norm": 0.37726107239723206, |
| "learning_rate": 1e-05, |
| "loss": 0.4834, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.11667985277424237, |
| "grad_norm": 0.38188016414642334, |
| "learning_rate": 1e-05, |
| "loss": 0.4853, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.11723022943827181, |
| "grad_norm": 0.39772850275039673, |
| "learning_rate": 1e-05, |
| "loss": 0.4895, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.11778060610230126, |
| "grad_norm": 0.3797503411769867, |
| "learning_rate": 1e-05, |
| "loss": 0.4818, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.11833098276633071, |
| "grad_norm": 0.39962416887283325, |
| "learning_rate": 1e-05, |
| "loss": 0.4802, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.11888135943036016, |
| "grad_norm": 0.37405237555503845, |
| "learning_rate": 1e-05, |
| "loss": 0.4879, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.1194317360943896, |
| "grad_norm": 0.39297720789909363, |
| "learning_rate": 1e-05, |
| "loss": 0.4853, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.11998211275841904, |
| "grad_norm": 0.3871022164821625, |
| "learning_rate": 1e-05, |
| "loss": 0.4845, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.12053248942244849, |
| "grad_norm": 0.43845734000205994, |
| "learning_rate": 1e-05, |
| "loss": 0.4865, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.12108286608647793, |
| "grad_norm": 0.3888757526874542, |
| "learning_rate": 1e-05, |
| "loss": 0.4862, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.12163324275050738, |
| "grad_norm": 0.3801029920578003, |
| "learning_rate": 1e-05, |
| "loss": 0.4751, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.12218361941453683, |
| "grad_norm": 0.3861992657184601, |
| "learning_rate": 1e-05, |
| "loss": 0.5026, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.12273399607856626, |
| "grad_norm": 0.40307343006134033, |
| "learning_rate": 1e-05, |
| "loss": 0.4901, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.12328437274259571, |
| "grad_norm": 0.36803606152534485, |
| "learning_rate": 1e-05, |
| "loss": 0.4927, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.12383474940662516, |
| "grad_norm": 0.40266790986061096, |
| "learning_rate": 1e-05, |
| "loss": 0.4663, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.1243851260706546, |
| "grad_norm": 0.3870522975921631, |
| "learning_rate": 1e-05, |
| "loss": 0.475, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.12493550273468405, |
| "grad_norm": 0.3978688716888428, |
| "learning_rate": 1e-05, |
| "loss": 0.4979, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.1254858793987135, |
| "grad_norm": 0.3799881935119629, |
| "learning_rate": 1e-05, |
| "loss": 0.4802, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.12603625606274294, |
| "grad_norm": 0.3795452415943146, |
| "learning_rate": 1e-05, |
| "loss": 0.4878, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.1265866327267724, |
| "grad_norm": 0.3865358233451843, |
| "learning_rate": 1e-05, |
| "loss": 0.4825, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.12713700939080183, |
| "grad_norm": 0.3646644353866577, |
| "learning_rate": 1e-05, |
| "loss": 0.4725, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.12768738605483126, |
| "grad_norm": 0.3851023018360138, |
| "learning_rate": 1e-05, |
| "loss": 0.4849, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.12823776271886073, |
| "grad_norm": 0.37587490677833557, |
| "learning_rate": 1e-05, |
| "loss": 0.4729, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.12878813938289016, |
| "grad_norm": 0.3559257686138153, |
| "learning_rate": 1e-05, |
| "loss": 0.4826, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.12933851604691962, |
| "grad_norm": 0.3967975974082947, |
| "learning_rate": 1e-05, |
| "loss": 0.4917, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.12988889271094906, |
| "grad_norm": 0.4064919650554657, |
| "learning_rate": 1e-05, |
| "loss": 0.5018, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.1304392693749785, |
| "grad_norm": 0.3609434962272644, |
| "learning_rate": 1e-05, |
| "loss": 0.4805, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.13098964603900795, |
| "grad_norm": 0.4229820668697357, |
| "learning_rate": 1e-05, |
| "loss": 0.4756, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.13154002270303738, |
| "grad_norm": 0.3882080018520355, |
| "learning_rate": 1e-05, |
| "loss": 0.4946, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.13209039936706685, |
| "grad_norm": 0.37811529636383057, |
| "learning_rate": 1e-05, |
| "loss": 0.495, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.13264077603109628, |
| "grad_norm": 0.4139231741428375, |
| "learning_rate": 1e-05, |
| "loss": 0.4722, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.13319115269512571, |
| "grad_norm": 0.3836536705493927, |
| "learning_rate": 1e-05, |
| "loss": 0.4795, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.13374152935915518, |
| "grad_norm": 0.39434006810188293, |
| "learning_rate": 1e-05, |
| "loss": 0.4783, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.1342919060231846, |
| "grad_norm": 0.3847144544124603, |
| "learning_rate": 1e-05, |
| "loss": 0.4751, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.13484228268721407, |
| "grad_norm": 0.4081107974052429, |
| "learning_rate": 1e-05, |
| "loss": 0.4947, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.1353926593512435, |
| "grad_norm": 0.3780671954154968, |
| "learning_rate": 1e-05, |
| "loss": 0.4932, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.13594303601527294, |
| "grad_norm": 0.39522022008895874, |
| "learning_rate": 1e-05, |
| "loss": 0.4868, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.1364934126793024, |
| "grad_norm": 0.3978594243526459, |
| "learning_rate": 1e-05, |
| "loss": 0.4895, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.13704378934333183, |
| "grad_norm": 0.40067028999328613, |
| "learning_rate": 1e-05, |
| "loss": 0.4841, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.1375941660073613, |
| "grad_norm": 0.38525891304016113, |
| "learning_rate": 1e-05, |
| "loss": 0.4769, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.13814454267139073, |
| "grad_norm": 0.3708615303039551, |
| "learning_rate": 1e-05, |
| "loss": 0.4787, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.1386949193354202, |
| "grad_norm": 0.3583269417285919, |
| "learning_rate": 1e-05, |
| "loss": 0.4905, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.13924529599944963, |
| "grad_norm": 0.4004143178462982, |
| "learning_rate": 1e-05, |
| "loss": 0.4797, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.13979567266347906, |
| "grad_norm": 0.3877711594104767, |
| "learning_rate": 1e-05, |
| "loss": 0.4968, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.14034604932750852, |
| "grad_norm": 0.394502729177475, |
| "learning_rate": 1e-05, |
| "loss": 0.4743, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.14089642599153795, |
| "grad_norm": 0.3829086720943451, |
| "learning_rate": 1e-05, |
| "loss": 0.4769, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.14144680265556742, |
| "grad_norm": 0.3849917948246002, |
| "learning_rate": 1e-05, |
| "loss": 0.4763, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.14199717931959685, |
| "grad_norm": 0.40810078382492065, |
| "learning_rate": 1e-05, |
| "loss": 0.4904, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.14254755598362628, |
| "grad_norm": 0.3982490599155426, |
| "learning_rate": 1e-05, |
| "loss": 0.4762, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.14309793264765575, |
| "grad_norm": 0.36841145157814026, |
| "learning_rate": 1e-05, |
| "loss": 0.4745, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.14364830931168518, |
| "grad_norm": 0.3805830180644989, |
| "learning_rate": 1e-05, |
| "loss": 0.4811, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.14419868597571464, |
| "grad_norm": 0.40074169635772705, |
| "learning_rate": 1e-05, |
| "loss": 0.4923, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.14474906263974407, |
| "grad_norm": 0.42140403389930725, |
| "learning_rate": 1e-05, |
| "loss": 0.4972, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.1452994393037735, |
| "grad_norm": 0.38489535450935364, |
| "learning_rate": 1e-05, |
| "loss": 0.4921, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.14584981596780297, |
| "grad_norm": 0.38449668884277344, |
| "learning_rate": 1e-05, |
| "loss": 0.4883, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.1464001926318324, |
| "grad_norm": 0.38009950518608093, |
| "learning_rate": 1e-05, |
| "loss": 0.4808, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.14695056929586187, |
| "grad_norm": 0.3916541337966919, |
| "learning_rate": 1e-05, |
| "loss": 0.4708, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.1475009459598913, |
| "grad_norm": 0.39856135845184326, |
| "learning_rate": 1e-05, |
| "loss": 0.4933, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.14805132262392073, |
| "grad_norm": 0.3804597556591034, |
| "learning_rate": 1e-05, |
| "loss": 0.4772, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.1486016992879502, |
| "grad_norm": 0.39584964513778687, |
| "learning_rate": 1e-05, |
| "loss": 0.4746, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.14915207595197963, |
| "grad_norm": 0.36922863125801086, |
| "learning_rate": 1e-05, |
| "loss": 0.4911, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.1497024526160091, |
| "grad_norm": 0.38762298226356506, |
| "learning_rate": 1e-05, |
| "loss": 0.4744, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.15025282928003852, |
| "grad_norm": 0.38803887367248535, |
| "learning_rate": 1e-05, |
| "loss": 0.4776, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.15080320594406796, |
| "grad_norm": 0.39409226179122925, |
| "learning_rate": 1e-05, |
| "loss": 0.4789, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.15135358260809742, |
| "grad_norm": 0.4141768217086792, |
| "learning_rate": 1e-05, |
| "loss": 0.4752, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.15190395927212685, |
| "grad_norm": 0.3770216703414917, |
| "learning_rate": 1e-05, |
| "loss": 0.4689, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.15245433593615632, |
| "grad_norm": 0.3929697573184967, |
| "learning_rate": 1e-05, |
| "loss": 0.4861, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.15300471260018575, |
| "grad_norm": 0.3859105706214905, |
| "learning_rate": 1e-05, |
| "loss": 0.4799, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.1535550892642152, |
| "grad_norm": 0.41044744849205017, |
| "learning_rate": 1e-05, |
| "loss": 0.4911, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.15410546592824464, |
| "grad_norm": 0.36859771609306335, |
| "learning_rate": 1e-05, |
| "loss": 0.4653, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.15465584259227408, |
| "grad_norm": 0.39258813858032227, |
| "learning_rate": 1e-05, |
| "loss": 0.4769, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.15520621925630354, |
| "grad_norm": 0.38241100311279297, |
| "learning_rate": 1e-05, |
| "loss": 0.4821, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.15575659592033297, |
| "grad_norm": 0.4107513427734375, |
| "learning_rate": 1e-05, |
| "loss": 0.4746, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.15630697258436244, |
| "grad_norm": 0.3872488737106323, |
| "learning_rate": 1e-05, |
| "loss": 0.4817, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.15685734924839187, |
| "grad_norm": 0.3712390065193176, |
| "learning_rate": 1e-05, |
| "loss": 0.4944, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.1574077259124213, |
| "grad_norm": 0.413503497838974, |
| "learning_rate": 1e-05, |
| "loss": 0.4774, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.15795810257645077, |
| "grad_norm": 0.35706543922424316, |
| "learning_rate": 1e-05, |
| "loss": 0.4743, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.1585084792404802, |
| "grad_norm": 0.39815768599510193, |
| "learning_rate": 1e-05, |
| "loss": 0.4846, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.15905885590450966, |
| "grad_norm": 0.38346678018569946, |
| "learning_rate": 1e-05, |
| "loss": 0.4633, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.1596092325685391, |
| "grad_norm": 0.3905611038208008, |
| "learning_rate": 1e-05, |
| "loss": 0.4776, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.16015960923256853, |
| "grad_norm": 0.3790382742881775, |
| "learning_rate": 1e-05, |
| "loss": 0.4892, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.160709985896598, |
| "grad_norm": 0.37033775448799133, |
| "learning_rate": 1e-05, |
| "loss": 0.4848, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.16126036256062742, |
| "grad_norm": 0.3686079680919647, |
| "learning_rate": 1e-05, |
| "loss": 0.4514, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.16181073922465689, |
| "grad_norm": 0.3836509883403778, |
| "learning_rate": 1e-05, |
| "loss": 0.4859, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.16236111588868632, |
| "grad_norm": 0.40387076139450073, |
| "learning_rate": 1e-05, |
| "loss": 0.485, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.16291149255271575, |
| "grad_norm": 0.3850373327732086, |
| "learning_rate": 1e-05, |
| "loss": 0.4843, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.16346186921674521, |
| "grad_norm": 0.3814505934715271, |
| "learning_rate": 1e-05, |
| "loss": 0.4749, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.16401224588077465, |
| "grad_norm": 0.35501739382743835, |
| "learning_rate": 1e-05, |
| "loss": 0.4645, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.1645626225448041, |
| "grad_norm": 0.34997090697288513, |
| "learning_rate": 1e-05, |
| "loss": 0.4687, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "grad_norm": 0.365212619304657, |
| "learning_rate": 1e-05, |
| "loss": 0.4956, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_merge_loss": 0.4216049909591675, |
| "eval_merge_runtime": 600.0688, |
| "eval_merge_samples_per_second": 56.247, |
| "eval_merge_steps_per_second": 2.345, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_new_aug_datas_filtered.json_loss": 0.5500591397285461, |
| "eval_new_aug_datas_filtered.json_runtime": 10.5295, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 72.843, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.039, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_sharegpt_gpt4.json_loss": 0.8046284914016724, |
| "eval_sharegpt_gpt4.json_runtime": 31.7366, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.639, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.458, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_Table_GPT.json_loss": 0.07981107383966446, |
| "eval_Table_GPT.json_runtime": 25.0085, |
| "eval_Table_GPT.json_samples_per_second": 83.691, |
| "eval_Table_GPT.json_steps_per_second": 3.519, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_gpt_4o_200k.json_loss": 0.8323716521263123, |
| "eval_gpt_4o_200k.json_runtime": 48.5988, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.242, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.391, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_multi_turn_datas.json_loss": 0.36492469906806946, |
| "eval_multi_turn_datas.json_runtime": 75.8696, |
| "eval_multi_turn_datas.json_samples_per_second": 52.748, |
| "eval_multi_turn_datas.json_steps_per_second": 2.201, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_table_python_code_datas.json_loss": 0.29984766244888306, |
| "eval_table_python_code_datas.json_runtime": 43.1945, |
| "eval_table_python_code_datas.json_samples_per_second": 49.983, |
| "eval_table_python_code_datas.json_steps_per_second": 2.084, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_tabular_llm_data.json_loss": 0.13250145316123962, |
| "eval_tabular_llm_data.json_runtime": 8.5476, |
| "eval_tabular_llm_data.json_samples_per_second": 28.78, |
| "eval_tabular_llm_data.json_steps_per_second": 1.287, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_python_code_critic_21k.json_loss": 0.599878191947937, |
| "eval_python_code_critic_21k.json_runtime": 3.2358, |
| "eval_python_code_critic_21k.json_samples_per_second": 184.496, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.726, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_all_merge_table_dataset.json_loss": 0.08627181500196457, |
| "eval_all_merge_table_dataset.json_runtime": 23.3808, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.452, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.283, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_code_feedback_multi_turn.json_loss": 0.5982062220573425, |
| "eval_code_feedback_multi_turn.json_runtime": 32.4617, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.803, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.834, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_ultrainteract_sft.json_loss": 0.4367137849330902, |
| "eval_ultrainteract_sft.json_runtime": 8.672, |
| "eval_ultrainteract_sft.json_samples_per_second": 167.896, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.034, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_synthetic_text_to_sql.json_loss": 0.1079096570611, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1265, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 268.769, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.81, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_sft_react_sql_datas.json_loss": 0.6610473990440369, |
| "eval_sft_react_sql_datas.json_runtime": 7.8536, |
| "eval_sft_react_sql_datas.json_samples_per_second": 39.982, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.783, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_all_merge_code.json_loss": 0.3021065890789032, |
| "eval_all_merge_code.json_runtime": 0.3377, |
| "eval_all_merge_code.json_samples_per_second": 186.571, |
| "eval_all_merge_code.json_steps_per_second": 8.884, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_magpie_datas.json_loss": 0.4455747604370117, |
| "eval_magpie_datas.json_runtime": 2.2122, |
| "eval_magpie_datas.json_samples_per_second": 77.751, |
| "eval_magpie_datas.json_steps_per_second": 3.616, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_train_data_for_qwen.json_loss": 0.009937227703630924, |
| "eval_train_data_for_qwen.json_runtime": 0.2454, |
| "eval_train_data_for_qwen.json_samples_per_second": 40.745, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.075, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_alpaca_cleaned.json_loss": 0.9349167943000793, |
| "eval_alpaca_cleaned.json_runtime": 0.1148, |
| "eval_alpaca_cleaned.json_samples_per_second": 235.251, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.426, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_agent_instruct.json_loss": 0.2310038059949875, |
| "eval_agent_instruct.json_runtime": 0.5119, |
| "eval_agent_instruct.json_samples_per_second": 93.766, |
| "eval_agent_instruct.json_steps_per_second": 3.907, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_MathInstruct.json_loss": 0.21358835697174072, |
| "eval_MathInstruct.json_runtime": 0.3581, |
| "eval_MathInstruct.json_samples_per_second": 159.182, |
| "eval_MathInstruct.json_steps_per_second": 8.378, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_tested_143k_python_alpaca.json_loss": 0.4455429017543793, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3013, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 112.849, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.638, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_xlam_function_calling_60k.json_loss": 0.00893339328467846, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1004, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 228.974, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.955, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.6295539140701294, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0512, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 312.613, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.538, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9761592745780945, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0499, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 220.289, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 20.026, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16511299920883354, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5956905484199524, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4851, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.232, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.062, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16566337587286298, |
| "grad_norm": 0.38436150550842285, |
| "learning_rate": 1e-05, |
| "loss": 0.4609, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.16621375253689244, |
| "grad_norm": 0.3946292996406555, |
| "learning_rate": 1e-05, |
| "loss": 0.4699, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.16676412920092187, |
| "grad_norm": 0.4069615304470062, |
| "learning_rate": 1e-05, |
| "loss": 0.4722, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.16731450586495134, |
| "grad_norm": 0.371660977602005, |
| "learning_rate": 1e-05, |
| "loss": 0.4856, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.16786488252898077, |
| "grad_norm": 0.394911527633667, |
| "learning_rate": 1e-05, |
| "loss": 0.4804, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.1684152591930102, |
| "grad_norm": 0.4873884916305542, |
| "learning_rate": 1e-05, |
| "loss": 0.4686, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.16896563585703966, |
| "grad_norm": 0.3943842649459839, |
| "learning_rate": 1e-05, |
| "loss": 0.4887, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.1695160125210691, |
| "grad_norm": 0.3716658055782318, |
| "learning_rate": 1e-05, |
| "loss": 0.4898, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.17006638918509856, |
| "grad_norm": 0.36271047592163086, |
| "learning_rate": 1e-05, |
| "loss": 0.4861, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.170616765849128, |
| "grad_norm": 0.3833015263080597, |
| "learning_rate": 1e-05, |
| "loss": 0.4814, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.17116714251315746, |
| "grad_norm": 0.3661365211009979, |
| "learning_rate": 1e-05, |
| "loss": 0.4873, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.1717175191771869, |
| "grad_norm": 0.3613869845867157, |
| "learning_rate": 1e-05, |
| "loss": 0.4537, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.17226789584121632, |
| "grad_norm": 0.34498724341392517, |
| "learning_rate": 1e-05, |
| "loss": 0.483, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.17281827250524578, |
| "grad_norm": 0.41466256976127625, |
| "learning_rate": 1e-05, |
| "loss": 0.4765, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.17336864916927522, |
| "grad_norm": 0.36220455169677734, |
| "learning_rate": 1e-05, |
| "loss": 0.4842, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.17391902583330468, |
| "grad_norm": 0.38009753823280334, |
| "learning_rate": 1e-05, |
| "loss": 0.482, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.17446940249733411, |
| "grad_norm": 0.3589475452899933, |
| "learning_rate": 1e-05, |
| "loss": 0.4714, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.17501977916136355, |
| "grad_norm": 0.37625178694725037, |
| "learning_rate": 1e-05, |
| "loss": 0.4487, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.175570155825393, |
| "grad_norm": 0.3818652331829071, |
| "learning_rate": 1e-05, |
| "loss": 0.4757, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.17612053248942244, |
| "grad_norm": 0.39498913288116455, |
| "learning_rate": 1e-05, |
| "loss": 0.4879, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.1766709091534519, |
| "grad_norm": 0.3864663243293762, |
| "learning_rate": 1e-05, |
| "loss": 0.4815, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.17722128581748134, |
| "grad_norm": 0.37452608346939087, |
| "learning_rate": 1e-05, |
| "loss": 0.4773, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.17777166248151077, |
| "grad_norm": 0.3754761219024658, |
| "learning_rate": 1e-05, |
| "loss": 0.4916, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.17832203914554023, |
| "grad_norm": 0.3797055780887604, |
| "learning_rate": 1e-05, |
| "loss": 0.4663, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.17887241580956967, |
| "grad_norm": 0.3640367090702057, |
| "learning_rate": 1e-05, |
| "loss": 0.4737, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.17942279247359913, |
| "grad_norm": 0.35961100459098816, |
| "learning_rate": 1e-05, |
| "loss": 0.4757, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.17997316913762856, |
| "grad_norm": 0.40443646907806396, |
| "learning_rate": 1e-05, |
| "loss": 0.4789, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.180523545801658, |
| "grad_norm": 0.35993334650993347, |
| "learning_rate": 1e-05, |
| "loss": 0.4902, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.18107392246568746, |
| "grad_norm": 0.3933318853378296, |
| "learning_rate": 1e-05, |
| "loss": 0.4726, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.1816242991297169, |
| "grad_norm": 0.3923085033893585, |
| "learning_rate": 1e-05, |
| "loss": 0.4714, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.18217467579374635, |
| "grad_norm": 0.37387627363204956, |
| "learning_rate": 1e-05, |
| "loss": 0.478, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.1827250524577758, |
| "grad_norm": 0.3787866532802582, |
| "learning_rate": 1e-05, |
| "loss": 0.4849, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.18327542912180522, |
| "grad_norm": 0.39361730217933655, |
| "learning_rate": 1e-05, |
| "loss": 0.4836, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.18382580578583468, |
| "grad_norm": 0.37430262565612793, |
| "learning_rate": 1e-05, |
| "loss": 0.4876, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.18437618244986412, |
| "grad_norm": 0.3914833068847656, |
| "learning_rate": 1e-05, |
| "loss": 0.48, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.18492655911389358, |
| "grad_norm": 0.36528506875038147, |
| "learning_rate": 1e-05, |
| "loss": 0.4583, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.185476935777923, |
| "grad_norm": 0.3779620826244354, |
| "learning_rate": 1e-05, |
| "loss": 0.483, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.18602731244195247, |
| "grad_norm": 0.3712228834629059, |
| "learning_rate": 1e-05, |
| "loss": 0.4833, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.1865776891059819, |
| "grad_norm": 0.3959150016307831, |
| "learning_rate": 1e-05, |
| "loss": 0.4678, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.18712806577001134, |
| "grad_norm": 0.38113903999328613, |
| "learning_rate": 1e-05, |
| "loss": 0.4794, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.1876784424340408, |
| "grad_norm": 0.3872113525867462, |
| "learning_rate": 1e-05, |
| "loss": 0.4627, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.18822881909807024, |
| "grad_norm": 0.35678407549858093, |
| "learning_rate": 1e-05, |
| "loss": 0.4666, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.1887791957620997, |
| "grad_norm": 0.37833312153816223, |
| "learning_rate": 1e-05, |
| "loss": 0.4734, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.18932957242612913, |
| "grad_norm": 0.3900817930698395, |
| "learning_rate": 1e-05, |
| "loss": 0.4834, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.18987994909015857, |
| "grad_norm": 0.37114864587783813, |
| "learning_rate": 1e-05, |
| "loss": 0.4682, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.19043032575418803, |
| "grad_norm": 0.37264662981033325, |
| "learning_rate": 1e-05, |
| "loss": 0.4815, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.19098070241821746, |
| "grad_norm": 0.3758707344532013, |
| "learning_rate": 1e-05, |
| "loss": 0.4847, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.19153107908224692, |
| "grad_norm": 0.38832512497901917, |
| "learning_rate": 1e-05, |
| "loss": 0.486, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.19208145574627636, |
| "grad_norm": 0.382926344871521, |
| "learning_rate": 1e-05, |
| "loss": 0.4844, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.1926318324103058, |
| "grad_norm": 0.3953557312488556, |
| "learning_rate": 1e-05, |
| "loss": 0.472, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.19318220907433525, |
| "grad_norm": 0.36295419931411743, |
| "learning_rate": 1e-05, |
| "loss": 0.4792, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.1937325857383647, |
| "grad_norm": 0.35859328508377075, |
| "learning_rate": 1e-05, |
| "loss": 0.4665, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.19428296240239415, |
| "grad_norm": 0.3658142685890198, |
| "learning_rate": 1e-05, |
| "loss": 0.4724, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.19483333906642358, |
| "grad_norm": 0.3860156834125519, |
| "learning_rate": 1e-05, |
| "loss": 0.4803, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.19538371573045302, |
| "grad_norm": 0.38030922412872314, |
| "learning_rate": 1e-05, |
| "loss": 0.4692, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.19593409239448248, |
| "grad_norm": 0.417516827583313, |
| "learning_rate": 1e-05, |
| "loss": 0.4833, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.1964844690585119, |
| "grad_norm": 0.39626750349998474, |
| "learning_rate": 1e-05, |
| "loss": 0.4808, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.19703484572254137, |
| "grad_norm": 0.3886042535305023, |
| "learning_rate": 1e-05, |
| "loss": 0.4716, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.1975852223865708, |
| "grad_norm": 0.3816077411174774, |
| "learning_rate": 1e-05, |
| "loss": 0.468, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.19813559905060024, |
| "grad_norm": 0.39385372400283813, |
| "learning_rate": 1e-05, |
| "loss": 0.4671, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.1986859757146297, |
| "grad_norm": 0.35457953810691833, |
| "learning_rate": 1e-05, |
| "loss": 0.4667, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.19923635237865914, |
| "grad_norm": 0.39437657594680786, |
| "learning_rate": 1e-05, |
| "loss": 0.4637, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.1997867290426886, |
| "grad_norm": 0.41132184863090515, |
| "learning_rate": 1e-05, |
| "loss": 0.4723, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.20033710570671803, |
| "grad_norm": 0.3640534281730652, |
| "learning_rate": 1e-05, |
| "loss": 0.4623, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.20088748237074747, |
| "grad_norm": 0.39893659949302673, |
| "learning_rate": 1e-05, |
| "loss": 0.4881, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.20143785903477693, |
| "grad_norm": 0.3677632212638855, |
| "learning_rate": 1e-05, |
| "loss": 0.4572, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.20198823569880636, |
| "grad_norm": 0.40594953298568726, |
| "learning_rate": 1e-05, |
| "loss": 0.4726, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.20253861236283582, |
| "grad_norm": 0.39571645855903625, |
| "learning_rate": 1e-05, |
| "loss": 0.4751, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.20308898902686526, |
| "grad_norm": 0.3569906949996948, |
| "learning_rate": 1e-05, |
| "loss": 0.4855, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.20363936569089472, |
| "grad_norm": 0.39166778326034546, |
| "learning_rate": 1e-05, |
| "loss": 0.4864, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.20418974235492415, |
| "grad_norm": 0.36861687898635864, |
| "learning_rate": 1e-05, |
| "loss": 0.4659, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.2047401190189536, |
| "grad_norm": 0.3691236078739166, |
| "learning_rate": 1e-05, |
| "loss": 0.4688, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.20529049568298305, |
| "grad_norm": 0.41912853717803955, |
| "learning_rate": 1e-05, |
| "loss": 0.4787, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.20584087234701248, |
| "grad_norm": 0.4022221565246582, |
| "learning_rate": 1e-05, |
| "loss": 0.4758, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.20639124901104194, |
| "grad_norm": 0.402567595243454, |
| "learning_rate": 1e-05, |
| "loss": 0.4766, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.20694162567507138, |
| "grad_norm": 0.3741600811481476, |
| "learning_rate": 1e-05, |
| "loss": 0.4833, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.2074920023391008, |
| "grad_norm": 0.3958164155483246, |
| "learning_rate": 1e-05, |
| "loss": 0.4786, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.20804237900313027, |
| "grad_norm": 0.37908801436424255, |
| "learning_rate": 1e-05, |
| "loss": 0.4715, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.2085927556671597, |
| "grad_norm": 0.38426473736763, |
| "learning_rate": 1e-05, |
| "loss": 0.4874, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.20914313233118917, |
| "grad_norm": 0.3873310983181, |
| "learning_rate": 1e-05, |
| "loss": 0.4677, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2096935089952186, |
| "grad_norm": 0.4033788740634918, |
| "learning_rate": 1e-05, |
| "loss": 0.4815, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.21024388565924804, |
| "grad_norm": 0.40875962376594543, |
| "learning_rate": 1e-05, |
| "loss": 0.4879, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.2107942623232775, |
| "grad_norm": 0.38724496960639954, |
| "learning_rate": 1e-05, |
| "loss": 0.4646, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.21134463898730693, |
| "grad_norm": 0.39307013154029846, |
| "learning_rate": 1e-05, |
| "loss": 0.4762, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.2118950156513364, |
| "grad_norm": 0.37346333265304565, |
| "learning_rate": 1e-05, |
| "loss": 0.4775, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.21244539231536583, |
| "grad_norm": 0.3753449618816376, |
| "learning_rate": 1e-05, |
| "loss": 0.4707, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.21299576897939526, |
| "grad_norm": 0.3829357922077179, |
| "learning_rate": 1e-05, |
| "loss": 0.4825, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.21354614564342472, |
| "grad_norm": 0.35514822602272034, |
| "learning_rate": 1e-05, |
| "loss": 0.4779, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.21409652230745416, |
| "grad_norm": 0.3714098036289215, |
| "learning_rate": 1e-05, |
| "loss": 0.4598, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.21464689897148362, |
| "grad_norm": 0.3754241168498993, |
| "learning_rate": 1e-05, |
| "loss": 0.4736, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.21519727563551305, |
| "grad_norm": 0.36637604236602783, |
| "learning_rate": 1e-05, |
| "loss": 0.4652, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.2157476522995425, |
| "grad_norm": 0.367357075214386, |
| "learning_rate": 1e-05, |
| "loss": 0.466, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.21629802896357195, |
| "grad_norm": 0.3747154176235199, |
| "learning_rate": 1e-05, |
| "loss": 0.4668, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.21684840562760138, |
| "grad_norm": 0.3824009895324707, |
| "learning_rate": 1e-05, |
| "loss": 0.4748, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.21739878229163084, |
| "grad_norm": 0.385030061006546, |
| "learning_rate": 1e-05, |
| "loss": 0.4882, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.21794915895566028, |
| "grad_norm": 0.35460343956947327, |
| "learning_rate": 1e-05, |
| "loss": 0.4664, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.21849953561968974, |
| "grad_norm": 0.3792308270931244, |
| "learning_rate": 1e-05, |
| "loss": 0.4874, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.21904991228371917, |
| "grad_norm": 0.37190011143684387, |
| "learning_rate": 1e-05, |
| "loss": 0.4838, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.2196002889477486, |
| "grad_norm": 0.3757864832878113, |
| "learning_rate": 1e-05, |
| "loss": 0.4538, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "grad_norm": 0.3677947223186493, |
| "learning_rate": 1e-05, |
| "loss": 0.4605, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_merge_loss": 0.4150216579437256, |
| "eval_merge_runtime": 600.1194, |
| "eval_merge_samples_per_second": 56.242, |
| "eval_merge_steps_per_second": 2.345, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_new_aug_datas_filtered.json_loss": 0.5434484481811523, |
| "eval_new_aug_datas_filtered.json_runtime": 10.4424, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 73.45, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.064, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_sharegpt_gpt4.json_loss": 0.7981637716293335, |
| "eval_sharegpt_gpt4.json_runtime": 31.6015, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.89, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.468, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_Table_GPT.json_loss": 0.0783885195851326, |
| "eval_Table_GPT.json_runtime": 24.9448, |
| "eval_Table_GPT.json_samples_per_second": 83.905, |
| "eval_Table_GPT.json_steps_per_second": 3.528, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_gpt_4o_200k.json_loss": 0.8245088458061218, |
| "eval_gpt_4o_200k.json_runtime": 48.4135, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.737, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.412, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_multi_turn_datas.json_loss": 0.35650402307510376, |
| "eval_multi_turn_datas.json_runtime": 75.5012, |
| "eval_multi_turn_datas.json_samples_per_second": 53.006, |
| "eval_multi_turn_datas.json_steps_per_second": 2.212, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_table_python_code_datas.json_loss": 0.2912423610687256, |
| "eval_table_python_code_datas.json_runtime": 43.0138, |
| "eval_table_python_code_datas.json_samples_per_second": 50.193, |
| "eval_table_python_code_datas.json_steps_per_second": 2.092, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_tabular_llm_data.json_loss": 0.11931464821100235, |
| "eval_tabular_llm_data.json_runtime": 8.524, |
| "eval_tabular_llm_data.json_samples_per_second": 28.86, |
| "eval_tabular_llm_data.json_steps_per_second": 1.29, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_python_code_critic_21k.json_loss": 0.5899094343185425, |
| "eval_python_code_critic_21k.json_runtime": 3.2108, |
| "eval_python_code_critic_21k.json_samples_per_second": 185.935, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.786, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_all_merge_table_dataset.json_loss": 0.08210163563489914, |
| "eval_all_merge_table_dataset.json_runtime": 23.2334, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.646, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.291, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_code_feedback_multi_turn.json_loss": 0.5942392349243164, |
| "eval_code_feedback_multi_turn.json_runtime": 32.3672, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 68.001, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.842, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_ultrainteract_sft.json_loss": 0.43230774998664856, |
| "eval_ultrainteract_sft.json_runtime": 8.6469, |
| "eval_ultrainteract_sft.json_samples_per_second": 168.384, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.055, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_synthetic_text_to_sql.json_loss": 0.10562511533498764, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1256, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 270.776, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.928, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_sft_react_sql_datas.json_loss": 0.6536443829536438, |
| "eval_sft_react_sql_datas.json_runtime": 7.8424, |
| "eval_sft_react_sql_datas.json_samples_per_second": 40.039, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.785, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_all_merge_code.json_loss": 0.2989647090435028, |
| "eval_all_merge_code.json_runtime": 0.3335, |
| "eval_all_merge_code.json_samples_per_second": 188.9, |
| "eval_all_merge_code.json_steps_per_second": 8.995, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_magpie_datas.json_loss": 0.4389919340610504, |
| "eval_magpie_datas.json_runtime": 2.209, |
| "eval_magpie_datas.json_samples_per_second": 77.862, |
| "eval_magpie_datas.json_steps_per_second": 3.621, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_train_data_for_qwen.json_loss": 0.0057810284197330475, |
| "eval_train_data_for_qwen.json_runtime": 0.2434, |
| "eval_train_data_for_qwen.json_samples_per_second": 41.087, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.109, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_alpaca_cleaned.json_loss": 0.9368440508842468, |
| "eval_alpaca_cleaned.json_runtime": 0.1149, |
| "eval_alpaca_cleaned.json_samples_per_second": 234.893, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.399, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_agent_instruct.json_loss": 0.2261410802602768, |
| "eval_agent_instruct.json_runtime": 0.5137, |
| "eval_agent_instruct.json_samples_per_second": 93.432, |
| "eval_agent_instruct.json_steps_per_second": 3.893, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_MathInstruct.json_loss": 0.208473339676857, |
| "eval_MathInstruct.json_runtime": 0.3639, |
| "eval_MathInstruct.json_samples_per_second": 156.645, |
| "eval_MathInstruct.json_steps_per_second": 8.244, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_tested_143k_python_alpaca.json_loss": 0.44293999671936035, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.2997, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 113.456, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.674, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_xlam_function_calling_60k.json_loss": 0.010015022940933704, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1001, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 229.814, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.992, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.591582179069519, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0515, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 310.705, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.419, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9911380410194397, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0498, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 221.019, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 20.093, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.22015066561177807, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5947377681732178, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.485, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.237, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.062, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2207010422758075, |
| "grad_norm": 0.36098968982696533, |
| "learning_rate": 1e-05, |
| "loss": 0.4646, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.22125141893983696, |
| "grad_norm": 0.3653786778450012, |
| "learning_rate": 1e-05, |
| "loss": 0.4501, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.2218017956038664, |
| "grad_norm": 0.36442849040031433, |
| "learning_rate": 1e-05, |
| "loss": 0.4686, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.22235217226789583, |
| "grad_norm": 0.3782612383365631, |
| "learning_rate": 1e-05, |
| "loss": 0.4598, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.2229025489319253, |
| "grad_norm": 0.39521896839141846, |
| "learning_rate": 1e-05, |
| "loss": 0.4679, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.22345292559595473, |
| "grad_norm": 0.3727470636367798, |
| "learning_rate": 1e-05, |
| "loss": 0.4803, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.2240033022599842, |
| "grad_norm": 0.3883068263530731, |
| "learning_rate": 1e-05, |
| "loss": 0.4773, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.22455367892401362, |
| "grad_norm": 0.37147605419158936, |
| "learning_rate": 1e-05, |
| "loss": 0.4825, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.22510405558804306, |
| "grad_norm": 0.3924333155155182, |
| "learning_rate": 1e-05, |
| "loss": 0.4698, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.22565443225207252, |
| "grad_norm": 0.38133057951927185, |
| "learning_rate": 1e-05, |
| "loss": 0.4842, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.22620480891610195, |
| "grad_norm": 0.36132821440696716, |
| "learning_rate": 1e-05, |
| "loss": 0.4594, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.2267551855801314, |
| "grad_norm": 0.39988580346107483, |
| "learning_rate": 1e-05, |
| "loss": 0.4795, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.22730556224416085, |
| "grad_norm": 0.38140830397605896, |
| "learning_rate": 1e-05, |
| "loss": 0.4649, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.22785593890819028, |
| "grad_norm": 0.3726978898048401, |
| "learning_rate": 1e-05, |
| "loss": 0.4603, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.22840631557221974, |
| "grad_norm": 0.3880995512008667, |
| "learning_rate": 1e-05, |
| "loss": 0.4739, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.22895669223624918, |
| "grad_norm": 0.4118787944316864, |
| "learning_rate": 1e-05, |
| "loss": 0.4733, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.22950706890027864, |
| "grad_norm": 0.37878745794296265, |
| "learning_rate": 1e-05, |
| "loss": 0.4922, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.23005744556430807, |
| "grad_norm": 0.3838474154472351, |
| "learning_rate": 1e-05, |
| "loss": 0.4646, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.2306078222283375, |
| "grad_norm": 0.37345945835113525, |
| "learning_rate": 1e-05, |
| "loss": 0.4798, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.23115819889236697, |
| "grad_norm": 0.36341801285743713, |
| "learning_rate": 1e-05, |
| "loss": 0.4773, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.2317085755563964, |
| "grad_norm": 0.38800522685050964, |
| "learning_rate": 1e-05, |
| "loss": 0.4806, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.23225895222042586, |
| "grad_norm": 0.38882526755332947, |
| "learning_rate": 1e-05, |
| "loss": 0.4765, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.2328093288844553, |
| "grad_norm": 0.37744489312171936, |
| "learning_rate": 1e-05, |
| "loss": 0.4842, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.23335970554848473, |
| "grad_norm": 0.39916718006134033, |
| "learning_rate": 1e-05, |
| "loss": 0.467, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.2339100822125142, |
| "grad_norm": 0.36556801199913025, |
| "learning_rate": 1e-05, |
| "loss": 0.4711, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.23446045887654363, |
| "grad_norm": 0.3993853032588959, |
| "learning_rate": 1e-05, |
| "loss": 0.4656, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.2350108355405731, |
| "grad_norm": 0.39630356431007385, |
| "learning_rate": 1e-05, |
| "loss": 0.4734, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.23556121220460252, |
| "grad_norm": 0.3797578513622284, |
| "learning_rate": 1e-05, |
| "loss": 0.4718, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.23611158886863198, |
| "grad_norm": 0.38648873567581177, |
| "learning_rate": 1e-05, |
| "loss": 0.4751, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.23666196553266142, |
| "grad_norm": 0.3934420347213745, |
| "learning_rate": 1e-05, |
| "loss": 0.4653, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.23721234219669085, |
| "grad_norm": 0.3899431824684143, |
| "learning_rate": 1e-05, |
| "loss": 0.4644, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.2377627188607203, |
| "grad_norm": 0.3696826696395874, |
| "learning_rate": 1e-05, |
| "loss": 0.482, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.23831309552474975, |
| "grad_norm": 0.352923184633255, |
| "learning_rate": 1e-05, |
| "loss": 0.4707, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.2388634721887792, |
| "grad_norm": 0.36678972840309143, |
| "learning_rate": 1e-05, |
| "loss": 0.4687, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.23941384885280864, |
| "grad_norm": 0.38986021280288696, |
| "learning_rate": 1e-05, |
| "loss": 0.4613, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.23996422551683808, |
| "grad_norm": 0.3684535622596741, |
| "learning_rate": 1e-05, |
| "loss": 0.4734, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.24051460218086754, |
| "grad_norm": 0.36672261357307434, |
| "learning_rate": 1e-05, |
| "loss": 0.4796, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.24106497884489697, |
| "grad_norm": 0.39910420775413513, |
| "learning_rate": 1e-05, |
| "loss": 0.4681, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.24161535550892643, |
| "grad_norm": 0.38694077730178833, |
| "learning_rate": 1e-05, |
| "loss": 0.4821, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.24216573217295587, |
| "grad_norm": 0.4555080831050873, |
| "learning_rate": 1e-05, |
| "loss": 0.4882, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.2427161088369853, |
| "grad_norm": 0.3934450149536133, |
| "learning_rate": 1e-05, |
| "loss": 0.4778, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.24326648550101476, |
| "grad_norm": 0.35743412375450134, |
| "learning_rate": 1e-05, |
| "loss": 0.4793, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.2438168621650442, |
| "grad_norm": 0.3518178462982178, |
| "learning_rate": 1e-05, |
| "loss": 0.472, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.24436723882907366, |
| "grad_norm": 0.35367751121520996, |
| "learning_rate": 1e-05, |
| "loss": 0.4747, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.2449176154931031, |
| "grad_norm": 0.3810805678367615, |
| "learning_rate": 1e-05, |
| "loss": 0.4834, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.24546799215713253, |
| "grad_norm": 0.38103243708610535, |
| "learning_rate": 1e-05, |
| "loss": 0.4763, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.246018368821162, |
| "grad_norm": 0.3839399218559265, |
| "learning_rate": 1e-05, |
| "loss": 0.4696, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.24656874548519142, |
| "grad_norm": 0.41292649507522583, |
| "learning_rate": 1e-05, |
| "loss": 0.4777, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.24711912214922088, |
| "grad_norm": 0.36179229617118835, |
| "learning_rate": 1e-05, |
| "loss": 0.4668, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.24766949881325032, |
| "grad_norm": 0.3638279139995575, |
| "learning_rate": 1e-05, |
| "loss": 0.4645, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.24821987547727975, |
| "grad_norm": 0.3458470106124878, |
| "learning_rate": 1e-05, |
| "loss": 0.4746, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.2487702521413092, |
| "grad_norm": 0.3822806775569916, |
| "learning_rate": 1e-05, |
| "loss": 0.4715, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.24932062880533865, |
| "grad_norm": 0.3655596077442169, |
| "learning_rate": 1e-05, |
| "loss": 0.4659, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.2498710054693681, |
| "grad_norm": 0.3868783116340637, |
| "learning_rate": 1e-05, |
| "loss": 0.4743, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.25042138213339754, |
| "grad_norm": 0.3778232932090759, |
| "learning_rate": 1e-05, |
| "loss": 0.4652, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.250971758797427, |
| "grad_norm": 0.36664894223213196, |
| "learning_rate": 1e-05, |
| "loss": 0.4554, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.2515221354614564, |
| "grad_norm": 0.3995139002799988, |
| "learning_rate": 1e-05, |
| "loss": 0.4683, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.25207251212548587, |
| "grad_norm": 0.40083470940589905, |
| "learning_rate": 1e-05, |
| "loss": 0.4673, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.25262288878951533, |
| "grad_norm": 0.37919968366622925, |
| "learning_rate": 1e-05, |
| "loss": 0.4776, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.2531732654535448, |
| "grad_norm": 0.3586704432964325, |
| "learning_rate": 1e-05, |
| "loss": 0.4792, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.2537236421175742, |
| "grad_norm": 0.3744722902774811, |
| "learning_rate": 1e-05, |
| "loss": 0.463, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.25427401878160366, |
| "grad_norm": 0.37209680676460266, |
| "learning_rate": 1e-05, |
| "loss": 0.4805, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.2548243954456331, |
| "grad_norm": 0.40809133648872375, |
| "learning_rate": 1e-05, |
| "loss": 0.4781, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.25537477210966253, |
| "grad_norm": 0.37261903285980225, |
| "learning_rate": 1e-05, |
| "loss": 0.4617, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.255925148773692, |
| "grad_norm": 0.37391313910484314, |
| "learning_rate": 1e-05, |
| "loss": 0.4617, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.25647552543772145, |
| "grad_norm": 0.36610838770866394, |
| "learning_rate": 1e-05, |
| "loss": 0.4642, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.25702590210175086, |
| "grad_norm": 0.3854142129421234, |
| "learning_rate": 1e-05, |
| "loss": 0.4652, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.2575762787657803, |
| "grad_norm": 0.365159809589386, |
| "learning_rate": 1e-05, |
| "loss": 0.4714, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.2581266554298098, |
| "grad_norm": 0.41678836941719055, |
| "learning_rate": 1e-05, |
| "loss": 0.4854, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.25867703209383924, |
| "grad_norm": 0.380215585231781, |
| "learning_rate": 1e-05, |
| "loss": 0.4785, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.25922740875786865, |
| "grad_norm": 0.3704361617565155, |
| "learning_rate": 1e-05, |
| "loss": 0.4433, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.2597777854218981, |
| "grad_norm": 0.34440556168556213, |
| "learning_rate": 1e-05, |
| "loss": 0.4642, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.2603281620859276, |
| "grad_norm": 0.36701446771621704, |
| "learning_rate": 1e-05, |
| "loss": 0.4533, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.260878538749957, |
| "grad_norm": 0.3694971799850464, |
| "learning_rate": 1e-05, |
| "loss": 0.4942, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.26142891541398644, |
| "grad_norm": 0.3697713017463684, |
| "learning_rate": 1e-05, |
| "loss": 0.4586, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.2619792920780159, |
| "grad_norm": 0.36559173464775085, |
| "learning_rate": 1e-05, |
| "loss": 0.4679, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.26252966874204536, |
| "grad_norm": 0.3704969584941864, |
| "learning_rate": 1e-05, |
| "loss": 0.4624, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.26308004540607477, |
| "grad_norm": 0.3804495334625244, |
| "learning_rate": 1e-05, |
| "loss": 0.4603, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.26363042207010423, |
| "grad_norm": 0.34987303614616394, |
| "learning_rate": 1e-05, |
| "loss": 0.4679, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.2641807987341337, |
| "grad_norm": 0.3723856508731842, |
| "learning_rate": 1e-05, |
| "loss": 0.4631, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.2647311753981631, |
| "grad_norm": 0.35623612999916077, |
| "learning_rate": 1e-05, |
| "loss": 0.4627, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.26528155206219256, |
| "grad_norm": 0.37969711422920227, |
| "learning_rate": 1e-05, |
| "loss": 0.4815, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.265831928726222, |
| "grad_norm": 0.3889734447002411, |
| "learning_rate": 1e-05, |
| "loss": 0.471, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.26638230539025143, |
| "grad_norm": 0.39106228947639465, |
| "learning_rate": 1e-05, |
| "loss": 0.4542, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.2669326820542809, |
| "grad_norm": 0.38163650035858154, |
| "learning_rate": 1e-05, |
| "loss": 0.4604, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.26748305871831035, |
| "grad_norm": 0.3733852505683899, |
| "learning_rate": 1e-05, |
| "loss": 0.4768, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.2680334353823398, |
| "grad_norm": 0.3894038796424866, |
| "learning_rate": 1e-05, |
| "loss": 0.4816, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.2685838120463692, |
| "grad_norm": 0.3697439432144165, |
| "learning_rate": 1e-05, |
| "loss": 0.4731, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.2691341887103987, |
| "grad_norm": 0.39549171924591064, |
| "learning_rate": 1e-05, |
| "loss": 0.473, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.26968456537442814, |
| "grad_norm": 0.38712403178215027, |
| "learning_rate": 1e-05, |
| "loss": 0.4717, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.27023494203845755, |
| "grad_norm": 0.3775619864463806, |
| "learning_rate": 1e-05, |
| "loss": 0.4638, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.270785318702487, |
| "grad_norm": 0.38664135336875916, |
| "learning_rate": 1e-05, |
| "loss": 0.4655, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.27133569536651647, |
| "grad_norm": 0.3730804920196533, |
| "learning_rate": 1e-05, |
| "loss": 0.4672, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.2718860720305459, |
| "grad_norm": 0.36626750230789185, |
| "learning_rate": 1e-05, |
| "loss": 0.4562, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.27243644869457534, |
| "grad_norm": 0.38708406686782837, |
| "learning_rate": 1e-05, |
| "loss": 0.4583, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.2729868253586048, |
| "grad_norm": 0.37348565459251404, |
| "learning_rate": 1e-05, |
| "loss": 0.4709, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.27353720202263426, |
| "grad_norm": 0.39145755767822266, |
| "learning_rate": 1e-05, |
| "loss": 0.4667, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.27408757868666367, |
| "grad_norm": 0.3615020215511322, |
| "learning_rate": 1e-05, |
| "loss": 0.4585, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.27463795535069313, |
| "grad_norm": 0.38545548915863037, |
| "learning_rate": 1e-05, |
| "loss": 0.472, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "grad_norm": 0.3605005741119385, |
| "learning_rate": 1e-05, |
| "loss": 0.4575, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_merge_loss": 0.4092504382133484, |
| "eval_merge_runtime": 599.649, |
| "eval_merge_samples_per_second": 56.286, |
| "eval_merge_steps_per_second": 2.346, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_new_aug_datas_filtered.json_loss": 0.534787118434906, |
| "eval_new_aug_datas_filtered.json_runtime": 10.3465, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 74.131, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.093, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_sharegpt_gpt4.json_loss": 0.7911589741706848, |
| "eval_sharegpt_gpt4.json_runtime": 31.721, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.668, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.459, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_Table_GPT.json_loss": 0.07000603526830673, |
| "eval_Table_GPT.json_runtime": 24.9973, |
| "eval_Table_GPT.json_samples_per_second": 83.729, |
| "eval_Table_GPT.json_steps_per_second": 3.52, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_gpt_4o_200k.json_loss": 0.8180866837501526, |
| "eval_gpt_4o_200k.json_runtime": 48.5388, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.402, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.398, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_multi_turn_datas.json_loss": 0.34955134987831116, |
| "eval_multi_turn_datas.json_runtime": 75.86, |
| "eval_multi_turn_datas.json_samples_per_second": 52.755, |
| "eval_multi_turn_datas.json_steps_per_second": 2.201, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_table_python_code_datas.json_loss": 0.285086989402771, |
| "eval_table_python_code_datas.json_runtime": 43.1585, |
| "eval_table_python_code_datas.json_samples_per_second": 50.025, |
| "eval_table_python_code_datas.json_steps_per_second": 2.085, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_tabular_llm_data.json_loss": 0.12198314070701599, |
| "eval_tabular_llm_data.json_runtime": 8.5654, |
| "eval_tabular_llm_data.json_samples_per_second": 28.72, |
| "eval_tabular_llm_data.json_steps_per_second": 1.284, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_python_code_critic_21k.json_loss": 0.5841899514198303, |
| "eval_python_code_critic_21k.json_runtime": 3.2248, |
| "eval_python_code_critic_21k.json_samples_per_second": 185.125, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.752, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_all_merge_table_dataset.json_loss": 0.08110550791025162, |
| "eval_all_merge_table_dataset.json_runtime": 23.4122, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.411, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.281, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_code_feedback_multi_turn.json_loss": 0.5908513069152832, |
| "eval_code_feedback_multi_turn.json_runtime": 32.4627, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.801, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.834, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_ultrainteract_sft.json_loss": 0.42869675159454346, |
| "eval_ultrainteract_sft.json_runtime": 8.6816, |
| "eval_ultrainteract_sft.json_samples_per_second": 167.711, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.026, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_synthetic_text_to_sql.json_loss": 0.10359195619821548, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1301, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 261.368, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.375, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_sft_react_sql_datas.json_loss": 0.6493918895721436, |
| "eval_sft_react_sql_datas.json_runtime": 7.8489, |
| "eval_sft_react_sql_datas.json_samples_per_second": 40.006, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.784, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_all_merge_code.json_loss": 0.29959577322006226, |
| "eval_all_merge_code.json_runtime": 0.3379, |
| "eval_all_merge_code.json_samples_per_second": 186.458, |
| "eval_all_merge_code.json_steps_per_second": 8.879, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_magpie_datas.json_loss": 0.4377444088459015, |
| "eval_magpie_datas.json_runtime": 2.2091, |
| "eval_magpie_datas.json_samples_per_second": 77.86, |
| "eval_magpie_datas.json_steps_per_second": 3.621, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_train_data_for_qwen.json_loss": 0.003975613508373499, |
| "eval_train_data_for_qwen.json_runtime": 0.2434, |
| "eval_train_data_for_qwen.json_samples_per_second": 41.087, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.109, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_alpaca_cleaned.json_loss": 0.9270830750465393, |
| "eval_alpaca_cleaned.json_runtime": 0.1147, |
| "eval_alpaca_cleaned.json_samples_per_second": 235.404, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.437, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_agent_instruct.json_loss": 0.2235051840543747, |
| "eval_agent_instruct.json_runtime": 0.5147, |
| "eval_agent_instruct.json_samples_per_second": 93.255, |
| "eval_agent_instruct.json_steps_per_second": 3.886, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_MathInstruct.json_loss": 0.20924758911132812, |
| "eval_MathInstruct.json_runtime": 0.3588, |
| "eval_MathInstruct.json_samples_per_second": 158.853, |
| "eval_MathInstruct.json_steps_per_second": 8.361, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_tested_143k_python_alpaca.json_loss": 0.44443246722221375, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3017, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 112.684, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.628, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_xlam_function_calling_60k.json_loss": 0.008116651326417923, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1004, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 229.157, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.963, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.588812232017517, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0516, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 310.032, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.377, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9696416258811951, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0501, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 219.488, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 19.953, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.2751883320147226, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5965829491615295, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4872, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.049, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.052, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.275738708678752, |
| "grad_norm": 0.3598334789276123, |
| "learning_rate": 1e-05, |
| "loss": 0.4624, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.27628908534278146, |
| "grad_norm": 0.3716166615486145, |
| "learning_rate": 1e-05, |
| "loss": 0.4593, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.2768394620068109, |
| "grad_norm": 0.3814164996147156, |
| "learning_rate": 1e-05, |
| "loss": 0.4581, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.2773898386708404, |
| "grad_norm": 0.3595026731491089, |
| "learning_rate": 1e-05, |
| "loss": 0.4826, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.2779402153348698, |
| "grad_norm": 0.3587126135826111, |
| "learning_rate": 1e-05, |
| "loss": 0.4563, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.27849059199889925, |
| "grad_norm": 0.36048388481140137, |
| "learning_rate": 1e-05, |
| "loss": 0.4646, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.2790409686629287, |
| "grad_norm": 0.37650784850120544, |
| "learning_rate": 1e-05, |
| "loss": 0.4658, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.2795913453269581, |
| "grad_norm": 0.34934109449386597, |
| "learning_rate": 1e-05, |
| "loss": 0.4535, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.2801417219909876, |
| "grad_norm": 0.375130295753479, |
| "learning_rate": 1e-05, |
| "loss": 0.4802, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.28069209865501704, |
| "grad_norm": 0.3595198094844818, |
| "learning_rate": 1e-05, |
| "loss": 0.4736, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.28124247531904645, |
| "grad_norm": 0.37816157937049866, |
| "learning_rate": 1e-05, |
| "loss": 0.4639, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.2817928519830759, |
| "grad_norm": 0.39598193764686584, |
| "learning_rate": 1e-05, |
| "loss": 0.4544, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.28234322864710537, |
| "grad_norm": 0.35407206416130066, |
| "learning_rate": 1e-05, |
| "loss": 0.4342, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.28289360531113483, |
| "grad_norm": 0.3630298972129822, |
| "learning_rate": 1e-05, |
| "loss": 0.48, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.28344398197516424, |
| "grad_norm": 0.35917675495147705, |
| "learning_rate": 1e-05, |
| "loss": 0.4647, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.2839943586391937, |
| "grad_norm": 0.36868980526924133, |
| "learning_rate": 1e-05, |
| "loss": 0.4633, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.28454473530322316, |
| "grad_norm": 0.38559168577194214, |
| "learning_rate": 1e-05, |
| "loss": 0.4786, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.28509511196725257, |
| "grad_norm": 0.3563440442085266, |
| "learning_rate": 1e-05, |
| "loss": 0.4703, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.28564548863128203, |
| "grad_norm": 0.3761630654335022, |
| "learning_rate": 1e-05, |
| "loss": 0.4712, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.2861958652953115, |
| "grad_norm": 0.3870238661766052, |
| "learning_rate": 1e-05, |
| "loss": 0.4622, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.2867462419593409, |
| "grad_norm": 0.36192306876182556, |
| "learning_rate": 1e-05, |
| "loss": 0.4619, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.28729661862337036, |
| "grad_norm": 0.3688748776912689, |
| "learning_rate": 1e-05, |
| "loss": 0.457, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.2878469952873998, |
| "grad_norm": 0.38211309909820557, |
| "learning_rate": 1e-05, |
| "loss": 0.4661, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.2883973719514293, |
| "grad_norm": 0.36421847343444824, |
| "learning_rate": 1e-05, |
| "loss": 0.4647, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.2889477486154587, |
| "grad_norm": 0.38917919993400574, |
| "learning_rate": 1e-05, |
| "loss": 0.4573, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.28949812527948815, |
| "grad_norm": 0.3668692111968994, |
| "learning_rate": 1e-05, |
| "loss": 0.4545, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.2900485019435176, |
| "grad_norm": 0.3869079649448395, |
| "learning_rate": 1e-05, |
| "loss": 0.4569, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.290598878607547, |
| "grad_norm": 0.3763209283351898, |
| "learning_rate": 1e-05, |
| "loss": 0.451, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.2911492552715765, |
| "grad_norm": 0.37899014353752136, |
| "learning_rate": 1e-05, |
| "loss": 0.4658, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.29169963193560594, |
| "grad_norm": 0.38784778118133545, |
| "learning_rate": 1e-05, |
| "loss": 0.4589, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.2922500085996354, |
| "grad_norm": 0.38340142369270325, |
| "learning_rate": 1e-05, |
| "loss": 0.4644, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.2928003852636648, |
| "grad_norm": 0.3758372962474823, |
| "learning_rate": 1e-05, |
| "loss": 0.4597, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.29335076192769427, |
| "grad_norm": 0.36990198493003845, |
| "learning_rate": 1e-05, |
| "loss": 0.4577, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.29390113859172373, |
| "grad_norm": 0.35997095704078674, |
| "learning_rate": 1e-05, |
| "loss": 0.452, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.29445151525575314, |
| "grad_norm": 0.3728466331958771, |
| "learning_rate": 1e-05, |
| "loss": 0.4567, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.2950018919197826, |
| "grad_norm": 0.3471437990665436, |
| "learning_rate": 1e-05, |
| "loss": 0.4661, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.29555226858381206, |
| "grad_norm": 0.39197105169296265, |
| "learning_rate": 1e-05, |
| "loss": 0.4738, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.29610264524784147, |
| "grad_norm": 0.366745263338089, |
| "learning_rate": 1e-05, |
| "loss": 0.4555, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.29665302191187093, |
| "grad_norm": 0.3721451759338379, |
| "learning_rate": 1e-05, |
| "loss": 0.4784, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.2972033985759004, |
| "grad_norm": 0.3505246341228485, |
| "learning_rate": 1e-05, |
| "loss": 0.4486, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.29775377523992985, |
| "grad_norm": 0.37022680044174194, |
| "learning_rate": 1e-05, |
| "loss": 0.4631, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.29830415190395926, |
| "grad_norm": 0.3808286190032959, |
| "learning_rate": 1e-05, |
| "loss": 0.472, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.2988545285679887, |
| "grad_norm": 0.3860435485839844, |
| "learning_rate": 1e-05, |
| "loss": 0.4541, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.2994049052320182, |
| "grad_norm": 0.35552406311035156, |
| "learning_rate": 1e-05, |
| "loss": 0.4565, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.2999552818960476, |
| "grad_norm": 0.3758242428302765, |
| "learning_rate": 1e-05, |
| "loss": 0.4803, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.30050565856007705, |
| "grad_norm": 0.3900710940361023, |
| "learning_rate": 1e-05, |
| "loss": 0.4658, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.3010560352241065, |
| "grad_norm": 0.38439512252807617, |
| "learning_rate": 1e-05, |
| "loss": 0.4677, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.3016064118881359, |
| "grad_norm": 0.3970472812652588, |
| "learning_rate": 1e-05, |
| "loss": 0.4751, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.3021567885521654, |
| "grad_norm": 0.36555778980255127, |
| "learning_rate": 1e-05, |
| "loss": 0.4556, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.30270716521619484, |
| "grad_norm": 0.3682638108730316, |
| "learning_rate": 1e-05, |
| "loss": 0.4504, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.3032575418802243, |
| "grad_norm": 0.4228995442390442, |
| "learning_rate": 1e-05, |
| "loss": 0.4736, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.3038079185442537, |
| "grad_norm": 0.35070449113845825, |
| "learning_rate": 1e-05, |
| "loss": 0.4589, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.30435829520828317, |
| "grad_norm": 0.40524446964263916, |
| "learning_rate": 1e-05, |
| "loss": 0.4616, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.30490867187231263, |
| "grad_norm": 0.3461023271083832, |
| "learning_rate": 1e-05, |
| "loss": 0.4679, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.30545904853634204, |
| "grad_norm": 0.3741723299026489, |
| "learning_rate": 1e-05, |
| "loss": 0.4618, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.3060094252003715, |
| "grad_norm": 0.37440451979637146, |
| "learning_rate": 1e-05, |
| "loss": 0.4638, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.30655980186440096, |
| "grad_norm": 0.34469377994537354, |
| "learning_rate": 1e-05, |
| "loss": 0.4426, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.3071101785284304, |
| "grad_norm": 0.35499683022499084, |
| "learning_rate": 1e-05, |
| "loss": 0.4548, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.30766055519245983, |
| "grad_norm": 0.3623688220977783, |
| "learning_rate": 1e-05, |
| "loss": 0.4574, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.3082109318564893, |
| "grad_norm": 0.3487359583377838, |
| "learning_rate": 1e-05, |
| "loss": 0.4632, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.30876130852051875, |
| "grad_norm": 0.36232292652130127, |
| "learning_rate": 1e-05, |
| "loss": 0.462, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.30931168518454816, |
| "grad_norm": 0.38301897048950195, |
| "learning_rate": 1e-05, |
| "loss": 0.4545, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.3098620618485776, |
| "grad_norm": 0.3788921535015106, |
| "learning_rate": 1e-05, |
| "loss": 0.4614, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.3104124385126071, |
| "grad_norm": 0.3723096251487732, |
| "learning_rate": 1e-05, |
| "loss": 0.4658, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.3109628151766365, |
| "grad_norm": 0.3926720917224884, |
| "learning_rate": 1e-05, |
| "loss": 0.4602, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.31151319184066595, |
| "grad_norm": 0.3565811514854431, |
| "learning_rate": 1e-05, |
| "loss": 0.4692, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.3120635685046954, |
| "grad_norm": 0.38179391622543335, |
| "learning_rate": 1e-05, |
| "loss": 0.4581, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.31261394516872487, |
| "grad_norm": 0.3732840418815613, |
| "learning_rate": 1e-05, |
| "loss": 0.4628, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.3131643218327543, |
| "grad_norm": 0.3934018313884735, |
| "learning_rate": 1e-05, |
| "loss": 0.4634, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.31371469849678374, |
| "grad_norm": 0.3575834035873413, |
| "learning_rate": 1e-05, |
| "loss": 0.4507, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.3142650751608132, |
| "grad_norm": 0.3623636066913605, |
| "learning_rate": 1e-05, |
| "loss": 0.4547, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.3148154518248426, |
| "grad_norm": 0.3794458508491516, |
| "learning_rate": 1e-05, |
| "loss": 0.4661, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.31536582848887207, |
| "grad_norm": 0.3896718919277191, |
| "learning_rate": 1e-05, |
| "loss": 0.4646, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.31591620515290153, |
| "grad_norm": 0.3608621060848236, |
| "learning_rate": 1e-05, |
| "loss": 0.4522, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.31646658181693094, |
| "grad_norm": 0.37019404768943787, |
| "learning_rate": 1e-05, |
| "loss": 0.4548, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.3170169584809604, |
| "grad_norm": 0.37957248091697693, |
| "learning_rate": 1e-05, |
| "loss": 0.4554, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.31756733514498986, |
| "grad_norm": 0.3605276048183441, |
| "learning_rate": 1e-05, |
| "loss": 0.4679, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.3181177118090193, |
| "grad_norm": 0.37218716740608215, |
| "learning_rate": 1e-05, |
| "loss": 0.4686, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.3186680884730487, |
| "grad_norm": 0.37037035822868347, |
| "learning_rate": 1e-05, |
| "loss": 0.4898, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.3192184651370782, |
| "grad_norm": 0.3569047749042511, |
| "learning_rate": 1e-05, |
| "loss": 0.4619, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.31976884180110765, |
| "grad_norm": 0.3728378117084503, |
| "learning_rate": 1e-05, |
| "loss": 0.4544, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.32031921846513706, |
| "grad_norm": 0.35970696806907654, |
| "learning_rate": 1e-05, |
| "loss": 0.4704, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.3208695951291665, |
| "grad_norm": 0.36476969718933105, |
| "learning_rate": 1e-05, |
| "loss": 0.4605, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.321419971793196, |
| "grad_norm": 0.35015928745269775, |
| "learning_rate": 1e-05, |
| "loss": 0.4653, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.3219703484572254, |
| "grad_norm": 0.3600417375564575, |
| "learning_rate": 1e-05, |
| "loss": 0.4557, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.32252072512125485, |
| "grad_norm": 0.36994755268096924, |
| "learning_rate": 1e-05, |
| "loss": 0.4601, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.3230711017852843, |
| "grad_norm": 0.39908286929130554, |
| "learning_rate": 1e-05, |
| "loss": 0.472, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.32362147844931377, |
| "grad_norm": 0.3717789947986603, |
| "learning_rate": 1e-05, |
| "loss": 0.4646, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.3241718551133432, |
| "grad_norm": 0.3617453873157501, |
| "learning_rate": 1e-05, |
| "loss": 0.4606, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.32472223177737264, |
| "grad_norm": 0.35809728503227234, |
| "learning_rate": 1e-05, |
| "loss": 0.4548, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.3252726084414021, |
| "grad_norm": 0.3767383396625519, |
| "learning_rate": 1e-05, |
| "loss": 0.4785, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.3258229851054315, |
| "grad_norm": 0.3819461166858673, |
| "learning_rate": 1e-05, |
| "loss": 0.4695, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.32637336176946097, |
| "grad_norm": 0.3590524196624756, |
| "learning_rate": 1e-05, |
| "loss": 0.468, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.32692373843349043, |
| "grad_norm": 0.37356823682785034, |
| "learning_rate": 1e-05, |
| "loss": 0.4628, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.3274741150975199, |
| "grad_norm": 0.39389410614967346, |
| "learning_rate": 1e-05, |
| "loss": 0.4686, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.3280244917615493, |
| "grad_norm": 0.36901354789733887, |
| "learning_rate": 1e-05, |
| "loss": 0.4623, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.32857486842557876, |
| "grad_norm": 0.35733821988105774, |
| "learning_rate": 1e-05, |
| "loss": 0.457, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.3291252450896082, |
| "grad_norm": 0.3803520202636719, |
| "learning_rate": 1e-05, |
| "loss": 0.4661, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.3296756217536376, |
| "grad_norm": 0.36812326312065125, |
| "learning_rate": 1e-05, |
| "loss": 0.453, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "grad_norm": 0.37463024258613586, |
| "learning_rate": 1e-05, |
| "loss": 0.4611, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_merge_loss": 0.4038620591163635, |
| "eval_merge_runtime": 600.528, |
| "eval_merge_samples_per_second": 56.204, |
| "eval_merge_steps_per_second": 2.343, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_new_aug_datas_filtered.json_loss": 0.5311903953552246, |
| "eval_new_aug_datas_filtered.json_runtime": 10.3899, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 73.822, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.08, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_sharegpt_gpt4.json_loss": 0.7848892211914062, |
| "eval_sharegpt_gpt4.json_runtime": 31.7548, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.605, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.456, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_Table_GPT.json_loss": 0.07294219732284546, |
| "eval_Table_GPT.json_runtime": 25.0251, |
| "eval_Table_GPT.json_samples_per_second": 83.636, |
| "eval_Table_GPT.json_steps_per_second": 3.516, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_gpt_4o_200k.json_loss": 0.8128483295440674, |
| "eval_gpt_4o_200k.json_runtime": 48.5727, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.311, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.394, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_multi_turn_datas.json_loss": 0.343874990940094, |
| "eval_multi_turn_datas.json_runtime": 75.779, |
| "eval_multi_turn_datas.json_samples_per_second": 52.811, |
| "eval_multi_turn_datas.json_steps_per_second": 2.204, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_table_python_code_datas.json_loss": 0.2791996896266937, |
| "eval_table_python_code_datas.json_runtime": 43.1703, |
| "eval_table_python_code_datas.json_samples_per_second": 50.011, |
| "eval_table_python_code_datas.json_steps_per_second": 2.085, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_tabular_llm_data.json_loss": 0.11510641872882843, |
| "eval_tabular_llm_data.json_runtime": 8.5754, |
| "eval_tabular_llm_data.json_samples_per_second": 28.687, |
| "eval_tabular_llm_data.json_steps_per_second": 1.283, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_python_code_critic_21k.json_loss": 0.5806341171264648, |
| "eval_python_code_critic_21k.json_runtime": 3.2355, |
| "eval_python_code_critic_21k.json_samples_per_second": 184.517, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.727, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_all_merge_table_dataset.json_loss": 0.0781954750418663, |
| "eval_all_merge_table_dataset.json_runtime": 23.3576, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.483, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.284, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_code_feedback_multi_turn.json_loss": 0.5880293846130371, |
| "eval_code_feedback_multi_turn.json_runtime": 32.5337, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.653, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.828, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_ultrainteract_sft.json_loss": 0.42568570375442505, |
| "eval_ultrainteract_sft.json_runtime": 8.665, |
| "eval_ultrainteract_sft.json_samples_per_second": 168.033, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.04, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_synthetic_text_to_sql.json_loss": 0.10025755316019058, |
| "eval_synthetic_text_to_sql.json_runtime": 0.127, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 267.683, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.746, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_sft_react_sql_datas.json_loss": 0.6435717344284058, |
| "eval_sft_react_sql_datas.json_runtime": 7.8854, |
| "eval_sft_react_sql_datas.json_samples_per_second": 39.82, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.775, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_all_merge_code.json_loss": 0.29655295610427856, |
| "eval_all_merge_code.json_runtime": 0.3333, |
| "eval_all_merge_code.json_samples_per_second": 189.039, |
| "eval_all_merge_code.json_steps_per_second": 9.002, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_magpie_datas.json_loss": 0.4353857934474945, |
| "eval_magpie_datas.json_runtime": 2.22, |
| "eval_magpie_datas.json_samples_per_second": 77.478, |
| "eval_magpie_datas.json_steps_per_second": 3.604, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_train_data_for_qwen.json_loss": 0.0036680654156953096, |
| "eval_train_data_for_qwen.json_runtime": 0.2448, |
| "eval_train_data_for_qwen.json_samples_per_second": 40.856, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.086, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_alpaca_cleaned.json_loss": 0.9278478622436523, |
| "eval_alpaca_cleaned.json_runtime": 0.1139, |
| "eval_alpaca_cleaned.json_samples_per_second": 237.139, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.566, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_agent_instruct.json_loss": 0.22283704578876495, |
| "eval_agent_instruct.json_runtime": 0.5129, |
| "eval_agent_instruct.json_samples_per_second": 93.582, |
| "eval_agent_instruct.json_steps_per_second": 3.899, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_MathInstruct.json_loss": 0.20810073614120483, |
| "eval_MathInstruct.json_runtime": 0.3587, |
| "eval_MathInstruct.json_samples_per_second": 158.905, |
| "eval_MathInstruct.json_steps_per_second": 8.363, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_tested_143k_python_alpaca.json_loss": 0.44691047072410583, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3024, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 112.419, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.613, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_xlam_function_calling_60k.json_loss": 0.009029570966959, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1005, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 228.948, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.954, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.5715256929397583, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0514, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 311.088, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.443, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9568694233894348, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0501, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 219.517, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 19.956, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.3302259984176671, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5982481837272644, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.487, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.068, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.053, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.33077637508169655, |
| "grad_norm": 0.3862474262714386, |
| "learning_rate": 1e-05, |
| "loss": 0.467, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.33132675174572596, |
| "grad_norm": 0.3586987555027008, |
| "learning_rate": 1e-05, |
| "loss": 0.4586, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.3318771284097554, |
| "grad_norm": 0.36768838763237, |
| "learning_rate": 1e-05, |
| "loss": 0.4658, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.3324275050737849, |
| "grad_norm": 0.36789608001708984, |
| "learning_rate": 1e-05, |
| "loss": 0.4479, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.33297788173781434, |
| "grad_norm": 0.3875747323036194, |
| "learning_rate": 1e-05, |
| "loss": 0.4651, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.33352825840184375, |
| "grad_norm": 0.37122058868408203, |
| "learning_rate": 1e-05, |
| "loss": 0.4474, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.3340786350658732, |
| "grad_norm": 0.3785482347011566, |
| "learning_rate": 1e-05, |
| "loss": 0.4573, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.33462901172990267, |
| "grad_norm": 0.3795594871044159, |
| "learning_rate": 1e-05, |
| "loss": 0.4633, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.3351793883939321, |
| "grad_norm": 0.35303714871406555, |
| "learning_rate": 1e-05, |
| "loss": 0.4701, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.33572976505796154, |
| "grad_norm": 0.3473946154117584, |
| "learning_rate": 1e-05, |
| "loss": 0.4565, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.336280141721991, |
| "grad_norm": 0.36495375633239746, |
| "learning_rate": 1e-05, |
| "loss": 0.4528, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.3368305183860204, |
| "grad_norm": 0.3617894649505615, |
| "learning_rate": 1e-05, |
| "loss": 0.4756, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.33738089505004987, |
| "grad_norm": 0.36371487379074097, |
| "learning_rate": 1e-05, |
| "loss": 0.4606, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.33793127171407933, |
| "grad_norm": 0.39192309975624084, |
| "learning_rate": 1e-05, |
| "loss": 0.4435, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.3384816483781088, |
| "grad_norm": 0.3902663588523865, |
| "learning_rate": 1e-05, |
| "loss": 0.4699, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.3390320250421382, |
| "grad_norm": 0.3662269115447998, |
| "learning_rate": 1e-05, |
| "loss": 0.4627, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.33958240170616766, |
| "grad_norm": 0.3659150004386902, |
| "learning_rate": 1e-05, |
| "loss": 0.4663, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.3401327783701971, |
| "grad_norm": 0.3632274568080902, |
| "learning_rate": 1e-05, |
| "loss": 0.4499, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.3406831550342265, |
| "grad_norm": 0.38413625955581665, |
| "learning_rate": 1e-05, |
| "loss": 0.4516, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.341233531698256, |
| "grad_norm": 0.35747644305229187, |
| "learning_rate": 1e-05, |
| "loss": 0.4718, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.34178390836228545, |
| "grad_norm": 0.36938604712486267, |
| "learning_rate": 1e-05, |
| "loss": 0.4568, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.3423342850263149, |
| "grad_norm": 0.38448217511177063, |
| "learning_rate": 1e-05, |
| "loss": 0.474, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.3428846616903443, |
| "grad_norm": 0.3694998323917389, |
| "learning_rate": 1e-05, |
| "loss": 0.4516, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.3434350383543738, |
| "grad_norm": 0.41237321496009827, |
| "learning_rate": 1e-05, |
| "loss": 0.4569, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.34398541501840324, |
| "grad_norm": 0.4058983325958252, |
| "learning_rate": 1e-05, |
| "loss": 0.4657, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.34453579168243265, |
| "grad_norm": 0.3610474467277527, |
| "learning_rate": 1e-05, |
| "loss": 0.4587, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.3450861683464621, |
| "grad_norm": 0.3664454221725464, |
| "learning_rate": 1e-05, |
| "loss": 0.4656, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.34563654501049157, |
| "grad_norm": 0.35148540139198303, |
| "learning_rate": 1e-05, |
| "loss": 0.4471, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.346186921674521, |
| "grad_norm": 0.35331565141677856, |
| "learning_rate": 1e-05, |
| "loss": 0.4674, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.34673729833855044, |
| "grad_norm": 0.35367992520332336, |
| "learning_rate": 1e-05, |
| "loss": 0.4572, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.3472876750025799, |
| "grad_norm": 0.36106035113334656, |
| "learning_rate": 1e-05, |
| "loss": 0.466, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.34783805166660936, |
| "grad_norm": 0.36034414172172546, |
| "learning_rate": 1e-05, |
| "loss": 0.4412, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.34838842833063877, |
| "grad_norm": 0.3532898426055908, |
| "learning_rate": 1e-05, |
| "loss": 0.4573, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.34893880499466823, |
| "grad_norm": 0.35383620858192444, |
| "learning_rate": 1e-05, |
| "loss": 0.4644, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.3494891816586977, |
| "grad_norm": 0.3757399022579193, |
| "learning_rate": 1e-05, |
| "loss": 0.4548, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.3500395583227271, |
| "grad_norm": 0.35997340083122253, |
| "learning_rate": 1e-05, |
| "loss": 0.4664, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.35058993498675656, |
| "grad_norm": 0.3761090636253357, |
| "learning_rate": 1e-05, |
| "loss": 0.4601, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.351140311650786, |
| "grad_norm": 0.33666959404945374, |
| "learning_rate": 1e-05, |
| "loss": 0.4596, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.3516906883148154, |
| "grad_norm": 0.36252304911613464, |
| "learning_rate": 1e-05, |
| "loss": 0.4688, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.3522410649788449, |
| "grad_norm": 0.3987884819507599, |
| "learning_rate": 1e-05, |
| "loss": 0.4444, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.35279144164287435, |
| "grad_norm": 0.35914021730422974, |
| "learning_rate": 1e-05, |
| "loss": 0.4508, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.3533418183069038, |
| "grad_norm": 0.36508429050445557, |
| "learning_rate": 1e-05, |
| "loss": 0.4597, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.3538921949709332, |
| "grad_norm": 0.3923473060131073, |
| "learning_rate": 1e-05, |
| "loss": 0.4594, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.3544425716349627, |
| "grad_norm": 0.38775792717933655, |
| "learning_rate": 1e-05, |
| "loss": 0.4573, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.35499294829899214, |
| "grad_norm": 0.4628289043903351, |
| "learning_rate": 1e-05, |
| "loss": 0.4732, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.35554332496302155, |
| "grad_norm": 0.35442307591438293, |
| "learning_rate": 1e-05, |
| "loss": 0.4621, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.356093701627051, |
| "grad_norm": 0.3809347152709961, |
| "learning_rate": 1e-05, |
| "loss": 0.4696, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.35664407829108047, |
| "grad_norm": 0.3683224618434906, |
| "learning_rate": 1e-05, |
| "loss": 0.4649, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.35719445495510993, |
| "grad_norm": 0.3792459964752197, |
| "learning_rate": 1e-05, |
| "loss": 0.465, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.35774483161913934, |
| "grad_norm": 0.3704141080379486, |
| "learning_rate": 1e-05, |
| "loss": 0.4572, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.3582952082831688, |
| "grad_norm": 0.3618161678314209, |
| "learning_rate": 1e-05, |
| "loss": 0.4497, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.35884558494719826, |
| "grad_norm": 0.36538904905319214, |
| "learning_rate": 1e-05, |
| "loss": 0.4525, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.35939596161122767, |
| "grad_norm": 0.36815035343170166, |
| "learning_rate": 1e-05, |
| "loss": 0.4767, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.3599463382752571, |
| "grad_norm": 0.39006996154785156, |
| "learning_rate": 1e-05, |
| "loss": 0.4809, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.3604967149392866, |
| "grad_norm": 0.3829619288444519, |
| "learning_rate": 1e-05, |
| "loss": 0.4714, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.361047091603316, |
| "grad_norm": 0.37935730814933777, |
| "learning_rate": 1e-05, |
| "loss": 0.4518, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.36159746826734546, |
| "grad_norm": 0.371320903301239, |
| "learning_rate": 1e-05, |
| "loss": 0.4437, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.3621478449313749, |
| "grad_norm": 0.35784757137298584, |
| "learning_rate": 1e-05, |
| "loss": 0.4579, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.3626982215954044, |
| "grad_norm": 0.36308974027633667, |
| "learning_rate": 1e-05, |
| "loss": 0.4561, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.3632485982594338, |
| "grad_norm": 0.3538898825645447, |
| "learning_rate": 1e-05, |
| "loss": 0.4574, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.36379897492346325, |
| "grad_norm": 0.3715920150279999, |
| "learning_rate": 1e-05, |
| "loss": 0.4649, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.3643493515874927, |
| "grad_norm": 0.3698347806930542, |
| "learning_rate": 1e-05, |
| "loss": 0.4648, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.3648997282515221, |
| "grad_norm": 0.3725499212741852, |
| "learning_rate": 1e-05, |
| "loss": 0.4669, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.3654501049155516, |
| "grad_norm": 0.37399542331695557, |
| "learning_rate": 1e-05, |
| "loss": 0.4615, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.36600048157958104, |
| "grad_norm": 0.35364219546318054, |
| "learning_rate": 1e-05, |
| "loss": 0.4573, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.36655085824361044, |
| "grad_norm": 0.3651660084724426, |
| "learning_rate": 1e-05, |
| "loss": 0.4485, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.3671012349076399, |
| "grad_norm": 0.3659324645996094, |
| "learning_rate": 1e-05, |
| "loss": 0.4492, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.36765161157166937, |
| "grad_norm": 0.35941600799560547, |
| "learning_rate": 1e-05, |
| "loss": 0.4727, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.36820198823569883, |
| "grad_norm": 0.35083696246147156, |
| "learning_rate": 1e-05, |
| "loss": 0.4453, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.36875236489972824, |
| "grad_norm": 0.3690749406814575, |
| "learning_rate": 1e-05, |
| "loss": 0.4582, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.3693027415637577, |
| "grad_norm": 0.3743647038936615, |
| "learning_rate": 1e-05, |
| "loss": 0.4383, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.36985311822778716, |
| "grad_norm": 0.37354332208633423, |
| "learning_rate": 1e-05, |
| "loss": 0.466, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.37040349489181656, |
| "grad_norm": 0.3735334575176239, |
| "learning_rate": 1e-05, |
| "loss": 0.4535, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.370953871555846, |
| "grad_norm": 0.37339311838150024, |
| "learning_rate": 1e-05, |
| "loss": 0.454, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.3715042482198755, |
| "grad_norm": 0.35196128487586975, |
| "learning_rate": 1e-05, |
| "loss": 0.4685, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.37205462488390495, |
| "grad_norm": 0.4031345546245575, |
| "learning_rate": 1e-05, |
| "loss": 0.4689, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.37260500154793436, |
| "grad_norm": 0.363320529460907, |
| "learning_rate": 1e-05, |
| "loss": 0.459, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.3731553782119638, |
| "grad_norm": 0.36146363615989685, |
| "learning_rate": 1e-05, |
| "loss": 0.4446, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.3737057548759933, |
| "grad_norm": 0.36425283551216125, |
| "learning_rate": 1e-05, |
| "loss": 0.468, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.3742561315400227, |
| "grad_norm": 0.3795093894004822, |
| "learning_rate": 1e-05, |
| "loss": 0.4513, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.37480650820405215, |
| "grad_norm": 0.37901571393013, |
| "learning_rate": 1e-05, |
| "loss": 0.464, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.3753568848680816, |
| "grad_norm": 0.3682788014411926, |
| "learning_rate": 1e-05, |
| "loss": 0.4535, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.375907261532111, |
| "grad_norm": 0.38756048679351807, |
| "learning_rate": 1e-05, |
| "loss": 0.4421, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.3764576381961405, |
| "grad_norm": 0.3859202563762665, |
| "learning_rate": 1e-05, |
| "loss": 0.4601, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.37700801486016994, |
| "grad_norm": 0.3959304392337799, |
| "learning_rate": 1e-05, |
| "loss": 0.4427, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.3775583915241994, |
| "grad_norm": 0.3768652379512787, |
| "learning_rate": 1e-05, |
| "loss": 0.4483, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.3781087681882288, |
| "grad_norm": 0.37339305877685547, |
| "learning_rate": 1e-05, |
| "loss": 0.4605, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.37865914485225827, |
| "grad_norm": 0.4036271572113037, |
| "learning_rate": 1e-05, |
| "loss": 0.4546, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.37920952151628773, |
| "grad_norm": 0.35173818469047546, |
| "learning_rate": 1e-05, |
| "loss": 0.4675, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.37975989818031713, |
| "grad_norm": 0.3682287335395813, |
| "learning_rate": 1e-05, |
| "loss": 0.4676, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.3803102748443466, |
| "grad_norm": 0.37660422921180725, |
| "learning_rate": 1e-05, |
| "loss": 0.4483, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.38086065150837606, |
| "grad_norm": 0.37428486347198486, |
| "learning_rate": 1e-05, |
| "loss": 0.4537, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.38141102817240546, |
| "grad_norm": 0.36140507459640503, |
| "learning_rate": 1e-05, |
| "loss": 0.4542, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.3819614048364349, |
| "grad_norm": 0.3818880021572113, |
| "learning_rate": 1e-05, |
| "loss": 0.4546, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.3825117815004644, |
| "grad_norm": 0.3840683698654175, |
| "learning_rate": 1e-05, |
| "loss": 0.4419, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.38306215816449385, |
| "grad_norm": 0.36933979392051697, |
| "learning_rate": 1e-05, |
| "loss": 0.4632, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.38361253482852326, |
| "grad_norm": 0.3724002540111542, |
| "learning_rate": 1e-05, |
| "loss": 0.455, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.3841629114925527, |
| "grad_norm": 0.35783514380455017, |
| "learning_rate": 1e-05, |
| "loss": 0.4652, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.3847132881565822, |
| "grad_norm": 0.36758366227149963, |
| "learning_rate": 1e-05, |
| "loss": 0.4647, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "grad_norm": 0.3690735995769501, |
| "learning_rate": 1e-05, |
| "loss": 0.4572, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_merge_loss": 0.39922505617141724, |
| "eval_merge_runtime": 600.7214, |
| "eval_merge_samples_per_second": 56.186, |
| "eval_merge_steps_per_second": 2.342, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_new_aug_datas_filtered.json_loss": 0.5239847898483276, |
| "eval_new_aug_datas_filtered.json_runtime": 10.3569, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 74.057, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.09, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_sharegpt_gpt4.json_loss": 0.7791606187820435, |
| "eval_sharegpt_gpt4.json_runtime": 31.6746, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.754, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.463, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_Table_GPT.json_loss": 0.0626993179321289, |
| "eval_Table_GPT.json_runtime": 24.9542, |
| "eval_Table_GPT.json_samples_per_second": 83.874, |
| "eval_Table_GPT.json_steps_per_second": 3.526, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_gpt_4o_200k.json_loss": 0.8082922101020813, |
| "eval_gpt_4o_200k.json_runtime": 48.466, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.596, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.406, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_multi_turn_datas.json_loss": 0.3381649851799011, |
| "eval_multi_turn_datas.json_runtime": 75.5711, |
| "eval_multi_turn_datas.json_samples_per_second": 52.957, |
| "eval_multi_turn_datas.json_steps_per_second": 2.21, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_table_python_code_datas.json_loss": 0.2752579152584076, |
| "eval_table_python_code_datas.json_runtime": 43.0439, |
| "eval_table_python_code_datas.json_samples_per_second": 50.158, |
| "eval_table_python_code_datas.json_steps_per_second": 2.091, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_tabular_llm_data.json_loss": 0.11023548245429993, |
| "eval_tabular_llm_data.json_runtime": 8.5291, |
| "eval_tabular_llm_data.json_samples_per_second": 28.843, |
| "eval_tabular_llm_data.json_steps_per_second": 1.29, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_python_code_critic_21k.json_loss": 0.5756029486656189, |
| "eval_python_code_critic_21k.json_runtime": 3.2275, |
| "eval_python_code_critic_21k.json_samples_per_second": 184.973, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.746, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_all_merge_table_dataset.json_loss": 0.08007320761680603, |
| "eval_all_merge_table_dataset.json_runtime": 23.3, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.558, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.288, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_code_feedback_multi_turn.json_loss": 0.5849318504333496, |
| "eval_code_feedback_multi_turn.json_runtime": 32.4131, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.905, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.838, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_ultrainteract_sft.json_loss": 0.4235917031764984, |
| "eval_ultrainteract_sft.json_runtime": 8.6815, |
| "eval_ultrainteract_sft.json_samples_per_second": 167.713, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.026, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_synthetic_text_to_sql.json_loss": 0.10058007389307022, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1256, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 270.794, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.929, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_sft_react_sql_datas.json_loss": 0.63919597864151, |
| "eval_sft_react_sql_datas.json_runtime": 7.8177, |
| "eval_sft_react_sql_datas.json_samples_per_second": 40.165, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.791, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_all_merge_code.json_loss": 0.293491929769516, |
| "eval_all_merge_code.json_runtime": 0.3331, |
| "eval_all_merge_code.json_samples_per_second": 189.11, |
| "eval_all_merge_code.json_steps_per_second": 9.005, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_magpie_datas.json_loss": 0.43307721614837646, |
| "eval_magpie_datas.json_runtime": 2.214, |
| "eval_magpie_datas.json_samples_per_second": 77.687, |
| "eval_magpie_datas.json_steps_per_second": 3.613, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_train_data_for_qwen.json_loss": 0.004504092503339052, |
| "eval_train_data_for_qwen.json_runtime": 0.2448, |
| "eval_train_data_for_qwen.json_samples_per_second": 40.845, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.084, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_alpaca_cleaned.json_loss": 0.9073267579078674, |
| "eval_alpaca_cleaned.json_runtime": 0.1148, |
| "eval_alpaca_cleaned.json_samples_per_second": 235.221, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.424, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_agent_instruct.json_loss": 0.22197985649108887, |
| "eval_agent_instruct.json_runtime": 0.5129, |
| "eval_agent_instruct.json_samples_per_second": 93.586, |
| "eval_agent_instruct.json_steps_per_second": 3.899, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_MathInstruct.json_loss": 0.2014550119638443, |
| "eval_MathInstruct.json_runtime": 0.3655, |
| "eval_MathInstruct.json_samples_per_second": 155.953, |
| "eval_MathInstruct.json_steps_per_second": 8.208, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_tested_143k_python_alpaca.json_loss": 0.44645121693611145, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3016, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 112.74, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.632, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_xlam_function_calling_60k.json_loss": 0.009633864276111126, |
| "eval_xlam_function_calling_60k.json_runtime": 0.0999, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 230.172, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 10.007, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.5636402368545532, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0503, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 318.002, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.875, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9602435231208801, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0502, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 219.067, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 19.915, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.3852636648206116, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5993592143058777, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4852, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.223, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.061, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.38581404148464105, |
| "grad_norm": 0.36705121397972107, |
| "learning_rate": 1e-05, |
| "loss": 0.4617, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.3863644181486705, |
| "grad_norm": 0.3653152883052826, |
| "learning_rate": 1e-05, |
| "loss": 0.4528, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.38691479481269997, |
| "grad_norm": 0.34426313638687134, |
| "learning_rate": 1e-05, |
| "loss": 0.4464, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.3874651714767294, |
| "grad_norm": 0.3493911623954773, |
| "learning_rate": 1e-05, |
| "loss": 0.4638, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.38801554814075884, |
| "grad_norm": 0.3841487765312195, |
| "learning_rate": 1e-05, |
| "loss": 0.4471, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.3885659248047883, |
| "grad_norm": 0.3770912289619446, |
| "learning_rate": 1e-05, |
| "loss": 0.4623, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.3891163014688177, |
| "grad_norm": 0.38141822814941406, |
| "learning_rate": 1e-05, |
| "loss": 0.4583, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.38966667813284717, |
| "grad_norm": 0.3774464726448059, |
| "learning_rate": 1e-05, |
| "loss": 0.4574, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.39021705479687663, |
| "grad_norm": 0.35681846737861633, |
| "learning_rate": 1e-05, |
| "loss": 0.4443, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.39076743146090603, |
| "grad_norm": 0.3700469732284546, |
| "learning_rate": 1e-05, |
| "loss": 0.4468, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.3913178081249355, |
| "grad_norm": 0.35229384899139404, |
| "learning_rate": 1e-05, |
| "loss": 0.456, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.39186818478896496, |
| "grad_norm": 0.3469116687774658, |
| "learning_rate": 1e-05, |
| "loss": 0.451, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.3924185614529944, |
| "grad_norm": 0.36313918232917786, |
| "learning_rate": 1e-05, |
| "loss": 0.4679, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.3929689381170238, |
| "grad_norm": 0.3543436527252197, |
| "learning_rate": 1e-05, |
| "loss": 0.464, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.3935193147810533, |
| "grad_norm": 0.3992765545845032, |
| "learning_rate": 1e-05, |
| "loss": 0.486, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.39406969144508275, |
| "grad_norm": 0.36149340867996216, |
| "learning_rate": 1e-05, |
| "loss": 0.4426, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.39462006810911215, |
| "grad_norm": 0.37118762731552124, |
| "learning_rate": 1e-05, |
| "loss": 0.4531, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.3951704447731416, |
| "grad_norm": 0.3618330955505371, |
| "learning_rate": 1e-05, |
| "loss": 0.4621, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.3957208214371711, |
| "grad_norm": 0.37272128462791443, |
| "learning_rate": 1e-05, |
| "loss": 0.4616, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.3962711981012005, |
| "grad_norm": 0.3678719997406006, |
| "learning_rate": 1e-05, |
| "loss": 0.4477, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.39682157476522995, |
| "grad_norm": 0.342907190322876, |
| "learning_rate": 1e-05, |
| "loss": 0.4484, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.3973719514292594, |
| "grad_norm": 0.3722037374973297, |
| "learning_rate": 1e-05, |
| "loss": 0.4576, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.39792232809328887, |
| "grad_norm": 0.3829335868358612, |
| "learning_rate": 1e-05, |
| "loss": 0.4568, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.3984727047573183, |
| "grad_norm": 0.36857596039772034, |
| "learning_rate": 1e-05, |
| "loss": 0.4509, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.39902308142134774, |
| "grad_norm": 0.36784934997558594, |
| "learning_rate": 1e-05, |
| "loss": 0.46, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3995734580853772, |
| "grad_norm": 0.36996331810951233, |
| "learning_rate": 1e-05, |
| "loss": 0.4435, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.4001238347494066, |
| "grad_norm": 0.3608056604862213, |
| "learning_rate": 1e-05, |
| "loss": 0.4467, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.40067421141343607, |
| "grad_norm": 0.3827229142189026, |
| "learning_rate": 1e-05, |
| "loss": 0.4576, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.4012245880774655, |
| "grad_norm": 0.38073116540908813, |
| "learning_rate": 1e-05, |
| "loss": 0.4433, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.40177496474149493, |
| "grad_norm": 0.3861468434333801, |
| "learning_rate": 1e-05, |
| "loss": 0.4466, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.4023253414055244, |
| "grad_norm": 0.36093631386756897, |
| "learning_rate": 1e-05, |
| "loss": 0.4409, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.40287571806955386, |
| "grad_norm": 0.34549927711486816, |
| "learning_rate": 1e-05, |
| "loss": 0.4507, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.4034260947335833, |
| "grad_norm": 0.3782083988189697, |
| "learning_rate": 1e-05, |
| "loss": 0.4648, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.4039764713976127, |
| "grad_norm": 0.366914302110672, |
| "learning_rate": 1e-05, |
| "loss": 0.462, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.4045268480616422, |
| "grad_norm": 0.3604414761066437, |
| "learning_rate": 1e-05, |
| "loss": 0.4639, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.40507722472567165, |
| "grad_norm": 0.3806079924106598, |
| "learning_rate": 1e-05, |
| "loss": 0.452, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.40562760138970105, |
| "grad_norm": 0.36079150438308716, |
| "learning_rate": 1e-05, |
| "loss": 0.4534, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.4061779780537305, |
| "grad_norm": 0.3526926040649414, |
| "learning_rate": 1e-05, |
| "loss": 0.4483, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.40672835471776, |
| "grad_norm": 0.36440181732177734, |
| "learning_rate": 1e-05, |
| "loss": 0.4445, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.40727873138178944, |
| "grad_norm": 0.3452344238758087, |
| "learning_rate": 1e-05, |
| "loss": 0.4531, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.40782910804581884, |
| "grad_norm": 0.3774935007095337, |
| "learning_rate": 1e-05, |
| "loss": 0.4644, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.4083794847098483, |
| "grad_norm": 0.3485760986804962, |
| "learning_rate": 1e-05, |
| "loss": 0.4489, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.40892986137387777, |
| "grad_norm": 0.3787960708141327, |
| "learning_rate": 1e-05, |
| "loss": 0.4682, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.4094802380379072, |
| "grad_norm": 0.38031846284866333, |
| "learning_rate": 1e-05, |
| "loss": 0.462, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.41003061470193664, |
| "grad_norm": 0.3756881654262543, |
| "learning_rate": 1e-05, |
| "loss": 0.4514, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.4105809913659661, |
| "grad_norm": 0.3663581311702728, |
| "learning_rate": 1e-05, |
| "loss": 0.4482, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.4111313680299955, |
| "grad_norm": 0.35938966274261475, |
| "learning_rate": 1e-05, |
| "loss": 0.4471, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.41168174469402496, |
| "grad_norm": 0.3561854064464569, |
| "learning_rate": 1e-05, |
| "loss": 0.4514, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.4122321213580544, |
| "grad_norm": 0.36052775382995605, |
| "learning_rate": 1e-05, |
| "loss": 0.4564, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.4127824980220839, |
| "grad_norm": 0.3753555119037628, |
| "learning_rate": 1e-05, |
| "loss": 0.4543, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.4133328746861133, |
| "grad_norm": 0.3747691810131073, |
| "learning_rate": 1e-05, |
| "loss": 0.4588, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.41388325135014276, |
| "grad_norm": 0.3654341399669647, |
| "learning_rate": 1e-05, |
| "loss": 0.451, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.4144336280141722, |
| "grad_norm": 0.3624642491340637, |
| "learning_rate": 1e-05, |
| "loss": 0.4528, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.4149840046782016, |
| "grad_norm": 0.3465966284275055, |
| "learning_rate": 1e-05, |
| "loss": 0.45, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.4155343813422311, |
| "grad_norm": 0.38202422857284546, |
| "learning_rate": 1e-05, |
| "loss": 0.4459, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.41608475800626055, |
| "grad_norm": 0.3562781512737274, |
| "learning_rate": 1e-05, |
| "loss": 0.4375, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.41663513467028995, |
| "grad_norm": 0.36660805344581604, |
| "learning_rate": 1e-05, |
| "loss": 0.4511, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.4171855113343194, |
| "grad_norm": 0.36541464924812317, |
| "learning_rate": 1e-05, |
| "loss": 0.4618, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.4177358879983489, |
| "grad_norm": 0.3570851981639862, |
| "learning_rate": 1e-05, |
| "loss": 0.4568, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.41828626466237834, |
| "grad_norm": 0.3508870005607605, |
| "learning_rate": 1e-05, |
| "loss": 0.4492, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.41883664132640774, |
| "grad_norm": 0.35050973296165466, |
| "learning_rate": 1e-05, |
| "loss": 0.4481, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.4193870179904372, |
| "grad_norm": 0.3564668297767639, |
| "learning_rate": 1e-05, |
| "loss": 0.4461, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.41993739465446667, |
| "grad_norm": 0.3646043539047241, |
| "learning_rate": 1e-05, |
| "loss": 0.4554, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.4204877713184961, |
| "grad_norm": 0.3904356360435486, |
| "learning_rate": 1e-05, |
| "loss": 0.4731, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.42103814798252553, |
| "grad_norm": 0.37373483180999756, |
| "learning_rate": 1e-05, |
| "loss": 0.4679, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.421588524646555, |
| "grad_norm": 0.3704439699649811, |
| "learning_rate": 1e-05, |
| "loss": 0.4706, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.42213890131058446, |
| "grad_norm": 0.37894484400749207, |
| "learning_rate": 1e-05, |
| "loss": 0.4515, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.42268927797461386, |
| "grad_norm": 0.3871210217475891, |
| "learning_rate": 1e-05, |
| "loss": 0.4477, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.4232396546386433, |
| "grad_norm": 0.3755747079849243, |
| "learning_rate": 1e-05, |
| "loss": 0.4633, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.4237900313026728, |
| "grad_norm": 0.359764039516449, |
| "learning_rate": 1e-05, |
| "loss": 0.4798, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.4243404079667022, |
| "grad_norm": 0.37172380089759827, |
| "learning_rate": 1e-05, |
| "loss": 0.4383, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.42489078463073165, |
| "grad_norm": 0.3501332700252533, |
| "learning_rate": 1e-05, |
| "loss": 0.442, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.4254411612947611, |
| "grad_norm": 0.3552211821079254, |
| "learning_rate": 1e-05, |
| "loss": 0.4539, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.4259915379587905, |
| "grad_norm": 0.35052230954170227, |
| "learning_rate": 1e-05, |
| "loss": 0.428, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.42654191462282, |
| "grad_norm": 0.3710823357105255, |
| "learning_rate": 1e-05, |
| "loss": 0.4297, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.42709229128684945, |
| "grad_norm": 0.37135034799575806, |
| "learning_rate": 1e-05, |
| "loss": 0.4587, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.4276426679508789, |
| "grad_norm": 0.3729698061943054, |
| "learning_rate": 1e-05, |
| "loss": 0.4585, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.4281930446149083, |
| "grad_norm": 0.3525015711784363, |
| "learning_rate": 1e-05, |
| "loss": 0.459, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.4287434212789378, |
| "grad_norm": 0.38500455021858215, |
| "learning_rate": 1e-05, |
| "loss": 0.4469, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.42929379794296724, |
| "grad_norm": 0.3852159380912781, |
| "learning_rate": 1e-05, |
| "loss": 0.4421, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.42984417460699664, |
| "grad_norm": 0.3567640781402588, |
| "learning_rate": 1e-05, |
| "loss": 0.4538, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.4303945512710261, |
| "grad_norm": 0.36795344948768616, |
| "learning_rate": 1e-05, |
| "loss": 0.4432, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.43094492793505557, |
| "grad_norm": 0.37614256143569946, |
| "learning_rate": 1e-05, |
| "loss": 0.4631, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.431495304599085, |
| "grad_norm": 0.356991171836853, |
| "learning_rate": 1e-05, |
| "loss": 0.4389, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.43204568126311443, |
| "grad_norm": 0.3793700933456421, |
| "learning_rate": 1e-05, |
| "loss": 0.4609, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.4325960579271439, |
| "grad_norm": 0.36675581336021423, |
| "learning_rate": 1e-05, |
| "loss": 0.4484, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.43314643459117336, |
| "grad_norm": 0.36404114961624146, |
| "learning_rate": 1e-05, |
| "loss": 0.45, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.43369681125520276, |
| "grad_norm": 0.3868160843849182, |
| "learning_rate": 1e-05, |
| "loss": 0.4652, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.4342471879192322, |
| "grad_norm": 0.3898649215698242, |
| "learning_rate": 1e-05, |
| "loss": 0.4612, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.4347975645832617, |
| "grad_norm": 0.36762335896492004, |
| "learning_rate": 1e-05, |
| "loss": 0.4543, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.4353479412472911, |
| "grad_norm": 0.3434213101863861, |
| "learning_rate": 1e-05, |
| "loss": 0.4423, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.43589831791132055, |
| "grad_norm": 0.3741122782230377, |
| "learning_rate": 1e-05, |
| "loss": 0.4638, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.43644869457535, |
| "grad_norm": 0.38991764187812805, |
| "learning_rate": 1e-05, |
| "loss": 0.438, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.4369990712393795, |
| "grad_norm": 0.35284510254859924, |
| "learning_rate": 1e-05, |
| "loss": 0.4559, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.4375494479034089, |
| "grad_norm": 0.36775341629981995, |
| "learning_rate": 1e-05, |
| "loss": 0.4594, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.43809982456743835, |
| "grad_norm": 0.3677217364311218, |
| "learning_rate": 1e-05, |
| "loss": 0.451, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.4386502012314678, |
| "grad_norm": 0.35295674204826355, |
| "learning_rate": 1e-05, |
| "loss": 0.4506, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.4392005778954972, |
| "grad_norm": 0.3770224452018738, |
| "learning_rate": 1e-05, |
| "loss": 0.4506, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.4397509545595267, |
| "grad_norm": 0.3824670612812042, |
| "learning_rate": 1e-05, |
| "loss": 0.4633, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "grad_norm": 0.38165828585624695, |
| "learning_rate": 1e-05, |
| "loss": 0.4458, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_merge_loss": 0.39449170231819153, |
| "eval_merge_runtime": 599.3899, |
| "eval_merge_samples_per_second": 56.311, |
| "eval_merge_steps_per_second": 2.347, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_new_aug_datas_filtered.json_loss": 0.5198476314544678, |
| "eval_new_aug_datas_filtered.json_runtime": 10.3548, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 74.072, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.09, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_sharegpt_gpt4.json_loss": 0.7743993997573853, |
| "eval_sharegpt_gpt4.json_runtime": 31.7173, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.675, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.459, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_Table_GPT.json_loss": 0.05817935988306999, |
| "eval_Table_GPT.json_runtime": 25.0301, |
| "eval_Table_GPT.json_samples_per_second": 83.619, |
| "eval_Table_GPT.json_steps_per_second": 3.516, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_gpt_4o_200k.json_loss": 0.8023759126663208, |
| "eval_gpt_4o_200k.json_runtime": 48.5498, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.372, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.397, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_multi_turn_datas.json_loss": 0.3328835964202881, |
| "eval_multi_turn_datas.json_runtime": 75.669, |
| "eval_multi_turn_datas.json_samples_per_second": 52.888, |
| "eval_multi_turn_datas.json_steps_per_second": 2.207, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_table_python_code_datas.json_loss": 0.2713072597980499, |
| "eval_table_python_code_datas.json_runtime": 43.1148, |
| "eval_table_python_code_datas.json_samples_per_second": 50.076, |
| "eval_table_python_code_datas.json_steps_per_second": 2.087, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_tabular_llm_data.json_loss": 0.10233539342880249, |
| "eval_tabular_llm_data.json_runtime": 8.5788, |
| "eval_tabular_llm_data.json_samples_per_second": 28.675, |
| "eval_tabular_llm_data.json_steps_per_second": 1.282, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_python_code_critic_21k.json_loss": 0.5702229142189026, |
| "eval_python_code_critic_21k.json_runtime": 3.2319, |
| "eval_python_code_critic_21k.json_samples_per_second": 184.719, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.735, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_all_merge_table_dataset.json_loss": 0.07606548815965652, |
| "eval_all_merge_table_dataset.json_runtime": 23.3911, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.439, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.283, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_code_feedback_multi_turn.json_loss": 0.5824379324913025, |
| "eval_code_feedback_multi_turn.json_runtime": 32.5207, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.68, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.829, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_ultrainteract_sft.json_loss": 0.42119815945625305, |
| "eval_ultrainteract_sft.json_runtime": 8.677, |
| "eval_ultrainteract_sft.json_samples_per_second": 167.801, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.03, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_synthetic_text_to_sql.json_loss": 0.09474331140518188, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1262, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 269.361, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.845, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_sft_react_sql_datas.json_loss": 0.6350359320640564, |
| "eval_sft_react_sql_datas.json_runtime": 7.869, |
| "eval_sft_react_sql_datas.json_samples_per_second": 39.903, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.779, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_all_merge_code.json_loss": 0.2929154634475708, |
| "eval_all_merge_code.json_runtime": 0.3373, |
| "eval_all_merge_code.json_samples_per_second": 186.752, |
| "eval_all_merge_code.json_steps_per_second": 8.893, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_magpie_datas.json_loss": 0.4318141043186188, |
| "eval_magpie_datas.json_runtime": 2.2195, |
| "eval_magpie_datas.json_samples_per_second": 77.496, |
| "eval_magpie_datas.json_steps_per_second": 3.604, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_train_data_for_qwen.json_loss": 0.00419951044023037, |
| "eval_train_data_for_qwen.json_runtime": 0.2455, |
| "eval_train_data_for_qwen.json_samples_per_second": 40.733, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.073, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_alpaca_cleaned.json_loss": 0.910367488861084, |
| "eval_alpaca_cleaned.json_runtime": 0.1147, |
| "eval_alpaca_cleaned.json_samples_per_second": 235.312, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.431, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_agent_instruct.json_loss": 0.21950356662273407, |
| "eval_agent_instruct.json_runtime": 0.5156, |
| "eval_agent_instruct.json_samples_per_second": 93.094, |
| "eval_agent_instruct.json_steps_per_second": 3.879, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_MathInstruct.json_loss": 0.19855839014053345, |
| "eval_MathInstruct.json_runtime": 0.3654, |
| "eval_MathInstruct.json_samples_per_second": 155.99, |
| "eval_MathInstruct.json_steps_per_second": 8.21, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_tested_143k_python_alpaca.json_loss": 0.4433169662952423, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3031, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 112.164, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.598, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_xlam_function_calling_60k.json_loss": 0.008965943939983845, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1008, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 228.26, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.924, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.560943603515625, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0505, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 316.662, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.791, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9813264012336731, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.05, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 219.867, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 19.988, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44030133122355614, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5885769128799438, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4829, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.42, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.071, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.44085170788758554, |
| "grad_norm": 0.36969345808029175, |
| "learning_rate": 1e-05, |
| "loss": 0.4474, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.441402084551615, |
| "grad_norm": 0.3673281967639923, |
| "learning_rate": 1e-05, |
| "loss": 0.4566, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.44195246121564447, |
| "grad_norm": 0.3695686459541321, |
| "learning_rate": 1e-05, |
| "loss": 0.4602, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.4425028378796739, |
| "grad_norm": 0.3653704822063446, |
| "learning_rate": 1e-05, |
| "loss": 0.4489, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.44305321454370333, |
| "grad_norm": 0.37890321016311646, |
| "learning_rate": 1e-05, |
| "loss": 0.4588, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.4436035912077328, |
| "grad_norm": 0.34637650847435, |
| "learning_rate": 1e-05, |
| "loss": 0.4554, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.44415396787176226, |
| "grad_norm": 0.3733616769313812, |
| "learning_rate": 1e-05, |
| "loss": 0.4477, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.44470434453579166, |
| "grad_norm": 0.3740238547325134, |
| "learning_rate": 1e-05, |
| "loss": 0.4528, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.4452547211998211, |
| "grad_norm": 0.35610541701316833, |
| "learning_rate": 1e-05, |
| "loss": 0.4487, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.4458050978638506, |
| "grad_norm": 0.362763911485672, |
| "learning_rate": 1e-05, |
| "loss": 0.4619, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.44635547452788, |
| "grad_norm": 0.3781318962574005, |
| "learning_rate": 1e-05, |
| "loss": 0.4481, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.44690585119190945, |
| "grad_norm": 0.40836694836616516, |
| "learning_rate": 1e-05, |
| "loss": 0.4597, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.4474562278559389, |
| "grad_norm": 0.3662070035934448, |
| "learning_rate": 1e-05, |
| "loss": 0.4466, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.4480066045199684, |
| "grad_norm": 0.37797635793685913, |
| "learning_rate": 1e-05, |
| "loss": 0.4589, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.4485569811839978, |
| "grad_norm": 0.3544275462627411, |
| "learning_rate": 1e-05, |
| "loss": 0.4549, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.44910735784802724, |
| "grad_norm": 0.36321336030960083, |
| "learning_rate": 1e-05, |
| "loss": 0.443, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.4496577345120567, |
| "grad_norm": 0.45478886365890503, |
| "learning_rate": 1e-05, |
| "loss": 0.4343, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.4502081111760861, |
| "grad_norm": 0.3670060336589813, |
| "learning_rate": 1e-05, |
| "loss": 0.4463, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.4507584878401156, |
| "grad_norm": 0.381145715713501, |
| "learning_rate": 1e-05, |
| "loss": 0.4512, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.45130886450414504, |
| "grad_norm": 0.3729204833507538, |
| "learning_rate": 1e-05, |
| "loss": 0.451, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.4518592411681745, |
| "grad_norm": 0.36986637115478516, |
| "learning_rate": 1e-05, |
| "loss": 0.4622, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.4524096178322039, |
| "grad_norm": 0.37230783700942993, |
| "learning_rate": 1e-05, |
| "loss": 0.4377, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.45295999449623336, |
| "grad_norm": 0.3671816885471344, |
| "learning_rate": 1e-05, |
| "loss": 0.4433, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.4535103711602628, |
| "grad_norm": 0.359372615814209, |
| "learning_rate": 1e-05, |
| "loss": 0.4512, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.45406074782429223, |
| "grad_norm": 0.3682217001914978, |
| "learning_rate": 1e-05, |
| "loss": 0.4478, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.4546111244883217, |
| "grad_norm": 0.3779531419277191, |
| "learning_rate": 1e-05, |
| "loss": 0.4446, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.45516150115235116, |
| "grad_norm": 0.3579237759113312, |
| "learning_rate": 1e-05, |
| "loss": 0.4432, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.45571187781638056, |
| "grad_norm": 0.35086673498153687, |
| "learning_rate": 1e-05, |
| "loss": 0.4511, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.45626225448041, |
| "grad_norm": 0.36263635754585266, |
| "learning_rate": 1e-05, |
| "loss": 0.4552, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.4568126311444395, |
| "grad_norm": 0.3715769648551941, |
| "learning_rate": 1e-05, |
| "loss": 0.4549, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.45736300780846895, |
| "grad_norm": 0.36989322304725647, |
| "learning_rate": 1e-05, |
| "loss": 0.4468, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.45791338447249835, |
| "grad_norm": 0.35716795921325684, |
| "learning_rate": 1e-05, |
| "loss": 0.4506, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.4584637611365278, |
| "grad_norm": 0.36870133876800537, |
| "learning_rate": 1e-05, |
| "loss": 0.4581, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.4590141378005573, |
| "grad_norm": 0.36808547377586365, |
| "learning_rate": 1e-05, |
| "loss": 0.4518, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.4595645144645867, |
| "grad_norm": 0.3777028024196625, |
| "learning_rate": 1e-05, |
| "loss": 0.4526, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.46011489112861614, |
| "grad_norm": 0.3849789798259735, |
| "learning_rate": 1e-05, |
| "loss": 0.452, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.4606652677926456, |
| "grad_norm": 0.38168811798095703, |
| "learning_rate": 1e-05, |
| "loss": 0.4408, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.461215644456675, |
| "grad_norm": 0.3601077198982239, |
| "learning_rate": 1e-05, |
| "loss": 0.4415, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.4617660211207045, |
| "grad_norm": 0.3658849596977234, |
| "learning_rate": 1e-05, |
| "loss": 0.4461, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.46231639778473393, |
| "grad_norm": 0.3822179138660431, |
| "learning_rate": 1e-05, |
| "loss": 0.4585, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.4628667744487634, |
| "grad_norm": 0.38321495056152344, |
| "learning_rate": 1e-05, |
| "loss": 0.4469, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.4634171511127928, |
| "grad_norm": 0.3911297917366028, |
| "learning_rate": 1e-05, |
| "loss": 0.4522, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.46396752777682226, |
| "grad_norm": 0.38053110241889954, |
| "learning_rate": 1e-05, |
| "loss": 0.4487, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.4645179044408517, |
| "grad_norm": 0.3704802691936493, |
| "learning_rate": 1e-05, |
| "loss": 0.4436, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.46506828110488113, |
| "grad_norm": 0.3804566562175751, |
| "learning_rate": 1e-05, |
| "loss": 0.4419, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.4656186577689106, |
| "grad_norm": 0.3807014524936676, |
| "learning_rate": 1e-05, |
| "loss": 0.4526, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.46616903443294005, |
| "grad_norm": 0.3678591549396515, |
| "learning_rate": 1e-05, |
| "loss": 0.4579, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.46671941109696946, |
| "grad_norm": 0.37586984038352966, |
| "learning_rate": 1e-05, |
| "loss": 0.4404, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.4672697877609989, |
| "grad_norm": 0.36084264516830444, |
| "learning_rate": 1e-05, |
| "loss": 0.4398, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.4678201644250284, |
| "grad_norm": 0.36694666743278503, |
| "learning_rate": 1e-05, |
| "loss": 0.4369, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.46837054108905785, |
| "grad_norm": 0.4061066210269928, |
| "learning_rate": 1e-05, |
| "loss": 0.4495, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.46892091775308725, |
| "grad_norm": 0.37329551577568054, |
| "learning_rate": 1e-05, |
| "loss": 0.4482, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.4694712944171167, |
| "grad_norm": 0.39072346687316895, |
| "learning_rate": 1e-05, |
| "loss": 0.4506, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.4700216710811462, |
| "grad_norm": 0.3565053343772888, |
| "learning_rate": 1e-05, |
| "loss": 0.447, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.4705720477451756, |
| "grad_norm": 0.39754360914230347, |
| "learning_rate": 1e-05, |
| "loss": 0.4468, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.47112242440920504, |
| "grad_norm": 0.34416159987449646, |
| "learning_rate": 1e-05, |
| "loss": 0.4509, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.4716728010732345, |
| "grad_norm": 0.3646188974380493, |
| "learning_rate": 1e-05, |
| "loss": 0.4436, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.47222317773726397, |
| "grad_norm": 0.372549831867218, |
| "learning_rate": 1e-05, |
| "loss": 0.4622, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.47277355440129337, |
| "grad_norm": 0.34616753458976746, |
| "learning_rate": 1e-05, |
| "loss": 0.4513, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.47332393106532283, |
| "grad_norm": 0.39396756887435913, |
| "learning_rate": 1e-05, |
| "loss": 0.4464, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.4738743077293523, |
| "grad_norm": 0.3681057095527649, |
| "learning_rate": 1e-05, |
| "loss": 0.4514, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.4744246843933817, |
| "grad_norm": 0.38942328095436096, |
| "learning_rate": 1e-05, |
| "loss": 0.4603, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.47497506105741116, |
| "grad_norm": 0.380278617143631, |
| "learning_rate": 1e-05, |
| "loss": 0.4463, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.4755254377214406, |
| "grad_norm": 0.37930282950401306, |
| "learning_rate": 1e-05, |
| "loss": 0.4377, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.47607581438547003, |
| "grad_norm": 0.36719146370887756, |
| "learning_rate": 1e-05, |
| "loss": 0.4285, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.4766261910494995, |
| "grad_norm": 0.3802686035633087, |
| "learning_rate": 1e-05, |
| "loss": 0.4346, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.47717656771352895, |
| "grad_norm": 0.3655955493450165, |
| "learning_rate": 1e-05, |
| "loss": 0.4504, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.4777269443775584, |
| "grad_norm": 0.34403982758522034, |
| "learning_rate": 1e-05, |
| "loss": 0.4502, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.4782773210415878, |
| "grad_norm": 0.35954922437667847, |
| "learning_rate": 1e-05, |
| "loss": 0.4313, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.4788276977056173, |
| "grad_norm": 0.3489810824394226, |
| "learning_rate": 1e-05, |
| "loss": 0.4479, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.47937807436964675, |
| "grad_norm": 0.3789598047733307, |
| "learning_rate": 1e-05, |
| "loss": 0.4488, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.47992845103367615, |
| "grad_norm": 0.38226747512817383, |
| "learning_rate": 1e-05, |
| "loss": 0.4612, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.4804788276977056, |
| "grad_norm": 0.36648547649383545, |
| "learning_rate": 1e-05, |
| "loss": 0.4521, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.4810292043617351, |
| "grad_norm": 0.36434775590896606, |
| "learning_rate": 1e-05, |
| "loss": 0.4579, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.4815795810257645, |
| "grad_norm": 0.3805695176124573, |
| "learning_rate": 1e-05, |
| "loss": 0.437, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.48212995768979394, |
| "grad_norm": 0.34234747290611267, |
| "learning_rate": 1e-05, |
| "loss": 0.4411, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.4826803343538234, |
| "grad_norm": 0.356953501701355, |
| "learning_rate": 1e-05, |
| "loss": 0.4563, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.48323071101785287, |
| "grad_norm": 0.35372647643089294, |
| "learning_rate": 1e-05, |
| "loss": 0.4506, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.48378108768188227, |
| "grad_norm": 0.3776678442955017, |
| "learning_rate": 1e-05, |
| "loss": 0.4517, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.48433146434591173, |
| "grad_norm": 0.336029052734375, |
| "learning_rate": 1e-05, |
| "loss": 0.4387, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.4848818410099412, |
| "grad_norm": 0.35482755303382874, |
| "learning_rate": 1e-05, |
| "loss": 0.4456, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.4854322176739706, |
| "grad_norm": 0.3713533580303192, |
| "learning_rate": 1e-05, |
| "loss": 0.4616, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.48598259433800006, |
| "grad_norm": 0.348069965839386, |
| "learning_rate": 1e-05, |
| "loss": 0.4504, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.4865329710020295, |
| "grad_norm": 0.36832061409950256, |
| "learning_rate": 1e-05, |
| "loss": 0.45, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.487083347666059, |
| "grad_norm": 0.3665439486503601, |
| "learning_rate": 1e-05, |
| "loss": 0.4525, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.4876337243300884, |
| "grad_norm": 0.39572247862815857, |
| "learning_rate": 1e-05, |
| "loss": 0.4521, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.48818410099411785, |
| "grad_norm": 0.36583212018013, |
| "learning_rate": 1e-05, |
| "loss": 0.4298, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.4887344776581473, |
| "grad_norm": 0.35969898104667664, |
| "learning_rate": 1e-05, |
| "loss": 0.4497, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.4892848543221767, |
| "grad_norm": 0.3651510775089264, |
| "learning_rate": 1e-05, |
| "loss": 0.4355, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.4898352309862062, |
| "grad_norm": 0.3885847330093384, |
| "learning_rate": 1e-05, |
| "loss": 0.4633, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.49038560765023564, |
| "grad_norm": 0.357166588306427, |
| "learning_rate": 1e-05, |
| "loss": 0.4512, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.49093598431426505, |
| "grad_norm": 0.34748879075050354, |
| "learning_rate": 1e-05, |
| "loss": 0.437, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.4914863609782945, |
| "grad_norm": 0.371999055147171, |
| "learning_rate": 1e-05, |
| "loss": 0.4493, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.492036737642324, |
| "grad_norm": 0.3602544665336609, |
| "learning_rate": 1e-05, |
| "loss": 0.4413, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.49258711430635344, |
| "grad_norm": 0.38811835646629333, |
| "learning_rate": 1e-05, |
| "loss": 0.4406, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.49313749097038284, |
| "grad_norm": 0.366616427898407, |
| "learning_rate": 1e-05, |
| "loss": 0.4587, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.4936878676344123, |
| "grad_norm": 0.39588844776153564, |
| "learning_rate": 1e-05, |
| "loss": 0.4525, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.49423824429844176, |
| "grad_norm": 0.3641244173049927, |
| "learning_rate": 1e-05, |
| "loss": 0.4533, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.49478862096247117, |
| "grad_norm": 0.35738009214401245, |
| "learning_rate": 1e-05, |
| "loss": 0.4542, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "grad_norm": 0.36343181133270264, |
| "learning_rate": 1e-05, |
| "loss": 0.4527, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_merge_loss": 0.3900485932826996, |
| "eval_merge_runtime": 600.246, |
| "eval_merge_samples_per_second": 56.23, |
| "eval_merge_steps_per_second": 2.344, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_new_aug_datas_filtered.json_loss": 0.5161438584327698, |
| "eval_new_aug_datas_filtered.json_runtime": 10.4655, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 73.288, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.058, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_sharegpt_gpt4.json_loss": 0.7699668407440186, |
| "eval_sharegpt_gpt4.json_runtime": 31.6447, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.809, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.465, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_Table_GPT.json_loss": 0.057397227734327316, |
| "eval_Table_GPT.json_runtime": 24.974, |
| "eval_Table_GPT.json_samples_per_second": 83.807, |
| "eval_Table_GPT.json_steps_per_second": 3.524, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_gpt_4o_200k.json_loss": 0.7959992289543152, |
| "eval_gpt_4o_200k.json_runtime": 48.4474, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.646, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.408, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_multi_turn_datas.json_loss": 0.326607346534729, |
| "eval_multi_turn_datas.json_runtime": 75.6077, |
| "eval_multi_turn_datas.json_samples_per_second": 52.931, |
| "eval_multi_turn_datas.json_steps_per_second": 2.209, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_table_python_code_datas.json_loss": 0.26808008551597595, |
| "eval_table_python_code_datas.json_runtime": 43.0557, |
| "eval_table_python_code_datas.json_samples_per_second": 50.144, |
| "eval_table_python_code_datas.json_steps_per_second": 2.09, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_tabular_llm_data.json_loss": 0.1004142090678215, |
| "eval_tabular_llm_data.json_runtime": 8.5429, |
| "eval_tabular_llm_data.json_samples_per_second": 28.796, |
| "eval_tabular_llm_data.json_steps_per_second": 1.288, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_python_code_critic_21k.json_loss": 0.5654606223106384, |
| "eval_python_code_critic_21k.json_runtime": 3.2351, |
| "eval_python_code_critic_21k.json_samples_per_second": 184.538, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.728, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_all_merge_table_dataset.json_loss": 0.07576768845319748, |
| "eval_all_merge_table_dataset.json_runtime": 23.2598, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.611, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.29, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_code_feedback_multi_turn.json_loss": 0.579846203327179, |
| "eval_code_feedback_multi_turn.json_runtime": 32.4188, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.893, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.838, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_ultrainteract_sft.json_loss": 0.4181068241596222, |
| "eval_ultrainteract_sft.json_runtime": 8.6461, |
| "eval_ultrainteract_sft.json_samples_per_second": 168.4, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.055, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_synthetic_text_to_sql.json_loss": 0.09818249940872192, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1264, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 269.092, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.829, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_sft_react_sql_datas.json_loss": 0.6291559338569641, |
| "eval_sft_react_sql_datas.json_runtime": 7.8451, |
| "eval_sft_react_sql_datas.json_samples_per_second": 40.025, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.785, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_all_merge_code.json_loss": 0.29108163714408875, |
| "eval_all_merge_code.json_runtime": 0.3447, |
| "eval_all_merge_code.json_samples_per_second": 182.771, |
| "eval_all_merge_code.json_steps_per_second": 8.703, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_magpie_datas.json_loss": 0.43020525574684143, |
| "eval_magpie_datas.json_runtime": 2.2179, |
| "eval_magpie_datas.json_samples_per_second": 77.551, |
| "eval_magpie_datas.json_steps_per_second": 3.607, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_train_data_for_qwen.json_loss": 0.0027856978122144938, |
| "eval_train_data_for_qwen.json_runtime": 0.2444, |
| "eval_train_data_for_qwen.json_samples_per_second": 40.919, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.092, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_alpaca_cleaned.json_loss": 0.9129724502563477, |
| "eval_alpaca_cleaned.json_runtime": 0.1153, |
| "eval_alpaca_cleaned.json_samples_per_second": 234.093, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.34, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_agent_instruct.json_loss": 0.22024483978748322, |
| "eval_agent_instruct.json_runtime": 0.5149, |
| "eval_agent_instruct.json_samples_per_second": 93.222, |
| "eval_agent_instruct.json_steps_per_second": 3.884, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_MathInstruct.json_loss": 0.20060402154922485, |
| "eval_MathInstruct.json_runtime": 0.3648, |
| "eval_MathInstruct.json_samples_per_second": 156.23, |
| "eval_MathInstruct.json_steps_per_second": 8.223, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_tested_143k_python_alpaca.json_loss": 0.44536128640174866, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3002, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 113.24, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.661, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_xlam_function_calling_60k.json_loss": 0.00967579148709774, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1002, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 229.452, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.976, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.5544477701187134, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0511, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 313.214, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.576, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_alpaca_gpt4_zh.json_loss": 0.977000892162323, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0508, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 216.666, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 19.697, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.49533899762650063, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5895399451255798, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4883, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 40.957, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.048, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.4958893742905301, |
| "grad_norm": 0.36430442333221436, |
| "learning_rate": 1e-05, |
| "loss": 0.4418, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.4964397509545595, |
| "grad_norm": 0.35012543201446533, |
| "learning_rate": 1e-05, |
| "loss": 0.4437, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.49699012761858896, |
| "grad_norm": 0.3726542294025421, |
| "learning_rate": 1e-05, |
| "loss": 0.4332, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.4975405042826184, |
| "grad_norm": 0.3564360439777374, |
| "learning_rate": 1e-05, |
| "loss": 0.4378, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.4980908809466479, |
| "grad_norm": 0.3730456233024597, |
| "learning_rate": 1e-05, |
| "loss": 0.443, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.4986412576106773, |
| "grad_norm": 0.3588622212409973, |
| "learning_rate": 1e-05, |
| "loss": 0.4387, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.49919163427470675, |
| "grad_norm": 0.36861783266067505, |
| "learning_rate": 1e-05, |
| "loss": 0.4392, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.4997420109387362, |
| "grad_norm": 0.3537515699863434, |
| "learning_rate": 1e-05, |
| "loss": 0.4331, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.5002923876027656, |
| "grad_norm": 0.3723071813583374, |
| "learning_rate": 1e-05, |
| "loss": 0.4429, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.5008427642667951, |
| "grad_norm": 0.37015634775161743, |
| "learning_rate": 1e-05, |
| "loss": 0.4687, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.5013931409308245, |
| "grad_norm": 0.3528953790664673, |
| "learning_rate": 1e-05, |
| "loss": 0.4315, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.501943517594854, |
| "grad_norm": 0.357120543718338, |
| "learning_rate": 1e-05, |
| "loss": 0.4423, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.5024938942588835, |
| "grad_norm": 0.3655802607536316, |
| "learning_rate": 1e-05, |
| "loss": 0.4475, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.5030442709229128, |
| "grad_norm": 0.3676040470600128, |
| "learning_rate": 1e-05, |
| "loss": 0.4345, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.5035946475869423, |
| "grad_norm": 0.3427799940109253, |
| "learning_rate": 1e-05, |
| "loss": 0.4422, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.5041450242509717, |
| "grad_norm": 0.3482607305049896, |
| "learning_rate": 1e-05, |
| "loss": 0.4347, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.5046954009150012, |
| "grad_norm": 0.3690313398838043, |
| "learning_rate": 1e-05, |
| "loss": 0.4572, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.5052457775790307, |
| "grad_norm": 0.351601243019104, |
| "learning_rate": 1e-05, |
| "loss": 0.4445, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.5057961542430601, |
| "grad_norm": 0.3506658971309662, |
| "learning_rate": 1e-05, |
| "loss": 0.4482, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.5063465309070896, |
| "grad_norm": 0.36706456542015076, |
| "learning_rate": 1e-05, |
| "loss": 0.4503, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.5068969075711189, |
| "grad_norm": 0.36632585525512695, |
| "learning_rate": 1e-05, |
| "loss": 0.4385, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.5074472842351484, |
| "grad_norm": 0.3675621747970581, |
| "learning_rate": 1e-05, |
| "loss": 0.4391, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.5079976608991779, |
| "grad_norm": 0.3883734941482544, |
| "learning_rate": 1e-05, |
| "loss": 0.4435, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.5085480375632073, |
| "grad_norm": 0.34348422288894653, |
| "learning_rate": 1e-05, |
| "loss": 0.4388, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.5090984142272368, |
| "grad_norm": 0.36695536971092224, |
| "learning_rate": 1e-05, |
| "loss": 0.4473, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.5096487908912662, |
| "grad_norm": 0.36929944157600403, |
| "learning_rate": 1e-05, |
| "loss": 0.4542, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.5101991675552957, |
| "grad_norm": 0.3946716785430908, |
| "learning_rate": 1e-05, |
| "loss": 0.4399, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.5107495442193251, |
| "grad_norm": 0.3619132936000824, |
| "learning_rate": 1e-05, |
| "loss": 0.4471, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.5112999208833545, |
| "grad_norm": 0.34836745262145996, |
| "learning_rate": 1e-05, |
| "loss": 0.4392, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.511850297547384, |
| "grad_norm": 0.37516769766807556, |
| "learning_rate": 1e-05, |
| "loss": 0.4579, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.5124006742114134, |
| "grad_norm": 0.35800984501838684, |
| "learning_rate": 1e-05, |
| "loss": 0.4479, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.5129510508754429, |
| "grad_norm": 0.3664796054363251, |
| "learning_rate": 1e-05, |
| "loss": 0.4556, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.5135014275394724, |
| "grad_norm": 0.3633113503456116, |
| "learning_rate": 1e-05, |
| "loss": 0.4405, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.5140518042035017, |
| "grad_norm": 0.3655359447002411, |
| "learning_rate": 1e-05, |
| "loss": 0.4486, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.5146021808675312, |
| "grad_norm": 0.36135318875312805, |
| "learning_rate": 1e-05, |
| "loss": 0.4473, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.5151525575315606, |
| "grad_norm": 0.4725627601146698, |
| "learning_rate": 1e-05, |
| "loss": 0.4579, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.5157029341955901, |
| "grad_norm": 0.37844300270080566, |
| "learning_rate": 1e-05, |
| "loss": 0.4502, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.5162533108596196, |
| "grad_norm": 0.35601717233657837, |
| "learning_rate": 1e-05, |
| "loss": 0.4392, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.516803687523649, |
| "grad_norm": 0.3960351049900055, |
| "learning_rate": 1e-05, |
| "loss": 0.4519, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.5173540641876785, |
| "grad_norm": 0.3775772154331207, |
| "learning_rate": 1e-05, |
| "loss": 0.4553, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.5179044408517078, |
| "grad_norm": 0.3815532624721527, |
| "learning_rate": 1e-05, |
| "loss": 0.4479, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.5184548175157373, |
| "grad_norm": 0.3661166727542877, |
| "learning_rate": 1e-05, |
| "loss": 0.4423, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.5190051941797668, |
| "grad_norm": 0.3378327786922455, |
| "learning_rate": 1e-05, |
| "loss": 0.4419, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.5195555708437962, |
| "grad_norm": 0.34638261795043945, |
| "learning_rate": 1e-05, |
| "loss": 0.4379, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.5201059475078257, |
| "grad_norm": 0.35764721035957336, |
| "learning_rate": 1e-05, |
| "loss": 0.4389, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.5206563241718551, |
| "grad_norm": 0.3674796223640442, |
| "learning_rate": 1e-05, |
| "loss": 0.4438, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.5212067008358846, |
| "grad_norm": 0.34744736552238464, |
| "learning_rate": 1e-05, |
| "loss": 0.4317, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.521757077499914, |
| "grad_norm": 0.39198940992355347, |
| "learning_rate": 1e-05, |
| "loss": 0.4406, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.5223074541639434, |
| "grad_norm": 0.3545363247394562, |
| "learning_rate": 1e-05, |
| "loss": 0.4255, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.5228578308279729, |
| "grad_norm": 0.3635193407535553, |
| "learning_rate": 1e-05, |
| "loss": 0.4521, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.5234082074920023, |
| "grad_norm": 0.33844560384750366, |
| "learning_rate": 1e-05, |
| "loss": 0.4371, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.5239585841560318, |
| "grad_norm": 0.34886521100997925, |
| "learning_rate": 1e-05, |
| "loss": 0.4328, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.5245089608200613, |
| "grad_norm": 0.34973517060279846, |
| "learning_rate": 1e-05, |
| "loss": 0.4442, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.5250593374840907, |
| "grad_norm": 0.35180777311325073, |
| "learning_rate": 1e-05, |
| "loss": 0.4575, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.5256097141481201, |
| "grad_norm": 0.36237335205078125, |
| "learning_rate": 1e-05, |
| "loss": 0.4357, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.5261600908121495, |
| "grad_norm": 0.3784085512161255, |
| "learning_rate": 1e-05, |
| "loss": 0.4559, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.526710467476179, |
| "grad_norm": 0.3556850254535675, |
| "learning_rate": 1e-05, |
| "loss": 0.4563, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.5272608441402085, |
| "grad_norm": 0.3620041310787201, |
| "learning_rate": 1e-05, |
| "loss": 0.4458, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.5278112208042379, |
| "grad_norm": 0.3616819679737091, |
| "learning_rate": 1e-05, |
| "loss": 0.4304, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.5283615974682674, |
| "grad_norm": 0.3651537597179413, |
| "learning_rate": 1e-05, |
| "loss": 0.4463, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.5289119741322967, |
| "grad_norm": 0.3924584686756134, |
| "learning_rate": 1e-05, |
| "loss": 0.4418, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.5294623507963262, |
| "grad_norm": 0.353217214345932, |
| "learning_rate": 1e-05, |
| "loss": 0.4437, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.5300127274603557, |
| "grad_norm": 0.3897522985935211, |
| "learning_rate": 1e-05, |
| "loss": 0.4549, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.5305631041243851, |
| "grad_norm": 0.36462587118148804, |
| "learning_rate": 1e-05, |
| "loss": 0.4247, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.5311134807884146, |
| "grad_norm": 0.3874776363372803, |
| "learning_rate": 1e-05, |
| "loss": 0.4502, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.531663857452444, |
| "grad_norm": 0.3533260226249695, |
| "learning_rate": 1e-05, |
| "loss": 0.4515, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.5322142341164735, |
| "grad_norm": 0.3668268024921417, |
| "learning_rate": 1e-05, |
| "loss": 0.4474, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.5327646107805029, |
| "grad_norm": 0.3501083254814148, |
| "learning_rate": 1e-05, |
| "loss": 0.4344, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.5333149874445323, |
| "grad_norm": 0.3565337657928467, |
| "learning_rate": 1e-05, |
| "loss": 0.4412, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.5338653641085618, |
| "grad_norm": 0.34048742055892944, |
| "learning_rate": 1e-05, |
| "loss": 0.4502, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.5344157407725912, |
| "grad_norm": 0.35694393515586853, |
| "learning_rate": 1e-05, |
| "loss": 0.4532, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.5349661174366207, |
| "grad_norm": 0.3527338206768036, |
| "learning_rate": 1e-05, |
| "loss": 0.4378, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.5355164941006502, |
| "grad_norm": 0.3684084117412567, |
| "learning_rate": 1e-05, |
| "loss": 0.4562, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.5360668707646796, |
| "grad_norm": 0.3584345281124115, |
| "learning_rate": 1e-05, |
| "loss": 0.4561, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.536617247428709, |
| "grad_norm": 0.35685622692108154, |
| "learning_rate": 1e-05, |
| "loss": 0.4532, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.5371676240927384, |
| "grad_norm": 0.36560460925102234, |
| "learning_rate": 1e-05, |
| "loss": 0.4529, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.5377180007567679, |
| "grad_norm": 0.36613890528678894, |
| "learning_rate": 1e-05, |
| "loss": 0.4536, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.5382683774207974, |
| "grad_norm": 0.3513580858707428, |
| "learning_rate": 1e-05, |
| "loss": 0.4496, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.5388187540848268, |
| "grad_norm": 0.38372403383255005, |
| "learning_rate": 1e-05, |
| "loss": 0.4506, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.5393691307488563, |
| "grad_norm": 0.35690757632255554, |
| "learning_rate": 1e-05, |
| "loss": 0.4371, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.5399195074128857, |
| "grad_norm": 0.36706483364105225, |
| "learning_rate": 1e-05, |
| "loss": 0.4292, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.5404698840769151, |
| "grad_norm": 0.35754841566085815, |
| "learning_rate": 1e-05, |
| "loss": 0.4543, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.5410202607409446, |
| "grad_norm": 0.35544702410697937, |
| "learning_rate": 1e-05, |
| "loss": 0.4522, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.541570637404974, |
| "grad_norm": 0.3689357042312622, |
| "learning_rate": 1e-05, |
| "loss": 0.4447, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.5421210140690035, |
| "grad_norm": 0.35911116003990173, |
| "learning_rate": 1e-05, |
| "loss": 0.4253, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.5426713907330329, |
| "grad_norm": 0.3458103537559509, |
| "learning_rate": 1e-05, |
| "loss": 0.4398, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.5432217673970624, |
| "grad_norm": 0.3606932759284973, |
| "learning_rate": 1e-05, |
| "loss": 0.4486, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.5437721440610918, |
| "grad_norm": 0.3759188652038574, |
| "learning_rate": 1e-05, |
| "loss": 0.4339, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.5443225207251212, |
| "grad_norm": 0.3803597390651703, |
| "learning_rate": 1e-05, |
| "loss": 0.4575, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.5448728973891507, |
| "grad_norm": 0.36220523715019226, |
| "learning_rate": 1e-05, |
| "loss": 0.4427, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.5454232740531801, |
| "grad_norm": 0.36756813526153564, |
| "learning_rate": 1e-05, |
| "loss": 0.4297, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.5459736507172096, |
| "grad_norm": 0.35930246114730835, |
| "learning_rate": 1e-05, |
| "loss": 0.4375, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.5465240273812391, |
| "grad_norm": 0.38998985290527344, |
| "learning_rate": 1e-05, |
| "loss": 0.4331, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.5470744040452685, |
| "grad_norm": 0.35975074768066406, |
| "learning_rate": 1e-05, |
| "loss": 0.4493, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.5476247807092979, |
| "grad_norm": 0.3618590533733368, |
| "learning_rate": 1e-05, |
| "loss": 0.4431, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.5481751573733273, |
| "grad_norm": 0.3768090009689331, |
| "learning_rate": 1e-05, |
| "loss": 0.4414, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.5487255340373568, |
| "grad_norm": 0.3526524305343628, |
| "learning_rate": 1e-05, |
| "loss": 0.4349, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.5492759107013863, |
| "grad_norm": 0.3426629900932312, |
| "learning_rate": 1e-05, |
| "loss": 0.4345, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.5498262873654157, |
| "grad_norm": 0.3500785529613495, |
| "learning_rate": 1e-05, |
| "loss": 0.4415, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "grad_norm": 0.3602929413318634, |
| "learning_rate": 1e-05, |
| "loss": 0.4454, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_merge_loss": 0.3855894207954407, |
| "eval_merge_runtime": 600.0048, |
| "eval_merge_samples_per_second": 56.253, |
| "eval_merge_steps_per_second": 2.345, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_new_aug_datas_filtered.json_loss": 0.5099759697914124, |
| "eval_new_aug_datas_filtered.json_runtime": 10.3782, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 73.905, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.083, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_sharegpt_gpt4.json_loss": 0.763576865196228, |
| "eval_sharegpt_gpt4.json_runtime": 31.7204, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.669, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.459, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_Table_GPT.json_loss": 0.055675260722637177, |
| "eval_Table_GPT.json_runtime": 24.9781, |
| "eval_Table_GPT.json_samples_per_second": 83.793, |
| "eval_Table_GPT.json_steps_per_second": 3.523, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_gpt_4o_200k.json_loss": 0.7919400334358215, |
| "eval_gpt_4o_200k.json_runtime": 48.5207, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.45, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.4, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_multi_turn_datas.json_loss": 0.321598082780838, |
| "eval_multi_turn_datas.json_runtime": 75.7401, |
| "eval_multi_turn_datas.json_samples_per_second": 52.839, |
| "eval_multi_turn_datas.json_steps_per_second": 2.205, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_table_python_code_datas.json_loss": 0.26337531208992004, |
| "eval_table_python_code_datas.json_runtime": 43.1695, |
| "eval_table_python_code_datas.json_samples_per_second": 50.012, |
| "eval_table_python_code_datas.json_steps_per_second": 2.085, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_tabular_llm_data.json_loss": 0.09393570572137833, |
| "eval_tabular_llm_data.json_runtime": 8.5822, |
| "eval_tabular_llm_data.json_samples_per_second": 28.664, |
| "eval_tabular_llm_data.json_steps_per_second": 1.282, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_python_code_critic_21k.json_loss": 0.5615730285644531, |
| "eval_python_code_critic_21k.json_runtime": 3.2332, |
| "eval_python_code_critic_21k.json_samples_per_second": 184.645, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.732, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_all_merge_table_dataset.json_loss": 0.07384855300188065, |
| "eval_all_merge_table_dataset.json_runtime": 23.3929, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.437, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.282, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_code_feedback_multi_turn.json_loss": 0.5769618153572083, |
| "eval_code_feedback_multi_turn.json_runtime": 32.4541, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.819, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.835, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_ultrainteract_sft.json_loss": 0.41532665491104126, |
| "eval_ultrainteract_sft.json_runtime": 8.6954, |
| "eval_ultrainteract_sft.json_samples_per_second": 167.445, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.015, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_synthetic_text_to_sql.json_loss": 0.09223779290914536, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1265, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 268.84, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.814, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_sft_react_sql_datas.json_loss": 0.6254591941833496, |
| "eval_sft_react_sql_datas.json_runtime": 7.8542, |
| "eval_sft_react_sql_datas.json_samples_per_second": 39.979, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.782, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_all_merge_code.json_loss": 0.2845838665962219, |
| "eval_all_merge_code.json_runtime": 0.3345, |
| "eval_all_merge_code.json_samples_per_second": 188.319, |
| "eval_all_merge_code.json_steps_per_second": 8.968, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_magpie_datas.json_loss": 0.4300972521305084, |
| "eval_magpie_datas.json_runtime": 2.2166, |
| "eval_magpie_datas.json_samples_per_second": 77.598, |
| "eval_magpie_datas.json_steps_per_second": 3.609, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_train_data_for_qwen.json_loss": 0.0036769520957022905, |
| "eval_train_data_for_qwen.json_runtime": 0.2431, |
| "eval_train_data_for_qwen.json_samples_per_second": 41.14, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.114, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_alpaca_cleaned.json_loss": 0.9104709625244141, |
| "eval_alpaca_cleaned.json_runtime": 0.1148, |
| "eval_alpaca_cleaned.json_samples_per_second": 235.266, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.427, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_agent_instruct.json_loss": 0.220087930560112, |
| "eval_agent_instruct.json_runtime": 0.5143, |
| "eval_agent_instruct.json_samples_per_second": 93.334, |
| "eval_agent_instruct.json_steps_per_second": 3.889, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_MathInstruct.json_loss": 0.1989249587059021, |
| "eval_MathInstruct.json_runtime": 0.3499, |
| "eval_MathInstruct.json_samples_per_second": 162.904, |
| "eval_MathInstruct.json_steps_per_second": 8.574, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_tested_143k_python_alpaca.json_loss": 0.4425477683544159, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3008, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 113.029, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.649, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_xlam_function_calling_60k.json_loss": 0.008927595801651478, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1003, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 229.301, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.97, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.5485728979110718, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0512, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 312.726, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.545, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9768400192260742, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0505, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 217.931, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 19.812, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5503766640294452, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.587010383605957, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4876, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.015, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.051, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.5509270406934746, |
| "grad_norm": 0.3626772463321686, |
| "learning_rate": 1e-05, |
| "loss": 0.4442, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.551477417357504, |
| "grad_norm": 0.34878280758857727, |
| "learning_rate": 1e-05, |
| "loss": 0.4458, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.5520277940215335, |
| "grad_norm": 0.35377946496009827, |
| "learning_rate": 1e-05, |
| "loss": 0.4273, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.5525781706855629, |
| "grad_norm": 0.3649701774120331, |
| "learning_rate": 1e-05, |
| "loss": 0.4342, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.5531285473495924, |
| "grad_norm": 0.34736165404319763, |
| "learning_rate": 1e-05, |
| "loss": 0.4298, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.5536789240136218, |
| "grad_norm": 0.3697884678840637, |
| "learning_rate": 1e-05, |
| "loss": 0.4424, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.5542293006776513, |
| "grad_norm": 0.40290403366088867, |
| "learning_rate": 1e-05, |
| "loss": 0.4388, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.5547796773416808, |
| "grad_norm": 0.36797061562538147, |
| "learning_rate": 1e-05, |
| "loss": 0.4648, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.5553300540057101, |
| "grad_norm": 0.35621124505996704, |
| "learning_rate": 1e-05, |
| "loss": 0.433, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.5558804306697396, |
| "grad_norm": 0.3625437915325165, |
| "learning_rate": 1e-05, |
| "loss": 0.441, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.556430807333769, |
| "grad_norm": 0.3642013370990753, |
| "learning_rate": 1e-05, |
| "loss": 0.4425, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.5569811839977985, |
| "grad_norm": 0.36053115129470825, |
| "learning_rate": 1e-05, |
| "loss": 0.4422, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.557531560661828, |
| "grad_norm": 0.36283549666404724, |
| "learning_rate": 1e-05, |
| "loss": 0.4338, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.5580819373258574, |
| "grad_norm": 0.3758421540260315, |
| "learning_rate": 1e-05, |
| "loss": 0.439, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.5586323139898868, |
| "grad_norm": 0.33730989694595337, |
| "learning_rate": 1e-05, |
| "loss": 0.4446, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.5591826906539162, |
| "grad_norm": 0.36297255754470825, |
| "learning_rate": 1e-05, |
| "loss": 0.4358, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.5597330673179457, |
| "grad_norm": 0.3534908890724182, |
| "learning_rate": 1e-05, |
| "loss": 0.4257, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.5602834439819752, |
| "grad_norm": 0.3690515160560608, |
| "learning_rate": 1e-05, |
| "loss": 0.4383, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.5608338206460046, |
| "grad_norm": 0.3638661503791809, |
| "learning_rate": 1e-05, |
| "loss": 0.4452, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.5613841973100341, |
| "grad_norm": 0.3521392047405243, |
| "learning_rate": 1e-05, |
| "loss": 0.4342, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.5619345739740635, |
| "grad_norm": 0.3569532632827759, |
| "learning_rate": 1e-05, |
| "loss": 0.4507, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.5624849506380929, |
| "grad_norm": 0.37072595953941345, |
| "learning_rate": 1e-05, |
| "loss": 0.4354, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.5630353273021224, |
| "grad_norm": 0.38489988446235657, |
| "learning_rate": 1e-05, |
| "loss": 0.4528, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.5635857039661518, |
| "grad_norm": 0.38305357098579407, |
| "learning_rate": 1e-05, |
| "loss": 0.4428, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.5641360806301813, |
| "grad_norm": 0.3491927981376648, |
| "learning_rate": 1e-05, |
| "loss": 0.4242, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.5646864572942107, |
| "grad_norm": 0.35508430004119873, |
| "learning_rate": 1e-05, |
| "loss": 0.4556, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.5652368339582402, |
| "grad_norm": 0.36298030614852905, |
| "learning_rate": 1e-05, |
| "loss": 0.4337, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.5657872106222697, |
| "grad_norm": 0.3598901629447937, |
| "learning_rate": 1e-05, |
| "loss": 0.4378, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.566337587286299, |
| "grad_norm": 0.3838946223258972, |
| "learning_rate": 1e-05, |
| "loss": 0.4346, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.5668879639503285, |
| "grad_norm": 0.3986867666244507, |
| "learning_rate": 1e-05, |
| "loss": 0.45, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.5674383406143579, |
| "grad_norm": 0.3509708344936371, |
| "learning_rate": 1e-05, |
| "loss": 0.4462, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.5679887172783874, |
| "grad_norm": 0.35189950466156006, |
| "learning_rate": 1e-05, |
| "loss": 0.4307, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.5685390939424169, |
| "grad_norm": 0.37416207790374756, |
| "learning_rate": 1e-05, |
| "loss": 0.4368, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.5690894706064463, |
| "grad_norm": 0.3902382254600525, |
| "learning_rate": 1e-05, |
| "loss": 0.4278, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.5696398472704758, |
| "grad_norm": 0.384260892868042, |
| "learning_rate": 1e-05, |
| "loss": 0.4449, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.5701902239345051, |
| "grad_norm": 0.367347776889801, |
| "learning_rate": 1e-05, |
| "loss": 0.4397, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.5707406005985346, |
| "grad_norm": 0.35011574625968933, |
| "learning_rate": 1e-05, |
| "loss": 0.4375, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.5712909772625641, |
| "grad_norm": 0.3609907329082489, |
| "learning_rate": 1e-05, |
| "loss": 0.446, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.5718413539265935, |
| "grad_norm": 0.3640425205230713, |
| "learning_rate": 1e-05, |
| "loss": 0.4453, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.572391730590623, |
| "grad_norm": 0.3464198112487793, |
| "learning_rate": 1e-05, |
| "loss": 0.4489, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.5729421072546524, |
| "grad_norm": 0.3741483688354492, |
| "learning_rate": 1e-05, |
| "loss": 0.4515, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.5734924839186818, |
| "grad_norm": 0.37388619780540466, |
| "learning_rate": 1e-05, |
| "loss": 0.4632, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.5740428605827113, |
| "grad_norm": 0.37237605452537537, |
| "learning_rate": 1e-05, |
| "loss": 0.4425, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.5745932372467407, |
| "grad_norm": 0.35421323776245117, |
| "learning_rate": 1e-05, |
| "loss": 0.4474, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.5751436139107702, |
| "grad_norm": 0.33015069365501404, |
| "learning_rate": 1e-05, |
| "loss": 0.43, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.5756939905747996, |
| "grad_norm": 0.3670506179332733, |
| "learning_rate": 1e-05, |
| "loss": 0.4452, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.5762443672388291, |
| "grad_norm": 0.3514888882637024, |
| "learning_rate": 1e-05, |
| "loss": 0.4287, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.5767947439028586, |
| "grad_norm": 0.3714512288570404, |
| "learning_rate": 1e-05, |
| "loss": 0.4344, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.5773451205668879, |
| "grad_norm": 0.35363397002220154, |
| "learning_rate": 1e-05, |
| "loss": 0.4408, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.5778954972309174, |
| "grad_norm": 0.3529844582080841, |
| "learning_rate": 1e-05, |
| "loss": 0.4434, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.5784458738949468, |
| "grad_norm": 0.3400002121925354, |
| "learning_rate": 1e-05, |
| "loss": 0.4443, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.5789962505589763, |
| "grad_norm": 0.3620370328426361, |
| "learning_rate": 1e-05, |
| "loss": 0.4377, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.5795466272230058, |
| "grad_norm": 0.3476988971233368, |
| "learning_rate": 1e-05, |
| "loss": 0.4321, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.5800970038870352, |
| "grad_norm": 0.35739636421203613, |
| "learning_rate": 1e-05, |
| "loss": 0.4495, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.5806473805510647, |
| "grad_norm": 0.3718028962612152, |
| "learning_rate": 1e-05, |
| "loss": 0.4391, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.581197757215094, |
| "grad_norm": 0.35041627287864685, |
| "learning_rate": 1e-05, |
| "loss": 0.454, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.5817481338791235, |
| "grad_norm": 0.36277493834495544, |
| "learning_rate": 1e-05, |
| "loss": 0.44, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.582298510543153, |
| "grad_norm": 0.36685582995414734, |
| "learning_rate": 1e-05, |
| "loss": 0.4401, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.5828488872071824, |
| "grad_norm": 0.33634135127067566, |
| "learning_rate": 1e-05, |
| "loss": 0.4338, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.5833992638712119, |
| "grad_norm": 0.36546674370765686, |
| "learning_rate": 1e-05, |
| "loss": 0.4456, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.5839496405352413, |
| "grad_norm": 0.361472487449646, |
| "learning_rate": 1e-05, |
| "loss": 0.4368, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.5845000171992708, |
| "grad_norm": 0.36743828654289246, |
| "learning_rate": 1e-05, |
| "loss": 0.4464, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.5850503938633002, |
| "grad_norm": 0.35304173827171326, |
| "learning_rate": 1e-05, |
| "loss": 0.4407, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.5856007705273296, |
| "grad_norm": 0.35151979327201843, |
| "learning_rate": 1e-05, |
| "loss": 0.4532, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.5861511471913591, |
| "grad_norm": 0.34761616587638855, |
| "learning_rate": 1e-05, |
| "loss": 0.444, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.5867015238553885, |
| "grad_norm": 0.3763500452041626, |
| "learning_rate": 1e-05, |
| "loss": 0.4524, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.587251900519418, |
| "grad_norm": 0.36489951610565186, |
| "learning_rate": 1e-05, |
| "loss": 0.4333, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.5878022771834475, |
| "grad_norm": 0.38710853457450867, |
| "learning_rate": 1e-05, |
| "loss": 0.4517, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.5883526538474768, |
| "grad_norm": 0.36153027415275574, |
| "learning_rate": 1e-05, |
| "loss": 0.438, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.5889030305115063, |
| "grad_norm": 0.3907857835292816, |
| "learning_rate": 1e-05, |
| "loss": 0.4429, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.5894534071755357, |
| "grad_norm": 0.3813617527484894, |
| "learning_rate": 1e-05, |
| "loss": 0.4392, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.5900037838395652, |
| "grad_norm": 0.3563400208950043, |
| "learning_rate": 1e-05, |
| "loss": 0.434, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.5905541605035947, |
| "grad_norm": 0.3556332290172577, |
| "learning_rate": 1e-05, |
| "loss": 0.4436, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.5911045371676241, |
| "grad_norm": 0.3623802363872528, |
| "learning_rate": 1e-05, |
| "loss": 0.4378, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.5916549138316536, |
| "grad_norm": 0.36329442262649536, |
| "learning_rate": 1e-05, |
| "loss": 0.4386, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.5922052904956829, |
| "grad_norm": 0.3771746754646301, |
| "learning_rate": 1e-05, |
| "loss": 0.4494, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.5927556671597124, |
| "grad_norm": 0.34596994519233704, |
| "learning_rate": 1e-05, |
| "loss": 0.4173, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.5933060438237419, |
| "grad_norm": 0.36507177352905273, |
| "learning_rate": 1e-05, |
| "loss": 0.4254, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.5938564204877713, |
| "grad_norm": 0.3519168794155121, |
| "learning_rate": 1e-05, |
| "loss": 0.4447, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.5944067971518008, |
| "grad_norm": 0.35316991806030273, |
| "learning_rate": 1e-05, |
| "loss": 0.4622, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.5949571738158302, |
| "grad_norm": 0.3529471158981323, |
| "learning_rate": 1e-05, |
| "loss": 0.4482, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.5955075504798597, |
| "grad_norm": 0.3722255825996399, |
| "learning_rate": 1e-05, |
| "loss": 0.4454, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.596057927143889, |
| "grad_norm": 0.3557456433773041, |
| "learning_rate": 1e-05, |
| "loss": 0.4435, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.5966083038079185, |
| "grad_norm": 0.3348141610622406, |
| "learning_rate": 1e-05, |
| "loss": 0.436, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.597158680471948, |
| "grad_norm": 0.38193532824516296, |
| "learning_rate": 1e-05, |
| "loss": 0.4543, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.5977090571359774, |
| "grad_norm": 0.3672102391719818, |
| "learning_rate": 1e-05, |
| "loss": 0.4356, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.5982594338000069, |
| "grad_norm": 0.37538838386535645, |
| "learning_rate": 1e-05, |
| "loss": 0.4442, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.5988098104640364, |
| "grad_norm": 0.3512885272502899, |
| "learning_rate": 1e-05, |
| "loss": 0.4249, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.5993601871280658, |
| "grad_norm": 0.4028591811656952, |
| "learning_rate": 1e-05, |
| "loss": 0.4495, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.5999105637920952, |
| "grad_norm": 0.3539179861545563, |
| "learning_rate": 1e-05, |
| "loss": 0.4504, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.6004609404561246, |
| "grad_norm": 0.34848934412002563, |
| "learning_rate": 1e-05, |
| "loss": 0.4348, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.6010113171201541, |
| "grad_norm": 0.37469926476478577, |
| "learning_rate": 1e-05, |
| "loss": 0.4414, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.6015616937841836, |
| "grad_norm": 0.3511207103729248, |
| "learning_rate": 1e-05, |
| "loss": 0.4489, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.602112070448213, |
| "grad_norm": 0.3594874441623688, |
| "learning_rate": 1e-05, |
| "loss": 0.4429, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.6026624471122425, |
| "grad_norm": 0.37694159150123596, |
| "learning_rate": 1e-05, |
| "loss": 0.4365, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.6032128237762718, |
| "grad_norm": 0.3630627393722534, |
| "learning_rate": 1e-05, |
| "loss": 0.4449, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.6037632004403013, |
| "grad_norm": 0.352230042219162, |
| "learning_rate": 1e-05, |
| "loss": 0.4382, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.6043135771043308, |
| "grad_norm": 0.369757741689682, |
| "learning_rate": 1e-05, |
| "loss": 0.4443, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.6048639537683602, |
| "grad_norm": 0.37120938301086426, |
| "learning_rate": 1e-05, |
| "loss": 0.454, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "grad_norm": 0.3475727140903473, |
| "learning_rate": 1e-05, |
| "loss": 0.4424, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_merge_loss": 0.38126564025878906, |
| "eval_merge_runtime": 600.3103, |
| "eval_merge_samples_per_second": 56.224, |
| "eval_merge_steps_per_second": 2.344, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_new_aug_datas_filtered.json_loss": 0.5048007369041443, |
| "eval_new_aug_datas_filtered.json_runtime": 10.3514, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 74.096, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.091, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_sharegpt_gpt4.json_loss": 0.7578977346420288, |
| "eval_sharegpt_gpt4.json_runtime": 31.6981, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.71, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.461, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_Table_GPT.json_loss": 0.05305211618542671, |
| "eval_Table_GPT.json_runtime": 25.0091, |
| "eval_Table_GPT.json_samples_per_second": 83.69, |
| "eval_Table_GPT.json_steps_per_second": 3.519, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_gpt_4o_200k.json_loss": 0.7855507135391235, |
| "eval_gpt_4o_200k.json_runtime": 48.5546, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.36, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.396, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_multi_turn_datas.json_loss": 0.3139781355857849, |
| "eval_multi_turn_datas.json_runtime": 75.6414, |
| "eval_multi_turn_datas.json_samples_per_second": 52.908, |
| "eval_multi_turn_datas.json_steps_per_second": 2.208, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_table_python_code_datas.json_loss": 0.2603669762611389, |
| "eval_table_python_code_datas.json_runtime": 43.0857, |
| "eval_table_python_code_datas.json_samples_per_second": 50.109, |
| "eval_table_python_code_datas.json_steps_per_second": 2.089, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_tabular_llm_data.json_loss": 0.0890057235956192, |
| "eval_tabular_llm_data.json_runtime": 8.5461, |
| "eval_tabular_llm_data.json_samples_per_second": 28.785, |
| "eval_tabular_llm_data.json_steps_per_second": 1.287, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_python_code_critic_21k.json_loss": 0.5582770705223083, |
| "eval_python_code_critic_21k.json_runtime": 3.2316, |
| "eval_python_code_critic_21k.json_samples_per_second": 184.737, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.736, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_all_merge_table_dataset.json_loss": 0.07120716571807861, |
| "eval_all_merge_table_dataset.json_runtime": 23.3637, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.475, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.284, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_code_feedback_multi_turn.json_loss": 0.5745006799697876, |
| "eval_code_feedback_multi_turn.json_runtime": 32.5197, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.682, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.829, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_ultrainteract_sft.json_loss": 0.41318273544311523, |
| "eval_ultrainteract_sft.json_runtime": 8.6602, |
| "eval_ultrainteract_sft.json_samples_per_second": 168.125, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.044, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_synthetic_text_to_sql.json_loss": 0.09635543823242188, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1265, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 268.832, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.814, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_sft_react_sql_datas.json_loss": 0.6216484904289246, |
| "eval_sft_react_sql_datas.json_runtime": 7.8599, |
| "eval_sft_react_sql_datas.json_samples_per_second": 39.949, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.781, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_all_merge_code.json_loss": 0.2849319279193878, |
| "eval_all_merge_code.json_runtime": 0.3296, |
| "eval_all_merge_code.json_samples_per_second": 191.112, |
| "eval_all_merge_code.json_steps_per_second": 9.101, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_magpie_datas.json_loss": 0.4269045293331146, |
| "eval_magpie_datas.json_runtime": 2.2161, |
| "eval_magpie_datas.json_samples_per_second": 77.615, |
| "eval_magpie_datas.json_steps_per_second": 3.61, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_train_data_for_qwen.json_loss": 0.005929525941610336, |
| "eval_train_data_for_qwen.json_runtime": 0.2454, |
| "eval_train_data_for_qwen.json_samples_per_second": 40.757, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.076, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_alpaca_cleaned.json_loss": 0.9076781272888184, |
| "eval_alpaca_cleaned.json_runtime": 0.1144, |
| "eval_alpaca_cleaned.json_samples_per_second": 236.011, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.482, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_agent_instruct.json_loss": 0.2231922596693039, |
| "eval_agent_instruct.json_runtime": 0.5154, |
| "eval_agent_instruct.json_samples_per_second": 93.136, |
| "eval_agent_instruct.json_steps_per_second": 3.881, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_MathInstruct.json_loss": 0.19876058399677277, |
| "eval_MathInstruct.json_runtime": 0.3563, |
| "eval_MathInstruct.json_samples_per_second": 159.969, |
| "eval_MathInstruct.json_steps_per_second": 8.419, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_tested_143k_python_alpaca.json_loss": 0.4431252181529999, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3026, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 112.374, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.61, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_xlam_function_calling_60k.json_loss": 0.00838847178965807, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 230.081, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 10.004, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.5384413003921509, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0514, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 311.198, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.45, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_alpaca_gpt4_zh.json_loss": 0.969275712966919, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0504, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 218.311, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 19.846, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6054143304323897, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5901365876197815, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4874, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.032, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.052, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.6059647070964191, |
| "grad_norm": 0.37194857001304626, |
| "learning_rate": 1e-05, |
| "loss": 0.424, |
| "step": 2202 |
| }, |
| { |
| "epoch": 0.6065150837604486, |
| "grad_norm": 0.36095818877220154, |
| "learning_rate": 1e-05, |
| "loss": 0.4344, |
| "step": 2204 |
| }, |
| { |
| "epoch": 0.607065460424478, |
| "grad_norm": 0.36337706446647644, |
| "learning_rate": 1e-05, |
| "loss": 0.4446, |
| "step": 2206 |
| }, |
| { |
| "epoch": 0.6076158370885074, |
| "grad_norm": 0.3500390946865082, |
| "learning_rate": 1e-05, |
| "loss": 0.4304, |
| "step": 2208 |
| }, |
| { |
| "epoch": 0.6081662137525369, |
| "grad_norm": 0.3477112054824829, |
| "learning_rate": 1e-05, |
| "loss": 0.4346, |
| "step": 2210 |
| }, |
| { |
| "epoch": 0.6087165904165663, |
| "grad_norm": 0.36322692036628723, |
| "learning_rate": 1e-05, |
| "loss": 0.4311, |
| "step": 2212 |
| }, |
| { |
| "epoch": 0.6092669670805958, |
| "grad_norm": 0.37783941626548767, |
| "learning_rate": 1e-05, |
| "loss": 0.4389, |
| "step": 2214 |
| }, |
| { |
| "epoch": 0.6098173437446253, |
| "grad_norm": 0.36018887162208557, |
| "learning_rate": 1e-05, |
| "loss": 0.4321, |
| "step": 2216 |
| }, |
| { |
| "epoch": 0.6103677204086547, |
| "grad_norm": 0.34396857023239136, |
| "learning_rate": 1e-05, |
| "loss": 0.4349, |
| "step": 2218 |
| }, |
| { |
| "epoch": 0.6109180970726841, |
| "grad_norm": 0.3611605167388916, |
| "learning_rate": 1e-05, |
| "loss": 0.4305, |
| "step": 2220 |
| }, |
| { |
| "epoch": 0.6114684737367135, |
| "grad_norm": 0.339339941740036, |
| "learning_rate": 1e-05, |
| "loss": 0.4338, |
| "step": 2222 |
| }, |
| { |
| "epoch": 0.612018850400743, |
| "grad_norm": 0.32705169916152954, |
| "learning_rate": 1e-05, |
| "loss": 0.4275, |
| "step": 2224 |
| }, |
| { |
| "epoch": 0.6125692270647725, |
| "grad_norm": 0.3551005721092224, |
| "learning_rate": 1e-05, |
| "loss": 0.4365, |
| "step": 2226 |
| }, |
| { |
| "epoch": 0.6131196037288019, |
| "grad_norm": 0.3826168179512024, |
| "learning_rate": 1e-05, |
| "loss": 0.4325, |
| "step": 2228 |
| }, |
| { |
| "epoch": 0.6136699803928314, |
| "grad_norm": 0.376407653093338, |
| "learning_rate": 1e-05, |
| "loss": 0.4325, |
| "step": 2230 |
| }, |
| { |
| "epoch": 0.6142203570568608, |
| "grad_norm": 0.3507418930530548, |
| "learning_rate": 1e-05, |
| "loss": 0.4315, |
| "step": 2232 |
| }, |
| { |
| "epoch": 0.6147707337208902, |
| "grad_norm": 0.3515014946460724, |
| "learning_rate": 1e-05, |
| "loss": 0.4432, |
| "step": 2234 |
| }, |
| { |
| "epoch": 0.6153211103849197, |
| "grad_norm": 0.37726324796676636, |
| "learning_rate": 1e-05, |
| "loss": 0.4389, |
| "step": 2236 |
| }, |
| { |
| "epoch": 0.6158714870489491, |
| "grad_norm": 0.35043272376060486, |
| "learning_rate": 1e-05, |
| "loss": 0.4406, |
| "step": 2238 |
| }, |
| { |
| "epoch": 0.6164218637129786, |
| "grad_norm": 0.3619838356971741, |
| "learning_rate": 1e-05, |
| "loss": 0.4381, |
| "step": 2240 |
| }, |
| { |
| "epoch": 0.616972240377008, |
| "grad_norm": 0.3727911114692688, |
| "learning_rate": 1e-05, |
| "loss": 0.4261, |
| "step": 2242 |
| }, |
| { |
| "epoch": 0.6175226170410375, |
| "grad_norm": 0.35618454217910767, |
| "learning_rate": 1e-05, |
| "loss": 0.4353, |
| "step": 2244 |
| }, |
| { |
| "epoch": 0.6180729937050669, |
| "grad_norm": 0.3659394681453705, |
| "learning_rate": 1e-05, |
| "loss": 0.4281, |
| "step": 2246 |
| }, |
| { |
| "epoch": 0.6186233703690963, |
| "grad_norm": 0.35864701867103577, |
| "learning_rate": 1e-05, |
| "loss": 0.4409, |
| "step": 2248 |
| }, |
| { |
| "epoch": 0.6191737470331258, |
| "grad_norm": 0.36990123987197876, |
| "learning_rate": 1e-05, |
| "loss": 0.4424, |
| "step": 2250 |
| }, |
| { |
| "epoch": 0.6197241236971552, |
| "grad_norm": 0.36422237753868103, |
| "learning_rate": 1e-05, |
| "loss": 0.4516, |
| "step": 2252 |
| }, |
| { |
| "epoch": 0.6202745003611847, |
| "grad_norm": 0.34886521100997925, |
| "learning_rate": 1e-05, |
| "loss": 0.4299, |
| "step": 2254 |
| }, |
| { |
| "epoch": 0.6208248770252142, |
| "grad_norm": 0.3683704137802124, |
| "learning_rate": 1e-05, |
| "loss": 0.4379, |
| "step": 2256 |
| }, |
| { |
| "epoch": 0.6213752536892436, |
| "grad_norm": 0.3535701334476471, |
| "learning_rate": 1e-05, |
| "loss": 0.4292, |
| "step": 2258 |
| }, |
| { |
| "epoch": 0.621925630353273, |
| "grad_norm": 0.370959997177124, |
| "learning_rate": 1e-05, |
| "loss": 0.4425, |
| "step": 2260 |
| }, |
| { |
| "epoch": 0.6224760070173024, |
| "grad_norm": 0.3473008871078491, |
| "learning_rate": 1e-05, |
| "loss": 0.4289, |
| "step": 2262 |
| }, |
| { |
| "epoch": 0.6230263836813319, |
| "grad_norm": 0.36245644092559814, |
| "learning_rate": 1e-05, |
| "loss": 0.4525, |
| "step": 2264 |
| }, |
| { |
| "epoch": 0.6235767603453614, |
| "grad_norm": 0.37182751297950745, |
| "learning_rate": 1e-05, |
| "loss": 0.4438, |
| "step": 2266 |
| }, |
| { |
| "epoch": 0.6241271370093908, |
| "grad_norm": 0.35843655467033386, |
| "learning_rate": 1e-05, |
| "loss": 0.4403, |
| "step": 2268 |
| }, |
| { |
| "epoch": 0.6246775136734203, |
| "grad_norm": 0.3484828472137451, |
| "learning_rate": 1e-05, |
| "loss": 0.429, |
| "step": 2270 |
| }, |
| { |
| "epoch": 0.6252278903374497, |
| "grad_norm": 0.35097572207450867, |
| "learning_rate": 1e-05, |
| "loss": 0.4435, |
| "step": 2272 |
| }, |
| { |
| "epoch": 0.6257782670014791, |
| "grad_norm": 0.35911381244659424, |
| "learning_rate": 1e-05, |
| "loss": 0.435, |
| "step": 2274 |
| }, |
| { |
| "epoch": 0.6263286436655086, |
| "grad_norm": 0.3544057309627533, |
| "learning_rate": 1e-05, |
| "loss": 0.4359, |
| "step": 2276 |
| }, |
| { |
| "epoch": 0.626879020329538, |
| "grad_norm": 0.34516793489456177, |
| "learning_rate": 1e-05, |
| "loss": 0.4261, |
| "step": 2278 |
| }, |
| { |
| "epoch": 0.6274293969935675, |
| "grad_norm": 0.3534994423389435, |
| "learning_rate": 1e-05, |
| "loss": 0.4539, |
| "step": 2280 |
| }, |
| { |
| "epoch": 0.6279797736575969, |
| "grad_norm": 0.356238454580307, |
| "learning_rate": 1e-05, |
| "loss": 0.4321, |
| "step": 2282 |
| }, |
| { |
| "epoch": 0.6285301503216264, |
| "grad_norm": 0.37285274267196655, |
| "learning_rate": 1e-05, |
| "loss": 0.4515, |
| "step": 2284 |
| }, |
| { |
| "epoch": 0.6290805269856559, |
| "grad_norm": 0.3517172336578369, |
| "learning_rate": 1e-05, |
| "loss": 0.4268, |
| "step": 2286 |
| }, |
| { |
| "epoch": 0.6296309036496852, |
| "grad_norm": 0.35732871294021606, |
| "learning_rate": 1e-05, |
| "loss": 0.4363, |
| "step": 2288 |
| }, |
| { |
| "epoch": 0.6301812803137147, |
| "grad_norm": 0.3592797815799713, |
| "learning_rate": 1e-05, |
| "loss": 0.4424, |
| "step": 2290 |
| }, |
| { |
| "epoch": 0.6307316569777441, |
| "grad_norm": 0.3233913481235504, |
| "learning_rate": 1e-05, |
| "loss": 0.421, |
| "step": 2292 |
| }, |
| { |
| "epoch": 0.6312820336417736, |
| "grad_norm": 0.361591100692749, |
| "learning_rate": 1e-05, |
| "loss": 0.4299, |
| "step": 2294 |
| }, |
| { |
| "epoch": 0.6318324103058031, |
| "grad_norm": 0.3468184173107147, |
| "learning_rate": 1e-05, |
| "loss": 0.4442, |
| "step": 2296 |
| }, |
| { |
| "epoch": 0.6323827869698325, |
| "grad_norm": 0.4019412398338318, |
| "learning_rate": 1e-05, |
| "loss": 0.4453, |
| "step": 2298 |
| }, |
| { |
| "epoch": 0.6329331636338619, |
| "grad_norm": 0.3713074326515198, |
| "learning_rate": 1e-05, |
| "loss": 0.435, |
| "step": 2300 |
| }, |
| { |
| "epoch": 0.6334835402978913, |
| "grad_norm": 0.35839253664016724, |
| "learning_rate": 1e-05, |
| "loss": 0.4449, |
| "step": 2302 |
| }, |
| { |
| "epoch": 0.6340339169619208, |
| "grad_norm": 0.33958542346954346, |
| "learning_rate": 1e-05, |
| "loss": 0.4433, |
| "step": 2304 |
| }, |
| { |
| "epoch": 0.6345842936259503, |
| "grad_norm": 0.3750527501106262, |
| "learning_rate": 1e-05, |
| "loss": 0.4297, |
| "step": 2306 |
| }, |
| { |
| "epoch": 0.6351346702899797, |
| "grad_norm": 0.35579168796539307, |
| "learning_rate": 1e-05, |
| "loss": 0.4307, |
| "step": 2308 |
| }, |
| { |
| "epoch": 0.6356850469540092, |
| "grad_norm": 0.3424528241157532, |
| "learning_rate": 1e-05, |
| "loss": 0.4451, |
| "step": 2310 |
| }, |
| { |
| "epoch": 0.6362354236180386, |
| "grad_norm": 0.3364480137825012, |
| "learning_rate": 1e-05, |
| "loss": 0.4251, |
| "step": 2312 |
| }, |
| { |
| "epoch": 0.636785800282068, |
| "grad_norm": 0.35307276248931885, |
| "learning_rate": 1e-05, |
| "loss": 0.4221, |
| "step": 2314 |
| }, |
| { |
| "epoch": 0.6373361769460975, |
| "grad_norm": 0.41354474425315857, |
| "learning_rate": 1e-05, |
| "loss": 0.4462, |
| "step": 2316 |
| }, |
| { |
| "epoch": 0.6378865536101269, |
| "grad_norm": 0.37485471367836, |
| "learning_rate": 1e-05, |
| "loss": 0.4337, |
| "step": 2318 |
| }, |
| { |
| "epoch": 0.6384369302741564, |
| "grad_norm": 0.344091534614563, |
| "learning_rate": 1e-05, |
| "loss": 0.43, |
| "step": 2320 |
| }, |
| { |
| "epoch": 0.6389873069381858, |
| "grad_norm": 0.3772261440753937, |
| "learning_rate": 1e-05, |
| "loss": 0.4444, |
| "step": 2322 |
| }, |
| { |
| "epoch": 0.6395376836022153, |
| "grad_norm": 0.35307928919792175, |
| "learning_rate": 1e-05, |
| "loss": 0.4332, |
| "step": 2324 |
| }, |
| { |
| "epoch": 0.6400880602662448, |
| "grad_norm": 0.35815975069999695, |
| "learning_rate": 1e-05, |
| "loss": 0.4489, |
| "step": 2326 |
| }, |
| { |
| "epoch": 0.6406384369302741, |
| "grad_norm": 0.3731154799461365, |
| "learning_rate": 1e-05, |
| "loss": 0.4355, |
| "step": 2328 |
| }, |
| { |
| "epoch": 0.6411888135943036, |
| "grad_norm": 0.36875462532043457, |
| "learning_rate": 1e-05, |
| "loss": 0.4339, |
| "step": 2330 |
| }, |
| { |
| "epoch": 0.641739190258333, |
| "grad_norm": 0.36913126707077026, |
| "learning_rate": 1e-05, |
| "loss": 0.4336, |
| "step": 2332 |
| }, |
| { |
| "epoch": 0.6422895669223625, |
| "grad_norm": 0.35829678177833557, |
| "learning_rate": 1e-05, |
| "loss": 0.4438, |
| "step": 2334 |
| }, |
| { |
| "epoch": 0.642839943586392, |
| "grad_norm": 0.36390239000320435, |
| "learning_rate": 1e-05, |
| "loss": 0.4405, |
| "step": 2336 |
| }, |
| { |
| "epoch": 0.6433903202504214, |
| "grad_norm": 0.34786713123321533, |
| "learning_rate": 1e-05, |
| "loss": 0.451, |
| "step": 2338 |
| }, |
| { |
| "epoch": 0.6439406969144508, |
| "grad_norm": 0.3522484600543976, |
| "learning_rate": 1e-05, |
| "loss": 0.4395, |
| "step": 2340 |
| }, |
| { |
| "epoch": 0.6444910735784802, |
| "grad_norm": 0.36442965269088745, |
| "learning_rate": 1e-05, |
| "loss": 0.4204, |
| "step": 2342 |
| }, |
| { |
| "epoch": 0.6450414502425097, |
| "grad_norm": 0.3635409474372864, |
| "learning_rate": 1e-05, |
| "loss": 0.4507, |
| "step": 2344 |
| }, |
| { |
| "epoch": 0.6455918269065392, |
| "grad_norm": 0.35682952404022217, |
| "learning_rate": 1e-05, |
| "loss": 0.4333, |
| "step": 2346 |
| }, |
| { |
| "epoch": 0.6461422035705686, |
| "grad_norm": 0.38101914525032043, |
| "learning_rate": 1e-05, |
| "loss": 0.4409, |
| "step": 2348 |
| }, |
| { |
| "epoch": 0.6466925802345981, |
| "grad_norm": 0.37273916602134705, |
| "learning_rate": 1e-05, |
| "loss": 0.4386, |
| "step": 2350 |
| }, |
| { |
| "epoch": 0.6472429568986275, |
| "grad_norm": 0.37394535541534424, |
| "learning_rate": 1e-05, |
| "loss": 0.4426, |
| "step": 2352 |
| }, |
| { |
| "epoch": 0.6477933335626569, |
| "grad_norm": 0.3374865651130676, |
| "learning_rate": 1e-05, |
| "loss": 0.443, |
| "step": 2354 |
| }, |
| { |
| "epoch": 0.6483437102266864, |
| "grad_norm": 0.34875357151031494, |
| "learning_rate": 1e-05, |
| "loss": 0.4135, |
| "step": 2356 |
| }, |
| { |
| "epoch": 0.6488940868907158, |
| "grad_norm": 0.365508109331131, |
| "learning_rate": 1e-05, |
| "loss": 0.4455, |
| "step": 2358 |
| }, |
| { |
| "epoch": 0.6494444635547453, |
| "grad_norm": 0.36924096941947937, |
| "learning_rate": 1e-05, |
| "loss": 0.4327, |
| "step": 2360 |
| }, |
| { |
| "epoch": 0.6499948402187747, |
| "grad_norm": 0.3646699786186218, |
| "learning_rate": 1e-05, |
| "loss": 0.4324, |
| "step": 2362 |
| }, |
| { |
| "epoch": 0.6505452168828042, |
| "grad_norm": 0.34241992235183716, |
| "learning_rate": 1e-05, |
| "loss": 0.4414, |
| "step": 2364 |
| }, |
| { |
| "epoch": 0.6510955935468337, |
| "grad_norm": 0.3360735774040222, |
| "learning_rate": 1e-05, |
| "loss": 0.4228, |
| "step": 2366 |
| }, |
| { |
| "epoch": 0.651645970210863, |
| "grad_norm": 0.3782423138618469, |
| "learning_rate": 1e-05, |
| "loss": 0.4366, |
| "step": 2368 |
| }, |
| { |
| "epoch": 0.6521963468748925, |
| "grad_norm": 0.3839074373245239, |
| "learning_rate": 1e-05, |
| "loss": 0.4389, |
| "step": 2370 |
| }, |
| { |
| "epoch": 0.6527467235389219, |
| "grad_norm": 0.3636200726032257, |
| "learning_rate": 1e-05, |
| "loss": 0.4418, |
| "step": 2372 |
| }, |
| { |
| "epoch": 0.6532971002029514, |
| "grad_norm": 0.3629804253578186, |
| "learning_rate": 1e-05, |
| "loss": 0.4259, |
| "step": 2374 |
| }, |
| { |
| "epoch": 0.6538474768669809, |
| "grad_norm": 0.3819858133792877, |
| "learning_rate": 1e-05, |
| "loss": 0.4348, |
| "step": 2376 |
| }, |
| { |
| "epoch": 0.6543978535310103, |
| "grad_norm": 0.3597410321235657, |
| "learning_rate": 1e-05, |
| "loss": 0.428, |
| "step": 2378 |
| }, |
| { |
| "epoch": 0.6549482301950398, |
| "grad_norm": 0.4084703326225281, |
| "learning_rate": 1e-05, |
| "loss": 0.4478, |
| "step": 2380 |
| }, |
| { |
| "epoch": 0.6554986068590691, |
| "grad_norm": 0.35995879769325256, |
| "learning_rate": 1e-05, |
| "loss": 0.4356, |
| "step": 2382 |
| }, |
| { |
| "epoch": 0.6560489835230986, |
| "grad_norm": 0.36047980189323425, |
| "learning_rate": 1e-05, |
| "loss": 0.4479, |
| "step": 2384 |
| }, |
| { |
| "epoch": 0.6565993601871281, |
| "grad_norm": 0.3532986342906952, |
| "learning_rate": 1e-05, |
| "loss": 0.424, |
| "step": 2386 |
| }, |
| { |
| "epoch": 0.6571497368511575, |
| "grad_norm": 0.3374999761581421, |
| "learning_rate": 1e-05, |
| "loss": 0.4338, |
| "step": 2388 |
| }, |
| { |
| "epoch": 0.657700113515187, |
| "grad_norm": 0.34645605087280273, |
| "learning_rate": 1e-05, |
| "loss": 0.4257, |
| "step": 2390 |
| }, |
| { |
| "epoch": 0.6582504901792164, |
| "grad_norm": 0.36470580101013184, |
| "learning_rate": 1e-05, |
| "loss": 0.4414, |
| "step": 2392 |
| }, |
| { |
| "epoch": 0.6588008668432458, |
| "grad_norm": 0.3823862075805664, |
| "learning_rate": 1e-05, |
| "loss": 0.4306, |
| "step": 2394 |
| }, |
| { |
| "epoch": 0.6593512435072753, |
| "grad_norm": 0.4070727229118347, |
| "learning_rate": 1e-05, |
| "loss": 0.4322, |
| "step": 2396 |
| }, |
| { |
| "epoch": 0.6599016201713047, |
| "grad_norm": 0.37519609928131104, |
| "learning_rate": 1e-05, |
| "loss": 0.4248, |
| "step": 2398 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "grad_norm": 0.35447025299072266, |
| "learning_rate": 1e-05, |
| "loss": 0.4283, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_merge_loss": 0.37715020775794983, |
| "eval_merge_runtime": 600.5757, |
| "eval_merge_samples_per_second": 56.199, |
| "eval_merge_steps_per_second": 2.343, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_new_aug_datas_filtered.json_loss": 0.5012194514274597, |
| "eval_new_aug_datas_filtered.json_runtime": 10.4212, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 73.6, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.071, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_sharegpt_gpt4.json_loss": 0.7534219026565552, |
| "eval_sharegpt_gpt4.json_runtime": 31.7308, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.65, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.458, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_Table_GPT.json_loss": 0.050881169736385345, |
| "eval_Table_GPT.json_runtime": 24.9922, |
| "eval_Table_GPT.json_samples_per_second": 83.746, |
| "eval_Table_GPT.json_steps_per_second": 3.521, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_gpt_4o_200k.json_loss": 0.7805712223052979, |
| "eval_gpt_4o_200k.json_runtime": 48.518, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.457, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.4, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_multi_turn_datas.json_loss": 0.3069368898868561, |
| "eval_multi_turn_datas.json_runtime": 75.8513, |
| "eval_multi_turn_datas.json_samples_per_second": 52.761, |
| "eval_multi_turn_datas.json_steps_per_second": 2.202, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_table_python_code_datas.json_loss": 0.2562294006347656, |
| "eval_table_python_code_datas.json_runtime": 43.1545, |
| "eval_table_python_code_datas.json_samples_per_second": 50.03, |
| "eval_table_python_code_datas.json_steps_per_second": 2.086, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_tabular_llm_data.json_loss": 0.09128429740667343, |
| "eval_tabular_llm_data.json_runtime": 8.5524, |
| "eval_tabular_llm_data.json_samples_per_second": 28.764, |
| "eval_tabular_llm_data.json_steps_per_second": 1.286, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_python_code_critic_21k.json_loss": 0.5555644631385803, |
| "eval_python_code_critic_21k.json_runtime": 3.2271, |
| "eval_python_code_critic_21k.json_samples_per_second": 184.994, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.747, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_all_merge_table_dataset.json_loss": 0.07006299495697021, |
| "eval_all_merge_table_dataset.json_runtime": 23.358, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.482, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.284, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_code_feedback_multi_turn.json_loss": 0.5720005035400391, |
| "eval_code_feedback_multi_turn.json_runtime": 32.5016, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.72, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.831, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_ultrainteract_sft.json_loss": 0.4097177982330322, |
| "eval_ultrainteract_sft.json_runtime": 8.6753, |
| "eval_ultrainteract_sft.json_samples_per_second": 167.832, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.031, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_synthetic_text_to_sql.json_loss": 0.09309177845716476, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1257, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 270.423, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.907, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_sft_react_sql_datas.json_loss": 0.6212250590324402, |
| "eval_sft_react_sql_datas.json_runtime": 7.859, |
| "eval_sft_react_sql_datas.json_samples_per_second": 39.954, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.781, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_all_merge_code.json_loss": 0.28449881076812744, |
| "eval_all_merge_code.json_runtime": 0.3298, |
| "eval_all_merge_code.json_samples_per_second": 191.001, |
| "eval_all_merge_code.json_steps_per_second": 9.095, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_magpie_datas.json_loss": 0.426034539937973, |
| "eval_magpie_datas.json_runtime": 2.2154, |
| "eval_magpie_datas.json_samples_per_second": 77.638, |
| "eval_magpie_datas.json_steps_per_second": 3.611, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_train_data_for_qwen.json_loss": 0.005596214439719915, |
| "eval_train_data_for_qwen.json_runtime": 0.2424, |
| "eval_train_data_for_qwen.json_samples_per_second": 41.251, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.125, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_alpaca_cleaned.json_loss": 0.9008170962333679, |
| "eval_alpaca_cleaned.json_runtime": 0.1147, |
| "eval_alpaca_cleaned.json_samples_per_second": 235.421, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.439, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_agent_instruct.json_loss": 0.21443764865398407, |
| "eval_agent_instruct.json_runtime": 0.5141, |
| "eval_agent_instruct.json_samples_per_second": 93.36, |
| "eval_agent_instruct.json_steps_per_second": 3.89, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_MathInstruct.json_loss": 0.1956825852394104, |
| "eval_MathInstruct.json_runtime": 0.3499, |
| "eval_MathInstruct.json_samples_per_second": 162.885, |
| "eval_MathInstruct.json_steps_per_second": 8.573, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_tested_143k_python_alpaca.json_loss": 0.4434005320072174, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3023, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 112.46, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.615, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_xlam_function_calling_60k.json_loss": 0.009229443967342377, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1004, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 229.185, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.965, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.5269618034362793, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0516, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 310.215, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.388, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9699357151985168, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0505, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 217.964, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 19.815, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6604519968353342, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5749525427818298, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4875, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.023, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.051, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.6610023734993636, |
| "grad_norm": 0.38521307706832886, |
| "learning_rate": 1e-05, |
| "loss": 0.4408, |
| "step": 2402 |
| }, |
| { |
| "epoch": 0.6615527501633931, |
| "grad_norm": 0.35963118076324463, |
| "learning_rate": 1e-05, |
| "loss": 0.4252, |
| "step": 2404 |
| }, |
| { |
| "epoch": 0.6621031268274226, |
| "grad_norm": 0.34755435585975647, |
| "learning_rate": 1e-05, |
| "loss": 0.4319, |
| "step": 2406 |
| }, |
| { |
| "epoch": 0.6626535034914519, |
| "grad_norm": 0.37133127450942993, |
| "learning_rate": 1e-05, |
| "loss": 0.4237, |
| "step": 2408 |
| }, |
| { |
| "epoch": 0.6632038801554814, |
| "grad_norm": 0.35870301723480225, |
| "learning_rate": 1e-05, |
| "loss": 0.4388, |
| "step": 2410 |
| }, |
| { |
| "epoch": 0.6637542568195108, |
| "grad_norm": 0.357415109872818, |
| "learning_rate": 1e-05, |
| "loss": 0.4322, |
| "step": 2412 |
| }, |
| { |
| "epoch": 0.6643046334835403, |
| "grad_norm": 0.3610486090183258, |
| "learning_rate": 1e-05, |
| "loss": 0.434, |
| "step": 2414 |
| }, |
| { |
| "epoch": 0.6648550101475698, |
| "grad_norm": 0.35058531165122986, |
| "learning_rate": 1e-05, |
| "loss": 0.4325, |
| "step": 2416 |
| }, |
| { |
| "epoch": 0.6654053868115992, |
| "grad_norm": 0.3732353448867798, |
| "learning_rate": 1e-05, |
| "loss": 0.4266, |
| "step": 2418 |
| }, |
| { |
| "epoch": 0.6659557634756287, |
| "grad_norm": 0.3728616535663605, |
| "learning_rate": 1e-05, |
| "loss": 0.4373, |
| "step": 2420 |
| }, |
| { |
| "epoch": 0.666506140139658, |
| "grad_norm": 0.3697822093963623, |
| "learning_rate": 1e-05, |
| "loss": 0.4263, |
| "step": 2422 |
| }, |
| { |
| "epoch": 0.6670565168036875, |
| "grad_norm": 0.34242671728134155, |
| "learning_rate": 1e-05, |
| "loss": 0.4234, |
| "step": 2424 |
| }, |
| { |
| "epoch": 0.667606893467717, |
| "grad_norm": 0.34660401940345764, |
| "learning_rate": 1e-05, |
| "loss": 0.4438, |
| "step": 2426 |
| }, |
| { |
| "epoch": 0.6681572701317464, |
| "grad_norm": 0.36335524916648865, |
| "learning_rate": 1e-05, |
| "loss": 0.447, |
| "step": 2428 |
| }, |
| { |
| "epoch": 0.6687076467957759, |
| "grad_norm": 0.39879950881004333, |
| "learning_rate": 1e-05, |
| "loss": 0.4328, |
| "step": 2430 |
| }, |
| { |
| "epoch": 0.6692580234598053, |
| "grad_norm": 0.3318917453289032, |
| "learning_rate": 1e-05, |
| "loss": 0.418, |
| "step": 2432 |
| }, |
| { |
| "epoch": 0.6698084001238348, |
| "grad_norm": 0.3548910319805145, |
| "learning_rate": 1e-05, |
| "loss": 0.4297, |
| "step": 2434 |
| }, |
| { |
| "epoch": 0.6703587767878642, |
| "grad_norm": 0.35431650280952454, |
| "learning_rate": 1e-05, |
| "loss": 0.4442, |
| "step": 2436 |
| }, |
| { |
| "epoch": 0.6709091534518936, |
| "grad_norm": 0.3501831889152527, |
| "learning_rate": 1e-05, |
| "loss": 0.4231, |
| "step": 2438 |
| }, |
| { |
| "epoch": 0.6714595301159231, |
| "grad_norm": 0.3664182424545288, |
| "learning_rate": 1e-05, |
| "loss": 0.4307, |
| "step": 2440 |
| }, |
| { |
| "epoch": 0.6720099067799525, |
| "grad_norm": 0.36051392555236816, |
| "learning_rate": 1e-05, |
| "loss": 0.4348, |
| "step": 2442 |
| }, |
| { |
| "epoch": 0.672560283443982, |
| "grad_norm": 0.38968268036842346, |
| "learning_rate": 1e-05, |
| "loss": 0.44, |
| "step": 2444 |
| }, |
| { |
| "epoch": 0.6731106601080115, |
| "grad_norm": 0.34485840797424316, |
| "learning_rate": 1e-05, |
| "loss": 0.4387, |
| "step": 2446 |
| }, |
| { |
| "epoch": 0.6736610367720408, |
| "grad_norm": 0.36389604210853577, |
| "learning_rate": 1e-05, |
| "loss": 0.4279, |
| "step": 2448 |
| }, |
| { |
| "epoch": 0.6742114134360703, |
| "grad_norm": 0.3703545331954956, |
| "learning_rate": 1e-05, |
| "loss": 0.4498, |
| "step": 2450 |
| }, |
| { |
| "epoch": 0.6747617901000997, |
| "grad_norm": 0.34628036618232727, |
| "learning_rate": 1e-05, |
| "loss": 0.4145, |
| "step": 2452 |
| }, |
| { |
| "epoch": 0.6753121667641292, |
| "grad_norm": 0.3569451570510864, |
| "learning_rate": 1e-05, |
| "loss": 0.4308, |
| "step": 2454 |
| }, |
| { |
| "epoch": 0.6758625434281587, |
| "grad_norm": 0.3471825122833252, |
| "learning_rate": 1e-05, |
| "loss": 0.4299, |
| "step": 2456 |
| }, |
| { |
| "epoch": 0.6764129200921881, |
| "grad_norm": 0.37446585297584534, |
| "learning_rate": 1e-05, |
| "loss": 0.4417, |
| "step": 2458 |
| }, |
| { |
| "epoch": 0.6769632967562176, |
| "grad_norm": 0.355708509683609, |
| "learning_rate": 1e-05, |
| "loss": 0.4306, |
| "step": 2460 |
| }, |
| { |
| "epoch": 0.6775136734202469, |
| "grad_norm": 0.36398351192474365, |
| "learning_rate": 1e-05, |
| "loss": 0.4331, |
| "step": 2462 |
| }, |
| { |
| "epoch": 0.6780640500842764, |
| "grad_norm": 0.38390782475471497, |
| "learning_rate": 1e-05, |
| "loss": 0.4421, |
| "step": 2464 |
| }, |
| { |
| "epoch": 0.6786144267483059, |
| "grad_norm": 0.3586190938949585, |
| "learning_rate": 1e-05, |
| "loss": 0.4365, |
| "step": 2466 |
| }, |
| { |
| "epoch": 0.6791648034123353, |
| "grad_norm": 0.33874934911727905, |
| "learning_rate": 1e-05, |
| "loss": 0.4346, |
| "step": 2468 |
| }, |
| { |
| "epoch": 0.6797151800763648, |
| "grad_norm": 0.3699466586112976, |
| "learning_rate": 1e-05, |
| "loss": 0.4282, |
| "step": 2470 |
| }, |
| { |
| "epoch": 0.6802655567403942, |
| "grad_norm": 0.35685962438583374, |
| "learning_rate": 1e-05, |
| "loss": 0.4386, |
| "step": 2472 |
| }, |
| { |
| "epoch": 0.6808159334044237, |
| "grad_norm": 0.36509183049201965, |
| "learning_rate": 1e-05, |
| "loss": 0.4234, |
| "step": 2474 |
| }, |
| { |
| "epoch": 0.681366310068453, |
| "grad_norm": 0.3677407503128052, |
| "learning_rate": 1e-05, |
| "loss": 0.4327, |
| "step": 2476 |
| }, |
| { |
| "epoch": 0.6819166867324825, |
| "grad_norm": 0.361396849155426, |
| "learning_rate": 1e-05, |
| "loss": 0.4282, |
| "step": 2478 |
| }, |
| { |
| "epoch": 0.682467063396512, |
| "grad_norm": 0.3637540936470032, |
| "learning_rate": 1e-05, |
| "loss": 0.4304, |
| "step": 2480 |
| }, |
| { |
| "epoch": 0.6830174400605414, |
| "grad_norm": 0.38396722078323364, |
| "learning_rate": 1e-05, |
| "loss": 0.4326, |
| "step": 2482 |
| }, |
| { |
| "epoch": 0.6835678167245709, |
| "grad_norm": 0.3760308623313904, |
| "learning_rate": 1e-05, |
| "loss": 0.4288, |
| "step": 2484 |
| }, |
| { |
| "epoch": 0.6841181933886004, |
| "grad_norm": 0.36777281761169434, |
| "learning_rate": 1e-05, |
| "loss": 0.4435, |
| "step": 2486 |
| }, |
| { |
| "epoch": 0.6846685700526298, |
| "grad_norm": 0.36967626214027405, |
| "learning_rate": 1e-05, |
| "loss": 0.4247, |
| "step": 2488 |
| }, |
| { |
| "epoch": 0.6852189467166592, |
| "grad_norm": 0.37309199571609497, |
| "learning_rate": 1e-05, |
| "loss": 0.4514, |
| "step": 2490 |
| }, |
| { |
| "epoch": 0.6857693233806886, |
| "grad_norm": 0.35478582978248596, |
| "learning_rate": 1e-05, |
| "loss": 0.436, |
| "step": 2492 |
| }, |
| { |
| "epoch": 0.6863197000447181, |
| "grad_norm": 0.35142141580581665, |
| "learning_rate": 1e-05, |
| "loss": 0.4289, |
| "step": 2494 |
| }, |
| { |
| "epoch": 0.6868700767087476, |
| "grad_norm": 0.37468215823173523, |
| "learning_rate": 1e-05, |
| "loss": 0.4363, |
| "step": 2496 |
| }, |
| { |
| "epoch": 0.687420453372777, |
| "grad_norm": 0.3481496572494507, |
| "learning_rate": 1e-05, |
| "loss": 0.441, |
| "step": 2498 |
| }, |
| { |
| "epoch": 0.6879708300368065, |
| "grad_norm": 0.34628838300704956, |
| "learning_rate": 1e-05, |
| "loss": 0.4425, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.6885212067008358, |
| "grad_norm": 0.3759724497795105, |
| "learning_rate": 1e-05, |
| "loss": 0.4322, |
| "step": 2502 |
| }, |
| { |
| "epoch": 0.6890715833648653, |
| "grad_norm": 0.37153902649879456, |
| "learning_rate": 1e-05, |
| "loss": 0.4412, |
| "step": 2504 |
| }, |
| { |
| "epoch": 0.6896219600288948, |
| "grad_norm": 0.3601967990398407, |
| "learning_rate": 1e-05, |
| "loss": 0.4314, |
| "step": 2506 |
| }, |
| { |
| "epoch": 0.6901723366929242, |
| "grad_norm": 0.3510344326496124, |
| "learning_rate": 1e-05, |
| "loss": 0.4261, |
| "step": 2508 |
| }, |
| { |
| "epoch": 0.6907227133569537, |
| "grad_norm": 0.34007585048675537, |
| "learning_rate": 1e-05, |
| "loss": 0.4272, |
| "step": 2510 |
| }, |
| { |
| "epoch": 0.6912730900209831, |
| "grad_norm": 0.34424078464508057, |
| "learning_rate": 1e-05, |
| "loss": 0.4253, |
| "step": 2512 |
| }, |
| { |
| "epoch": 0.6918234666850126, |
| "grad_norm": 0.36498820781707764, |
| "learning_rate": 1e-05, |
| "loss": 0.434, |
| "step": 2514 |
| }, |
| { |
| "epoch": 0.692373843349042, |
| "grad_norm": 0.3697148859500885, |
| "learning_rate": 1e-05, |
| "loss": 0.4358, |
| "step": 2516 |
| }, |
| { |
| "epoch": 0.6929242200130714, |
| "grad_norm": 0.36114463210105896, |
| "learning_rate": 1e-05, |
| "loss": 0.4177, |
| "step": 2518 |
| }, |
| { |
| "epoch": 0.6934745966771009, |
| "grad_norm": 0.3630925714969635, |
| "learning_rate": 1e-05, |
| "loss": 0.4438, |
| "step": 2520 |
| }, |
| { |
| "epoch": 0.6940249733411303, |
| "grad_norm": 0.36949414014816284, |
| "learning_rate": 1e-05, |
| "loss": 0.4281, |
| "step": 2522 |
| }, |
| { |
| "epoch": 0.6945753500051598, |
| "grad_norm": 0.36324694752693176, |
| "learning_rate": 1e-05, |
| "loss": 0.4253, |
| "step": 2524 |
| }, |
| { |
| "epoch": 0.6951257266691893, |
| "grad_norm": 0.3471947908401489, |
| "learning_rate": 1e-05, |
| "loss": 0.4215, |
| "step": 2526 |
| }, |
| { |
| "epoch": 0.6956761033332187, |
| "grad_norm": 0.33943814039230347, |
| "learning_rate": 1e-05, |
| "loss": 0.4546, |
| "step": 2528 |
| }, |
| { |
| "epoch": 0.6962264799972481, |
| "grad_norm": 0.34675729274749756, |
| "learning_rate": 1e-05, |
| "loss": 0.4191, |
| "step": 2530 |
| }, |
| { |
| "epoch": 0.6967768566612775, |
| "grad_norm": 0.3519613742828369, |
| "learning_rate": 1e-05, |
| "loss": 0.4272, |
| "step": 2532 |
| }, |
| { |
| "epoch": 0.697327233325307, |
| "grad_norm": 0.3635639548301697, |
| "learning_rate": 1e-05, |
| "loss": 0.4489, |
| "step": 2534 |
| }, |
| { |
| "epoch": 0.6978776099893365, |
| "grad_norm": 0.3636915385723114, |
| "learning_rate": 1e-05, |
| "loss": 0.4233, |
| "step": 2536 |
| }, |
| { |
| "epoch": 0.6984279866533659, |
| "grad_norm": 0.36174023151397705, |
| "learning_rate": 1e-05, |
| "loss": 0.425, |
| "step": 2538 |
| }, |
| { |
| "epoch": 0.6989783633173954, |
| "grad_norm": 0.35721176862716675, |
| "learning_rate": 1e-05, |
| "loss": 0.4279, |
| "step": 2540 |
| }, |
| { |
| "epoch": 0.6995287399814248, |
| "grad_norm": 0.35394319891929626, |
| "learning_rate": 1e-05, |
| "loss": 0.4279, |
| "step": 2542 |
| }, |
| { |
| "epoch": 0.7000791166454542, |
| "grad_norm": 0.37505972385406494, |
| "learning_rate": 1e-05, |
| "loss": 0.423, |
| "step": 2544 |
| }, |
| { |
| "epoch": 0.7006294933094837, |
| "grad_norm": 0.3504476249217987, |
| "learning_rate": 1e-05, |
| "loss": 0.4212, |
| "step": 2546 |
| }, |
| { |
| "epoch": 0.7011798699735131, |
| "grad_norm": 0.39700883626937866, |
| "learning_rate": 1e-05, |
| "loss": 0.4257, |
| "step": 2548 |
| }, |
| { |
| "epoch": 0.7017302466375426, |
| "grad_norm": 0.36360886693000793, |
| "learning_rate": 1e-05, |
| "loss": 0.4276, |
| "step": 2550 |
| }, |
| { |
| "epoch": 0.702280623301572, |
| "grad_norm": 0.36123448610305786, |
| "learning_rate": 1e-05, |
| "loss": 0.4266, |
| "step": 2552 |
| }, |
| { |
| "epoch": 0.7028309999656015, |
| "grad_norm": 0.35183826088905334, |
| "learning_rate": 1e-05, |
| "loss": 0.421, |
| "step": 2554 |
| }, |
| { |
| "epoch": 0.7033813766296309, |
| "grad_norm": 0.3557921350002289, |
| "learning_rate": 1e-05, |
| "loss": 0.4239, |
| "step": 2556 |
| }, |
| { |
| "epoch": 0.7039317532936603, |
| "grad_norm": 0.35415929555892944, |
| "learning_rate": 1e-05, |
| "loss": 0.4216, |
| "step": 2558 |
| }, |
| { |
| "epoch": 0.7044821299576898, |
| "grad_norm": 0.3662279546260834, |
| "learning_rate": 1e-05, |
| "loss": 0.4268, |
| "step": 2560 |
| }, |
| { |
| "epoch": 0.7050325066217192, |
| "grad_norm": 0.35718172788619995, |
| "learning_rate": 1e-05, |
| "loss": 0.4213, |
| "step": 2562 |
| }, |
| { |
| "epoch": 0.7055828832857487, |
| "grad_norm": 0.3595860004425049, |
| "learning_rate": 1e-05, |
| "loss": 0.4398, |
| "step": 2564 |
| }, |
| { |
| "epoch": 0.7061332599497782, |
| "grad_norm": 0.3576621413230896, |
| "learning_rate": 1e-05, |
| "loss": 0.4263, |
| "step": 2566 |
| }, |
| { |
| "epoch": 0.7066836366138076, |
| "grad_norm": 0.3699706792831421, |
| "learning_rate": 1e-05, |
| "loss": 0.4331, |
| "step": 2568 |
| }, |
| { |
| "epoch": 0.707234013277837, |
| "grad_norm": 0.38423609733581543, |
| "learning_rate": 1e-05, |
| "loss": 0.436, |
| "step": 2570 |
| }, |
| { |
| "epoch": 0.7077843899418664, |
| "grad_norm": 0.3747715651988983, |
| "learning_rate": 1e-05, |
| "loss": 0.4335, |
| "step": 2572 |
| }, |
| { |
| "epoch": 0.7083347666058959, |
| "grad_norm": 0.3554603159427643, |
| "learning_rate": 1e-05, |
| "loss": 0.4236, |
| "step": 2574 |
| }, |
| { |
| "epoch": 0.7088851432699254, |
| "grad_norm": 0.35446056723594666, |
| "learning_rate": 1e-05, |
| "loss": 0.4235, |
| "step": 2576 |
| }, |
| { |
| "epoch": 0.7094355199339548, |
| "grad_norm": 0.3770659267902374, |
| "learning_rate": 1e-05, |
| "loss": 0.4344, |
| "step": 2578 |
| }, |
| { |
| "epoch": 0.7099858965979843, |
| "grad_norm": 0.35676074028015137, |
| "learning_rate": 1e-05, |
| "loss": 0.4241, |
| "step": 2580 |
| }, |
| { |
| "epoch": 0.7105362732620137, |
| "grad_norm": 0.3687559962272644, |
| "learning_rate": 1e-05, |
| "loss": 0.4329, |
| "step": 2582 |
| }, |
| { |
| "epoch": 0.7110866499260431, |
| "grad_norm": 0.35311195254325867, |
| "learning_rate": 1e-05, |
| "loss": 0.4355, |
| "step": 2584 |
| }, |
| { |
| "epoch": 0.7116370265900726, |
| "grad_norm": 0.3590395152568817, |
| "learning_rate": 1e-05, |
| "loss": 0.4213, |
| "step": 2586 |
| }, |
| { |
| "epoch": 0.712187403254102, |
| "grad_norm": 0.3694981336593628, |
| "learning_rate": 1e-05, |
| "loss": 0.4344, |
| "step": 2588 |
| }, |
| { |
| "epoch": 0.7127377799181315, |
| "grad_norm": 0.3516077399253845, |
| "learning_rate": 1e-05, |
| "loss": 0.4202, |
| "step": 2590 |
| }, |
| { |
| "epoch": 0.7132881565821609, |
| "grad_norm": 0.38859254121780396, |
| "learning_rate": 1e-05, |
| "loss": 0.4179, |
| "step": 2592 |
| }, |
| { |
| "epoch": 0.7138385332461904, |
| "grad_norm": 0.3825247883796692, |
| "learning_rate": 1e-05, |
| "loss": 0.4393, |
| "step": 2594 |
| }, |
| { |
| "epoch": 0.7143889099102199, |
| "grad_norm": 0.36817750334739685, |
| "learning_rate": 1e-05, |
| "loss": 0.4341, |
| "step": 2596 |
| }, |
| { |
| "epoch": 0.7149392865742492, |
| "grad_norm": 0.36351174116134644, |
| "learning_rate": 1e-05, |
| "loss": 0.4355, |
| "step": 2598 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "grad_norm": 0.3494237959384918, |
| "learning_rate": 1e-05, |
| "loss": 0.4176, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_merge_loss": 0.3735547661781311, |
| "eval_merge_runtime": 599.6483, |
| "eval_merge_samples_per_second": 56.286, |
| "eval_merge_steps_per_second": 2.346, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_new_aug_datas_filtered.json_loss": 0.4953900873661041, |
| "eval_new_aug_datas_filtered.json_runtime": 10.4567, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 73.35, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.06, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_sharegpt_gpt4.json_loss": 0.748174786567688, |
| "eval_sharegpt_gpt4.json_runtime": 31.7349, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.642, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.458, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_Table_GPT.json_loss": 0.04870549216866493, |
| "eval_Table_GPT.json_runtime": 25.0368, |
| "eval_Table_GPT.json_samples_per_second": 83.597, |
| "eval_Table_GPT.json_steps_per_second": 3.515, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_gpt_4o_200k.json_loss": 0.775393009185791, |
| "eval_gpt_4o_200k.json_runtime": 48.6152, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.198, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.389, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_multi_turn_datas.json_loss": 0.29874685406684875, |
| "eval_multi_turn_datas.json_runtime": 75.9064, |
| "eval_multi_turn_datas.json_samples_per_second": 52.723, |
| "eval_multi_turn_datas.json_steps_per_second": 2.2, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_table_python_code_datas.json_loss": 0.2535416781902313, |
| "eval_table_python_code_datas.json_runtime": 43.2787, |
| "eval_table_python_code_datas.json_samples_per_second": 49.886, |
| "eval_table_python_code_datas.json_steps_per_second": 2.08, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_tabular_llm_data.json_loss": 0.08522781729698181, |
| "eval_tabular_llm_data.json_runtime": 8.609, |
| "eval_tabular_llm_data.json_samples_per_second": 28.575, |
| "eval_tabular_llm_data.json_steps_per_second": 1.278, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_python_code_critic_21k.json_loss": 0.5531289577484131, |
| "eval_python_code_critic_21k.json_runtime": 3.2416, |
| "eval_python_code_critic_21k.json_samples_per_second": 184.167, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.712, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_all_merge_table_dataset.json_loss": 0.07141314446926117, |
| "eval_all_merge_table_dataset.json_runtime": 23.4197, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.402, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.281, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_code_feedback_multi_turn.json_loss": 0.5697857737541199, |
| "eval_code_feedback_multi_turn.json_runtime": 32.4913, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.741, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.832, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_ultrainteract_sft.json_loss": 0.406777024269104, |
| "eval_ultrainteract_sft.json_runtime": 8.6553, |
| "eval_ultrainteract_sft.json_samples_per_second": 168.22, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.048, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_synthetic_text_to_sql.json_loss": 0.09255770593881607, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1264, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 268.887, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.817, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_sft_react_sql_datas.json_loss": 0.6156443953514099, |
| "eval_sft_react_sql_datas.json_runtime": 7.8669, |
| "eval_sft_react_sql_datas.json_samples_per_second": 39.914, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.78, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_all_merge_code.json_loss": 0.2804557681083679, |
| "eval_all_merge_code.json_runtime": 0.3331, |
| "eval_all_merge_code.json_samples_per_second": 189.109, |
| "eval_all_merge_code.json_steps_per_second": 9.005, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_magpie_datas.json_loss": 0.42615047097206116, |
| "eval_magpie_datas.json_runtime": 2.2188, |
| "eval_magpie_datas.json_samples_per_second": 77.518, |
| "eval_magpie_datas.json_steps_per_second": 3.605, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_train_data_for_qwen.json_loss": 0.005531710106879473, |
| "eval_train_data_for_qwen.json_runtime": 0.2446, |
| "eval_train_data_for_qwen.json_samples_per_second": 40.888, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.089, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_alpaca_cleaned.json_loss": 0.8993179202079773, |
| "eval_alpaca_cleaned.json_runtime": 0.1158, |
| "eval_alpaca_cleaned.json_samples_per_second": 233.205, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.274, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_agent_instruct.json_loss": 0.20902203023433685, |
| "eval_agent_instruct.json_runtime": 0.5148, |
| "eval_agent_instruct.json_samples_per_second": 93.239, |
| "eval_agent_instruct.json_steps_per_second": 3.885, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_MathInstruct.json_loss": 0.20088934898376465, |
| "eval_MathInstruct.json_runtime": 0.3521, |
| "eval_MathInstruct.json_samples_per_second": 161.889, |
| "eval_MathInstruct.json_steps_per_second": 8.52, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_tested_143k_python_alpaca.json_loss": 0.44206199049949646, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3013, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 112.861, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.639, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_xlam_function_calling_60k.json_loss": 0.00838589109480381, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1004, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 229.101, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.961, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.5224987268447876, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0517, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 309.243, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.328, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9841532111167908, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0501, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 219.503, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 19.955, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7154896632382787, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5787987112998962, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4863, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.126, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.056, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.7160400399023081, |
| "grad_norm": 0.3617021143436432, |
| "learning_rate": 1e-05, |
| "loss": 0.4292, |
| "step": 2602 |
| }, |
| { |
| "epoch": 0.7165904165663376, |
| "grad_norm": 0.39201030135154724, |
| "learning_rate": 1e-05, |
| "loss": 0.4565, |
| "step": 2604 |
| }, |
| { |
| "epoch": 0.7171407932303671, |
| "grad_norm": 0.3617227077484131, |
| "learning_rate": 1e-05, |
| "loss": 0.4279, |
| "step": 2606 |
| }, |
| { |
| "epoch": 0.7176911698943965, |
| "grad_norm": 0.3502630591392517, |
| "learning_rate": 1e-05, |
| "loss": 0.4191, |
| "step": 2608 |
| }, |
| { |
| "epoch": 0.7182415465584259, |
| "grad_norm": 0.41853633522987366, |
| "learning_rate": 1e-05, |
| "loss": 0.4122, |
| "step": 2610 |
| }, |
| { |
| "epoch": 0.7187919232224553, |
| "grad_norm": 0.35474300384521484, |
| "learning_rate": 1e-05, |
| "loss": 0.4225, |
| "step": 2612 |
| }, |
| { |
| "epoch": 0.7193422998864848, |
| "grad_norm": 0.3673190772533417, |
| "learning_rate": 1e-05, |
| "loss": 0.4307, |
| "step": 2614 |
| }, |
| { |
| "epoch": 0.7198926765505143, |
| "grad_norm": 0.383365273475647, |
| "learning_rate": 1e-05, |
| "loss": 0.4335, |
| "step": 2616 |
| }, |
| { |
| "epoch": 0.7204430532145437, |
| "grad_norm": 0.35813844203948975, |
| "learning_rate": 1e-05, |
| "loss": 0.4462, |
| "step": 2618 |
| }, |
| { |
| "epoch": 0.7209934298785732, |
| "grad_norm": 0.7552120685577393, |
| "learning_rate": 1e-05, |
| "loss": 0.4209, |
| "step": 2620 |
| }, |
| { |
| "epoch": 0.7215438065426026, |
| "grad_norm": 0.365175724029541, |
| "learning_rate": 1e-05, |
| "loss": 0.441, |
| "step": 2622 |
| }, |
| { |
| "epoch": 0.722094183206632, |
| "grad_norm": 0.3450736701488495, |
| "learning_rate": 1e-05, |
| "loss": 0.4302, |
| "step": 2624 |
| }, |
| { |
| "epoch": 0.7226445598706615, |
| "grad_norm": 0.34044018387794495, |
| "learning_rate": 1e-05, |
| "loss": 0.4265, |
| "step": 2626 |
| }, |
| { |
| "epoch": 0.7231949365346909, |
| "grad_norm": 0.36393091082572937, |
| "learning_rate": 1e-05, |
| "loss": 0.4226, |
| "step": 2628 |
| }, |
| { |
| "epoch": 0.7237453131987204, |
| "grad_norm": 0.3462166488170624, |
| "learning_rate": 1e-05, |
| "loss": 0.4236, |
| "step": 2630 |
| }, |
| { |
| "epoch": 0.7242956898627498, |
| "grad_norm": 0.4024192988872528, |
| "learning_rate": 1e-05, |
| "loss": 0.4377, |
| "step": 2632 |
| }, |
| { |
| "epoch": 0.7248460665267793, |
| "grad_norm": 0.354809045791626, |
| "learning_rate": 1e-05, |
| "loss": 0.4245, |
| "step": 2634 |
| }, |
| { |
| "epoch": 0.7253964431908088, |
| "grad_norm": 0.3701523244380951, |
| "learning_rate": 1e-05, |
| "loss": 0.438, |
| "step": 2636 |
| }, |
| { |
| "epoch": 0.7259468198548381, |
| "grad_norm": 0.37080636620521545, |
| "learning_rate": 1e-05, |
| "loss": 0.4299, |
| "step": 2638 |
| }, |
| { |
| "epoch": 0.7264971965188676, |
| "grad_norm": 0.3205287754535675, |
| "learning_rate": 1e-05, |
| "loss": 0.4193, |
| "step": 2640 |
| }, |
| { |
| "epoch": 0.727047573182897, |
| "grad_norm": 0.3642041087150574, |
| "learning_rate": 1e-05, |
| "loss": 0.4259, |
| "step": 2642 |
| }, |
| { |
| "epoch": 0.7275979498469265, |
| "grad_norm": 0.34573763608932495, |
| "learning_rate": 1e-05, |
| "loss": 0.438, |
| "step": 2644 |
| }, |
| { |
| "epoch": 0.728148326510956, |
| "grad_norm": 0.3501754701137543, |
| "learning_rate": 1e-05, |
| "loss": 0.4184, |
| "step": 2646 |
| }, |
| { |
| "epoch": 0.7286987031749854, |
| "grad_norm": 0.35315144062042236, |
| "learning_rate": 1e-05, |
| "loss": 0.4236, |
| "step": 2648 |
| }, |
| { |
| "epoch": 0.7292490798390149, |
| "grad_norm": 0.36585912108421326, |
| "learning_rate": 1e-05, |
| "loss": 0.4205, |
| "step": 2650 |
| }, |
| { |
| "epoch": 0.7297994565030442, |
| "grad_norm": 0.3684290051460266, |
| "learning_rate": 1e-05, |
| "loss": 0.4366, |
| "step": 2652 |
| }, |
| { |
| "epoch": 0.7303498331670737, |
| "grad_norm": 0.3628571927547455, |
| "learning_rate": 1e-05, |
| "loss": 0.4205, |
| "step": 2654 |
| }, |
| { |
| "epoch": 0.7309002098311032, |
| "grad_norm": 0.36779502034187317, |
| "learning_rate": 1e-05, |
| "loss": 0.4338, |
| "step": 2656 |
| }, |
| { |
| "epoch": 0.7314505864951326, |
| "grad_norm": 0.3522249162197113, |
| "learning_rate": 1e-05, |
| "loss": 0.4268, |
| "step": 2658 |
| }, |
| { |
| "epoch": 0.7320009631591621, |
| "grad_norm": 0.3840633034706116, |
| "learning_rate": 1e-05, |
| "loss": 0.425, |
| "step": 2660 |
| }, |
| { |
| "epoch": 0.7325513398231915, |
| "grad_norm": 0.3498011529445648, |
| "learning_rate": 1e-05, |
| "loss": 0.4269, |
| "step": 2662 |
| }, |
| { |
| "epoch": 0.7331017164872209, |
| "grad_norm": 0.36151036620140076, |
| "learning_rate": 1e-05, |
| "loss": 0.4215, |
| "step": 2664 |
| }, |
| { |
| "epoch": 0.7336520931512504, |
| "grad_norm": 0.37008973956108093, |
| "learning_rate": 1e-05, |
| "loss": 0.4468, |
| "step": 2666 |
| }, |
| { |
| "epoch": 0.7342024698152798, |
| "grad_norm": 0.3440816104412079, |
| "learning_rate": 1e-05, |
| "loss": 0.4349, |
| "step": 2668 |
| }, |
| { |
| "epoch": 0.7347528464793093, |
| "grad_norm": 0.3912747800350189, |
| "learning_rate": 1e-05, |
| "loss": 0.4188, |
| "step": 2670 |
| }, |
| { |
| "epoch": 0.7353032231433387, |
| "grad_norm": 0.3472096025943756, |
| "learning_rate": 1e-05, |
| "loss": 0.4344, |
| "step": 2672 |
| }, |
| { |
| "epoch": 0.7358535998073682, |
| "grad_norm": 0.3477676510810852, |
| "learning_rate": 1e-05, |
| "loss": 0.4226, |
| "step": 2674 |
| }, |
| { |
| "epoch": 0.7364039764713977, |
| "grad_norm": 0.3726285696029663, |
| "learning_rate": 1e-05, |
| "loss": 0.4263, |
| "step": 2676 |
| }, |
| { |
| "epoch": 0.736954353135427, |
| "grad_norm": 0.3610732853412628, |
| "learning_rate": 1e-05, |
| "loss": 0.4272, |
| "step": 2678 |
| }, |
| { |
| "epoch": 0.7375047297994565, |
| "grad_norm": 0.35711386799812317, |
| "learning_rate": 1e-05, |
| "loss": 0.4356, |
| "step": 2680 |
| }, |
| { |
| "epoch": 0.7380551064634859, |
| "grad_norm": 0.36050212383270264, |
| "learning_rate": 1e-05, |
| "loss": 0.437, |
| "step": 2682 |
| }, |
| { |
| "epoch": 0.7386054831275154, |
| "grad_norm": 0.33842894434928894, |
| "learning_rate": 1e-05, |
| "loss": 0.4136, |
| "step": 2684 |
| }, |
| { |
| "epoch": 0.7391558597915449, |
| "grad_norm": 0.35878267884254456, |
| "learning_rate": 1e-05, |
| "loss": 0.4118, |
| "step": 2686 |
| }, |
| { |
| "epoch": 0.7397062364555743, |
| "grad_norm": 0.3504185676574707, |
| "learning_rate": 1e-05, |
| "loss": 0.4157, |
| "step": 2688 |
| }, |
| { |
| "epoch": 0.7402566131196038, |
| "grad_norm": 0.35226139426231384, |
| "learning_rate": 1e-05, |
| "loss": 0.4194, |
| "step": 2690 |
| }, |
| { |
| "epoch": 0.7408069897836331, |
| "grad_norm": 0.3720513880252838, |
| "learning_rate": 1e-05, |
| "loss": 0.4225, |
| "step": 2692 |
| }, |
| { |
| "epoch": 0.7413573664476626, |
| "grad_norm": 0.3444679081439972, |
| "learning_rate": 1e-05, |
| "loss": 0.433, |
| "step": 2694 |
| }, |
| { |
| "epoch": 0.741907743111692, |
| "grad_norm": 0.3685862421989441, |
| "learning_rate": 1e-05, |
| "loss": 0.4139, |
| "step": 2696 |
| }, |
| { |
| "epoch": 0.7424581197757215, |
| "grad_norm": 0.36269327998161316, |
| "learning_rate": 1e-05, |
| "loss": 0.4277, |
| "step": 2698 |
| }, |
| { |
| "epoch": 0.743008496439751, |
| "grad_norm": 0.36458590626716614, |
| "learning_rate": 1e-05, |
| "loss": 0.4217, |
| "step": 2700 |
| }, |
| { |
| "epoch": 0.7435588731037804, |
| "grad_norm": 0.3453613221645355, |
| "learning_rate": 1e-05, |
| "loss": 0.4174, |
| "step": 2702 |
| }, |
| { |
| "epoch": 0.7441092497678099, |
| "grad_norm": 0.3562467098236084, |
| "learning_rate": 1e-05, |
| "loss": 0.4313, |
| "step": 2704 |
| }, |
| { |
| "epoch": 0.7446596264318392, |
| "grad_norm": 0.3774909973144531, |
| "learning_rate": 1e-05, |
| "loss": 0.432, |
| "step": 2706 |
| }, |
| { |
| "epoch": 0.7452100030958687, |
| "grad_norm": 0.3668104112148285, |
| "learning_rate": 1e-05, |
| "loss": 0.4236, |
| "step": 2708 |
| }, |
| { |
| "epoch": 0.7457603797598982, |
| "grad_norm": 0.38669878244400024, |
| "learning_rate": 1e-05, |
| "loss": 0.4432, |
| "step": 2710 |
| }, |
| { |
| "epoch": 0.7463107564239276, |
| "grad_norm": 0.3985156714916229, |
| "learning_rate": 1e-05, |
| "loss": 0.4422, |
| "step": 2712 |
| }, |
| { |
| "epoch": 0.7468611330879571, |
| "grad_norm": 0.3647630512714386, |
| "learning_rate": 1e-05, |
| "loss": 0.4273, |
| "step": 2714 |
| }, |
| { |
| "epoch": 0.7474115097519866, |
| "grad_norm": 0.37027841806411743, |
| "learning_rate": 1e-05, |
| "loss": 0.4166, |
| "step": 2716 |
| }, |
| { |
| "epoch": 0.7479618864160159, |
| "grad_norm": 0.3770820200443268, |
| "learning_rate": 1e-05, |
| "loss": 0.4461, |
| "step": 2718 |
| }, |
| { |
| "epoch": 0.7485122630800454, |
| "grad_norm": 0.35209086537361145, |
| "learning_rate": 1e-05, |
| "loss": 0.4473, |
| "step": 2720 |
| }, |
| { |
| "epoch": 0.7490626397440748, |
| "grad_norm": 0.38394030928611755, |
| "learning_rate": 1e-05, |
| "loss": 0.4353, |
| "step": 2722 |
| }, |
| { |
| "epoch": 0.7496130164081043, |
| "grad_norm": 0.3524518311023712, |
| "learning_rate": 1e-05, |
| "loss": 0.4277, |
| "step": 2724 |
| }, |
| { |
| "epoch": 0.7501633930721338, |
| "grad_norm": 0.35822972655296326, |
| "learning_rate": 1e-05, |
| "loss": 0.4277, |
| "step": 2726 |
| }, |
| { |
| "epoch": 0.7507137697361632, |
| "grad_norm": 0.3409929573535919, |
| "learning_rate": 1e-05, |
| "loss": 0.4172, |
| "step": 2728 |
| }, |
| { |
| "epoch": 0.7512641464001927, |
| "grad_norm": 0.3534572422504425, |
| "learning_rate": 1e-05, |
| "loss": 0.431, |
| "step": 2730 |
| }, |
| { |
| "epoch": 0.751814523064222, |
| "grad_norm": 0.3565024733543396, |
| "learning_rate": 1e-05, |
| "loss": 0.4297, |
| "step": 2732 |
| }, |
| { |
| "epoch": 0.7523648997282515, |
| "grad_norm": 0.3499157130718231, |
| "learning_rate": 1e-05, |
| "loss": 0.4131, |
| "step": 2734 |
| }, |
| { |
| "epoch": 0.752915276392281, |
| "grad_norm": 0.37271568179130554, |
| "learning_rate": 1e-05, |
| "loss": 0.4224, |
| "step": 2736 |
| }, |
| { |
| "epoch": 0.7534656530563104, |
| "grad_norm": 0.38281935453414917, |
| "learning_rate": 1e-05, |
| "loss": 0.4366, |
| "step": 2738 |
| }, |
| { |
| "epoch": 0.7540160297203399, |
| "grad_norm": 0.35982009768486023, |
| "learning_rate": 1e-05, |
| "loss": 0.4384, |
| "step": 2740 |
| }, |
| { |
| "epoch": 0.7545664063843693, |
| "grad_norm": 0.3618968427181244, |
| "learning_rate": 1e-05, |
| "loss": 0.4484, |
| "step": 2742 |
| }, |
| { |
| "epoch": 0.7551167830483988, |
| "grad_norm": 0.35112181305885315, |
| "learning_rate": 1e-05, |
| "loss": 0.4132, |
| "step": 2744 |
| }, |
| { |
| "epoch": 0.7556671597124281, |
| "grad_norm": 0.35898518562316895, |
| "learning_rate": 1e-05, |
| "loss": 0.4234, |
| "step": 2746 |
| }, |
| { |
| "epoch": 0.7562175363764576, |
| "grad_norm": 0.36049455404281616, |
| "learning_rate": 1e-05, |
| "loss": 0.4254, |
| "step": 2748 |
| }, |
| { |
| "epoch": 0.7567679130404871, |
| "grad_norm": 0.3698630630970001, |
| "learning_rate": 1e-05, |
| "loss": 0.4387, |
| "step": 2750 |
| }, |
| { |
| "epoch": 0.7573182897045165, |
| "grad_norm": 0.36196333169937134, |
| "learning_rate": 1e-05, |
| "loss": 0.4242, |
| "step": 2752 |
| }, |
| { |
| "epoch": 0.757868666368546, |
| "grad_norm": 0.3553547263145447, |
| "learning_rate": 1e-05, |
| "loss": 0.4332, |
| "step": 2754 |
| }, |
| { |
| "epoch": 0.7584190430325755, |
| "grad_norm": 0.36536121368408203, |
| "learning_rate": 1e-05, |
| "loss": 0.4123, |
| "step": 2756 |
| }, |
| { |
| "epoch": 0.7589694196966049, |
| "grad_norm": 0.3394269049167633, |
| "learning_rate": 1e-05, |
| "loss": 0.4115, |
| "step": 2758 |
| }, |
| { |
| "epoch": 0.7595197963606343, |
| "grad_norm": 0.35857659578323364, |
| "learning_rate": 1e-05, |
| "loss": 0.4174, |
| "step": 2760 |
| }, |
| { |
| "epoch": 0.7600701730246637, |
| "grad_norm": 0.3676673173904419, |
| "learning_rate": 1e-05, |
| "loss": 0.4334, |
| "step": 2762 |
| }, |
| { |
| "epoch": 0.7606205496886932, |
| "grad_norm": 0.35949233174324036, |
| "learning_rate": 1e-05, |
| "loss": 0.4345, |
| "step": 2764 |
| }, |
| { |
| "epoch": 0.7611709263527227, |
| "grad_norm": 0.368569940328598, |
| "learning_rate": 1e-05, |
| "loss": 0.4241, |
| "step": 2766 |
| }, |
| { |
| "epoch": 0.7617213030167521, |
| "grad_norm": 0.37473535537719727, |
| "learning_rate": 1e-05, |
| "loss": 0.4454, |
| "step": 2768 |
| }, |
| { |
| "epoch": 0.7622716796807816, |
| "grad_norm": 0.34766483306884766, |
| "learning_rate": 1e-05, |
| "loss": 0.4193, |
| "step": 2770 |
| }, |
| { |
| "epoch": 0.7628220563448109, |
| "grad_norm": 0.3594741225242615, |
| "learning_rate": 1e-05, |
| "loss": 0.4265, |
| "step": 2772 |
| }, |
| { |
| "epoch": 0.7633724330088404, |
| "grad_norm": 0.35876014828681946, |
| "learning_rate": 1e-05, |
| "loss": 0.4401, |
| "step": 2774 |
| }, |
| { |
| "epoch": 0.7639228096728699, |
| "grad_norm": 0.3698675036430359, |
| "learning_rate": 1e-05, |
| "loss": 0.4301, |
| "step": 2776 |
| }, |
| { |
| "epoch": 0.7644731863368993, |
| "grad_norm": 0.3890196979045868, |
| "learning_rate": 1e-05, |
| "loss": 0.4312, |
| "step": 2778 |
| }, |
| { |
| "epoch": 0.7650235630009288, |
| "grad_norm": 0.3495800793170929, |
| "learning_rate": 1e-05, |
| "loss": 0.4235, |
| "step": 2780 |
| }, |
| { |
| "epoch": 0.7655739396649582, |
| "grad_norm": 0.3536211848258972, |
| "learning_rate": 1e-05, |
| "loss": 0.4319, |
| "step": 2782 |
| }, |
| { |
| "epoch": 0.7661243163289877, |
| "grad_norm": 0.35744360089302063, |
| "learning_rate": 1e-05, |
| "loss": 0.419, |
| "step": 2784 |
| }, |
| { |
| "epoch": 0.766674692993017, |
| "grad_norm": 0.35292670130729675, |
| "learning_rate": 1e-05, |
| "loss": 0.4428, |
| "step": 2786 |
| }, |
| { |
| "epoch": 0.7672250696570465, |
| "grad_norm": 0.32827427983283997, |
| "learning_rate": 1e-05, |
| "loss": 0.4175, |
| "step": 2788 |
| }, |
| { |
| "epoch": 0.767775446321076, |
| "grad_norm": 0.3385542929172516, |
| "learning_rate": 1e-05, |
| "loss": 0.4288, |
| "step": 2790 |
| }, |
| { |
| "epoch": 0.7683258229851054, |
| "grad_norm": 0.3474958539009094, |
| "learning_rate": 1e-05, |
| "loss": 0.4424, |
| "step": 2792 |
| }, |
| { |
| "epoch": 0.7688761996491349, |
| "grad_norm": 0.3551865816116333, |
| "learning_rate": 1e-05, |
| "loss": 0.4351, |
| "step": 2794 |
| }, |
| { |
| "epoch": 0.7694265763131644, |
| "grad_norm": 0.3616306781768799, |
| "learning_rate": 1e-05, |
| "loss": 0.4481, |
| "step": 2796 |
| }, |
| { |
| "epoch": 0.7699769529771938, |
| "grad_norm": 0.36132022738456726, |
| "learning_rate": 1e-05, |
| "loss": 0.4128, |
| "step": 2798 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "grad_norm": 0.3580198585987091, |
| "learning_rate": 1e-05, |
| "loss": 0.4242, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_merge_loss": 0.3696165680885315, |
| "eval_merge_runtime": 600.0202, |
| "eval_merge_samples_per_second": 56.251, |
| "eval_merge_steps_per_second": 2.345, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_new_aug_datas_filtered.json_loss": 0.49126043915748596, |
| "eval_new_aug_datas_filtered.json_runtime": 10.3252, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 74.285, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.099, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_sharegpt_gpt4.json_loss": 0.7416729927062988, |
| "eval_sharegpt_gpt4.json_runtime": 31.6069, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.88, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.468, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_Table_GPT.json_loss": 0.04911120608448982, |
| "eval_Table_GPT.json_runtime": 24.9282, |
| "eval_Table_GPT.json_samples_per_second": 83.961, |
| "eval_Table_GPT.json_steps_per_second": 3.53, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_gpt_4o_200k.json_loss": 0.7679291367530823, |
| "eval_gpt_4o_200k.json_runtime": 48.4021, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.767, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.413, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_multi_turn_datas.json_loss": 0.2913420498371124, |
| "eval_multi_turn_datas.json_runtime": 75.4573, |
| "eval_multi_turn_datas.json_samples_per_second": 53.037, |
| "eval_multi_turn_datas.json_steps_per_second": 2.213, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_table_python_code_datas.json_loss": 0.25055599212646484, |
| "eval_table_python_code_datas.json_runtime": 43.009, |
| "eval_table_python_code_datas.json_samples_per_second": 50.199, |
| "eval_table_python_code_datas.json_steps_per_second": 2.093, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_tabular_llm_data.json_loss": 0.07946833223104477, |
| "eval_tabular_llm_data.json_runtime": 8.5236, |
| "eval_tabular_llm_data.json_samples_per_second": 28.861, |
| "eval_tabular_llm_data.json_steps_per_second": 1.291, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_python_code_critic_21k.json_loss": 0.5505719184875488, |
| "eval_python_code_critic_21k.json_runtime": 3.2237, |
| "eval_python_code_critic_21k.json_samples_per_second": 185.192, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.755, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_all_merge_table_dataset.json_loss": 0.07032839208841324, |
| "eval_all_merge_table_dataset.json_runtime": 23.2519, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.621, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.29, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_code_feedback_multi_turn.json_loss": 0.5668665766716003, |
| "eval_code_feedback_multi_turn.json_runtime": 32.3765, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.981, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.842, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_ultrainteract_sft.json_loss": 0.405385285615921, |
| "eval_ultrainteract_sft.json_runtime": 8.6576, |
| "eval_ultrainteract_sft.json_samples_per_second": 168.176, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.046, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_synthetic_text_to_sql.json_loss": 0.0894596055150032, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1263, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 269.263, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.839, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_sft_react_sql_datas.json_loss": 0.6155156493186951, |
| "eval_sft_react_sql_datas.json_runtime": 7.8457, |
| "eval_sft_react_sql_datas.json_samples_per_second": 40.022, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.784, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_all_merge_code.json_loss": 0.2757679224014282, |
| "eval_all_merge_code.json_runtime": 0.3332, |
| "eval_all_merge_code.json_samples_per_second": 189.076, |
| "eval_all_merge_code.json_steps_per_second": 9.004, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_magpie_datas.json_loss": 0.42383918166160583, |
| "eval_magpie_datas.json_runtime": 2.2093, |
| "eval_magpie_datas.json_samples_per_second": 77.853, |
| "eval_magpie_datas.json_steps_per_second": 3.621, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_train_data_for_qwen.json_loss": 0.0028582699596881866, |
| "eval_train_data_for_qwen.json_runtime": 0.244, |
| "eval_train_data_for_qwen.json_samples_per_second": 40.988, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.099, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_alpaca_cleaned.json_loss": 0.9000511169433594, |
| "eval_alpaca_cleaned.json_runtime": 0.1144, |
| "eval_alpaca_cleaned.json_samples_per_second": 235.991, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.481, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_agent_instruct.json_loss": 0.21006985008716583, |
| "eval_agent_instruct.json_runtime": 0.5133, |
| "eval_agent_instruct.json_samples_per_second": 93.518, |
| "eval_agent_instruct.json_steps_per_second": 3.897, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_MathInstruct.json_loss": 0.19836944341659546, |
| "eval_MathInstruct.json_runtime": 0.3623, |
| "eval_MathInstruct.json_samples_per_second": 157.336, |
| "eval_MathInstruct.json_steps_per_second": 8.281, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_tested_143k_python_alpaca.json_loss": 0.44593295454978943, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.303, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 112.196, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.6, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_xlam_function_calling_60k.json_loss": 0.0066245682537555695, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1016, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 226.385, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.843, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.5253314971923828, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.052, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 307.853, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.241, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9524829983711243, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0499, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 220.602, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 20.055, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7705273296412232, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5769651532173157, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4873, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.047, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.052, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.7710777063052526, |
| "grad_norm": 0.3490790128707886, |
| "learning_rate": 1e-05, |
| "loss": 0.4279, |
| "step": 2802 |
| }, |
| { |
| "epoch": 0.7716280829692821, |
| "grad_norm": 0.39200064539909363, |
| "learning_rate": 1e-05, |
| "loss": 0.4419, |
| "step": 2804 |
| }, |
| { |
| "epoch": 0.7721784596333116, |
| "grad_norm": 0.36754128336906433, |
| "learning_rate": 1e-05, |
| "loss": 0.4298, |
| "step": 2806 |
| }, |
| { |
| "epoch": 0.772728836297341, |
| "grad_norm": 0.3482655882835388, |
| "learning_rate": 1e-05, |
| "loss": 0.4249, |
| "step": 2808 |
| }, |
| { |
| "epoch": 0.7732792129613705, |
| "grad_norm": 0.35949841141700745, |
| "learning_rate": 1e-05, |
| "loss": 0.4245, |
| "step": 2810 |
| }, |
| { |
| "epoch": 0.7738295896253999, |
| "grad_norm": 0.3631410598754883, |
| "learning_rate": 1e-05, |
| "loss": 0.4221, |
| "step": 2812 |
| }, |
| { |
| "epoch": 0.7743799662894293, |
| "grad_norm": 0.3531825542449951, |
| "learning_rate": 1e-05, |
| "loss": 0.415, |
| "step": 2814 |
| }, |
| { |
| "epoch": 0.7749303429534588, |
| "grad_norm": 0.3741169571876526, |
| "learning_rate": 1e-05, |
| "loss": 0.421, |
| "step": 2816 |
| }, |
| { |
| "epoch": 0.7754807196174882, |
| "grad_norm": 0.3431030511856079, |
| "learning_rate": 1e-05, |
| "loss": 0.4082, |
| "step": 2818 |
| }, |
| { |
| "epoch": 0.7760310962815177, |
| "grad_norm": 0.35572293400764465, |
| "learning_rate": 1e-05, |
| "loss": 0.4279, |
| "step": 2820 |
| }, |
| { |
| "epoch": 0.7765814729455471, |
| "grad_norm": 0.33715927600860596, |
| "learning_rate": 1e-05, |
| "loss": 0.4217, |
| "step": 2822 |
| }, |
| { |
| "epoch": 0.7771318496095766, |
| "grad_norm": 0.3827720582485199, |
| "learning_rate": 1e-05, |
| "loss": 0.4195, |
| "step": 2824 |
| }, |
| { |
| "epoch": 0.777682226273606, |
| "grad_norm": 0.34325775504112244, |
| "learning_rate": 1e-05, |
| "loss": 0.4359, |
| "step": 2826 |
| }, |
| { |
| "epoch": 0.7782326029376354, |
| "grad_norm": 0.34917858242988586, |
| "learning_rate": 1e-05, |
| "loss": 0.4165, |
| "step": 2828 |
| }, |
| { |
| "epoch": 0.7787829796016649, |
| "grad_norm": 0.3705228865146637, |
| "learning_rate": 1e-05, |
| "loss": 0.4234, |
| "step": 2830 |
| }, |
| { |
| "epoch": 0.7793333562656943, |
| "grad_norm": 0.36879298090934753, |
| "learning_rate": 1e-05, |
| "loss": 0.4173, |
| "step": 2832 |
| }, |
| { |
| "epoch": 0.7798837329297238, |
| "grad_norm": 0.35160768032073975, |
| "learning_rate": 1e-05, |
| "loss": 0.427, |
| "step": 2834 |
| }, |
| { |
| "epoch": 0.7804341095937533, |
| "grad_norm": 0.35639581084251404, |
| "learning_rate": 1e-05, |
| "loss": 0.4342, |
| "step": 2836 |
| }, |
| { |
| "epoch": 0.7809844862577827, |
| "grad_norm": 0.3821897804737091, |
| "learning_rate": 1e-05, |
| "loss": 0.4143, |
| "step": 2838 |
| }, |
| { |
| "epoch": 0.7815348629218121, |
| "grad_norm": 0.35575130581855774, |
| "learning_rate": 1e-05, |
| "loss": 0.4052, |
| "step": 2840 |
| }, |
| { |
| "epoch": 0.7820852395858415, |
| "grad_norm": 0.367026150226593, |
| "learning_rate": 1e-05, |
| "loss": 0.4507, |
| "step": 2842 |
| }, |
| { |
| "epoch": 0.782635616249871, |
| "grad_norm": 0.35660848021507263, |
| "learning_rate": 1e-05, |
| "loss": 0.4112, |
| "step": 2844 |
| }, |
| { |
| "epoch": 0.7831859929139005, |
| "grad_norm": 0.3623476028442383, |
| "learning_rate": 1e-05, |
| "loss": 0.4298, |
| "step": 2846 |
| }, |
| { |
| "epoch": 0.7837363695779299, |
| "grad_norm": 0.36522987484931946, |
| "learning_rate": 1e-05, |
| "loss": 0.4197, |
| "step": 2848 |
| }, |
| { |
| "epoch": 0.7842867462419594, |
| "grad_norm": 0.349153608083725, |
| "learning_rate": 1e-05, |
| "loss": 0.4179, |
| "step": 2850 |
| }, |
| { |
| "epoch": 0.7848371229059888, |
| "grad_norm": 0.3868444263935089, |
| "learning_rate": 1e-05, |
| "loss": 0.4309, |
| "step": 2852 |
| }, |
| { |
| "epoch": 0.7853874995700182, |
| "grad_norm": 0.3388199210166931, |
| "learning_rate": 1e-05, |
| "loss": 0.4255, |
| "step": 2854 |
| }, |
| { |
| "epoch": 0.7859378762340476, |
| "grad_norm": 0.3848430812358856, |
| "learning_rate": 1e-05, |
| "loss": 0.4248, |
| "step": 2856 |
| }, |
| { |
| "epoch": 0.7864882528980771, |
| "grad_norm": 0.34994250535964966, |
| "learning_rate": 1e-05, |
| "loss": 0.43, |
| "step": 2858 |
| }, |
| { |
| "epoch": 0.7870386295621066, |
| "grad_norm": 0.3475828170776367, |
| "learning_rate": 1e-05, |
| "loss": 0.4245, |
| "step": 2860 |
| }, |
| { |
| "epoch": 0.787589006226136, |
| "grad_norm": 0.3643713593482971, |
| "learning_rate": 1e-05, |
| "loss": 0.4285, |
| "step": 2862 |
| }, |
| { |
| "epoch": 0.7881393828901655, |
| "grad_norm": 0.3819843828678131, |
| "learning_rate": 1e-05, |
| "loss": 0.4264, |
| "step": 2864 |
| }, |
| { |
| "epoch": 0.7886897595541948, |
| "grad_norm": 0.3636263608932495, |
| "learning_rate": 1e-05, |
| "loss": 0.4354, |
| "step": 2866 |
| }, |
| { |
| "epoch": 0.7892401362182243, |
| "grad_norm": 0.35367467999458313, |
| "learning_rate": 1e-05, |
| "loss": 0.4219, |
| "step": 2868 |
| }, |
| { |
| "epoch": 0.7897905128822538, |
| "grad_norm": 0.33511704206466675, |
| "learning_rate": 1e-05, |
| "loss": 0.427, |
| "step": 2870 |
| }, |
| { |
| "epoch": 0.7903408895462832, |
| "grad_norm": 0.3727225363254547, |
| "learning_rate": 1e-05, |
| "loss": 0.4325, |
| "step": 2872 |
| }, |
| { |
| "epoch": 0.7908912662103127, |
| "grad_norm": 0.35963478684425354, |
| "learning_rate": 1e-05, |
| "loss": 0.4331, |
| "step": 2874 |
| }, |
| { |
| "epoch": 0.7914416428743422, |
| "grad_norm": 0.3680688440799713, |
| "learning_rate": 1e-05, |
| "loss": 0.426, |
| "step": 2876 |
| }, |
| { |
| "epoch": 0.7919920195383716, |
| "grad_norm": 0.3594858646392822, |
| "learning_rate": 1e-05, |
| "loss": 0.4251, |
| "step": 2878 |
| }, |
| { |
| "epoch": 0.792542396202401, |
| "grad_norm": 0.3666832745075226, |
| "learning_rate": 1e-05, |
| "loss": 0.4148, |
| "step": 2880 |
| }, |
| { |
| "epoch": 0.7930927728664304, |
| "grad_norm": 0.3594750761985779, |
| "learning_rate": 1e-05, |
| "loss": 0.424, |
| "step": 2882 |
| }, |
| { |
| "epoch": 0.7936431495304599, |
| "grad_norm": 0.34796181321144104, |
| "learning_rate": 1e-05, |
| "loss": 0.4188, |
| "step": 2884 |
| }, |
| { |
| "epoch": 0.7941935261944894, |
| "grad_norm": 0.3670448958873749, |
| "learning_rate": 1e-05, |
| "loss": 0.4184, |
| "step": 2886 |
| }, |
| { |
| "epoch": 0.7947439028585188, |
| "grad_norm": 0.38206908106803894, |
| "learning_rate": 1e-05, |
| "loss": 0.4333, |
| "step": 2888 |
| }, |
| { |
| "epoch": 0.7952942795225483, |
| "grad_norm": 0.3671881854534149, |
| "learning_rate": 1e-05, |
| "loss": 0.4117, |
| "step": 2890 |
| }, |
| { |
| "epoch": 0.7958446561865777, |
| "grad_norm": 0.33647626638412476, |
| "learning_rate": 1e-05, |
| "loss": 0.4098, |
| "step": 2892 |
| }, |
| { |
| "epoch": 0.7963950328506071, |
| "grad_norm": 0.3504905700683594, |
| "learning_rate": 1e-05, |
| "loss": 0.4227, |
| "step": 2894 |
| }, |
| { |
| "epoch": 0.7969454095146365, |
| "grad_norm": 0.3571165204048157, |
| "learning_rate": 1e-05, |
| "loss": 0.4126, |
| "step": 2896 |
| }, |
| { |
| "epoch": 0.797495786178666, |
| "grad_norm": 0.3529278337955475, |
| "learning_rate": 1e-05, |
| "loss": 0.4198, |
| "step": 2898 |
| }, |
| { |
| "epoch": 0.7980461628426955, |
| "grad_norm": 0.3688133656978607, |
| "learning_rate": 1e-05, |
| "loss": 0.443, |
| "step": 2900 |
| }, |
| { |
| "epoch": 0.7985965395067249, |
| "grad_norm": 0.37664586305618286, |
| "learning_rate": 1e-05, |
| "loss": 0.4345, |
| "step": 2902 |
| }, |
| { |
| "epoch": 0.7991469161707544, |
| "grad_norm": 0.37368759512901306, |
| "learning_rate": 1e-05, |
| "loss": 0.4202, |
| "step": 2904 |
| }, |
| { |
| "epoch": 0.7996972928347839, |
| "grad_norm": 0.3880954384803772, |
| "learning_rate": 1e-05, |
| "loss": 0.4234, |
| "step": 2906 |
| }, |
| { |
| "epoch": 0.8002476694988132, |
| "grad_norm": 0.34263235330581665, |
| "learning_rate": 1e-05, |
| "loss": 0.4177, |
| "step": 2908 |
| }, |
| { |
| "epoch": 0.8007980461628427, |
| "grad_norm": 0.37408214807510376, |
| "learning_rate": 1e-05, |
| "loss": 0.4366, |
| "step": 2910 |
| }, |
| { |
| "epoch": 0.8013484228268721, |
| "grad_norm": 0.35213685035705566, |
| "learning_rate": 1e-05, |
| "loss": 0.411, |
| "step": 2912 |
| }, |
| { |
| "epoch": 0.8018987994909016, |
| "grad_norm": 0.3545092046260834, |
| "learning_rate": 1e-05, |
| "loss": 0.4378, |
| "step": 2914 |
| }, |
| { |
| "epoch": 0.802449176154931, |
| "grad_norm": 0.3618670701980591, |
| "learning_rate": 1e-05, |
| "loss": 0.4187, |
| "step": 2916 |
| }, |
| { |
| "epoch": 0.8029995528189605, |
| "grad_norm": 0.3392831087112427, |
| "learning_rate": 1e-05, |
| "loss": 0.4305, |
| "step": 2918 |
| }, |
| { |
| "epoch": 0.8035499294829899, |
| "grad_norm": 0.3700800836086273, |
| "learning_rate": 1e-05, |
| "loss": 0.4212, |
| "step": 2920 |
| }, |
| { |
| "epoch": 0.8041003061470193, |
| "grad_norm": 0.35381945967674255, |
| "learning_rate": 1e-05, |
| "loss": 0.416, |
| "step": 2922 |
| }, |
| { |
| "epoch": 0.8046506828110488, |
| "grad_norm": 0.3526875972747803, |
| "learning_rate": 1e-05, |
| "loss": 0.4287, |
| "step": 2924 |
| }, |
| { |
| "epoch": 0.8052010594750783, |
| "grad_norm": 0.3656879663467407, |
| "learning_rate": 1e-05, |
| "loss": 0.4196, |
| "step": 2926 |
| }, |
| { |
| "epoch": 0.8057514361391077, |
| "grad_norm": 0.3675120174884796, |
| "learning_rate": 1e-05, |
| "loss": 0.419, |
| "step": 2928 |
| }, |
| { |
| "epoch": 0.8063018128031372, |
| "grad_norm": 0.34032610058784485, |
| "learning_rate": 1e-05, |
| "loss": 0.4301, |
| "step": 2930 |
| }, |
| { |
| "epoch": 0.8068521894671666, |
| "grad_norm": 0.39022547006607056, |
| "learning_rate": 1e-05, |
| "loss": 0.4347, |
| "step": 2932 |
| }, |
| { |
| "epoch": 0.807402566131196, |
| "grad_norm": 0.38301143050193787, |
| "learning_rate": 1e-05, |
| "loss": 0.4289, |
| "step": 2934 |
| }, |
| { |
| "epoch": 0.8079529427952254, |
| "grad_norm": 0.34974217414855957, |
| "learning_rate": 1e-05, |
| "loss": 0.4233, |
| "step": 2936 |
| }, |
| { |
| "epoch": 0.8085033194592549, |
| "grad_norm": 0.3554193377494812, |
| "learning_rate": 1e-05, |
| "loss": 0.4078, |
| "step": 2938 |
| }, |
| { |
| "epoch": 0.8090536961232844, |
| "grad_norm": 0.3496205806732178, |
| "learning_rate": 1e-05, |
| "loss": 0.4241, |
| "step": 2940 |
| }, |
| { |
| "epoch": 0.8096040727873138, |
| "grad_norm": 0.3549167513847351, |
| "learning_rate": 1e-05, |
| "loss": 0.4281, |
| "step": 2942 |
| }, |
| { |
| "epoch": 0.8101544494513433, |
| "grad_norm": 0.3635149896144867, |
| "learning_rate": 1e-05, |
| "loss": 0.4307, |
| "step": 2944 |
| }, |
| { |
| "epoch": 0.8107048261153728, |
| "grad_norm": 0.36100322008132935, |
| "learning_rate": 1e-05, |
| "loss": 0.4352, |
| "step": 2946 |
| }, |
| { |
| "epoch": 0.8112552027794021, |
| "grad_norm": 0.36892169713974, |
| "learning_rate": 1e-05, |
| "loss": 0.4245, |
| "step": 2948 |
| }, |
| { |
| "epoch": 0.8118055794434316, |
| "grad_norm": 0.34998342394828796, |
| "learning_rate": 1e-05, |
| "loss": 0.4214, |
| "step": 2950 |
| }, |
| { |
| "epoch": 0.812355956107461, |
| "grad_norm": 0.36382123827934265, |
| "learning_rate": 1e-05, |
| "loss": 0.4342, |
| "step": 2952 |
| }, |
| { |
| "epoch": 0.8129063327714905, |
| "grad_norm": 0.361068457365036, |
| "learning_rate": 1e-05, |
| "loss": 0.4198, |
| "step": 2954 |
| }, |
| { |
| "epoch": 0.81345670943552, |
| "grad_norm": 0.36285367608070374, |
| "learning_rate": 1e-05, |
| "loss": 0.4297, |
| "step": 2956 |
| }, |
| { |
| "epoch": 0.8140070860995494, |
| "grad_norm": 0.3376438319683075, |
| "learning_rate": 1e-05, |
| "loss": 0.3999, |
| "step": 2958 |
| }, |
| { |
| "epoch": 0.8145574627635789, |
| "grad_norm": 0.35821884870529175, |
| "learning_rate": 1e-05, |
| "loss": 0.4283, |
| "step": 2960 |
| }, |
| { |
| "epoch": 0.8151078394276082, |
| "grad_norm": 0.37185990810394287, |
| "learning_rate": 1e-05, |
| "loss": 0.4221, |
| "step": 2962 |
| }, |
| { |
| "epoch": 0.8156582160916377, |
| "grad_norm": 0.3599165380001068, |
| "learning_rate": 1e-05, |
| "loss": 0.4222, |
| "step": 2964 |
| }, |
| { |
| "epoch": 0.8162085927556672, |
| "grad_norm": 0.3599473237991333, |
| "learning_rate": 1e-05, |
| "loss": 0.4211, |
| "step": 2966 |
| }, |
| { |
| "epoch": 0.8167589694196966, |
| "grad_norm": 0.3631754219532013, |
| "learning_rate": 1e-05, |
| "loss": 0.4273, |
| "step": 2968 |
| }, |
| { |
| "epoch": 0.8173093460837261, |
| "grad_norm": 0.34736868739128113, |
| "learning_rate": 1e-05, |
| "loss": 0.4175, |
| "step": 2970 |
| }, |
| { |
| "epoch": 0.8178597227477555, |
| "grad_norm": 0.34098127484321594, |
| "learning_rate": 1e-05, |
| "loss": 0.4297, |
| "step": 2972 |
| }, |
| { |
| "epoch": 0.8184100994117849, |
| "grad_norm": 0.3562553822994232, |
| "learning_rate": 1e-05, |
| "loss": 0.4342, |
| "step": 2974 |
| }, |
| { |
| "epoch": 0.8189604760758143, |
| "grad_norm": 0.3628046214580536, |
| "learning_rate": 1e-05, |
| "loss": 0.4146, |
| "step": 2976 |
| }, |
| { |
| "epoch": 0.8195108527398438, |
| "grad_norm": 0.33993610739707947, |
| "learning_rate": 1e-05, |
| "loss": 0.4228, |
| "step": 2978 |
| }, |
| { |
| "epoch": 0.8200612294038733, |
| "grad_norm": 0.35291528701782227, |
| "learning_rate": 1e-05, |
| "loss": 0.4179, |
| "step": 2980 |
| }, |
| { |
| "epoch": 0.8206116060679027, |
| "grad_norm": 0.3480774164199829, |
| "learning_rate": 1e-05, |
| "loss": 0.4099, |
| "step": 2982 |
| }, |
| { |
| "epoch": 0.8211619827319322, |
| "grad_norm": 0.36476173996925354, |
| "learning_rate": 1e-05, |
| "loss": 0.4153, |
| "step": 2984 |
| }, |
| { |
| "epoch": 0.8217123593959617, |
| "grad_norm": 0.3587859869003296, |
| "learning_rate": 1e-05, |
| "loss": 0.4334, |
| "step": 2986 |
| }, |
| { |
| "epoch": 0.822262736059991, |
| "grad_norm": 0.38419267535209656, |
| "learning_rate": 1e-05, |
| "loss": 0.4357, |
| "step": 2988 |
| }, |
| { |
| "epoch": 0.8228131127240205, |
| "grad_norm": 0.3496173024177551, |
| "learning_rate": 1e-05, |
| "loss": 0.4156, |
| "step": 2990 |
| }, |
| { |
| "epoch": 0.8233634893880499, |
| "grad_norm": 0.36481598019599915, |
| "learning_rate": 1e-05, |
| "loss": 0.4108, |
| "step": 2992 |
| }, |
| { |
| "epoch": 0.8239138660520794, |
| "grad_norm": 0.36568546295166016, |
| "learning_rate": 1e-05, |
| "loss": 0.4329, |
| "step": 2994 |
| }, |
| { |
| "epoch": 0.8244642427161089, |
| "grad_norm": 0.3675042390823364, |
| "learning_rate": 1e-05, |
| "loss": 0.4301, |
| "step": 2996 |
| }, |
| { |
| "epoch": 0.8250146193801383, |
| "grad_norm": 0.3355284035205841, |
| "learning_rate": 1e-05, |
| "loss": 0.4162, |
| "step": 2998 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "grad_norm": 0.34280914068222046, |
| "learning_rate": 1e-05, |
| "loss": 0.4168, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_merge_loss": 0.3659045696258545, |
| "eval_merge_runtime": 599.8368, |
| "eval_merge_samples_per_second": 56.269, |
| "eval_merge_steps_per_second": 2.346, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_new_aug_datas_filtered.json_loss": 0.48660770058631897, |
| "eval_new_aug_datas_filtered.json_runtime": 10.3383, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 74.19, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.095, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_sharegpt_gpt4.json_loss": 0.7358890175819397, |
| "eval_sharegpt_gpt4.json_runtime": 31.7081, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.692, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.46, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_Table_GPT.json_loss": 0.045936468988657, |
| "eval_Table_GPT.json_runtime": 24.9946, |
| "eval_Table_GPT.json_samples_per_second": 83.738, |
| "eval_Table_GPT.json_steps_per_second": 3.521, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_gpt_4o_200k.json_loss": 0.7624426484107971, |
| "eval_gpt_4o_200k.json_runtime": 48.6264, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.169, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.388, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_multi_turn_datas.json_loss": 0.2812780439853668, |
| "eval_multi_turn_datas.json_runtime": 75.8593, |
| "eval_multi_turn_datas.json_samples_per_second": 52.756, |
| "eval_multi_turn_datas.json_steps_per_second": 2.201, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_table_python_code_datas.json_loss": 0.24670127034187317, |
| "eval_table_python_code_datas.json_runtime": 43.2305, |
| "eval_table_python_code_datas.json_samples_per_second": 49.942, |
| "eval_table_python_code_datas.json_steps_per_second": 2.082, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_tabular_llm_data.json_loss": 0.08318436145782471, |
| "eval_tabular_llm_data.json_runtime": 8.561, |
| "eval_tabular_llm_data.json_samples_per_second": 28.735, |
| "eval_tabular_llm_data.json_steps_per_second": 1.285, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_python_code_critic_21k.json_loss": 0.5459744930267334, |
| "eval_python_code_critic_21k.json_runtime": 3.2232, |
| "eval_python_code_critic_21k.json_samples_per_second": 185.217, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.756, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_all_merge_table_dataset.json_loss": 0.07661881297826767, |
| "eval_all_merge_table_dataset.json_runtime": 23.3773, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.457, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.283, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_code_feedback_multi_turn.json_loss": 0.5640604496002197, |
| "eval_code_feedback_multi_turn.json_runtime": 32.4865, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.751, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.832, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_ultrainteract_sft.json_loss": 0.40351128578186035, |
| "eval_ultrainteract_sft.json_runtime": 8.6435, |
| "eval_ultrainteract_sft.json_samples_per_second": 168.449, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.057, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_synthetic_text_to_sql.json_loss": 0.09340357035398483, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1267, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 268.437, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.79, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_sft_react_sql_datas.json_loss": 0.614182710647583, |
| "eval_sft_react_sql_datas.json_runtime": 7.8427, |
| "eval_sft_react_sql_datas.json_samples_per_second": 40.037, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.785, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_all_merge_code.json_loss": 0.2747681736946106, |
| "eval_all_merge_code.json_runtime": 0.3335, |
| "eval_all_merge_code.json_samples_per_second": 188.917, |
| "eval_all_merge_code.json_steps_per_second": 8.996, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_magpie_datas.json_loss": 0.42281365394592285, |
| "eval_magpie_datas.json_runtime": 2.2171, |
| "eval_magpie_datas.json_samples_per_second": 77.579, |
| "eval_magpie_datas.json_steps_per_second": 3.608, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_train_data_for_qwen.json_loss": 0.0027365919668227434, |
| "eval_train_data_for_qwen.json_runtime": 0.2454, |
| "eval_train_data_for_qwen.json_samples_per_second": 40.756, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.076, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_alpaca_cleaned.json_loss": 0.9086716175079346, |
| "eval_alpaca_cleaned.json_runtime": 0.1143, |
| "eval_alpaca_cleaned.json_samples_per_second": 236.118, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.49, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_agent_instruct.json_loss": 0.20960307121276855, |
| "eval_agent_instruct.json_runtime": 0.5163, |
| "eval_agent_instruct.json_samples_per_second": 92.971, |
| "eval_agent_instruct.json_steps_per_second": 3.874, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_MathInstruct.json_loss": 0.20019014179706573, |
| "eval_MathInstruct.json_runtime": 0.3582, |
| "eval_MathInstruct.json_samples_per_second": 159.116, |
| "eval_MathInstruct.json_steps_per_second": 8.375, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_tested_143k_python_alpaca.json_loss": 0.44821104407310486, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3022, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 112.526, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.619, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_xlam_function_calling_60k.json_loss": 0.008376230485737324, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1003, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 229.41, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.974, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.513078212738037, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0516, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 310.009, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.376, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9633126258850098, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0499, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 220.561, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 20.051, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8255649960441678, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5788259506225586, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4854, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.202, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.06, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.8261153727081971, |
| "grad_norm": 0.35386523604393005, |
| "learning_rate": 1e-05, |
| "loss": 0.4235, |
| "step": 3002 |
| }, |
| { |
| "epoch": 0.8266657493722266, |
| "grad_norm": 0.35325145721435547, |
| "learning_rate": 1e-05, |
| "loss": 0.4296, |
| "step": 3004 |
| }, |
| { |
| "epoch": 0.827216126036256, |
| "grad_norm": 0.35455331206321716, |
| "learning_rate": 1e-05, |
| "loss": 0.405, |
| "step": 3006 |
| }, |
| { |
| "epoch": 0.8277665027002855, |
| "grad_norm": 0.37510380148887634, |
| "learning_rate": 1e-05, |
| "loss": 0.4288, |
| "step": 3008 |
| }, |
| { |
| "epoch": 0.828316879364315, |
| "grad_norm": 0.356189489364624, |
| "learning_rate": 1e-05, |
| "loss": 0.4145, |
| "step": 3010 |
| }, |
| { |
| "epoch": 0.8288672560283444, |
| "grad_norm": 0.36097854375839233, |
| "learning_rate": 1e-05, |
| "loss": 0.4247, |
| "step": 3012 |
| }, |
| { |
| "epoch": 0.8294176326923739, |
| "grad_norm": 0.3489934802055359, |
| "learning_rate": 1e-05, |
| "loss": 0.422, |
| "step": 3014 |
| }, |
| { |
| "epoch": 0.8299680093564032, |
| "grad_norm": 0.36287152767181396, |
| "learning_rate": 1e-05, |
| "loss": 0.4025, |
| "step": 3016 |
| }, |
| { |
| "epoch": 0.8305183860204327, |
| "grad_norm": 0.3664880096912384, |
| "learning_rate": 1e-05, |
| "loss": 0.4278, |
| "step": 3018 |
| }, |
| { |
| "epoch": 0.8310687626844622, |
| "grad_norm": 0.35230088233947754, |
| "learning_rate": 1e-05, |
| "loss": 0.4233, |
| "step": 3020 |
| }, |
| { |
| "epoch": 0.8316191393484916, |
| "grad_norm": 0.3595122694969177, |
| "learning_rate": 1e-05, |
| "loss": 0.4173, |
| "step": 3022 |
| }, |
| { |
| "epoch": 0.8321695160125211, |
| "grad_norm": 0.3618360757827759, |
| "learning_rate": 1e-05, |
| "loss": 0.4213, |
| "step": 3024 |
| }, |
| { |
| "epoch": 0.8327198926765506, |
| "grad_norm": 0.3699500858783722, |
| "learning_rate": 1e-05, |
| "loss": 0.4267, |
| "step": 3026 |
| }, |
| { |
| "epoch": 0.8332702693405799, |
| "grad_norm": 0.37343189120292664, |
| "learning_rate": 1e-05, |
| "loss": 0.4288, |
| "step": 3028 |
| }, |
| { |
| "epoch": 0.8338206460046094, |
| "grad_norm": 0.34580445289611816, |
| "learning_rate": 1e-05, |
| "loss": 0.4232, |
| "step": 3030 |
| }, |
| { |
| "epoch": 0.8343710226686388, |
| "grad_norm": 0.3410281836986542, |
| "learning_rate": 1e-05, |
| "loss": 0.4272, |
| "step": 3032 |
| }, |
| { |
| "epoch": 0.8349213993326683, |
| "grad_norm": 0.37444379925727844, |
| "learning_rate": 1e-05, |
| "loss": 0.4264, |
| "step": 3034 |
| }, |
| { |
| "epoch": 0.8354717759966978, |
| "grad_norm": 0.359546959400177, |
| "learning_rate": 1e-05, |
| "loss": 0.4243, |
| "step": 3036 |
| }, |
| { |
| "epoch": 0.8360221526607272, |
| "grad_norm": 0.3611339032649994, |
| "learning_rate": 1e-05, |
| "loss": 0.4243, |
| "step": 3038 |
| }, |
| { |
| "epoch": 0.8365725293247567, |
| "grad_norm": 0.3678295612335205, |
| "learning_rate": 1e-05, |
| "loss": 0.4089, |
| "step": 3040 |
| }, |
| { |
| "epoch": 0.837122905988786, |
| "grad_norm": 0.37094810605049133, |
| "learning_rate": 1e-05, |
| "loss": 0.4191, |
| "step": 3042 |
| }, |
| { |
| "epoch": 0.8376732826528155, |
| "grad_norm": 0.354481041431427, |
| "learning_rate": 1e-05, |
| "loss": 0.4235, |
| "step": 3044 |
| }, |
| { |
| "epoch": 0.838223659316845, |
| "grad_norm": 0.3498587906360626, |
| "learning_rate": 1e-05, |
| "loss": 0.431, |
| "step": 3046 |
| }, |
| { |
| "epoch": 0.8387740359808744, |
| "grad_norm": 0.35214436054229736, |
| "learning_rate": 1e-05, |
| "loss": 0.4132, |
| "step": 3048 |
| }, |
| { |
| "epoch": 0.8393244126449039, |
| "grad_norm": 0.35119178891181946, |
| "learning_rate": 1e-05, |
| "loss": 0.4161, |
| "step": 3050 |
| }, |
| { |
| "epoch": 0.8398747893089333, |
| "grad_norm": 0.3671429753303528, |
| "learning_rate": 1e-05, |
| "loss": 0.4276, |
| "step": 3052 |
| }, |
| { |
| "epoch": 0.8404251659729628, |
| "grad_norm": 0.3626399636268616, |
| "learning_rate": 1e-05, |
| "loss": 0.417, |
| "step": 3054 |
| }, |
| { |
| "epoch": 0.8409755426369921, |
| "grad_norm": 0.3819148540496826, |
| "learning_rate": 1e-05, |
| "loss": 0.4261, |
| "step": 3056 |
| }, |
| { |
| "epoch": 0.8415259193010216, |
| "grad_norm": 0.3481554687023163, |
| "learning_rate": 1e-05, |
| "loss": 0.4339, |
| "step": 3058 |
| }, |
| { |
| "epoch": 0.8420762959650511, |
| "grad_norm": 0.3603340983390808, |
| "learning_rate": 1e-05, |
| "loss": 0.406, |
| "step": 3060 |
| }, |
| { |
| "epoch": 0.8426266726290805, |
| "grad_norm": 0.3565911650657654, |
| "learning_rate": 1e-05, |
| "loss": 0.4245, |
| "step": 3062 |
| }, |
| { |
| "epoch": 0.84317704929311, |
| "grad_norm": 0.36305105686187744, |
| "learning_rate": 1e-05, |
| "loss": 0.4255, |
| "step": 3064 |
| }, |
| { |
| "epoch": 0.8437274259571395, |
| "grad_norm": 0.33078432083129883, |
| "learning_rate": 1e-05, |
| "loss": 0.4045, |
| "step": 3066 |
| }, |
| { |
| "epoch": 0.8442778026211689, |
| "grad_norm": 0.346562922000885, |
| "learning_rate": 1e-05, |
| "loss": 0.4279, |
| "step": 3068 |
| }, |
| { |
| "epoch": 0.8448281792851983, |
| "grad_norm": 0.36170172691345215, |
| "learning_rate": 1e-05, |
| "loss": 0.4139, |
| "step": 3070 |
| }, |
| { |
| "epoch": 0.8453785559492277, |
| "grad_norm": 0.360568106174469, |
| "learning_rate": 1e-05, |
| "loss": 0.4276, |
| "step": 3072 |
| }, |
| { |
| "epoch": 0.8459289326132572, |
| "grad_norm": 0.38023245334625244, |
| "learning_rate": 1e-05, |
| "loss": 0.4317, |
| "step": 3074 |
| }, |
| { |
| "epoch": 0.8464793092772867, |
| "grad_norm": 0.344732403755188, |
| "learning_rate": 1e-05, |
| "loss": 0.4109, |
| "step": 3076 |
| }, |
| { |
| "epoch": 0.8470296859413161, |
| "grad_norm": 0.35157695412635803, |
| "learning_rate": 1e-05, |
| "loss": 0.4192, |
| "step": 3078 |
| }, |
| { |
| "epoch": 0.8475800626053456, |
| "grad_norm": 0.36455512046813965, |
| "learning_rate": 1e-05, |
| "loss": 0.4247, |
| "step": 3080 |
| }, |
| { |
| "epoch": 0.8481304392693749, |
| "grad_norm": 0.39768150448799133, |
| "learning_rate": 1e-05, |
| "loss": 0.4383, |
| "step": 3082 |
| }, |
| { |
| "epoch": 0.8486808159334044, |
| "grad_norm": 0.38052836060523987, |
| "learning_rate": 1e-05, |
| "loss": 0.4199, |
| "step": 3084 |
| }, |
| { |
| "epoch": 0.8492311925974338, |
| "grad_norm": 0.3625752925872803, |
| "learning_rate": 1e-05, |
| "loss": 0.4161, |
| "step": 3086 |
| }, |
| { |
| "epoch": 0.8497815692614633, |
| "grad_norm": 0.3708571493625641, |
| "learning_rate": 1e-05, |
| "loss": 0.4402, |
| "step": 3088 |
| }, |
| { |
| "epoch": 0.8503319459254928, |
| "grad_norm": 0.3581870496273041, |
| "learning_rate": 1e-05, |
| "loss": 0.4376, |
| "step": 3090 |
| }, |
| { |
| "epoch": 0.8508823225895222, |
| "grad_norm": 0.33589842915534973, |
| "learning_rate": 1e-05, |
| "loss": 0.4144, |
| "step": 3092 |
| }, |
| { |
| "epoch": 0.8514326992535517, |
| "grad_norm": 0.35838133096694946, |
| "learning_rate": 1e-05, |
| "loss": 0.4128, |
| "step": 3094 |
| }, |
| { |
| "epoch": 0.851983075917581, |
| "grad_norm": 0.3660927712917328, |
| "learning_rate": 1e-05, |
| "loss": 0.42, |
| "step": 3096 |
| }, |
| { |
| "epoch": 0.8525334525816105, |
| "grad_norm": 0.3606925904750824, |
| "learning_rate": 1e-05, |
| "loss": 0.4288, |
| "step": 3098 |
| }, |
| { |
| "epoch": 0.85308382924564, |
| "grad_norm": 0.3437570333480835, |
| "learning_rate": 1e-05, |
| "loss": 0.4213, |
| "step": 3100 |
| }, |
| { |
| "epoch": 0.8536342059096694, |
| "grad_norm": 0.35351496934890747, |
| "learning_rate": 1e-05, |
| "loss": 0.4238, |
| "step": 3102 |
| }, |
| { |
| "epoch": 0.8541845825736989, |
| "grad_norm": 0.3595280051231384, |
| "learning_rate": 1e-05, |
| "loss": 0.4107, |
| "step": 3104 |
| }, |
| { |
| "epoch": 0.8547349592377284, |
| "grad_norm": 0.3546600937843323, |
| "learning_rate": 1e-05, |
| "loss": 0.4105, |
| "step": 3106 |
| }, |
| { |
| "epoch": 0.8552853359017578, |
| "grad_norm": 0.3654036819934845, |
| "learning_rate": 1e-05, |
| "loss": 0.4158, |
| "step": 3108 |
| }, |
| { |
| "epoch": 0.8558357125657872, |
| "grad_norm": 0.3742349445819855, |
| "learning_rate": 1e-05, |
| "loss": 0.4217, |
| "step": 3110 |
| }, |
| { |
| "epoch": 0.8563860892298166, |
| "grad_norm": 0.35527029633522034, |
| "learning_rate": 1e-05, |
| "loss": 0.414, |
| "step": 3112 |
| }, |
| { |
| "epoch": 0.8569364658938461, |
| "grad_norm": 0.3408162295818329, |
| "learning_rate": 1e-05, |
| "loss": 0.4245, |
| "step": 3114 |
| }, |
| { |
| "epoch": 0.8574868425578756, |
| "grad_norm": 0.3608722686767578, |
| "learning_rate": 1e-05, |
| "loss": 0.4346, |
| "step": 3116 |
| }, |
| { |
| "epoch": 0.858037219221905, |
| "grad_norm": 0.36163628101348877, |
| "learning_rate": 1e-05, |
| "loss": 0.422, |
| "step": 3118 |
| }, |
| { |
| "epoch": 0.8585875958859345, |
| "grad_norm": 0.35417988896369934, |
| "learning_rate": 1e-05, |
| "loss": 0.4101, |
| "step": 3120 |
| }, |
| { |
| "epoch": 0.8591379725499639, |
| "grad_norm": 0.3626682162284851, |
| "learning_rate": 1e-05, |
| "loss": 0.4147, |
| "step": 3122 |
| }, |
| { |
| "epoch": 0.8596883492139933, |
| "grad_norm": 0.34313321113586426, |
| "learning_rate": 1e-05, |
| "loss": 0.4215, |
| "step": 3124 |
| }, |
| { |
| "epoch": 0.8602387258780227, |
| "grad_norm": 0.3839293122291565, |
| "learning_rate": 1e-05, |
| "loss": 0.4173, |
| "step": 3126 |
| }, |
| { |
| "epoch": 0.8607891025420522, |
| "grad_norm": 0.3548083007335663, |
| "learning_rate": 1e-05, |
| "loss": 0.4153, |
| "step": 3128 |
| }, |
| { |
| "epoch": 0.8613394792060817, |
| "grad_norm": 0.35141652822494507, |
| "learning_rate": 1e-05, |
| "loss": 0.4066, |
| "step": 3130 |
| }, |
| { |
| "epoch": 0.8618898558701111, |
| "grad_norm": 0.3777351975440979, |
| "learning_rate": 1e-05, |
| "loss": 0.4128, |
| "step": 3132 |
| }, |
| { |
| "epoch": 0.8624402325341406, |
| "grad_norm": 0.3580491840839386, |
| "learning_rate": 1e-05, |
| "loss": 0.4048, |
| "step": 3134 |
| }, |
| { |
| "epoch": 0.86299060919817, |
| "grad_norm": 0.373532772064209, |
| "learning_rate": 1e-05, |
| "loss": 0.4173, |
| "step": 3136 |
| }, |
| { |
| "epoch": 0.8635409858621994, |
| "grad_norm": 0.35365086793899536, |
| "learning_rate": 1e-05, |
| "loss": 0.4076, |
| "step": 3138 |
| }, |
| { |
| "epoch": 0.8640913625262289, |
| "grad_norm": 0.3887852728366852, |
| "learning_rate": 1e-05, |
| "loss": 0.418, |
| "step": 3140 |
| }, |
| { |
| "epoch": 0.8646417391902583, |
| "grad_norm": 0.35862478613853455, |
| "learning_rate": 1e-05, |
| "loss": 0.4234, |
| "step": 3142 |
| }, |
| { |
| "epoch": 0.8651921158542878, |
| "grad_norm": 0.3472420275211334, |
| "learning_rate": 1e-05, |
| "loss": 0.4132, |
| "step": 3144 |
| }, |
| { |
| "epoch": 0.8657424925183173, |
| "grad_norm": 0.344862163066864, |
| "learning_rate": 1e-05, |
| "loss": 0.41, |
| "step": 3146 |
| }, |
| { |
| "epoch": 0.8662928691823467, |
| "grad_norm": 0.35329338908195496, |
| "learning_rate": 1e-05, |
| "loss": 0.4152, |
| "step": 3148 |
| }, |
| { |
| "epoch": 0.8668432458463761, |
| "grad_norm": 0.3792724907398224, |
| "learning_rate": 1e-05, |
| "loss": 0.4307, |
| "step": 3150 |
| }, |
| { |
| "epoch": 0.8673936225104055, |
| "grad_norm": 0.3611691892147064, |
| "learning_rate": 1e-05, |
| "loss": 0.4166, |
| "step": 3152 |
| }, |
| { |
| "epoch": 0.867943999174435, |
| "grad_norm": 0.35675716400146484, |
| "learning_rate": 1e-05, |
| "loss": 0.4309, |
| "step": 3154 |
| }, |
| { |
| "epoch": 0.8684943758384644, |
| "grad_norm": 0.37591055035591125, |
| "learning_rate": 1e-05, |
| "loss": 0.4174, |
| "step": 3156 |
| }, |
| { |
| "epoch": 0.8690447525024939, |
| "grad_norm": 0.34695202112197876, |
| "learning_rate": 1e-05, |
| "loss": 0.4067, |
| "step": 3158 |
| }, |
| { |
| "epoch": 0.8695951291665234, |
| "grad_norm": 0.36810246109962463, |
| "learning_rate": 1e-05, |
| "loss": 0.4236, |
| "step": 3160 |
| }, |
| { |
| "epoch": 0.8701455058305528, |
| "grad_norm": 0.3910383880138397, |
| "learning_rate": 1e-05, |
| "loss": 0.4344, |
| "step": 3162 |
| }, |
| { |
| "epoch": 0.8706958824945822, |
| "grad_norm": 0.3465210497379303, |
| "learning_rate": 1e-05, |
| "loss": 0.4156, |
| "step": 3164 |
| }, |
| { |
| "epoch": 0.8712462591586116, |
| "grad_norm": 0.39839833974838257, |
| "learning_rate": 1e-05, |
| "loss": 0.417, |
| "step": 3166 |
| }, |
| { |
| "epoch": 0.8717966358226411, |
| "grad_norm": 0.33419859409332275, |
| "learning_rate": 1e-05, |
| "loss": 0.4131, |
| "step": 3168 |
| }, |
| { |
| "epoch": 0.8723470124866706, |
| "grad_norm": 0.3657875955104828, |
| "learning_rate": 1e-05, |
| "loss": 0.4243, |
| "step": 3170 |
| }, |
| { |
| "epoch": 0.8728973891507, |
| "grad_norm": 0.35600635409355164, |
| "learning_rate": 1e-05, |
| "loss": 0.4251, |
| "step": 3172 |
| }, |
| { |
| "epoch": 0.8734477658147295, |
| "grad_norm": 0.3642902374267578, |
| "learning_rate": 1e-05, |
| "loss": 0.4334, |
| "step": 3174 |
| }, |
| { |
| "epoch": 0.873998142478759, |
| "grad_norm": 0.35452064871788025, |
| "learning_rate": 1e-05, |
| "loss": 0.4244, |
| "step": 3176 |
| }, |
| { |
| "epoch": 0.8745485191427883, |
| "grad_norm": 0.372953861951828, |
| "learning_rate": 1e-05, |
| "loss": 0.421, |
| "step": 3178 |
| }, |
| { |
| "epoch": 0.8750988958068178, |
| "grad_norm": 0.3428981900215149, |
| "learning_rate": 1e-05, |
| "loss": 0.4172, |
| "step": 3180 |
| }, |
| { |
| "epoch": 0.8756492724708472, |
| "grad_norm": 0.36314892768859863, |
| "learning_rate": 1e-05, |
| "loss": 0.4008, |
| "step": 3182 |
| }, |
| { |
| "epoch": 0.8761996491348767, |
| "grad_norm": 0.35167455673217773, |
| "learning_rate": 1e-05, |
| "loss": 0.4114, |
| "step": 3184 |
| }, |
| { |
| "epoch": 0.8767500257989062, |
| "grad_norm": 0.3496149778366089, |
| "learning_rate": 1e-05, |
| "loss": 0.4053, |
| "step": 3186 |
| }, |
| { |
| "epoch": 0.8773004024629356, |
| "grad_norm": 0.351510226726532, |
| "learning_rate": 1e-05, |
| "loss": 0.4173, |
| "step": 3188 |
| }, |
| { |
| "epoch": 0.877850779126965, |
| "grad_norm": 0.35172203183174133, |
| "learning_rate": 1e-05, |
| "loss": 0.4202, |
| "step": 3190 |
| }, |
| { |
| "epoch": 0.8784011557909944, |
| "grad_norm": 0.36200663447380066, |
| "learning_rate": 1e-05, |
| "loss": 0.3987, |
| "step": 3192 |
| }, |
| { |
| "epoch": 0.8789515324550239, |
| "grad_norm": 0.36070528626441956, |
| "learning_rate": 1e-05, |
| "loss": 0.4202, |
| "step": 3194 |
| }, |
| { |
| "epoch": 0.8795019091190533, |
| "grad_norm": 0.37506040930747986, |
| "learning_rate": 1e-05, |
| "loss": 0.4125, |
| "step": 3196 |
| }, |
| { |
| "epoch": 0.8800522857830828, |
| "grad_norm": 0.3433153033256531, |
| "learning_rate": 1e-05, |
| "loss": 0.4185, |
| "step": 3198 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "grad_norm": 0.3672421872615814, |
| "learning_rate": 1e-05, |
| "loss": 0.4227, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_merge_loss": 0.36182981729507446, |
| "eval_merge_runtime": 600.3542, |
| "eval_merge_samples_per_second": 56.22, |
| "eval_merge_steps_per_second": 2.344, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_new_aug_datas_filtered.json_loss": 0.4833287298679352, |
| "eval_new_aug_datas_filtered.json_runtime": 10.3758, |
| "eval_new_aug_datas_filtered.json_samples_per_second": 73.922, |
| "eval_new_aug_datas_filtered.json_steps_per_second": 3.084, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_sharegpt_gpt4.json_loss": 0.7305224537849426, |
| "eval_sharegpt_gpt4.json_runtime": 31.7036, |
| "eval_sharegpt_gpt4.json_samples_per_second": 58.7, |
| "eval_sharegpt_gpt4.json_steps_per_second": 2.46, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_Table_GPT.json_loss": 0.04232589527964592, |
| "eval_Table_GPT.json_runtime": 24.994, |
| "eval_Table_GPT.json_samples_per_second": 83.74, |
| "eval_Table_GPT.json_steps_per_second": 3.521, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_gpt_4o_200k.json_loss": 0.7571491003036499, |
| "eval_gpt_4o_200k.json_runtime": 48.5629, |
| "eval_gpt_4o_200k.json_samples_per_second": 129.337, |
| "eval_gpt_4o_200k.json_steps_per_second": 5.395, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_multi_turn_datas.json_loss": 0.2720319330692291, |
| "eval_multi_turn_datas.json_runtime": 75.6646, |
| "eval_multi_turn_datas.json_samples_per_second": 52.891, |
| "eval_multi_turn_datas.json_steps_per_second": 2.207, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_table_python_code_datas.json_loss": 0.24331320822238922, |
| "eval_table_python_code_datas.json_runtime": 43.0385, |
| "eval_table_python_code_datas.json_samples_per_second": 50.164, |
| "eval_table_python_code_datas.json_steps_per_second": 2.091, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_tabular_llm_data.json_loss": 0.0862693339586258, |
| "eval_tabular_llm_data.json_runtime": 8.5454, |
| "eval_tabular_llm_data.json_samples_per_second": 28.788, |
| "eval_tabular_llm_data.json_steps_per_second": 1.287, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_python_code_critic_21k.json_loss": 0.5425785183906555, |
| "eval_python_code_critic_21k.json_runtime": 3.2194, |
| "eval_python_code_critic_21k.json_samples_per_second": 185.437, |
| "eval_python_code_critic_21k.json_steps_per_second": 7.765, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_all_merge_table_dataset.json_loss": 0.06970688700675964, |
| "eval_all_merge_table_dataset.json_runtime": 23.3201, |
| "eval_all_merge_table_dataset.json_samples_per_second": 30.532, |
| "eval_all_merge_table_dataset.json_steps_per_second": 1.286, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_code_feedback_multi_turn.json_loss": 0.5619133114814758, |
| "eval_code_feedback_multi_turn.json_runtime": 32.4257, |
| "eval_code_feedback_multi_turn.json_samples_per_second": 67.878, |
| "eval_code_feedback_multi_turn.json_steps_per_second": 2.837, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_ultrainteract_sft.json_loss": 0.4016903042793274, |
| "eval_ultrainteract_sft.json_runtime": 8.6472, |
| "eval_ultrainteract_sft.json_samples_per_second": 168.378, |
| "eval_ultrainteract_sft.json_steps_per_second": 7.054, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_synthetic_text_to_sql.json_loss": 0.09171026945114136, |
| "eval_synthetic_text_to_sql.json_runtime": 0.1264, |
| "eval_synthetic_text_to_sql.json_samples_per_second": 268.914, |
| "eval_synthetic_text_to_sql.json_steps_per_second": 15.818, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_sft_react_sql_datas.json_loss": 0.6105172038078308, |
| "eval_sft_react_sql_datas.json_runtime": 7.8425, |
| "eval_sft_react_sql_datas.json_samples_per_second": 40.038, |
| "eval_sft_react_sql_datas.json_steps_per_second": 1.785, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_all_merge_code.json_loss": 0.264506459236145, |
| "eval_all_merge_code.json_runtime": 0.3347, |
| "eval_all_merge_code.json_samples_per_second": 188.208, |
| "eval_all_merge_code.json_steps_per_second": 8.962, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_magpie_datas.json_loss": 0.4236694872379303, |
| "eval_magpie_datas.json_runtime": 2.213, |
| "eval_magpie_datas.json_samples_per_second": 77.723, |
| "eval_magpie_datas.json_steps_per_second": 3.615, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_train_data_for_qwen.json_loss": 0.0027615067083388567, |
| "eval_train_data_for_qwen.json_runtime": 0.2435, |
| "eval_train_data_for_qwen.json_samples_per_second": 41.06, |
| "eval_train_data_for_qwen.json_steps_per_second": 4.106, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_alpaca_cleaned.json_loss": 0.9028782844543457, |
| "eval_alpaca_cleaned.json_runtime": 0.1145, |
| "eval_alpaca_cleaned.json_samples_per_second": 235.866, |
| "eval_alpaca_cleaned.json_steps_per_second": 17.472, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_agent_instruct.json_loss": 0.20418775081634521, |
| "eval_agent_instruct.json_runtime": 0.5123, |
| "eval_agent_instruct.json_samples_per_second": 93.693, |
| "eval_agent_instruct.json_steps_per_second": 3.904, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_MathInstruct.json_loss": 0.2024046629667282, |
| "eval_MathInstruct.json_runtime": 0.35, |
| "eval_MathInstruct.json_samples_per_second": 162.858, |
| "eval_MathInstruct.json_steps_per_second": 8.571, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_tested_143k_python_alpaca.json_loss": 0.4462108612060547, |
| "eval_tested_143k_python_alpaca.json_runtime": 0.3037, |
| "eval_tested_143k_python_alpaca.json_samples_per_second": 111.97, |
| "eval_tested_143k_python_alpaca.json_steps_per_second": 6.586, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_xlam_function_calling_60k.json_loss": 0.008976898156106472, |
| "eval_xlam_function_calling_60k.json_runtime": 0.1004, |
| "eval_xlam_function_calling_60k.json_samples_per_second": 229.083, |
| "eval_xlam_function_calling_60k.json_steps_per_second": 9.96, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_alpaca_data_gpt4_chinese.json_loss": 1.512216567993164, |
| "eval_alpaca_data_gpt4_chinese.json_runtime": 0.0511, |
| "eval_alpaca_data_gpt4_chinese.json_samples_per_second": 313.242, |
| "eval_alpaca_data_gpt4_chinese.json_steps_per_second": 19.578, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_alpaca_gpt4_zh.json_loss": 0.9716835021972656, |
| "eval_alpaca_gpt4_zh.json_runtime": 0.0498, |
| "eval_alpaca_gpt4_zh.json_samples_per_second": 220.769, |
| "eval_alpaca_gpt4_zh.json_steps_per_second": 20.07, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.8806026624471123, |
| "eval_codefeedback_filtered_instruction.json_loss": 0.5663765072822571, |
| "eval_codefeedback_filtered_instruction.json_runtime": 0.4857, |
| "eval_codefeedback_filtered_instruction.json_samples_per_second": 41.176, |
| "eval_codefeedback_filtered_instruction.json_steps_per_second": 2.059, |
| "step": 3200 |
| } |
| ], |
| "logging_steps": 2, |
| "max_steps": 3633, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.4654685450828094e+20, |
| "train_batch_size": 3, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|