| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 6.612048192771084, |
| "eval_steps": 500, |
| "global_step": 343, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0963855421686747, |
| "grad_norm": 1.7634485960006714, |
| "learning_rate": 4.998814299283415e-05, |
| "loss": 0.8996, |
| "num_input_tokens_seen": 78528, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.1927710843373494, |
| "grad_norm": 1.3068124055862427, |
| "learning_rate": 4.995258321842611e-05, |
| "loss": 0.6806, |
| "num_input_tokens_seen": 159120, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2891566265060241, |
| "grad_norm": 1.2104840278625488, |
| "learning_rate": 4.989335440737586e-05, |
| "loss": 0.618, |
| "num_input_tokens_seen": 223552, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.3855421686746988, |
| "grad_norm": 1.4112542867660522, |
| "learning_rate": 4.98105127417984e-05, |
| "loss": 0.5594, |
| "num_input_tokens_seen": 290944, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.4819277108433735, |
| "grad_norm": 0.9026587605476379, |
| "learning_rate": 4.9704136802031485e-05, |
| "loss": 0.5253, |
| "num_input_tokens_seen": 364064, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.5783132530120482, |
| "grad_norm": 0.9427546858787537, |
| "learning_rate": 4.957432749209755e-05, |
| "loss": 0.4794, |
| "num_input_tokens_seen": 440176, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.6746987951807228, |
| "grad_norm": 1.0594468116760254, |
| "learning_rate": 4.942120794399002e-05, |
| "loss": 0.4546, |
| "num_input_tokens_seen": 517184, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.7710843373493976, |
| "grad_norm": 0.9458279013633728, |
| "learning_rate": 4.9244923400875245e-05, |
| "loss": 0.4703, |
| "num_input_tokens_seen": 591424, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.8674698795180723, |
| "grad_norm": 1.1610336303710938, |
| "learning_rate": 4.9045641079320484e-05, |
| "loss": 0.4407, |
| "num_input_tokens_seen": 662784, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.963855421686747, |
| "grad_norm": 1.0153354406356812, |
| "learning_rate": 4.882355001067892e-05, |
| "loss": 0.4425, |
| "num_input_tokens_seen": 734784, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.0602409638554218, |
| "grad_norm": 1.0889695882797241, |
| "learning_rate": 4.857886086178194e-05, |
| "loss": 0.4081, |
| "num_input_tokens_seen": 808336, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.1566265060240963, |
| "grad_norm": 0.9168598055839539, |
| "learning_rate": 4.8311805735108894e-05, |
| "loss": 0.4002, |
| "num_input_tokens_seen": 882672, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.2530120481927711, |
| "grad_norm": 0.8168660998344421, |
| "learning_rate": 4.802263794862385e-05, |
| "loss": 0.3587, |
| "num_input_tokens_seen": 947680, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.3493975903614457, |
| "grad_norm": 1.0652003288269043, |
| "learning_rate": 4.7711631795488096e-05, |
| "loss": 0.356, |
| "num_input_tokens_seen": 1022112, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.4457831325301205, |
| "grad_norm": 1.1781517267227173, |
| "learning_rate": 4.7379082283876566e-05, |
| "loss": 0.3639, |
| "num_input_tokens_seen": 1091744, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.5421686746987953, |
| "grad_norm": 1.0550976991653442, |
| "learning_rate": 4.702530485714461e-05, |
| "loss": 0.3288, |
| "num_input_tokens_seen": 1163728, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.6385542168674698, |
| "grad_norm": 1.3946661949157715, |
| "learning_rate": 4.665063509461097e-05, |
| "loss": 0.3563, |
| "num_input_tokens_seen": 1245728, |
| "step": 85 |
| }, |
| { |
| "epoch": 1.7349397590361446, |
| "grad_norm": 1.1458536386489868, |
| "learning_rate": 4.625542839324036e-05, |
| "loss": 0.3642, |
| "num_input_tokens_seen": 1315056, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.8313253012048194, |
| "grad_norm": 1.0227209329605103, |
| "learning_rate": 4.584005963052799e-05, |
| "loss": 0.3407, |
| "num_input_tokens_seen": 1392224, |
| "step": 95 |
| }, |
| { |
| "epoch": 1.927710843373494, |
| "grad_norm": 1.0699985027313232, |
| "learning_rate": 4.540492280890555e-05, |
| "loss": 0.3216, |
| "num_input_tokens_seen": 1471008, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.0240963855421685, |
| "grad_norm": 0.8573477268218994, |
| "learning_rate": 4.4950430682006e-05, |
| "loss": 0.3197, |
| "num_input_tokens_seen": 1546912, |
| "step": 105 |
| }, |
| { |
| "epoch": 2.1204819277108435, |
| "grad_norm": 1.1516242027282715, |
| "learning_rate": 4.447701436314176e-05, |
| "loss": 0.2904, |
| "num_input_tokens_seen": 1611328, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.216867469879518, |
| "grad_norm": 1.0890793800354004, |
| "learning_rate": 4.398512291636768e-05, |
| "loss": 0.2498, |
| "num_input_tokens_seen": 1682528, |
| "step": 115 |
| }, |
| { |
| "epoch": 2.3132530120481927, |
| "grad_norm": 1.3621636629104614, |
| "learning_rate": 4.347522293051648e-05, |
| "loss": 0.269, |
| "num_input_tokens_seen": 1751856, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.4096385542168672, |
| "grad_norm": 1.338083028793335, |
| "learning_rate": 4.294779807661105e-05, |
| "loss": 0.2838, |
| "num_input_tokens_seen": 1830288, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.5060240963855422, |
| "grad_norm": 1.2083592414855957, |
| "learning_rate": 4.2403348649073174e-05, |
| "loss": 0.2466, |
| "num_input_tokens_seen": 1905296, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.602409638554217, |
| "grad_norm": 1.35024094581604, |
| "learning_rate": 4.184239109116393e-05, |
| "loss": 0.2272, |
| "num_input_tokens_seen": 1974464, |
| "step": 135 |
| }, |
| { |
| "epoch": 2.6987951807228914, |
| "grad_norm": 1.3738912343978882, |
| "learning_rate": 4.126545750510605e-05, |
| "loss": 0.2484, |
| "num_input_tokens_seen": 2058176, |
| "step": 140 |
| }, |
| { |
| "epoch": 2.7951807228915664, |
| "grad_norm": 1.5877448320388794, |
| "learning_rate": 4.067309514735267e-05, |
| "loss": 0.2339, |
| "num_input_tokens_seen": 2124912, |
| "step": 145 |
| }, |
| { |
| "epoch": 2.891566265060241, |
| "grad_norm": 1.3735121488571167, |
| "learning_rate": 4.0065865909481417e-05, |
| "loss": 0.2597, |
| "num_input_tokens_seen": 2213456, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.9879518072289155, |
| "grad_norm": 1.6480368375778198, |
| "learning_rate": 3.9444345785206285e-05, |
| "loss": 0.2525, |
| "num_input_tokens_seen": 2281680, |
| "step": 155 |
| }, |
| { |
| "epoch": 3.0843373493975905, |
| "grad_norm": 1.2931358814239502, |
| "learning_rate": 3.880912432401265e-05, |
| "loss": 0.1832, |
| "num_input_tokens_seen": 2349408, |
| "step": 160 |
| }, |
| { |
| "epoch": 3.180722891566265, |
| "grad_norm": 1.4131468534469604, |
| "learning_rate": 3.81608040719339e-05, |
| "loss": 0.1519, |
| "num_input_tokens_seen": 2425456, |
| "step": 165 |
| }, |
| { |
| "epoch": 3.2771084337349397, |
| "grad_norm": 1.6228159666061401, |
| "learning_rate": 3.7500000000000003e-05, |
| "loss": 0.1707, |
| "num_input_tokens_seen": 2494064, |
| "step": 170 |
| }, |
| { |
| "epoch": 3.3734939759036147, |
| "grad_norm": 1.1356842517852783, |
| "learning_rate": 3.6827338920900254e-05, |
| "loss": 0.1603, |
| "num_input_tokens_seen": 2573616, |
| "step": 175 |
| }, |
| { |
| "epoch": 3.4698795180722892, |
| "grad_norm": 1.3535553216934204, |
| "learning_rate": 3.6143458894413465e-05, |
| "loss": 0.1683, |
| "num_input_tokens_seen": 2657744, |
| "step": 180 |
| }, |
| { |
| "epoch": 3.566265060240964, |
| "grad_norm": 1.3832409381866455, |
| "learning_rate": 3.544900862216959e-05, |
| "loss": 0.1734, |
| "num_input_tokens_seen": 2721200, |
| "step": 185 |
| }, |
| { |
| "epoch": 3.662650602409639, |
| "grad_norm": 1.6430705785751343, |
| "learning_rate": 3.474464683231698e-05, |
| "loss": 0.1543, |
| "num_input_tokens_seen": 2798320, |
| "step": 190 |
| }, |
| { |
| "epoch": 3.7590361445783134, |
| "grad_norm": 1.7706836462020874, |
| "learning_rate": 3.403104165467883e-05, |
| "loss": 0.1601, |
| "num_input_tokens_seen": 2879200, |
| "step": 195 |
| }, |
| { |
| "epoch": 3.855421686746988, |
| "grad_norm": 1.7721610069274902, |
| "learning_rate": 3.330886998699149e-05, |
| "loss": 0.1911, |
| "num_input_tokens_seen": 2947024, |
| "step": 200 |
| }, |
| { |
| "epoch": 3.9518072289156625, |
| "grad_norm": 1.666278600692749, |
| "learning_rate": 3.257881685282609e-05, |
| "loss": 0.1741, |
| "num_input_tokens_seen": 3016656, |
| "step": 205 |
| }, |
| { |
| "epoch": 4.048192771084337, |
| "grad_norm": 1.099639892578125, |
| "learning_rate": 3.1841574751802076e-05, |
| "loss": 0.1334, |
| "num_input_tokens_seen": 3084416, |
| "step": 210 |
| }, |
| { |
| "epoch": 4.144578313253012, |
| "grad_norm": 1.5020925998687744, |
| "learning_rate": 3.109784300270943e-05, |
| "loss": 0.1027, |
| "num_input_tokens_seen": 3166784, |
| "step": 215 |
| }, |
| { |
| "epoch": 4.240963855421687, |
| "grad_norm": 2.203794240951538, |
| "learning_rate": 3.0348327080162435e-05, |
| "loss": 0.0955, |
| "num_input_tokens_seen": 3239584, |
| "step": 220 |
| }, |
| { |
| "epoch": 4.337349397590361, |
| "grad_norm": 1.7183223962783813, |
| "learning_rate": 2.9593737945414264e-05, |
| "loss": 0.1006, |
| "num_input_tokens_seen": 3313360, |
| "step": 225 |
| }, |
| { |
| "epoch": 4.433734939759036, |
| "grad_norm": 1.4102908372879028, |
| "learning_rate": 2.8834791371967142e-05, |
| "loss": 0.1007, |
| "num_input_tokens_seen": 3377840, |
| "step": 230 |
| }, |
| { |
| "epoch": 4.530120481927711, |
| "grad_norm": 1.214020013809204, |
| "learning_rate": 2.8072207266617855e-05, |
| "loss": 0.1033, |
| "num_input_tokens_seen": 3455904, |
| "step": 235 |
| }, |
| { |
| "epoch": 4.626506024096385, |
| "grad_norm": 1.5255635976791382, |
| "learning_rate": 2.7306708986582553e-05, |
| "loss": 0.1023, |
| "num_input_tokens_seen": 3529360, |
| "step": 240 |
| }, |
| { |
| "epoch": 4.72289156626506, |
| "grad_norm": 1.6624009609222412, |
| "learning_rate": 2.653902265334858e-05, |
| "loss": 0.1121, |
| "num_input_tokens_seen": 3605344, |
| "step": 245 |
| }, |
| { |
| "epoch": 4.8192771084337345, |
| "grad_norm": 1.7999521493911743, |
| "learning_rate": 2.5769876463904265e-05, |
| "loss": 0.1028, |
| "num_input_tokens_seen": 3678352, |
| "step": 250 |
| }, |
| { |
| "epoch": 4.9156626506024095, |
| "grad_norm": 2.1297786235809326, |
| "learning_rate": 2.5e-05, |
| "loss": 0.1055, |
| "num_input_tokens_seen": 3752608, |
| "step": 255 |
| }, |
| { |
| "epoch": 5.0120481927710845, |
| "grad_norm": 1.215146780014038, |
| "learning_rate": 2.4230123536095748e-05, |
| "loss": 0.1037, |
| "num_input_tokens_seen": 3819744, |
| "step": 260 |
| }, |
| { |
| "epoch": 5.108433734939759, |
| "grad_norm": 1.448801040649414, |
| "learning_rate": 2.346097734665143e-05, |
| "loss": 0.0633, |
| "num_input_tokens_seen": 3896592, |
| "step": 265 |
| }, |
| { |
| "epoch": 5.204819277108434, |
| "grad_norm": 1.220989465713501, |
| "learning_rate": 2.2693291013417453e-05, |
| "loss": 0.0521, |
| "num_input_tokens_seen": 3970976, |
| "step": 270 |
| }, |
| { |
| "epoch": 5.301204819277109, |
| "grad_norm": 1.3077821731567383, |
| "learning_rate": 2.192779273338215e-05, |
| "loss": 0.0625, |
| "num_input_tokens_seen": 4051760, |
| "step": 275 |
| }, |
| { |
| "epoch": 5.397590361445783, |
| "grad_norm": 2.02695369720459, |
| "learning_rate": 2.116520862803286e-05, |
| "loss": 0.059, |
| "num_input_tokens_seen": 4124096, |
| "step": 280 |
| }, |
| { |
| "epoch": 5.493975903614458, |
| "grad_norm": 1.6377320289611816, |
| "learning_rate": 2.0406262054585738e-05, |
| "loss": 0.0648, |
| "num_input_tokens_seen": 4188448, |
| "step": 285 |
| }, |
| { |
| "epoch": 5.590361445783133, |
| "grad_norm": 1.6187361478805542, |
| "learning_rate": 1.965167291983757e-05, |
| "loss": 0.0709, |
| "num_input_tokens_seen": 4261056, |
| "step": 290 |
| }, |
| { |
| "epoch": 5.686746987951807, |
| "grad_norm": 1.4855268001556396, |
| "learning_rate": 1.890215699729057e-05, |
| "loss": 0.0641, |
| "num_input_tokens_seen": 4329024, |
| "step": 295 |
| }, |
| { |
| "epoch": 5.783132530120482, |
| "grad_norm": 1.4216831922531128, |
| "learning_rate": 1.815842524819793e-05, |
| "loss": 0.0689, |
| "num_input_tokens_seen": 4406624, |
| "step": 300 |
| }, |
| { |
| "epoch": 5.879518072289157, |
| "grad_norm": 1.7383759021759033, |
| "learning_rate": 1.7421183147173915e-05, |
| "loss": 0.055, |
| "num_input_tokens_seen": 4480352, |
| "step": 305 |
| }, |
| { |
| "epoch": 5.975903614457831, |
| "grad_norm": 1.5599803924560547, |
| "learning_rate": 1.6691130013008514e-05, |
| "loss": 0.0626, |
| "num_input_tokens_seen": 4554080, |
| "step": 310 |
| }, |
| { |
| "epoch": 6.072289156626506, |
| "grad_norm": 1.028124213218689, |
| "learning_rate": 1.5968958345321178e-05, |
| "loss": 0.0465, |
| "num_input_tokens_seen": 4628576, |
| "step": 315 |
| }, |
| { |
| "epoch": 6.168674698795181, |
| "grad_norm": 1.4686311483383179, |
| "learning_rate": 1.5255353167683017e-05, |
| "loss": 0.0421, |
| "num_input_tokens_seen": 4704512, |
| "step": 320 |
| }, |
| { |
| "epoch": 6.265060240963855, |
| "grad_norm": 1.1644634008407593, |
| "learning_rate": 1.4550991377830426e-05, |
| "loss": 0.0303, |
| "num_input_tokens_seen": 4776912, |
| "step": 325 |
| }, |
| { |
| "epoch": 6.36144578313253, |
| "grad_norm": 1.090997338294983, |
| "learning_rate": 1.3856541105586545e-05, |
| "loss": 0.0337, |
| "num_input_tokens_seen": 4855600, |
| "step": 330 |
| }, |
| { |
| "epoch": 6.457831325301205, |
| "grad_norm": 1.4336110353469849, |
| "learning_rate": 1.3172661079099752e-05, |
| "loss": 0.0333, |
| "num_input_tokens_seen": 4927600, |
| "step": 335 |
| }, |
| { |
| "epoch": 6.554216867469879, |
| "grad_norm": 1.3488271236419678, |
| "learning_rate": 1.2500000000000006e-05, |
| "loss": 0.039, |
| "num_input_tokens_seen": 5003184, |
| "step": 340 |
| }, |
| { |
| "epoch": 6.612048192771084, |
| "num_input_tokens_seen": 5052448, |
| "step": 343, |
| "total_flos": 2.28822660837802e+17, |
| "train_loss": 0.2300173058541106, |
| "train_runtime": 12675.9679, |
| "train_samples_per_second": 0.655, |
| "train_steps_per_second": 0.04 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 510, |
| "num_input_tokens_seen": 5052448, |
| "num_train_epochs": 10, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.28822660837802e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|