| { |
| "best_global_step": 810, |
| "best_metric": 0.5652363896369934, |
| "best_model_checkpoint": "./nepal-legal-model/checkpoint-810", |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 810, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.012345679012345678, |
| "grad_norm": 2.151370048522949, |
| "learning_rate": 3.673469387755102e-05, |
| "loss": 1.7959, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.024691358024691357, |
| "grad_norm": 1.6559878587722778, |
| "learning_rate": 7.755102040816327e-05, |
| "loss": 1.1927, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.037037037037037035, |
| "grad_norm": 1.125174641609192, |
| "learning_rate": 0.00011836734693877552, |
| "loss": 0.9138, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.04938271604938271, |
| "grad_norm": 1.193498969078064, |
| "learning_rate": 0.00015918367346938776, |
| "loss": 0.8576, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.06172839506172839, |
| "grad_norm": 1.0680336952209473, |
| "learning_rate": 0.0002, |
| "loss": 0.7993, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.07407407407407407, |
| "grad_norm": 0.9959688782691956, |
| "learning_rate": 0.00019998000585179915, |
| "loss": 0.7726, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.08641975308641975, |
| "grad_norm": 0.9554275274276733, |
| "learning_rate": 0.00019992003140251584, |
| "loss": 0.7653, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.09876543209876543, |
| "grad_norm": 0.8977523446083069, |
| "learning_rate": 0.00019982010063491056, |
| "loss": 0.7866, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.1111111111111111, |
| "grad_norm": 0.8715363144874573, |
| "learning_rate": 0.00019968025350959495, |
| "loss": 0.743, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.12345679012345678, |
| "grad_norm": 0.9484530091285706, |
| "learning_rate": 0.00019950054594905194, |
| "loss": 0.7463, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.13580246913580246, |
| "grad_norm": 0.899090588092804, |
| "learning_rate": 0.00019928104981527348, |
| "loss": 0.7249, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.14814814814814814, |
| "grad_norm": 0.8685264587402344, |
| "learning_rate": 0.0001990218528810242, |
| "loss": 0.7151, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.16049382716049382, |
| "grad_norm": 0.8665175437927246, |
| "learning_rate": 0.00019872305879474234, |
| "loss": 0.6964, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.1728395061728395, |
| "grad_norm": 0.8796736598014832, |
| "learning_rate": 0.00019838478703909282, |
| "loss": 0.7099, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.18518518518518517, |
| "grad_norm": 0.8723923563957214, |
| "learning_rate": 0.0001980071728831879, |
| "loss": 0.7037, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.19753086419753085, |
| "grad_norm": 0.8163822889328003, |
| "learning_rate": 0.0001975903673284955, |
| "loss": 0.6695, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.20987654320987653, |
| "grad_norm": 0.8660425543785095, |
| "learning_rate": 0.0001971345370484563, |
| "loss": 0.6806, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.2222222222222222, |
| "grad_norm": 0.854213535785675, |
| "learning_rate": 0.00019663986432183372, |
| "loss": 0.7105, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.2345679012345679, |
| "grad_norm": 0.8273674249649048, |
| "learning_rate": 0.0001961065469598239, |
| "loss": 0.6901, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.24691358024691357, |
| "grad_norm": 0.8302693963050842, |
| "learning_rate": 0.00019553479822695434, |
| "loss": 0.6962, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.25925925925925924, |
| "grad_norm": 0.9145434498786926, |
| "learning_rate": 0.000194924846755803, |
| "loss": 0.6889, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.2716049382716049, |
| "grad_norm": 0.842943012714386, |
| "learning_rate": 0.0001942769364555721, |
| "loss": 0.6856, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.2839506172839506, |
| "grad_norm": 0.8815113306045532, |
| "learning_rate": 0.0001935913264145529, |
| "loss": 0.6889, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.2962962962962963, |
| "grad_norm": 0.8023223280906677, |
| "learning_rate": 0.0001928682907965207, |
| "loss": 0.6955, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.30864197530864196, |
| "grad_norm": 0.7992237210273743, |
| "learning_rate": 0.0001921081187311016, |
| "loss": 0.6562, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.32098765432098764, |
| "grad_norm": 0.8328211903572083, |
| "learning_rate": 0.0001913111141981543, |
| "loss": 0.6513, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.8767305612564087, |
| "learning_rate": 0.00019047759590621374, |
| "loss": 0.6897, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.345679012345679, |
| "grad_norm": 0.8765648007392883, |
| "learning_rate": 0.000189607897165045, |
| "loss": 0.6516, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.35802469135802467, |
| "grad_norm": 1.12288498878479, |
| "learning_rate": 0.0001887023657523586, |
| "loss": 0.6391, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.37037037037037035, |
| "grad_norm": 0.8884546756744385, |
| "learning_rate": 0.00018776136377473982, |
| "loss": 0.6613, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.38271604938271603, |
| "grad_norm": 0.8964736461639404, |
| "learning_rate": 0.00018678526752284857, |
| "loss": 0.6629, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.3950617283950617, |
| "grad_norm": 0.8375449180603027, |
| "learning_rate": 0.0001857744673209473, |
| "loss": 0.6215, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.4074074074074074, |
| "grad_norm": 0.9215915203094482, |
| "learning_rate": 0.00018472936737081672, |
| "loss": 0.6443, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.41975308641975306, |
| "grad_norm": 0.795101523399353, |
| "learning_rate": 0.00018365038559012265, |
| "loss": 0.6548, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.43209876543209874, |
| "grad_norm": 0.8599629402160645, |
| "learning_rate": 0.00018253795344529757, |
| "loss": 0.6382, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.4444444444444444, |
| "grad_norm": 0.9184603691101074, |
| "learning_rate": 0.0001813925157790049, |
| "loss": 0.6254, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.4567901234567901, |
| "grad_norm": 0.8265050649642944, |
| "learning_rate": 0.0001802145306322537, |
| "loss": 0.6172, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.4691358024691358, |
| "grad_norm": 0.9218090772628784, |
| "learning_rate": 0.00017900446906123603, |
| "loss": 0.6319, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.48148148148148145, |
| "grad_norm": 0.8071914315223694, |
| "learning_rate": 0.00017776281494895956, |
| "loss": 0.6195, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.49382716049382713, |
| "grad_norm": 0.8605950474739075, |
| "learning_rate": 0.00017649006481175098, |
| "loss": 0.6706, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.5061728395061729, |
| "grad_norm": 0.9902074337005615, |
| "learning_rate": 0.00017518672760070763, |
| "loss": 0.643, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.5185185185185185, |
| "grad_norm": 0.9705262184143066, |
| "learning_rate": 0.00017385332449817656, |
| "loss": 0.6433, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.5308641975308642, |
| "grad_norm": 0.8431525230407715, |
| "learning_rate": 0.00017249038870934262, |
| "loss": 0.6375, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.5432098765432098, |
| "grad_norm": 0.8588557243347168, |
| "learning_rate": 0.00017109846524900887, |
| "loss": 0.6143, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 0.8512520790100098, |
| "learning_rate": 0.00016967811072365421, |
| "loss": 0.6175, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.5679012345679012, |
| "grad_norm": 0.8612751960754395, |
| "learning_rate": 0.0001682298931088563, |
| "loss": 0.6428, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.5802469135802469, |
| "grad_norm": 0.8494360446929932, |
| "learning_rate": 0.00016675439152216747, |
| "loss": 0.6103, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.5925925925925926, |
| "grad_norm": 0.8205723166465759, |
| "learning_rate": 0.0001652521959915356, |
| "loss": 0.5897, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.6049382716049383, |
| "grad_norm": 0.900737464427948, |
| "learning_rate": 0.00016372390721936198, |
| "loss": 0.6038, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.6172839506172839, |
| "grad_norm": 0.7930091619491577, |
| "learning_rate": 0.00016217013634229073, |
| "loss": 0.6256, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6296296296296297, |
| "grad_norm": 0.8809173703193665, |
| "learning_rate": 0.00016059150468682558, |
| "loss": 0.6111, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.6419753086419753, |
| "grad_norm": 0.8604996800422668, |
| "learning_rate": 0.000158988643520872, |
| "loss": 0.6153, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.654320987654321, |
| "grad_norm": 0.8070431351661682, |
| "learning_rate": 0.00015736219380130395, |
| "loss": 0.6154, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 1.0792070627212524, |
| "learning_rate": 0.0001557128059176561, |
| "loss": 0.6152, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.6790123456790124, |
| "grad_norm": 0.8521204590797424, |
| "learning_rate": 0.00015404113943204422, |
| "loss": 0.5986, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.691358024691358, |
| "grad_norm": 0.7541177272796631, |
| "learning_rate": 0.00015234786281541736, |
| "loss": 0.6048, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.7037037037037037, |
| "grad_norm": 0.850120484828949, |
| "learning_rate": 0.0001506336531802479, |
| "loss": 0.5929, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.7160493827160493, |
| "grad_norm": 0.8251618146896362, |
| "learning_rate": 0.0001488991960097657, |
| "loss": 0.6171, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.7283950617283951, |
| "grad_norm": 1.0346400737762451, |
| "learning_rate": 0.00014714518488384513, |
| "loss": 0.6045, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.7407407407407407, |
| "grad_norm": 0.829596221446991, |
| "learning_rate": 0.000145372321201654, |
| "loss": 0.5872, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7530864197530864, |
| "grad_norm": 0.9026337265968323, |
| "learning_rate": 0.00014358131390117645, |
| "loss": 0.5964, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.7654320987654321, |
| "grad_norm": 0.8854096531867981, |
| "learning_rate": 0.00014177287917572031, |
| "loss": 0.579, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.7777777777777778, |
| "grad_norm": 0.7996705770492554, |
| "learning_rate": 0.00013994774018752387, |
| "loss": 0.5915, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.7901234567901234, |
| "grad_norm": 0.8649770021438599, |
| "learning_rate": 0.00013810662677857547, |
| "loss": 0.6079, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.8024691358024691, |
| "grad_norm": 0.8632396459579468, |
| "learning_rate": 0.00013625027517876216, |
| "loss": 0.5921, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.8148148148148148, |
| "grad_norm": 0.8673484325408936, |
| "learning_rate": 0.00013437942771146388, |
| "loss": 0.5807, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.8271604938271605, |
| "grad_norm": 0.8567239046096802, |
| "learning_rate": 0.00013249483249671117, |
| "loss": 0.6008, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.8395061728395061, |
| "grad_norm": 0.8294961452484131, |
| "learning_rate": 0.00013059724315202443, |
| "loss": 0.5972, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.8518518518518519, |
| "grad_norm": 0.9110690951347351, |
| "learning_rate": 0.0001286874184910553, |
| "loss": 0.5823, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.8641975308641975, |
| "grad_norm": 0.8697801828384399, |
| "learning_rate": 0.00012676612222015, |
| "loss": 0.5792, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.8765432098765432, |
| "grad_norm": 0.9493820071220398, |
| "learning_rate": 0.00012483412263295603, |
| "loss": 0.545, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.8888888888888888, |
| "grad_norm": 0.8390475511550903, |
| "learning_rate": 0.0001228921923031948, |
| "loss": 0.5844, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.9012345679012346, |
| "grad_norm": 0.8430746793746948, |
| "learning_rate": 0.00012094110777572256, |
| "loss": 0.6021, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.9135802469135802, |
| "grad_norm": 0.7902705073356628, |
| "learning_rate": 0.00011898164925600315, |
| "loss": 0.585, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.9259259259259259, |
| "grad_norm": 0.87884521484375, |
| "learning_rate": 0.00011701460029811733, |
| "loss": 0.5998, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.9382716049382716, |
| "grad_norm": 0.924493134021759, |
| "learning_rate": 0.00011504074749143269, |
| "loss": 0.5945, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.9506172839506173, |
| "grad_norm": 0.8170025944709778, |
| "learning_rate": 0.00011306088014606018, |
| "loss": 0.5814, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.9629629629629629, |
| "grad_norm": 0.9573339819908142, |
| "learning_rate": 0.00011107578997722219, |
| "loss": 0.577, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.9753086419753086, |
| "grad_norm": 0.8860905766487122, |
| "learning_rate": 0.00010908627078865927, |
| "loss": 0.5972, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.9876543209876543, |
| "grad_norm": 0.8376012444496155, |
| "learning_rate": 0.00010709311815520151, |
| "loss": 0.5554, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.8038386106491089, |
| "learning_rate": 0.00010509712910463174, |
| "loss": 0.5622, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_loss": 0.5652363896369934, |
| "eval_runtime": 496.7418, |
| "eval_samples_per_second": 2.899, |
| "eval_steps_per_second": 0.725, |
| "step": 810 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 1620, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.4237273708455526e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|