| { |
| "best_global_step": 1800, |
| "best_metric": 0.07772836834192276, |
| "best_model_checkpoint": "./outputs/powershell-production/checkpoint-1800", |
| "epoch": 1.5481573242489937, |
| "eval_steps": 100, |
| "global_step": 2500, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.012387736141220192, |
| "grad_norm": 296.0, |
| "learning_rate": 7.054455445544555e-07, |
| "loss": 4.6719, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.024775472282440383, |
| "grad_norm": 149.0, |
| "learning_rate": 1.448019801980198e-06, |
| "loss": 4.032, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.037163208423660575, |
| "grad_norm": 76.0, |
| "learning_rate": 2.1905940594059405e-06, |
| "loss": 3.7462, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.04955094456488077, |
| "grad_norm": 82.0, |
| "learning_rate": 2.9331683168316834e-06, |
| "loss": 2.6629, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.06193868070610096, |
| "grad_norm": 67.5, |
| "learning_rate": 3.675742574257426e-06, |
| "loss": 1.4688, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.06193868070610096, |
| "eval_accuracy": 0.8779996238543429, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9087364573635998, |
| "eval_f1_unsafe": 0.8224576644206463, |
| "eval_loss": 0.8046298027038574, |
| "eval_macro_f1": 0.5770647072614153, |
| "eval_runtime": 1995.9982, |
| "eval_samples_per_second": 5.875, |
| "eval_steps_per_second": 1.469, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.07432641684732115, |
| "grad_norm": 14.6875, |
| "learning_rate": 4.418316831683168e-06, |
| "loss": 0.6992, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.08671415298854135, |
| "grad_norm": 19.75, |
| "learning_rate": 5.160891089108911e-06, |
| "loss": 0.5021, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.09910188912976153, |
| "grad_norm": 85.0, |
| "learning_rate": 5.903465346534654e-06, |
| "loss": 0.4075, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.11148962527098173, |
| "grad_norm": 78.0, |
| "learning_rate": 6.646039603960397e-06, |
| "loss": 0.2943, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.12387736141220192, |
| "grad_norm": 8.0625, |
| "learning_rate": 7.388613861386139e-06, |
| "loss": 0.3197, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.12387736141220192, |
| "eval_accuracy": 0.950703116857914, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9602635885015949, |
| "eval_f1_unsafe": 0.9357709157041263, |
| "eval_loss": 0.21172067523002625, |
| "eval_macro_f1": 0.6320115014019071, |
| "eval_runtime": 1997.1894, |
| "eval_samples_per_second": 5.872, |
| "eval_steps_per_second": 1.468, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.13626509755342212, |
| "grad_norm": 32.0, |
| "learning_rate": 8.131188118811882e-06, |
| "loss": 0.3174, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.1486528336946423, |
| "grad_norm": 102.0, |
| "learning_rate": 8.873762376237623e-06, |
| "loss": 0.2517, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.16104056983586248, |
| "grad_norm": 61.5, |
| "learning_rate": 9.616336633663367e-06, |
| "loss": 0.2532, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.1734283059770827, |
| "grad_norm": 52.75, |
| "learning_rate": 1.0358910891089109e-05, |
| "loss": 0.1548, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.18581604211830288, |
| "grad_norm": 58.25, |
| "learning_rate": 1.1101485148514851e-05, |
| "loss": 0.1953, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.18581604211830288, |
| "eval_accuracy": 0.9621405323299588, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9684133256371634, |
| "eval_f1_unsafe": 0.9531137279206326, |
| "eval_loss": 0.1852019727230072, |
| "eval_macro_f1": 0.6405090178525987, |
| "eval_runtime": 1997.4497, |
| "eval_samples_per_second": 5.871, |
| "eval_steps_per_second": 1.468, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.19820377825952307, |
| "grad_norm": 13.25, |
| "learning_rate": 1.1844059405940594e-05, |
| "loss": 0.183, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.21059151440074325, |
| "grad_norm": 8.8125, |
| "learning_rate": 1.2586633663366337e-05, |
| "loss": 0.2541, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.22297925054196346, |
| "grad_norm": 17.0, |
| "learning_rate": 1.332920792079208e-05, |
| "loss": 0.1851, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.23536698668318365, |
| "grad_norm": 62.25, |
| "learning_rate": 1.4071782178217821e-05, |
| "loss": 0.2287, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.24775472282440383, |
| "grad_norm": 8.125, |
| "learning_rate": 1.4814356435643564e-05, |
| "loss": 0.1806, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.24775472282440383, |
| "eval_accuracy": 0.9719879297973246, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9767870924112939, |
| "eval_f1_unsafe": 0.9646897646101082, |
| "eval_loss": 0.13077302277088165, |
| "eval_macro_f1": 0.6471589523404674, |
| "eval_runtime": 1996.7162, |
| "eval_samples_per_second": 5.873, |
| "eval_steps_per_second": 1.468, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.26014245896562405, |
| "grad_norm": 0.75390625, |
| "learning_rate": 1.499985848313474e-05, |
| "loss": 0.2029, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.27253019510684423, |
| "grad_norm": 21.625, |
| "learning_rate": 1.4999229530058107e-05, |
| "loss": 0.228, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.2849179312480644, |
| "grad_norm": 88.5, |
| "learning_rate": 1.4998097458826036e-05, |
| "loss": 0.1709, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.2973056673892846, |
| "grad_norm": 1.4375, |
| "learning_rate": 1.4996462345388408e-05, |
| "loss": 0.1309, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.3096934035305048, |
| "grad_norm": 73.5, |
| "learning_rate": 1.499432429944386e-05, |
| "loss": 0.1862, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.3096934035305048, |
| "eval_accuracy": 0.9698197016470148, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9749329791096684, |
| "eval_f1_unsafe": 0.9620869563961173, |
| "eval_loss": 0.16210031509399414, |
| "eval_macro_f1": 0.6456733118352619, |
| "eval_runtime": 1996.9786, |
| "eval_samples_per_second": 5.872, |
| "eval_steps_per_second": 1.468, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.32208113967172497, |
| "grad_norm": 1.21875, |
| "learning_rate": 1.4991683464432428e-05, |
| "loss": 0.0715, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.3344688758129452, |
| "grad_norm": 2.625, |
| "learning_rate": 1.4988540017525911e-05, |
| "loss": 0.2116, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.3468566119541654, |
| "grad_norm": 124.0, |
| "learning_rate": 1.4984894169616006e-05, |
| "loss": 0.1838, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.3592443480953856, |
| "grad_norm": 3.703125, |
| "learning_rate": 1.4980746165300146e-05, |
| "loss": 0.1586, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.37163208423660576, |
| "grad_norm": 14.5, |
| "learning_rate": 1.4976096282865085e-05, |
| "loss": 0.1157, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.37163208423660576, |
| "eval_accuracy": 0.9606404538664539, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9680893048977398, |
| "eval_f1_unsafe": 0.948655118870566, |
| "eval_loss": 0.24218252301216125, |
| "eval_macro_f1": 0.6389148079227686, |
| "eval_runtime": 1997.0786, |
| "eval_samples_per_second": 5.872, |
| "eval_steps_per_second": 1.468, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.38401982037782595, |
| "grad_norm": 3.640625, |
| "learning_rate": 1.4970944834268245e-05, |
| "loss": 0.1796, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.39640755651904613, |
| "grad_norm": 1.0546875, |
| "learning_rate": 1.4965292165116766e-05, |
| "loss": 0.1243, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.4087952926602663, |
| "grad_norm": 0.44921875, |
| "learning_rate": 1.495913865464434e-05, |
| "loss": 0.1263, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.4211830288014865, |
| "grad_norm": 1.3671875, |
| "learning_rate": 1.4952484715685758e-05, |
| "loss": 0.1291, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.43357076494270674, |
| "grad_norm": 6.8125, |
| "learning_rate": 1.4945330794649209e-05, |
| "loss": 0.1866, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.43357076494270674, |
| "eval_accuracy": 0.9692282665075834, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9746824060081962, |
| "eval_f1_unsafe": 0.9607789140941978, |
| "eval_loss": 0.15051080286502838, |
| "eval_macro_f1": 0.6451537733674647, |
| "eval_runtime": 1996.287, |
| "eval_samples_per_second": 5.874, |
| "eval_steps_per_second": 1.469, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.44595850108392693, |
| "grad_norm": 19.375, |
| "learning_rate": 1.493767737148634e-05, |
| "loss": 0.1021, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.4583462372251471, |
| "grad_norm": 1.2265625, |
| "learning_rate": 1.492952495966005e-05, |
| "loss": 0.1652, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.4707339733663673, |
| "grad_norm": 67.5, |
| "learning_rate": 1.4920874106110049e-05, |
| "loss": 0.0815, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.4831217095075875, |
| "grad_norm": 7.65625, |
| "learning_rate": 1.4911725391216151e-05, |
| "loss": 0.1006, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.49550944564880767, |
| "grad_norm": 2.6875, |
| "learning_rate": 1.4902079428759355e-05, |
| "loss": 0.1625, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.49550944564880767, |
| "eval_accuracy": 0.9683618856636994, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9743468963029092, |
| "eval_f1_unsafe": 0.958734391511112, |
| "eval_loss": 0.17920953035354614, |
| "eval_macro_f1": 0.6443604292713404, |
| "eval_runtime": 1996.9022, |
| "eval_samples_per_second": 5.873, |
| "eval_steps_per_second": 1.468, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.5078971817900279, |
| "grad_norm": 1.4453125, |
| "learning_rate": 1.4891936865880652e-05, |
| "loss": 0.2079, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.5202849179312481, |
| "grad_norm": 205.0, |
| "learning_rate": 1.4881298383037618e-05, |
| "loss": 0.0709, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.5326726540724682, |
| "grad_norm": 0.71875, |
| "learning_rate": 1.4870164693958752e-05, |
| "loss": 0.1639, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.5450603902136885, |
| "grad_norm": 4.84375, |
| "learning_rate": 1.4858536545595602e-05, |
| "loss": 0.0897, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.5574481263549086, |
| "grad_norm": 64.5, |
| "learning_rate": 1.4846414718072656e-05, |
| "loss": 0.2061, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5574481263549086, |
| "eval_accuracy": 0.9738056476952693, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9784293070754638, |
| "eval_f1_unsafe": 0.9666591344542607, |
| "eval_loss": 0.12458275258541107, |
| "eval_macro_f1": 0.6483628138432415, |
| "eval_runtime": 1995.9769, |
| "eval_samples_per_second": 5.875, |
| "eval_steps_per_second": 1.469, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.5698358624961288, |
| "grad_norm": 0.314453125, |
| "learning_rate": 1.4833800024634986e-05, |
| "loss": 0.1909, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.5822235986373491, |
| "grad_norm": 0.25390625, |
| "learning_rate": 1.4820693311593708e-05, |
| "loss": 0.1375, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.5946113347785692, |
| "grad_norm": 2.828125, |
| "learning_rate": 1.4807095458269194e-05, |
| "loss": 0.0788, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.6069990709197894, |
| "grad_norm": 10.4375, |
| "learning_rate": 1.4793007376932077e-05, |
| "loss": 0.0933, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.6193868070610096, |
| "grad_norm": 42.25, |
| "learning_rate": 1.4778430012742053e-05, |
| "loss": 0.2083, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6193868070610096, |
| "eval_accuracy": 0.9669330113334843, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9732207557904943, |
| "eval_f1_unsafe": 0.9567865224376395, |
| "eval_loss": 0.1653885543346405, |
| "eval_macro_f1": 0.6433357594093779, |
| "eval_runtime": 1996.6331, |
| "eval_samples_per_second": 5.873, |
| "eval_steps_per_second": 1.468, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.6317745432022298, |
| "grad_norm": 36.75, |
| "learning_rate": 1.4763364343684464e-05, |
| "loss": 0.0689, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.6441622793434499, |
| "grad_norm": 4.09375, |
| "learning_rate": 1.4747811380504698e-05, |
| "loss": 0.0984, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.6565500154846702, |
| "grad_norm": 1.7890625, |
| "learning_rate": 1.4731772166640363e-05, |
| "loss": 0.0894, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.6689377516258904, |
| "grad_norm": 0.50390625, |
| "learning_rate": 1.4715247778151297e-05, |
| "loss": 0.0969, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.6813254877671105, |
| "grad_norm": 6.1875, |
| "learning_rate": 1.4698239323647365e-05, |
| "loss": 0.2052, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6813254877671105, |
| "eval_accuracy": 0.9829721975651158, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9859135534089479, |
| "eval_f1_unsafe": 0.9784783062783209, |
| "eval_loss": 0.08367573469877243, |
| "eval_macro_f1": 0.654797286562423, |
| "eval_runtime": 1996.2626, |
| "eval_samples_per_second": 5.874, |
| "eval_steps_per_second": 1.469, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.6937132239083308, |
| "grad_norm": 0.3125, |
| "learning_rate": 1.4680747944214093e-05, |
| "loss": 0.0508, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.7061009600495509, |
| "grad_norm": 0.34375, |
| "learning_rate": 1.4662774813336105e-05, |
| "loss": 0.0915, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.7184886961907712, |
| "grad_norm": 6.34375, |
| "learning_rate": 1.4644321136818402e-05, |
| "loss": 0.2418, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.7308764323319913, |
| "grad_norm": 134.0, |
| "learning_rate": 1.4625388152705457e-05, |
| "loss": 0.094, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.7432641684732115, |
| "grad_norm": 1.34375, |
| "learning_rate": 1.4605977131198166e-05, |
| "loss": 0.1194, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7432641684732115, |
| "eval_accuracy": 0.979379790915723, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9830450283559968, |
| "eval_f1_unsafe": 0.9736928478058482, |
| "eval_loss": 0.11758451163768768, |
| "eval_macro_f1": 0.652245958720615, |
| "eval_runtime": 1996.2276, |
| "eval_samples_per_second": 5.875, |
| "eval_steps_per_second": 1.469, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.7556519046144317, |
| "grad_norm": 2.84375, |
| "learning_rate": 1.4586089374568616e-05, |
| "loss": 0.2312, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.7680396407556519, |
| "grad_norm": 0.2021484375, |
| "learning_rate": 1.4565726217072738e-05, |
| "loss": 0.1048, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.7804273768968721, |
| "grad_norm": 1.6484375, |
| "learning_rate": 1.454488902486077e-05, |
| "loss": 0.0491, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.7928151130380923, |
| "grad_norm": 3.109375, |
| "learning_rate": 1.452357919588562e-05, |
| "loss": 0.2089, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.8052028491793125, |
| "grad_norm": 15.0, |
| "learning_rate": 1.4501798159809068e-05, |
| "loss": 0.1261, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8052028491793125, |
| "eval_accuracy": 0.9768035525400014, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.981035749707623, |
| "eval_f1_unsafe": 0.9701397197118314, |
| "eval_loss": 0.13235369324684143, |
| "eval_macro_f1": 0.6503918231398181, |
| "eval_runtime": 1996.4333, |
| "eval_samples_per_second": 5.874, |
| "eval_steps_per_second": 1.469, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.8175905853205326, |
| "grad_norm": 0.373046875, |
| "learning_rate": 1.4479547377905856e-05, |
| "loss": 0.1104, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.8299783214617529, |
| "grad_norm": 1.890625, |
| "learning_rate": 1.445682834296565e-05, |
| "loss": 0.1023, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.842366057602973, |
| "grad_norm": 0.10888671875, |
| "learning_rate": 1.4433642579192891e-05, |
| "loss": 0.1085, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.8547537937441932, |
| "grad_norm": 2.984375, |
| "learning_rate": 1.4409991642104537e-05, |
| "loss": 0.1881, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.8671415298854135, |
| "grad_norm": 37.5, |
| "learning_rate": 1.4385877118425702e-05, |
| "loss": 0.1471, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.8671415298854135, |
| "eval_accuracy": 0.9815110753333501, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9848392442662509, |
| "eval_f1_unsafe": 0.9763106685487681, |
| "eval_loss": 0.1091982051730156, |
| "eval_macro_f1": 0.6537166376050063, |
| "eval_runtime": 1995.848, |
| "eval_samples_per_second": 5.876, |
| "eval_steps_per_second": 1.469, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.8795292660266336, |
| "grad_norm": 1.078125, |
| "learning_rate": 1.436130062598321e-05, |
| "loss": 0.1376, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.8919170021678539, |
| "grad_norm": 30.875, |
| "learning_rate": 1.4336263813597044e-05, |
| "loss": 0.2345, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.904304738309074, |
| "grad_norm": 0.40234375, |
| "learning_rate": 1.4310768360969748e-05, |
| "loss": 0.0666, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.9166924744502942, |
| "grad_norm": 58.5, |
| "learning_rate": 1.4284815978573712e-05, |
| "loss": 0.1151, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.9290802105915144, |
| "grad_norm": 20.75, |
| "learning_rate": 1.4258408407536437e-05, |
| "loss": 0.1628, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9290802105915144, |
| "eval_accuracy": 0.9807349694471498, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.984214613072056, |
| "eval_f1_unsafe": 0.9752874772399701, |
| "eval_loss": 0.11199858784675598, |
| "eval_macro_f1": 0.653167363437342, |
| "eval_runtime": 1994.6293, |
| "eval_samples_per_second": 5.879, |
| "eval_steps_per_second": 1.47, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.9414679467327346, |
| "grad_norm": 76.5, |
| "learning_rate": 1.4231547419523716e-05, |
| "loss": 0.073, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.9538556828739548, |
| "grad_norm": 57.0, |
| "learning_rate": 1.4204234816620775e-05, |
| "loss": 0.1174, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.966243419015175, |
| "grad_norm": 1.0234375, |
| "learning_rate": 1.4176472431211372e-05, |
| "loss": 0.0915, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.9786311551563952, |
| "grad_norm": 9.125, |
| "learning_rate": 1.4148262125854865e-05, |
| "loss": 0.1316, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.9910188912976153, |
| "grad_norm": 24.0, |
| "learning_rate": 1.4119605793161252e-05, |
| "loss": 0.1827, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.9910188912976153, |
| "eval_accuracy": 0.9785007600777688, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9823662269149198, |
| "eval_f1_unsafe": 0.972464825268733, |
| "eval_loss": 0.11720670759677887, |
| "eval_macro_f1": 0.6516103507278843, |
| "eval_runtime": 1994.2961, |
| "eval_samples_per_second": 5.88, |
| "eval_steps_per_second": 1.47, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.003096934035305, |
| "grad_norm": 9.4375, |
| "learning_rate": 1.4090505355664204e-05, |
| "loss": 0.0551, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.0154846701765252, |
| "grad_norm": 70.0, |
| "learning_rate": 1.4060962765692071e-05, |
| "loss": 0.1282, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.0278724063177453, |
| "grad_norm": 5.28125, |
| "learning_rate": 1.4030980005236909e-05, |
| "loss": 0.1651, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.0402601424589657, |
| "grad_norm": 0.8515625, |
| "learning_rate": 1.4000559085821516e-05, |
| "loss": 0.0613, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.0526478786001858, |
| "grad_norm": 41.0, |
| "learning_rate": 1.3969702048364466e-05, |
| "loss": 0.1932, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.0526478786001858, |
| "eval_accuracy": 0.9818333147656337, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9850234980707332, |
| "eval_f1_unsafe": 0.9769161682170211, |
| "eval_loss": 0.09109389036893845, |
| "eval_macro_f1": 0.6539798887625848, |
| "eval_runtime": 1994.2161, |
| "eval_samples_per_second": 5.881, |
| "eval_steps_per_second": 1.47, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.065035614741406, |
| "grad_norm": 16.625, |
| "learning_rate": 1.39384109630432e-05, |
| "loss": 0.0363, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.077423350882626, |
| "grad_norm": 6.25, |
| "learning_rate": 1.3906687929155126e-05, |
| "loss": 0.1233, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.0898110870238464, |
| "grad_norm": 1.65625, |
| "learning_rate": 1.3874535074976783e-05, |
| "loss": 0.0671, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.1021988231650666, |
| "grad_norm": 1.1171875, |
| "learning_rate": 1.3841954557621064e-05, |
| "loss": 0.1144, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.1145865593062867, |
| "grad_norm": 0.70703125, |
| "learning_rate": 1.380894856289249e-05, |
| "loss": 0.1096, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.1145865593062867, |
| "eval_accuracy": 0.9877523140744361, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9898415868750905, |
| "eval_f1_unsafe": 0.9845811265566877, |
| "eval_loss": 0.07772836834192276, |
| "eval_macro_f1": 0.6581409044772594, |
| "eval_runtime": 1994.4681, |
| "eval_samples_per_second": 5.88, |
| "eval_steps_per_second": 1.47, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.126974295447507, |
| "grad_norm": 0.10107421875, |
| "learning_rate": 1.3775519305140562e-05, |
| "loss": 0.0635, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.1393620315887272, |
| "grad_norm": 3.4375, |
| "learning_rate": 1.3741669027111208e-05, |
| "loss": 0.1231, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.1517497677299473, |
| "grad_norm": 63.0, |
| "learning_rate": 1.370739999979632e-05, |
| "loss": 0.03, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.1641375038711677, |
| "grad_norm": 21.25, |
| "learning_rate": 1.3672714522281388e-05, |
| "loss": 0.0977, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.1765252400123878, |
| "grad_norm": 5.96875, |
| "learning_rate": 1.3637614921591264e-05, |
| "loss": 0.1558, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.1765252400123878, |
| "eval_accuracy": 0.9833260140659309, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9862329815239024, |
| "eval_f1_unsafe": 0.9788627998117704, |
| "eval_loss": 0.07807961851358414, |
| "eval_macro_f1": 0.655031927111891, |
| "eval_runtime": 1993.7901, |
| "eval_samples_per_second": 5.882, |
| "eval_steps_per_second": 1.471, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.188912976153608, |
| "grad_norm": 0.134765625, |
| "learning_rate": 1.3602103552534031e-05, |
| "loss": 0.0994, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.201300712294828, |
| "grad_norm": 1.46875, |
| "learning_rate": 1.3566182797543043e-05, |
| "loss": 0.0687, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.2136884484360484, |
| "grad_norm": 1.59375, |
| "learning_rate": 1.352985506651706e-05, |
| "loss": 0.0575, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.2260761845772685, |
| "grad_norm": 0.515625, |
| "learning_rate": 1.3493122796658592e-05, |
| "loss": 0.0911, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.2384639207184887, |
| "grad_norm": 1.484375, |
| "learning_rate": 1.345598845231038e-05, |
| "loss": 0.0229, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.2384639207184887, |
| "eval_accuracy": 0.9824398915646342, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9854820774139722, |
| "eval_f1_unsafe": 0.9777847557227568, |
| "eval_loss": 0.08629076927900314, |
| "eval_macro_f1": 0.6544222777122429, |
| "eval_runtime": 1994.2838, |
| "eval_samples_per_second": 5.88, |
| "eval_steps_per_second": 1.47, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.2508516568597088, |
| "grad_norm": 12.1875, |
| "learning_rate": 1.3418454524790067e-05, |
| "loss": 0.0455, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.2632393930009291, |
| "grad_norm": 3.546875, |
| "learning_rate": 1.3380523532223054e-05, |
| "loss": 0.0651, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.2756271291421493, |
| "grad_norm": 5.8125, |
| "learning_rate": 1.3342198019373568e-05, |
| "loss": 0.0416, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.2880148652833694, |
| "grad_norm": 19.375, |
| "learning_rate": 1.3303480557473925e-05, |
| "loss": 0.0529, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.3004026014245897, |
| "grad_norm": 0.625, |
| "learning_rate": 1.326437374405204e-05, |
| "loss": 0.1341, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.3004026014245897, |
| "eval_accuracy": 0.9761251966079808, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9805451901064516, |
| "eval_f1_unsafe": 0.9691063895273341, |
| "eval_loss": 0.14960430562496185, |
| "eval_macro_f1": 0.6498838598779285, |
| "eval_runtime": 1993.9644, |
| "eval_samples_per_second": 5.881, |
| "eval_steps_per_second": 1.47, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.3127903375658099, |
| "grad_norm": 3.75, |
| "learning_rate": 1.3224880202757141e-05, |
| "loss": 0.0826, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.32517807370703, |
| "grad_norm": 0.279296875, |
| "learning_rate": 1.318500258318378e-05, |
| "loss": 0.1449, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.3375658098482504, |
| "grad_norm": 2.140625, |
| "learning_rate": 1.3144743560694046e-05, |
| "loss": 0.1547, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.3499535459894705, |
| "grad_norm": 0.93359375, |
| "learning_rate": 1.3104105836238093e-05, |
| "loss": 0.1091, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.3623412821306906, |
| "grad_norm": 19.25, |
| "learning_rate": 1.3063092136172923e-05, |
| "loss": 0.0411, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.3623412821306906, |
| "eval_accuracy": 0.9798046198832511, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.983508466217983, |
| "eval_f1_unsafe": 0.9739551975438099, |
| "eval_loss": 0.12951034307479858, |
| "eval_macro_f1": 0.6524878879205976, |
| "eval_runtime": 1994.3539, |
| "eval_samples_per_second": 5.88, |
| "eval_steps_per_second": 1.47, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.3747290182719107, |
| "grad_norm": 1.2734375, |
| "learning_rate": 1.3021705212079489e-05, |
| "loss": 0.0346, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.3871167544131309, |
| "grad_norm": 4.84375, |
| "learning_rate": 1.2979947840578088e-05, |
| "loss": 0.05, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.3995044905543512, |
| "grad_norm": 2.53125, |
| "learning_rate": 1.2937822823142075e-05, |
| "loss": 0.0295, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.4118922266955714, |
| "grad_norm": 4.125, |
| "learning_rate": 1.2895332985909917e-05, |
| "loss": 0.0247, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.4242799628367915, |
| "grad_norm": 0.859375, |
| "learning_rate": 1.2852481179495598e-05, |
| "loss": 0.0995, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.4242799628367915, |
| "eval_accuracy": 0.9826931310292615, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9857166974218224, |
| "eval_f1_unsafe": 0.9780457337441255, |
| "eval_loss": 0.08260737359523773, |
| "eval_macro_f1": 0.654587477055316, |
| "eval_runtime": 1994.3821, |
| "eval_samples_per_second": 5.88, |
| "eval_steps_per_second": 1.47, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.4366676989780118, |
| "grad_norm": 0.98046875, |
| "learning_rate": 1.2809270278797362e-05, |
| "loss": 0.0237, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.449055435119232, |
| "grad_norm": 0.1689453125, |
| "learning_rate": 1.2765703182804838e-05, |
| "loss": 0.1853, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.461443171260452, |
| "grad_norm": 0.37890625, |
| "learning_rate": 1.2721782814404554e-05, |
| "loss": 0.0685, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.4738309074016724, |
| "grad_norm": 1.59375, |
| "learning_rate": 1.2677512120183843e-05, |
| "loss": 0.098, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.4862186435428926, |
| "grad_norm": 0.66796875, |
| "learning_rate": 1.2632894070233157e-05, |
| "loss": 0.0969, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.4862186435428926, |
| "eval_accuracy": 0.9844363385454663, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.98716291618377, |
| "eval_f1_unsafe": 0.9802392592072087, |
| "eval_loss": 0.07929345965385437, |
| "eval_macro_f1": 0.6558007251303262, |
| "eval_runtime": 1993.9159, |
| "eval_samples_per_second": 5.881, |
| "eval_steps_per_second": 1.47, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.4986063796841127, |
| "grad_norm": 0.376953125, |
| "learning_rate": 1.2587931657946806e-05, |
| "loss": 0.0766, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.510994115825333, |
| "grad_norm": 4.9375, |
| "learning_rate": 1.2542627899822127e-05, |
| "loss": 0.0969, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.523381851966553, |
| "grad_norm": 4.59375, |
| "learning_rate": 1.249698583525712e-05, |
| "loss": 0.0476, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.5357695881077733, |
| "grad_norm": 0.3359375, |
| "learning_rate": 1.245100852634653e-05, |
| "loss": 0.1034, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.5481573242489937, |
| "grad_norm": 0.484375, |
| "learning_rate": 1.2404699057676415e-05, |
| "loss": 0.0748, |
| "step": 2500 |
| }, |
| { |
| "epoch": 1.5481573242489937, |
| "eval_accuracy": 0.9836527097461256, |
| "eval_f1_controversial": 0.0, |
| "eval_f1_safe": 0.9865615120011499, |
| "eval_f1_unsafe": 0.9791368071911146, |
| "eval_loss": 0.0919911116361618, |
| "eval_macro_f1": 0.6552327730640882, |
| "eval_runtime": 1995.1921, |
| "eval_samples_per_second": 5.878, |
| "eval_steps_per_second": 1.47, |
| "step": 2500 |
| } |
| ], |
| "logging_steps": 20, |
| "max_steps": 8075, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.8442537274070835e+17, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|