Invalid JSON: Unexpected token 'N', ..."ejected": NaN,
"... is not valid JSON
| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9996332966629996, | |
| "eval_steps": 50, | |
| "global_step": 1363, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.018335166850018333, | |
| "grad_norm": 107.5, | |
| "learning_rate": 4.911958914159941e-07, | |
| "logits/chosen": -1.3776370286941528, | |
| "logits/rejected": -1.2306550741195679, | |
| "logps/chosen": -411.12249755859375, | |
| "logps/rejected": -369.989990234375, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.23749999701976776, | |
| "rewards/chosen": 0.0027563476469367743, | |
| "rewards/margins": 0.00781173724681139, | |
| "rewards/rejected": -0.0050407410599291325, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.03667033370003667, | |
| "grad_norm": 100.5, | |
| "learning_rate": 4.820249449743213e-07, | |
| "logits/chosen": -1.514649510383606, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -430.3500061035156, | |
| "logps/rejected": -395.49749755859375, | |
| "loss": 0.6956, | |
| "rewards/accuracies": 0.24250000715255737, | |
| "rewards/chosen": 0.004258117638528347, | |
| "rewards/margins": 0.0015393065987154841, | |
| "rewards/rejected": 0.002720947377383709, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.03667033370003667, | |
| "eval_logits/chosen": -1.534035086631775, | |
| "eval_logits/rejected": -1.435206413269043, | |
| "eval_logps/chosen": -377.2894592285156, | |
| "eval_logps/rejected": -350.399658203125, | |
| "eval_loss": 0.6936107873916626, | |
| "eval_rewards/accuracies": 0.23519736528396606, | |
| "eval_rewards/chosen": 0.0011091734049841762, | |
| "eval_rewards/margins": 0.005430823657661676, | |
| "eval_rewards/rejected": -0.00432586669921875, | |
| "eval_runtime": 183.6723, | |
| "eval_samples_per_second": 6.599, | |
| "eval_steps_per_second": 0.828, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05500550055005501, | |
| "grad_norm": 125.5, | |
| "learning_rate": 4.7285399853264857e-07, | |
| "logits/chosen": -1.5126913785934448, | |
| "logits/rejected": -1.361869215965271, | |
| "logps/chosen": -440.635009765625, | |
| "logps/rejected": -423.7550048828125, | |
| "loss": 0.7034, | |
| "rewards/accuracies": 0.1899999976158142, | |
| "rewards/chosen": 0.001637954730540514, | |
| "rewards/margins": -0.015832213684916496, | |
| "rewards/rejected": 0.01747741736471653, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.07334066740007333, | |
| "grad_norm": 102.0, | |
| "learning_rate": 4.636830520909757e-07, | |
| "logits/chosen": -1.4500524997711182, | |
| "logits/rejected": -1.3146843910217285, | |
| "logps/chosen": -444.37249755859375, | |
| "logps/rejected": -417.4649963378906, | |
| "loss": 0.6997, | |
| "rewards/accuracies": 0.2549999952316284, | |
| "rewards/chosen": -0.0015260315267369151, | |
| "rewards/margins": -0.005664672702550888, | |
| "rewards/rejected": 0.004122619517147541, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07334066740007333, | |
| "eval_logits/chosen": -1.533652901649475, | |
| "eval_logits/rejected": -1.4345542192459106, | |
| "eval_logps/chosen": -377.2927551269531, | |
| "eval_logps/rejected": -350.2820739746094, | |
| "eval_loss": 0.6919476985931396, | |
| "eval_rewards/accuracies": 0.23766447603702545, | |
| "eval_rewards/chosen": 0.008811799809336662, | |
| "eval_rewards/margins": 0.007162897381931543, | |
| "eval_rewards/rejected": 0.0016679262043908238, | |
| "eval_runtime": 183.6267, | |
| "eval_samples_per_second": 6.6, | |
| "eval_steps_per_second": 0.828, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.09167583425009168, | |
| "grad_norm": 101.0, | |
| "learning_rate": 4.54512105649303e-07, | |
| "logits/chosen": -1.429406762123108, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -436.73748779296875, | |
| "logps/rejected": -403.6050109863281, | |
| "loss": 0.6898, | |
| "rewards/accuracies": 0.2549999952316284, | |
| "rewards/chosen": 0.01620025560259819, | |
| "rewards/margins": 0.012928924523293972, | |
| "rewards/rejected": 0.0032881165388971567, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.11001100110011001, | |
| "grad_norm": 108.5, | |
| "learning_rate": 4.4534115920763023e-07, | |
| "logits/chosen": -1.4991015195846558, | |
| "logits/rejected": -1.323161005973816, | |
| "logps/chosen": -458.50750732421875, | |
| "logps/rejected": -410.1050109863281, | |
| "loss": 0.6872, | |
| "rewards/accuracies": 0.2775000035762787, | |
| "rewards/chosen": 0.017397155985236168, | |
| "rewards/margins": 0.019593505188822746, | |
| "rewards/rejected": -0.0021939086727797985, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.11001100110011001, | |
| "eval_logits/chosen": -1.5319759845733643, | |
| "eval_logits/rejected": -1.432373046875, | |
| "eval_logps/chosen": -377.319091796875, | |
| "eval_logps/rejected": -350.2779541015625, | |
| "eval_loss": 0.6953898668289185, | |
| "eval_rewards/accuracies": 0.23026315867900848, | |
| "eval_rewards/chosen": 0.006703075487166643, | |
| "eval_rewards/margins": 0.002716064453125, | |
| "eval_rewards/rejected": 0.003982142545282841, | |
| "eval_runtime": 183.6328, | |
| "eval_samples_per_second": 6.6, | |
| "eval_steps_per_second": 0.828, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.12834616795012835, | |
| "grad_norm": 107.0, | |
| "learning_rate": 4.3617021276595744e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -1.2460485696792603, | |
| "logps/chosen": -432.5924987792969, | |
| "logps/rejected": -419.864990234375, | |
| "loss": 0.6915, | |
| "rewards/accuracies": 0.2775000035762787, | |
| "rewards/chosen": 0.02058563195168972, | |
| "rewards/margins": 0.010975646786391735, | |
| "rewards/rejected": 0.009614868089556694, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.14668133480014667, | |
| "grad_norm": 108.5, | |
| "learning_rate": 4.2699926632428464e-07, | |
| "logits/chosen": -1.5161709785461426, | |
| "logits/rejected": -1.2818182706832886, | |
| "logps/chosen": -443.9725036621094, | |
| "logps/rejected": -402.0425109863281, | |
| "loss": 0.6887, | |
| "rewards/accuracies": 0.2574999928474426, | |
| "rewards/chosen": 0.023589782416820526, | |
| "rewards/margins": 0.015720978379249573, | |
| "rewards/rejected": 0.007876587100327015, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.14668133480014667, | |
| "eval_logits/chosen": -1.529746651649475, | |
| "eval_logits/rejected": -1.430788516998291, | |
| "eval_logps/chosen": -377.256591796875, | |
| "eval_logps/rejected": -350.24835205078125, | |
| "eval_loss": 0.6939330697059631, | |
| "eval_rewards/accuracies": 0.23273026943206787, | |
| "eval_rewards/chosen": 0.010709461756050587, | |
| "eval_rewards/margins": 0.003307191887870431, | |
| "eval_rewards/rejected": 0.007396697998046875, | |
| "eval_runtime": 183.6754, | |
| "eval_samples_per_second": 6.599, | |
| "eval_steps_per_second": 0.828, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.16501650165016502, | |
| "grad_norm": 88.5, | |
| "learning_rate": 4.1782831988261185e-07, | |
| "logits/chosen": -1.4257241487503052, | |
| "logits/rejected": -1.1836668252944946, | |
| "logps/chosen": -410.3500061035156, | |
| "logps/rejected": -390.2325134277344, | |
| "loss": 0.7044, | |
| "rewards/accuracies": 0.23000000417232513, | |
| "rewards/chosen": -0.004284057766199112, | |
| "rewards/margins": -0.016582336276769638, | |
| "rewards/rejected": 0.012289123609662056, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.18335166850018336, | |
| "grad_norm": 96.5, | |
| "learning_rate": 4.086573734409391e-07, | |
| "logits/chosen": -1.4944552183151245, | |
| "logits/rejected": -1.2437607049942017, | |
| "logps/chosen": -431.1650085449219, | |
| "logps/rejected": -407.6449890136719, | |
| "loss": 0.692, | |
| "rewards/accuracies": 0.2574999928474426, | |
| "rewards/chosen": 0.02587219327688217, | |
| "rewards/margins": 0.011192931793630123, | |
| "rewards/rejected": 0.01466323807835579, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.18335166850018336, | |
| "eval_logits/chosen": -1.5293899774551392, | |
| "eval_logits/rejected": -1.4297887086868286, | |
| "eval_logps/chosen": -377.2006530761719, | |
| "eval_logps/rejected": -350.2894592285156, | |
| "eval_loss": 0.6944616436958313, | |
| "eval_rewards/accuracies": 0.23026315867900848, | |
| "eval_rewards/chosen": 0.012256572023034096, | |
| "eval_rewards/margins": 0.004177294205874205, | |
| "eval_rewards/rejected": 0.00807877629995346, | |
| "eval_runtime": 183.6704, | |
| "eval_samples_per_second": 6.599, | |
| "eval_steps_per_second": 0.828, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.20168683535020168, | |
| "grad_norm": 113.0, | |
| "learning_rate": 3.994864269992663e-07, | |
| "logits/chosen": -1.4493054151535034, | |
| "logits/rejected": -1.3403005599975586, | |
| "logps/chosen": -430.05999755859375, | |
| "logps/rejected": -410.04998779296875, | |
| "loss": 0.699, | |
| "rewards/accuracies": 0.2775000035762787, | |
| "rewards/chosen": 0.016153564676642418, | |
| "rewards/margins": -0.0009202575893141329, | |
| "rewards/rejected": 0.01705078035593033, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.22002200220022003, | |
| "grad_norm": 130.0, | |
| "learning_rate": 3.903154805575935e-07, | |
| "logits/chosen": -1.3159887790679932, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -428.38751220703125, | |
| "logps/rejected": -422.7925109863281, | |
| "loss": 0.6976, | |
| "rewards/accuracies": 0.24250000715255737, | |
| "rewards/chosen": 0.01615051180124283, | |
| "rewards/margins": -0.00038818360189907253, | |
| "rewards/rejected": 0.016550598666071892, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.22002200220022003, | |
| "eval_logits/chosen": -1.528660774230957, | |
| "eval_logits/rejected": -1.4295405149459839, | |
| "eval_logps/chosen": -377.2154541015625, | |
| "eval_logps/rejected": -350.28125, | |
| "eval_loss": 0.6940748691558838, | |
| "eval_rewards/accuracies": 0.2409539520740509, | |
| "eval_rewards/chosen": 0.010566108860075474, | |
| "eval_rewards/margins": 0.005987267941236496, | |
| "eval_rewards/rejected": 0.00458752503618598, | |
| "eval_runtime": 183.6126, | |
| "eval_samples_per_second": 6.601, | |
| "eval_steps_per_second": 0.828, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.23835716905023835, | |
| "grad_norm": 84.5, | |
| "learning_rate": 3.811445341159207e-07, | |
| "logits/chosen": -1.5210723876953125, | |
| "logits/rejected": -1.3567346334457397, | |
| "logps/chosen": -407.4012451171875, | |
| "logps/rejected": -392.4549865722656, | |
| "loss": 0.6985, | |
| "rewards/accuracies": 0.25999999046325684, | |
| "rewards/chosen": 0.013754882849752903, | |
| "rewards/margins": -0.0055097960866987705, | |
| "rewards/rejected": 0.01927383430302143, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.2566923359002567, | |
| "grad_norm": 119.5, | |
| "learning_rate": 3.7197358767424797e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -1.246942162513733, | |
| "logps/chosen": -436.572509765625, | |
| "logps/rejected": -423.2974853515625, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.25999999046325684, | |
| "rewards/chosen": 0.013691024854779243, | |
| "rewards/margins": 0.0036587524227797985, | |
| "rewards/rejected": 0.01002708449959755, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2566923359002567, | |
| "eval_logits/chosen": -1.5272730588912964, | |
| "eval_logits/rejected": -1.4281728267669678, | |
| "eval_logps/chosen": -377.25, | |
| "eval_logps/rejected": -350.162841796875, | |
| "eval_loss": 0.6958668231964111, | |
| "eval_rewards/accuracies": 0.22615131735801697, | |
| "eval_rewards/chosen": 0.013866023160517216, | |
| "eval_rewards/margins": 0.00107443961314857, | |
| "eval_rewards/rejected": 0.012793390080332756, | |
| "eval_runtime": 183.7079, | |
| "eval_samples_per_second": 6.597, | |
| "eval_steps_per_second": 0.827, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.27502750275027504, | |
| "grad_norm": 107.5, | |
| "learning_rate": 3.6280264123257523e-07, | |
| "logits/chosen": -1.4459222555160522, | |
| "logits/rejected": -1.3630120754241943, | |
| "logps/chosen": -455.4324951171875, | |
| "logps/rejected": -415.572509765625, | |
| "loss": 0.6992, | |
| "rewards/accuracies": 0.2150000035762787, | |
| "rewards/chosen": 0.016205139458179474, | |
| "rewards/margins": -0.0074514769949018955, | |
| "rewards/rejected": 0.02364654466509819, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.29336266960029334, | |
| "grad_norm": 111.5, | |
| "learning_rate": 3.536316947909024e-07, | |
| "logits/chosen": -1.4444499015808105, | |
| "logits/rejected": -1.2767553329467773, | |
| "logps/chosen": -425.5450134277344, | |
| "logps/rejected": -379.00250244140625, | |
| "loss": 0.6962, | |
| "rewards/accuracies": 0.24500000476837158, | |
| "rewards/chosen": 0.01742446981370449, | |
| "rewards/margins": -0.0001217651370097883, | |
| "rewards/rejected": 0.01754501275718212, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.29336266960029334, | |
| "eval_logits/chosen": -1.5267494916915894, | |
| "eval_logits/rejected": -1.4275885820388794, | |
| "eval_logps/chosen": -377.2302551269531, | |
| "eval_logps/rejected": -350.2368469238281, | |
| "eval_loss": 0.6963567733764648, | |
| "eval_rewards/accuracies": 0.23848684132099152, | |
| "eval_rewards/chosen": 0.012201008386909962, | |
| "eval_rewards/margins": 0.0003203341912012547, | |
| "eval_rewards/rejected": 0.011873997747898102, | |
| "eval_runtime": 183.6474, | |
| "eval_samples_per_second": 6.6, | |
| "eval_steps_per_second": 0.828, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.3116978364503117, | |
| "grad_norm": 90.5, | |
| "learning_rate": 3.4446074834922964e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -1.3805227279663086, | |
| "logps/chosen": -404.4849853515625, | |
| "logps/rejected": -390.4962463378906, | |
| "loss": 0.6861, | |
| "rewards/accuracies": 0.27250000834465027, | |
| "rewards/chosen": 0.022739257663488388, | |
| "rewards/margins": 0.018987426534295082, | |
| "rewards/rejected": 0.003755493089556694, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.33003300330033003, | |
| "grad_norm": 108.0, | |
| "learning_rate": 3.3528980190755684e-07, | |
| "logits/chosen": -1.461000919342041, | |
| "logits/rejected": -1.2780396938323975, | |
| "logps/chosen": -414.50750732421875, | |
| "logps/rejected": -408.3500061035156, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.2750000059604645, | |
| "rewards/chosen": 0.018767546862363815, | |
| "rewards/margins": 0.005626831203699112, | |
| "rewards/rejected": 0.01314392127096653, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.33003300330033003, | |
| "eval_logits/chosen": -1.5271477699279785, | |
| "eval_logits/rejected": -1.4275585412979126, | |
| "eval_logps/chosen": -377.1217041015625, | |
| "eval_logps/rejected": -350.1554260253906, | |
| "eval_loss": 0.6922184228897095, | |
| "eval_rewards/accuracies": 0.24671052396297455, | |
| "eval_rewards/chosen": 0.018702909350395203, | |
| "eval_rewards/margins": 0.007914392277598381, | |
| "eval_rewards/rejected": 0.010786859318614006, | |
| "eval_runtime": 183.7247, | |
| "eval_samples_per_second": 6.597, | |
| "eval_steps_per_second": 0.827, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.3483681701503484, | |
| "grad_norm": 125.5, | |
| "learning_rate": 3.261188554658841e-07, | |
| "logits/chosen": -1.432356595993042, | |
| "logits/rejected": -1.2240395545959473, | |
| "logps/chosen": -438.5174865722656, | |
| "logps/rejected": -409.4549865722656, | |
| "loss": 0.7048, | |
| "rewards/accuracies": 0.2199999988079071, | |
| "rewards/chosen": 0.011979827657341957, | |
| "rewards/margins": -0.015614014118909836, | |
| "rewards/rejected": 0.02758941613137722, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.3667033370003667, | |
| "grad_norm": 100.0, | |
| "learning_rate": 3.1694790902421125e-07, | |
| "logits/chosen": -1.4630835056304932, | |
| "logits/rejected": -1.3660200834274292, | |
| "logps/chosen": -431.2174987792969, | |
| "logps/rejected": -405.39373779296875, | |
| "loss": 0.6841, | |
| "rewards/accuracies": 0.2874999940395355, | |
| "rewards/chosen": 0.02687728963792324, | |
| "rewards/margins": 0.02634170465171337, | |
| "rewards/rejected": 0.0005162048619240522, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.3667033370003667, | |
| "eval_logits/chosen": -1.5262771844863892, | |
| "eval_logits/rejected": -1.4270143508911133, | |
| "eval_logps/chosen": -377.21875, | |
| "eval_logps/rejected": -350.1620178222656, | |
| "eval_loss": 0.6944873929023743, | |
| "eval_rewards/accuracies": 0.23848684132099152, | |
| "eval_rewards/chosen": 0.014546644873917103, | |
| "eval_rewards/margins": 0.0022586018312722445, | |
| "eval_rewards/rejected": 0.012288796715438366, | |
| "eval_runtime": 183.6962, | |
| "eval_samples_per_second": 6.598, | |
| "eval_steps_per_second": 0.827, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.385038503850385, | |
| "grad_norm": 114.5, | |
| "learning_rate": 3.077769625825385e-07, | |
| "logits/chosen": -1.4934594631195068, | |
| "logits/rejected": -1.316256046295166, | |
| "logps/chosen": -446.3924865722656, | |
| "logps/rejected": -413.6081237792969, | |
| "loss": 0.6988, | |
| "rewards/accuracies": 0.2524999976158142, | |
| "rewards/chosen": 0.017985381186008453, | |
| "rewards/margins": -0.003291168250143528, | |
| "rewards/rejected": 0.021276244893670082, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.40337367070040336, | |
| "grad_norm": 88.5, | |
| "learning_rate": 2.986060161408657e-07, | |
| "logits/chosen": -1.3428008556365967, | |
| "logits/rejected": -1.264622688293457, | |
| "logps/chosen": -425.7149963378906, | |
| "logps/rejected": -417.7699890136719, | |
| "loss": 0.6939, | |
| "rewards/accuracies": 0.26499998569488525, | |
| "rewards/chosen": 0.021398009732365608, | |
| "rewards/margins": 0.007883605547249317, | |
| "rewards/rejected": 0.013519592583179474, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.40337367070040336, | |
| "eval_logits/chosen": -1.5262964963912964, | |
| "eval_logits/rejected": -1.4273858070373535, | |
| "eval_logps/chosen": -377.16119384765625, | |
| "eval_logps/rejected": -350.23272705078125, | |
| "eval_loss": 0.6917800903320312, | |
| "eval_rewards/accuracies": 0.24588815867900848, | |
| "eval_rewards/chosen": 0.015943175181746483, | |
| "eval_rewards/margins": 0.008131027221679688, | |
| "eval_rewards/rejected": 0.007824345491826534, | |
| "eval_runtime": 183.6867, | |
| "eval_samples_per_second": 6.598, | |
| "eval_steps_per_second": 0.827, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.4217088375504217, | |
| "grad_norm": 95.0, | |
| "learning_rate": 2.8943506969919296e-07, | |
| "logits/chosen": -1.4449292421340942, | |
| "logits/rejected": -1.2489904165267944, | |
| "logps/chosen": -434.9649963378906, | |
| "logps/rejected": -409.8125, | |
| "loss": 0.6943, | |
| "rewards/accuracies": 0.24250000715255737, | |
| "rewards/chosen": 0.02709350548684597, | |
| "rewards/margins": 0.005654601845890284, | |
| "rewards/rejected": 0.021434325724840164, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.44004400440044006, | |
| "grad_norm": 137.0, | |
| "learning_rate": 2.8026412325752017e-07, | |
| "logits/chosen": -1.4576478004455566, | |
| "logits/rejected": -1.312269926071167, | |
| "logps/chosen": -417.552490234375, | |
| "logps/rejected": -382.11248779296875, | |
| "loss": 0.6943, | |
| "rewards/accuracies": 0.2750000059604645, | |
| "rewards/chosen": 0.02476959303021431, | |
| "rewards/margins": 0.0036778259091079235, | |
| "rewards/rejected": 0.021088866516947746, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.44004400440044006, | |
| "eval_logits/chosen": -1.5249665975570679, | |
| "eval_logits/rejected": -1.4268261194229126, | |
| "eval_logps/chosen": -377.1414489746094, | |
| "eval_logps/rejected": -350.2212219238281, | |
| "eval_loss": 0.6920637488365173, | |
| "eval_rewards/accuracies": 0.2319078892469406, | |
| "eval_rewards/chosen": 0.018822118639945984, | |
| "eval_rewards/margins": 0.007972114719450474, | |
| "eval_rewards/rejected": 0.010848095640540123, | |
| "eval_runtime": 183.6918, | |
| "eval_samples_per_second": 6.598, | |
| "eval_steps_per_second": 0.827, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.4583791712504584, | |
| "grad_norm": 113.0, | |
| "learning_rate": 2.7109317681584737e-07, | |
| "logits/chosen": -1.3854376077651978, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -430.0950012207031, | |
| "logps/rejected": -378.32000732421875, | |
| "loss": 0.699, | |
| "rewards/accuracies": 0.22750000655651093, | |
| "rewards/chosen": 0.0017354583833366632, | |
| "rewards/margins": -0.00892486609518528, | |
| "rewards/rejected": 0.010651321150362492, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.4767143381004767, | |
| "grad_norm": 90.5, | |
| "learning_rate": 2.619222303741746e-07, | |
| "logits/chosen": -1.5457714796066284, | |
| "logits/rejected": -1.3327239751815796, | |
| "logps/chosen": -443.5487365722656, | |
| "logps/rejected": -417.052490234375, | |
| "loss": 0.6899, | |
| "rewards/accuracies": 0.2574999928474426, | |
| "rewards/chosen": 0.02349899336695671, | |
| "rewards/margins": 0.01311645470559597, | |
| "rewards/rejected": 0.010387726128101349, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.4767143381004767, | |
| "eval_logits/chosen": -1.5264860391616821, | |
| "eval_logits/rejected": -1.4273176193237305, | |
| "eval_logps/chosen": -377.1990051269531, | |
| "eval_logps/rejected": -350.26397705078125, | |
| "eval_loss": 0.6901170611381531, | |
| "eval_rewards/accuracies": 0.25986841320991516, | |
| "eval_rewards/chosen": 0.019354568794369698, | |
| "eval_rewards/margins": 0.013522299006581306, | |
| "eval_rewards/rejected": 0.005830463487654924, | |
| "eval_runtime": 183.7079, | |
| "eval_samples_per_second": 6.597, | |
| "eval_steps_per_second": 0.827, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.49504950495049505, | |
| "grad_norm": 95.0, | |
| "learning_rate": 2.5275128393250183e-07, | |
| "logits/chosen": -1.4289679527282715, | |
| "logits/rejected": -1.2180871963500977, | |
| "logps/chosen": -425.9100036621094, | |
| "logps/rejected": -406.896240234375, | |
| "loss": 0.6923, | |
| "rewards/accuracies": 0.2750000059604645, | |
| "rewards/chosen": 0.030851593241095543, | |
| "rewards/margins": 0.008693389594554901, | |
| "rewards/rejected": 0.022180786356329918, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.5133846718005134, | |
| "grad_norm": 112.0, | |
| "learning_rate": 2.4358033749082904e-07, | |
| "logits/chosen": -1.4609838724136353, | |
| "logits/rejected": -1.2298834323883057, | |
| "logps/chosen": -420.2749938964844, | |
| "logps/rejected": -415.197509765625, | |
| "loss": 0.6966, | |
| "rewards/accuracies": 0.26499998569488525, | |
| "rewards/chosen": 0.026063384488224983, | |
| "rewards/margins": 0.0011651611421257257, | |
| "rewards/rejected": 0.024905700236558914, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5133846718005134, | |
| "eval_logits/chosen": -1.5253231525421143, | |
| "eval_logits/rejected": -1.4264132976531982, | |
| "eval_logps/chosen": -377.131591796875, | |
| "eval_logps/rejected": -350.2458801269531, | |
| "eval_loss": 0.6932626962661743, | |
| "eval_rewards/accuracies": 0.2368421107530594, | |
| "eval_rewards/chosen": 0.019523821771144867, | |
| "eval_rewards/margins": 0.005818919278681278, | |
| "eval_rewards/rejected": 0.01370515301823616, | |
| "eval_runtime": 183.7031, | |
| "eval_samples_per_second": 6.598, | |
| "eval_steps_per_second": 0.827, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.5317198386505317, | |
| "grad_norm": 88.0, | |
| "learning_rate": 2.3440939104915627e-07, | |
| "logits/chosen": -1.414352536201477, | |
| "logits/rejected": -1.2296130657196045, | |
| "logps/chosen": -406.614990234375, | |
| "logps/rejected": -388.9599914550781, | |
| "loss": 0.706, | |
| "rewards/accuracies": 0.23999999463558197, | |
| "rewards/chosen": 0.014584961347281933, | |
| "rewards/margins": -0.01819046027958393, | |
| "rewards/rejected": 0.032747648656368256, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.5500550055005501, | |
| "grad_norm": 117.5, | |
| "learning_rate": 2.2523844460748347e-07, | |
| "logits/chosen": -1.6487542390823364, | |
| "logits/rejected": -1.505617380142212, | |
| "logps/chosen": -415.32000732421875, | |
| "logps/rejected": -388.822509765625, | |
| "loss": 0.6909, | |
| "rewards/accuracies": 0.27000001072883606, | |
| "rewards/chosen": 0.027718810364603996, | |
| "rewards/margins": 0.00900314375758171, | |
| "rewards/rejected": 0.01870529167354107, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5500550055005501, | |
| "eval_logits/chosen": -1.5252236127853394, | |
| "eval_logits/rejected": -1.4261868000030518, | |
| "eval_logps/chosen": -377.18585205078125, | |
| "eval_logps/rejected": -350.2220458984375, | |
| "eval_loss": 0.6894466876983643, | |
| "eval_rewards/accuracies": 0.24259868264198303, | |
| "eval_rewards/chosen": 0.019582247361540794, | |
| "eval_rewards/margins": 0.012999284081161022, | |
| "eval_rewards/rejected": 0.006589788943529129, | |
| "eval_runtime": 183.6443, | |
| "eval_samples_per_second": 6.6, | |
| "eval_steps_per_second": 0.828, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.5683901723505684, | |
| "grad_norm": 100.5, | |
| "learning_rate": 2.160674981658107e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -1.316674828529358, | |
| "logps/chosen": -418.80499267578125, | |
| "logps/rejected": -402.65875244140625, | |
| "loss": 0.6904, | |
| "rewards/accuracies": 0.2574999928474426, | |
| "rewards/chosen": 0.03401367366313934, | |
| "rewards/margins": 0.013011474162340164, | |
| "rewards/rejected": 0.020986633375287056, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.5867253392005867, | |
| "grad_norm": 88.5, | |
| "learning_rate": 2.0689655172413793e-07, | |
| "logits/chosen": -1.4807385206222534, | |
| "logits/rejected": -1.2883676290512085, | |
| "logps/chosen": -425.5325012207031, | |
| "logps/rejected": -403.2650146484375, | |
| "loss": 0.6893, | |
| "rewards/accuracies": 0.2750000059604645, | |
| "rewards/chosen": 0.02227478101849556, | |
| "rewards/margins": 0.012271422892808914, | |
| "rewards/rejected": 0.010012512095272541, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.5867253392005867, | |
| "eval_logits/chosen": -1.5252236127853394, | |
| "eval_logits/rejected": -1.4258739948272705, | |
| "eval_logps/chosen": -377.2467041015625, | |
| "eval_logps/rejected": -350.1842041015625, | |
| "eval_loss": 0.6948097348213196, | |
| "eval_rewards/accuracies": 0.23355263471603394, | |
| "eval_rewards/chosen": 0.014182441867887974, | |
| "eval_rewards/margins": 0.002087994711473584, | |
| "eval_rewards/rejected": 0.012089378200471401, | |
| "eval_runtime": 183.6876, | |
| "eval_samples_per_second": 6.598, | |
| "eval_steps_per_second": 0.827, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.605060506050605, | |
| "grad_norm": 80.5, | |
| "learning_rate": 1.9772560528246516e-07, | |
| "logits/chosen": -1.494598388671875, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -427.4649963378906, | |
| "logps/rejected": -406.42498779296875, | |
| "loss": 0.6926, | |
| "rewards/accuracies": 0.23499999940395355, | |
| "rewards/chosen": 0.018895873799920082, | |
| "rewards/margins": 0.010480347089469433, | |
| "rewards/rejected": 0.008425445295870304, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.6233956729006234, | |
| "grad_norm": 114.0, | |
| "learning_rate": 1.8855465884079237e-07, | |
| "logits/chosen": -1.546240210533142, | |
| "logits/rejected": -1.399204134941101, | |
| "logps/chosen": -416.7099914550781, | |
| "logps/rejected": -391.4775085449219, | |
| "loss": 0.6964, | |
| "rewards/accuracies": 0.2750000059604645, | |
| "rewards/chosen": 0.011256103403866291, | |
| "rewards/margins": 0.0009084320045076311, | |
| "rewards/rejected": 0.010347671806812286, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6233956729006234, | |
| "eval_logits/chosen": -1.524696707725525, | |
| "eval_logits/rejected": -1.4260004758834839, | |
| "eval_logps/chosen": -377.21875, | |
| "eval_logps/rejected": -350.2689208984375, | |
| "eval_loss": 0.6896400451660156, | |
| "eval_rewards/accuracies": 0.25740131735801697, | |
| "eval_rewards/chosen": 0.0202172938734293, | |
| "eval_rewards/margins": 0.012124011293053627, | |
| "eval_rewards/rejected": 0.008097749203443527, | |
| "eval_runtime": 183.7166, | |
| "eval_samples_per_second": 6.597, | |
| "eval_steps_per_second": 0.827, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.6417308397506417, | |
| "grad_norm": 109.0, | |
| "learning_rate": 1.793837123991196e-07, | |
| "logits/chosen": -1.505387544631958, | |
| "logits/rejected": -1.3573604822158813, | |
| "logps/chosen": -424.7099914550781, | |
| "logps/rejected": -410.5162353515625, | |
| "loss": 0.7019, | |
| "rewards/accuracies": 0.23999999463558197, | |
| "rewards/chosen": 0.009785156697034836, | |
| "rewards/margins": -0.007974395528435707, | |
| "rewards/rejected": 0.017765656113624573, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.6600660066006601, | |
| "grad_norm": 102.5, | |
| "learning_rate": 1.702127659574468e-07, | |
| "logits/chosen": -1.519402265548706, | |
| "logits/rejected": -1.3906365633010864, | |
| "logps/chosen": -425.7699890136719, | |
| "logps/rejected": -410.8399963378906, | |
| "loss": 0.6951, | |
| "rewards/accuracies": 0.2874999940395355, | |
| "rewards/chosen": 0.021494140848517418, | |
| "rewards/margins": 0.003243102924898267, | |
| "rewards/rejected": 0.018245697021484375, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6600660066006601, | |
| "eval_logits/chosen": -1.5251914262771606, | |
| "eval_logits/rejected": -1.42595636844635, | |
| "eval_logps/chosen": -377.2302551269531, | |
| "eval_logps/rejected": -350.2039489746094, | |
| "eval_loss": 0.6908003091812134, | |
| "eval_rewards/accuracies": 0.24424342811107635, | |
| "eval_rewards/chosen": 0.01913321577012539, | |
| "eval_rewards/margins": 0.010163256898522377, | |
| "eval_rewards/rejected": 0.008959268219769001, | |
| "eval_runtime": 183.6568, | |
| "eval_samples_per_second": 6.599, | |
| "eval_steps_per_second": 0.828, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.6784011734506784, | |
| "grad_norm": 100.0, | |
| "learning_rate": 1.6104181951577403e-07, | |
| "logits/chosen": -1.548006534576416, | |
| "logits/rejected": -1.3452630043029785, | |
| "logps/chosen": -442.739990234375, | |
| "logps/rejected": -419.04217529296875, | |
| "loss": 0.696, | |
| "rewards/accuracies": 0.24500000476837158, | |
| "rewards/chosen": 0.027564391493797302, | |
| "rewards/margins": 0.0037604523822665215, | |
| "rewards/rejected": 0.0237899012863636, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.6967363403006968, | |
| "grad_norm": 103.0, | |
| "learning_rate": 1.5187087307410123e-07, | |
| "logits/chosen": -1.4493930339813232, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -446.68499755859375, | |
| "logps/rejected": -405.67999267578125, | |
| "loss": 0.6963, | |
| "rewards/accuracies": 0.24250000715255737, | |
| "rewards/chosen": 0.028537597507238388, | |
| "rewards/margins": 0.0007598876836709678, | |
| "rewards/rejected": 0.027764510363340378, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.6967363403006968, | |
| "eval_logits/chosen": -1.5243659019470215, | |
| "eval_logits/rejected": -1.4253950119018555, | |
| "eval_logps/chosen": -377.1759948730469, | |
| "eval_logps/rejected": -350.2154541015625, | |
| "eval_loss": 0.693004846572876, | |
| "eval_rewards/accuracies": 0.24013157188892365, | |
| "eval_rewards/chosen": 0.018045425415039062, | |
| "eval_rewards/margins": 0.00674940412864089, | |
| "eval_rewards/rejected": 0.011289797723293304, | |
| "eval_runtime": 183.7309, | |
| "eval_samples_per_second": 6.597, | |
| "eval_steps_per_second": 0.827, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.7150715071507151, | |
| "grad_norm": 98.5, | |
| "learning_rate": 1.4269992663242846e-07, | |
| "logits/chosen": -1.6109237670898438, | |
| "logits/rejected": -1.3813133239746094, | |
| "logps/chosen": -421.8275146484375, | |
| "logps/rejected": -401.74249267578125, | |
| "loss": 0.6906, | |
| "rewards/accuracies": 0.22750000655651093, | |
| "rewards/chosen": 0.019305266439914703, | |
| "rewards/margins": 0.009662169963121414, | |
| "rewards/rejected": 0.009637146256864071, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.7334066740007334, | |
| "grad_norm": 87.5, | |
| "learning_rate": 1.3352898019075567e-07, | |
| "logits/chosen": -1.4307568073272705, | |
| "logits/rejected": -1.2897155284881592, | |
| "logps/chosen": -428.614990234375, | |
| "logps/rejected": -416.0874938964844, | |
| "loss": 0.6913, | |
| "rewards/accuracies": 0.26750001311302185, | |
| "rewards/chosen": 0.027062682434916496, | |
| "rewards/margins": 0.010381927713751793, | |
| "rewards/rejected": 0.016668854281306267, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7334066740007334, | |
| "eval_logits/chosen": -1.5244911909103394, | |
| "eval_logits/rejected": -1.4257001876831055, | |
| "eval_logps/chosen": -377.1414489746094, | |
| "eval_logps/rejected": -350.1036071777344, | |
| "eval_loss": 0.6934689283370972, | |
| "eval_rewards/accuracies": 0.22944079339504242, | |
| "eval_rewards/chosen": 0.023268749937415123, | |
| "eval_rewards/margins": 0.004975419491529465, | |
| "eval_rewards/rejected": 0.018304072320461273, | |
| "eval_runtime": 183.7804, | |
| "eval_samples_per_second": 6.595, | |
| "eval_steps_per_second": 0.827, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7517418408507518, | |
| "grad_norm": 108.5, | |
| "learning_rate": 1.243580337490829e-07, | |
| "logits/chosen": -1.545839786529541, | |
| "logits/rejected": -1.3644452095031738, | |
| "logps/chosen": -423.6575012207031, | |
| "logps/rejected": -404.2225036621094, | |
| "loss": 0.6925, | |
| "rewards/accuracies": 0.27250000834465027, | |
| "rewards/chosen": 0.030840760096907616, | |
| "rewards/margins": 0.007886047475039959, | |
| "rewards/rejected": 0.02297058142721653, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.77007700770077, | |
| "grad_norm": 84.5, | |
| "learning_rate": 1.1518708730741012e-07, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -1.2828707695007324, | |
| "logps/chosen": -411.5050048828125, | |
| "logps/rejected": -384.7875061035156, | |
| "loss": 0.6972, | |
| "rewards/accuracies": 0.26499998569488525, | |
| "rewards/chosen": 0.010768127627670765, | |
| "rewards/margins": -0.001839599572122097, | |
| "rewards/rejected": 0.012620086781680584, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.77007700770077, | |
| "eval_logits/chosen": -1.5248092412948608, | |
| "eval_logits/rejected": -1.4259984493255615, | |
| "eval_logps/chosen": -377.1019592285156, | |
| "eval_logps/rejected": -350.2105407714844, | |
| "eval_loss": 0.6938557028770447, | |
| "eval_rewards/accuracies": 0.25082236528396606, | |
| "eval_rewards/chosen": 0.020134273916482925, | |
| "eval_rewards/margins": 0.004903642926365137, | |
| "eval_rewards/rejected": 0.015223653987050056, | |
| "eval_runtime": 183.6531, | |
| "eval_samples_per_second": 6.599, | |
| "eval_steps_per_second": 0.828, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.7884121745507884, | |
| "grad_norm": 105.0, | |
| "learning_rate": 1.0601614086573733e-07, | |
| "logits/chosen": -1.4120160341262817, | |
| "logits/rejected": -1.2870084047317505, | |
| "logps/chosen": -426.989990234375, | |
| "logps/rejected": -409.2774963378906, | |
| "loss": 0.6944, | |
| "rewards/accuracies": 0.27000001072883606, | |
| "rewards/chosen": 0.02048644982278347, | |
| "rewards/margins": 0.004820861853659153, | |
| "rewards/rejected": 0.01568496786057949, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.8067473414008067, | |
| "grad_norm": 97.5, | |
| "learning_rate": 9.684519442406455e-08, | |
| "logits/chosen": -1.5207568407058716, | |
| "logits/rejected": -1.271683931350708, | |
| "logps/chosen": -412.1675109863281, | |
| "logps/rejected": -380.9200134277344, | |
| "loss": 0.7016, | |
| "rewards/accuracies": 0.25999999046325684, | |
| "rewards/chosen": 0.016521912068128586, | |
| "rewards/margins": -0.011280059814453125, | |
| "rewards/rejected": 0.02781723067164421, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8067473414008067, | |
| "eval_logits/chosen": -1.524436593055725, | |
| "eval_logits/rejected": -1.4248079061508179, | |
| "eval_logps/chosen": -377.15130615234375, | |
| "eval_logps/rejected": -350.2006530761719, | |
| "eval_loss": 0.6940104365348816, | |
| "eval_rewards/accuracies": 0.22861842811107635, | |
| "eval_rewards/chosen": 0.018003061413764954, | |
| "eval_rewards/margins": 0.002626720117405057, | |
| "eval_rewards/rejected": 0.01539022009819746, | |
| "eval_runtime": 183.6449, | |
| "eval_samples_per_second": 6.6, | |
| "eval_steps_per_second": 0.828, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.8250825082508251, | |
| "grad_norm": 105.0, | |
| "learning_rate": 8.767424798239178e-08, | |
| "logits/chosen": -1.534997582435608, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -407.0662536621094, | |
| "logps/rejected": -381.228759765625, | |
| "loss": 0.6907, | |
| "rewards/accuracies": 0.2574999928474426, | |
| "rewards/chosen": 0.02077072113752365, | |
| "rewards/margins": 0.011860962025821209, | |
| "rewards/rejected": 0.008914489299058914, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.8434176751008434, | |
| "grad_norm": 118.5, | |
| "learning_rate": 7.850330154071901e-08, | |
| "logits/chosen": -1.448728084564209, | |
| "logits/rejected": -1.2549041509628296, | |
| "logps/chosen": -424.74749755859375, | |
| "logps/rejected": -409.44500732421875, | |
| "loss": 0.6998, | |
| "rewards/accuracies": 0.26249998807907104, | |
| "rewards/chosen": 0.016925200819969177, | |
| "rewards/margins": -0.007094268687069416, | |
| "rewards/rejected": 0.02402496337890625, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8434176751008434, | |
| "eval_logits/chosen": -1.5245393514633179, | |
| "eval_logits/rejected": -1.4257924556732178, | |
| "eval_logps/chosen": -377.2730407714844, | |
| "eval_logps/rejected": -350.22039794921875, | |
| "eval_loss": 0.6929017305374146, | |
| "eval_rewards/accuracies": 0.23026315867900848, | |
| "eval_rewards/chosen": 0.01627480424940586, | |
| "eval_rewards/margins": 0.005090211518108845, | |
| "eval_rewards/rejected": 0.011195835657417774, | |
| "eval_runtime": 183.7208, | |
| "eval_samples_per_second": 6.597, | |
| "eval_steps_per_second": 0.827, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.8617528419508618, | |
| "grad_norm": 94.0, | |
| "learning_rate": 6.933235509904623e-08, | |
| "logits/chosen": -1.643198847770691, | |
| "logits/rejected": -1.4097143411636353, | |
| "logps/chosen": -416.32061767578125, | |
| "logps/rejected": -420.0243835449219, | |
| "loss": 0.701, | |
| "rewards/accuracies": 0.2750000059604645, | |
| "rewards/chosen": 0.019934996962547302, | |
| "rewards/margins": -0.004024200607091188, | |
| "rewards/rejected": 0.0239674374461174, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.8800880088008801, | |
| "grad_norm": 113.0, | |
| "learning_rate": 6.016140865737343e-08, | |
| "logits/chosen": -1.4947460889816284, | |
| "logits/rejected": -1.2584409713745117, | |
| "logps/chosen": -406.197509765625, | |
| "logps/rejected": -374.1000061035156, | |
| "loss": 0.6912, | |
| "rewards/accuracies": 0.29249998927116394, | |
| "rewards/chosen": 0.023181457072496414, | |
| "rewards/margins": 0.013177642598748207, | |
| "rewards/rejected": 0.010010071098804474, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8800880088008801, | |
| "eval_logits/chosen": -1.5244590044021606, | |
| "eval_logits/rejected": -1.4259113073349, | |
| "eval_logps/chosen": -377.118408203125, | |
| "eval_logps/rejected": -350.1998291015625, | |
| "eval_loss": 0.6906456351280212, | |
| "eval_rewards/accuracies": 0.2409539520740509, | |
| "eval_rewards/chosen": 0.019954681396484375, | |
| "eval_rewards/margins": 0.011732828803360462, | |
| "eval_rewards/rejected": 0.008238466456532478, | |
| "eval_runtime": 183.8199, | |
| "eval_samples_per_second": 6.593, | |
| "eval_steps_per_second": 0.827, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.8984231756508985, | |
| "grad_norm": 108.0, | |
| "learning_rate": 5.0990462215700656e-08, | |
| "logits/chosen": -1.4646357297897339, | |
| "logits/rejected": -1.2686426639556885, | |
| "logps/chosen": -419.42498779296875, | |
| "logps/rejected": -413.2925109863281, | |
| "loss": 0.6974, | |
| "rewards/accuracies": 0.2175000011920929, | |
| "rewards/chosen": 0.01217727642506361, | |
| "rewards/margins": -0.0036750794388353825, | |
| "rewards/rejected": 0.015859374776482582, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.9167583425009168, | |
| "grad_norm": 125.0, | |
| "learning_rate": 4.181951577402787e-08, | |
| "logits/chosen": -1.3645446300506592, | |
| "logits/rejected": -1.2459040880203247, | |
| "logps/chosen": -433.9324951171875, | |
| "logps/rejected": -397.8374938964844, | |
| "loss": 0.6948, | |
| "rewards/accuracies": 0.30250000953674316, | |
| "rewards/chosen": 0.023784179240465164, | |
| "rewards/margins": 0.004344787448644638, | |
| "rewards/rejected": 0.019432831555604935, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.9167583425009168, | |
| "eval_logits/chosen": -1.5248445272445679, | |
| "eval_logits/rejected": -1.4260269403457642, | |
| "eval_logps/chosen": -377.15460205078125, | |
| "eval_logps/rejected": -350.25494384765625, | |
| "eval_loss": 0.6929919719696045, | |
| "eval_rewards/accuracies": 0.24259868264198303, | |
| "eval_rewards/chosen": 0.0216668788343668, | |
| "eval_rewards/margins": 0.0056954436004161835, | |
| "eval_rewards/rejected": 0.01596139557659626, | |
| "eval_runtime": 183.7346, | |
| "eval_samples_per_second": 6.596, | |
| "eval_steps_per_second": 0.827, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.935093509350935, | |
| "grad_norm": 103.0, | |
| "learning_rate": 3.26485693323551e-08, | |
| "logits/chosen": -1.543642520904541, | |
| "logits/rejected": -1.4146960973739624, | |
| "logps/chosen": -432.8800048828125, | |
| "logps/rejected": -407.4825134277344, | |
| "loss": 0.6917, | |
| "rewards/accuracies": 0.27000001072883606, | |
| "rewards/chosen": 0.02718307450413704, | |
| "rewards/margins": 0.008900909684598446, | |
| "rewards/rejected": 0.01827133260667324, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.9534286762009534, | |
| "grad_norm": 107.0, | |
| "learning_rate": 2.3477622890682317e-08, | |
| "logits/chosen": -1.4744700193405151, | |
| "logits/rejected": NaN, | |
| "logps/chosen": -432.228759765625, | |
| "logps/rejected": -398.2674865722656, | |
| "loss": 0.6905, | |
| "rewards/accuracies": 0.2775000035762787, | |
| "rewards/chosen": 0.029524916782975197, | |
| "rewards/margins": 0.01431709248572588, | |
| "rewards/rejected": 0.015177459456026554, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9534286762009534, | |
| "eval_logits/chosen": -1.52471923828125, | |
| "eval_logits/rejected": -1.4258816242218018, | |
| "eval_logps/chosen": -377.1759948730469, | |
| "eval_logps/rejected": -350.13568115234375, | |
| "eval_loss": 0.6930950880050659, | |
| "eval_rewards/accuracies": 0.2253289520740509, | |
| "eval_rewards/chosen": 0.019946148619055748, | |
| "eval_rewards/margins": 0.005028975661844015, | |
| "eval_rewards/rejected": 0.014918653294444084, | |
| "eval_runtime": 183.6858, | |
| "eval_samples_per_second": 6.598, | |
| "eval_steps_per_second": 0.827, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.9717638430509717, | |
| "grad_norm": 100.0, | |
| "learning_rate": 1.4306676449009536e-08, | |
| "logits/chosen": -1.454483985900879, | |
| "logits/rejected": -1.3722683191299438, | |
| "logps/chosen": -422.8074951171875, | |
| "logps/rejected": -396.38250732421875, | |
| "loss": 0.6941, | |
| "rewards/accuracies": 0.2549999952316284, | |
| "rewards/chosen": 0.028248444199562073, | |
| "rewards/margins": 0.004119873046875, | |
| "rewards/rejected": 0.024119414389133453, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.9900990099009901, | |
| "grad_norm": 113.0, | |
| "learning_rate": 5.135730007336757e-09, | |
| "logits/chosen": NaN, | |
| "logits/rejected": -1.327661395072937, | |
| "logps/chosen": -423.99749755859375, | |
| "logps/rejected": -390.8299865722656, | |
| "loss": 0.6953, | |
| "rewards/accuracies": 0.2775000035762787, | |
| "rewards/chosen": 0.024477539584040642, | |
| "rewards/margins": 0.002653961069881916, | |
| "rewards/rejected": 0.021812591701745987, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.9900990099009901, | |
| "eval_logits/chosen": -1.524070382118225, | |
| "eval_logits/rejected": -1.4259716272354126, | |
| "eval_logps/chosen": -377.162841796875, | |
| "eval_logps/rejected": -350.21380615234375, | |
| "eval_loss": 0.6933400630950928, | |
| "eval_rewards/accuracies": 0.24259868264198303, | |
| "eval_rewards/chosen": 0.02030799351632595, | |
| "eval_rewards/margins": 0.005902240052819252, | |
| "eval_rewards/rejected": 0.01440710760653019, | |
| "eval_runtime": 183.6314, | |
| "eval_samples_per_second": 6.6, | |
| "eval_steps_per_second": 0.828, | |
| "step": 1350 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 1363, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |