| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 5.6, |
| "eval_steps": 500, |
| "global_step": 7000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.008, |
| "grad_norm": 2.3092777729034424, |
| "learning_rate": 3.6000000000000005e-08, |
| "logits/chosen": 0.7764996886253357, |
| "logits/rejected": 0.8174192309379578, |
| "logps/chosen": -195.11270141601562, |
| "logps/rejected": -207.083251953125, |
| "loss": 0.6965, |
| "rewards/accuracies": 0.30000001192092896, |
| "rewards/chosen": -0.0012020017020404339, |
| "rewards/margins": -0.006086898501962423, |
| "rewards/rejected": 0.004884896334260702, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.016, |
| "grad_norm": 3.044440507888794, |
| "learning_rate": 7.6e-08, |
| "logits/chosen": 0.8046936988830566, |
| "logits/rejected": 0.7519802451133728, |
| "logps/chosen": -196.82322692871094, |
| "logps/rejected": -199.39141845703125, |
| "loss": 0.6877, |
| "rewards/accuracies": 0.5250000357627869, |
| "rewards/chosen": 0.0069522010162472725, |
| "rewards/margins": 0.01239713840186596, |
| "rewards/rejected": -0.005444936919957399, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.024, |
| "grad_norm": 2.9533703327178955, |
| "learning_rate": 1.16e-07, |
| "logits/chosen": 0.7309688925743103, |
| "logits/rejected": 0.6841751933097839, |
| "logps/chosen": -201.09934997558594, |
| "logps/rejected": -231.3838653564453, |
| "loss": 0.6962, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": -0.0034579948987811804, |
| "rewards/margins": -0.00475282734259963, |
| "rewards/rejected": 0.0012948326766490936, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.032, |
| "grad_norm": 3.043637990951538, |
| "learning_rate": 1.56e-07, |
| "logits/chosen": 0.707830548286438, |
| "logits/rejected": 0.6798078417778015, |
| "logps/chosen": -217.506103515625, |
| "logps/rejected": -214.7266845703125, |
| "loss": 0.695, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.006588793825358152, |
| "rewards/margins": -0.0025365306064486504, |
| "rewards/rejected": -0.004052260424941778, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04, |
| "grad_norm": 2.9874885082244873, |
| "learning_rate": 1.96e-07, |
| "logits/chosen": 0.6864386796951294, |
| "logits/rejected": 0.6313192248344421, |
| "logps/chosen": -215.640869140625, |
| "logps/rejected": -208.9573211669922, |
| "loss": 0.688, |
| "rewards/accuracies": 0.5875000357627869, |
| "rewards/chosen": -0.0006361968698911369, |
| "rewards/margins": 0.011526194401085377, |
| "rewards/rejected": -0.012162390165030956, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.048, |
| "grad_norm": 2.3293204307556152, |
| "learning_rate": 2.3600000000000002e-07, |
| "logits/chosen": 0.7226251363754272, |
| "logits/rejected": 0.826255738735199, |
| "logps/chosen": -202.46560668945312, |
| "logps/rejected": -201.2319793701172, |
| "loss": 0.6887, |
| "rewards/accuracies": 0.5875000357627869, |
| "rewards/chosen": 0.007742859423160553, |
| "rewards/margins": 0.010210123844444752, |
| "rewards/rejected": -0.0024672651197761297, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.056, |
| "grad_norm": 2.408390522003174, |
| "learning_rate": 2.7600000000000004e-07, |
| "logits/chosen": 0.6049055457115173, |
| "logits/rejected": 0.6685499548912048, |
| "logps/chosen": -193.7244873046875, |
| "logps/rejected": -202.5239715576172, |
| "loss": 0.6949, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.004926090594381094, |
| "rewards/margins": -0.0017412189627066255, |
| "rewards/rejected": -0.0031848729122430086, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.064, |
| "grad_norm": 3.6175403594970703, |
| "learning_rate": 3.160000000000001e-07, |
| "logits/chosen": 0.5864802598953247, |
| "logits/rejected": 0.6271843314170837, |
| "logps/chosen": -202.54612731933594, |
| "logps/rejected": -233.43150329589844, |
| "loss": 0.6904, |
| "rewards/accuracies": 0.4625000059604645, |
| "rewards/chosen": 0.00966743566095829, |
| "rewards/margins": 0.006970310118049383, |
| "rewards/rejected": 0.002697124844416976, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.072, |
| "grad_norm": 2.3893001079559326, |
| "learning_rate": 3.56e-07, |
| "logits/chosen": 0.730164647102356, |
| "logits/rejected": 0.713482677936554, |
| "logps/chosen": -195.13555908203125, |
| "logps/rejected": -219.647216796875, |
| "loss": 0.6963, |
| "rewards/accuracies": 0.5875000357627869, |
| "rewards/chosen": 0.011823897249996662, |
| "rewards/margins": -0.005143971648067236, |
| "rewards/rejected": 0.01696786843240261, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.08, |
| "grad_norm": 2.7904205322265625, |
| "learning_rate": 3.9600000000000005e-07, |
| "logits/chosen": 0.5790210962295532, |
| "logits/rejected": 0.6835765838623047, |
| "logps/chosen": -202.36647033691406, |
| "logps/rejected": -213.2338104248047, |
| "loss": 0.696, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": -0.004648447968065739, |
| "rewards/margins": -0.004634796176105738, |
| "rewards/rejected": -1.3651699191541411e-05, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.088, |
| "grad_norm": 2.350283145904541, |
| "learning_rate": 4.3600000000000004e-07, |
| "logits/chosen": 0.8016487956047058, |
| "logits/rejected": 0.8312139511108398, |
| "logps/chosen": -202.82815551757812, |
| "logps/rejected": -218.19920349121094, |
| "loss": 0.6913, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 0.01035931147634983, |
| "rewards/margins": 0.005032673478126526, |
| "rewards/rejected": 0.005326639395207167, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.096, |
| "grad_norm": 2.2766993045806885, |
| "learning_rate": 4.760000000000001e-07, |
| "logits/chosen": 0.8250360488891602, |
| "logits/rejected": 0.835590660572052, |
| "logps/chosen": -196.98216247558594, |
| "logps/rejected": -210.0322723388672, |
| "loss": 0.6919, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.00589400390163064, |
| "rewards/margins": 0.003922100644558668, |
| "rewards/rejected": 0.0019719023257493973, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.104, |
| "grad_norm": 2.0690665245056152, |
| "learning_rate": 5.16e-07, |
| "logits/chosen": 0.7067065238952637, |
| "logits/rejected": 0.7281522154808044, |
| "logps/chosen": -195.405029296875, |
| "logps/rejected": -206.66079711914062, |
| "loss": 0.6954, |
| "rewards/accuracies": 0.4625000059604645, |
| "rewards/chosen": 0.006292525213211775, |
| "rewards/margins": -0.0033251051791012287, |
| "rewards/rejected": 0.009617629460990429, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.112, |
| "grad_norm": 2.388901948928833, |
| "learning_rate": 5.560000000000001e-07, |
| "logits/chosen": 0.6768548488616943, |
| "logits/rejected": 0.6542965173721313, |
| "logps/chosen": -214.23342895507812, |
| "logps/rejected": -216.52366638183594, |
| "loss": 0.6939, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": -0.012295923195779324, |
| "rewards/margins": 8.453801456198562e-06, |
| "rewards/rejected": -0.012304377742111683, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.4146411418914795, |
| "learning_rate": 5.960000000000001e-07, |
| "logits/chosen": 0.8362228274345398, |
| "logits/rejected": 0.780723512172699, |
| "logps/chosen": -192.99110412597656, |
| "logps/rejected": -182.7938232421875, |
| "loss": 0.6965, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.008289004676043987, |
| "rewards/margins": -0.005621676333248615, |
| "rewards/rejected": 0.013910681009292603, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.128, |
| "grad_norm": 2.408268690109253, |
| "learning_rate": 6.360000000000001e-07, |
| "logits/chosen": 0.6485953330993652, |
| "logits/rejected": 0.7263766527175903, |
| "logps/chosen": -191.4886016845703, |
| "logps/rejected": -218.2458953857422, |
| "loss": 0.7014, |
| "rewards/accuracies": 0.4375, |
| "rewards/chosen": 0.007325764745473862, |
| "rewards/margins": -0.014910398982465267, |
| "rewards/rejected": 0.022236162796616554, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.136, |
| "grad_norm": 2.3368568420410156, |
| "learning_rate": 6.76e-07, |
| "logits/chosen": 0.6823139190673828, |
| "logits/rejected": 0.7047578692436218, |
| "logps/chosen": -188.77145385742188, |
| "logps/rejected": -230.02903747558594, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": 0.009600992314517498, |
| "rewards/margins": 0.0018820532131940126, |
| "rewards/rejected": 0.007718939334154129, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.144, |
| "grad_norm": 2.562350273132324, |
| "learning_rate": 7.16e-07, |
| "logits/chosen": 0.7193002700805664, |
| "logits/rejected": 0.6238381266593933, |
| "logps/chosen": -199.7821807861328, |
| "logps/rejected": -189.610595703125, |
| "loss": 0.6955, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.00945024099200964, |
| "rewards/margins": -0.003362303366884589, |
| "rewards/rejected": -0.00608793692663312, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.152, |
| "grad_norm": 2.5308778285980225, |
| "learning_rate": 7.56e-07, |
| "logits/chosen": 0.7583810091018677, |
| "logits/rejected": 0.7683423161506653, |
| "logps/chosen": -179.4371795654297, |
| "logps/rejected": -193.5501708984375, |
| "loss": 0.6947, |
| "rewards/accuracies": 0.45000001788139343, |
| "rewards/chosen": 0.024644900113344193, |
| "rewards/margins": -0.0013052498688921332, |
| "rewards/rejected": 0.025950148701667786, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.16, |
| "grad_norm": 2.271794080734253, |
| "learning_rate": 7.960000000000001e-07, |
| "logits/chosen": 0.8137730956077576, |
| "logits/rejected": 0.6786491274833679, |
| "logps/chosen": -202.08787536621094, |
| "logps/rejected": -198.05364990234375, |
| "loss": 0.6975, |
| "rewards/accuracies": 0.4749999940395355, |
| "rewards/chosen": -0.01316139753907919, |
| "rewards/margins": -0.007475724909454584, |
| "rewards/rejected": -0.005685672629624605, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.168, |
| "grad_norm": 2.6240570545196533, |
| "learning_rate": 8.36e-07, |
| "logits/chosen": 0.6418821215629578, |
| "logits/rejected": 0.7391870617866516, |
| "logps/chosen": -202.360107421875, |
| "logps/rejected": -229.84375, |
| "loss": 0.6896, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": 0.006038342602550983, |
| "rewards/margins": 0.00873213168233633, |
| "rewards/rejected": -0.002693791640922427, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.176, |
| "grad_norm": 2.454711675643921, |
| "learning_rate": 8.760000000000001e-07, |
| "logits/chosen": 0.6627649664878845, |
| "logits/rejected": 0.5812577605247498, |
| "logps/chosen": -231.45840454101562, |
| "logps/rejected": -224.667724609375, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.5250000357627869, |
| "rewards/chosen": -0.004710569512099028, |
| "rewards/margins": 0.0032497793436050415, |
| "rewards/rejected": -0.007960348390042782, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.184, |
| "grad_norm": 2.9100327491760254, |
| "learning_rate": 9.160000000000001e-07, |
| "logits/chosen": 0.7058368921279907, |
| "logits/rejected": 0.6532104015350342, |
| "logps/chosen": -209.2056884765625, |
| "logps/rejected": -206.2039031982422, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.00045182276517152786, |
| "rewards/margins": 0.004544996656477451, |
| "rewards/rejected": -0.004093174822628498, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.192, |
| "grad_norm": 2.845057487487793, |
| "learning_rate": 9.56e-07, |
| "logits/chosen": 0.5265730023384094, |
| "logits/rejected": 0.6017157435417175, |
| "logps/chosen": -202.66600036621094, |
| "logps/rejected": -229.9880828857422, |
| "loss": 0.697, |
| "rewards/accuracies": 0.5250000357627869, |
| "rewards/chosen": 0.0027109149377793074, |
| "rewards/margins": -0.00603170320391655, |
| "rewards/rejected": 0.008742619305849075, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.2, |
| "grad_norm": 2.409635543823242, |
| "learning_rate": 9.96e-07, |
| "logits/chosen": 0.7155398726463318, |
| "logits/rejected": 0.7485236525535583, |
| "logps/chosen": -208.7382354736328, |
| "logps/rejected": -224.94078063964844, |
| "loss": 0.689, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.021835366263985634, |
| "rewards/margins": 0.009591616690158844, |
| "rewards/rejected": 0.01224374771118164, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.208, |
| "grad_norm": 2.2228920459747314, |
| "learning_rate": 1.0360000000000001e-06, |
| "logits/chosen": 0.7711376547813416, |
| "logits/rejected": 0.6718112230300903, |
| "logps/chosen": -204.5934600830078, |
| "logps/rejected": -199.9540252685547, |
| "loss": 0.6978, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": 0.006919704377651215, |
| "rewards/margins": -0.008324065245687962, |
| "rewards/rejected": 0.015243768692016602, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.216, |
| "grad_norm": 2.7563939094543457, |
| "learning_rate": 1.0760000000000002e-06, |
| "logits/chosen": 0.7490439414978027, |
| "logits/rejected": 0.8420026898384094, |
| "logps/chosen": -194.5001678466797, |
| "logps/rejected": -194.9221954345703, |
| "loss": 0.7006, |
| "rewards/accuracies": 0.4124999940395355, |
| "rewards/chosen": 0.0004544306721072644, |
| "rewards/margins": -0.01385478675365448, |
| "rewards/rejected": 0.014309215359389782, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.224, |
| "grad_norm": 2.703477621078491, |
| "learning_rate": 1.1160000000000002e-06, |
| "logits/chosen": 0.654878556728363, |
| "logits/rejected": 0.6851706504821777, |
| "logps/chosen": -191.12637329101562, |
| "logps/rejected": -179.5616455078125, |
| "loss": 0.6963, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.002727155340835452, |
| "rewards/margins": -0.005241455975919962, |
| "rewards/rejected": 0.002514300402253866, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.232, |
| "grad_norm": 2.8370096683502197, |
| "learning_rate": 1.156e-06, |
| "logits/chosen": 0.7035982012748718, |
| "logits/rejected": 0.6883268356323242, |
| "logps/chosen": -198.1017608642578, |
| "logps/rejected": -205.367431640625, |
| "loss": 0.6936, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": 0.0034764811862260103, |
| "rewards/margins": 0.0005613662651740015, |
| "rewards/rejected": 0.0029151157941669226, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.24, |
| "grad_norm": 2.892529249191284, |
| "learning_rate": 1.196e-06, |
| "logits/chosen": 0.661008358001709, |
| "logits/rejected": 0.6034583449363708, |
| "logps/chosen": -197.7530975341797, |
| "logps/rejected": -212.4963836669922, |
| "loss": 0.6944, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": 0.005977009888738394, |
| "rewards/margins": -0.0009391207131557167, |
| "rewards/rejected": 0.006916132755577564, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.248, |
| "grad_norm": 2.4562253952026367, |
| "learning_rate": 1.2360000000000001e-06, |
| "logits/chosen": 0.7135326266288757, |
| "logits/rejected": 0.7250908017158508, |
| "logps/chosen": -203.17738342285156, |
| "logps/rejected": -208.8437042236328, |
| "loss": 0.6925, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": 0.0018625364173203707, |
| "rewards/margins": 0.002679466502740979, |
| "rewards/rejected": -0.0008169323555193841, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.256, |
| "grad_norm": 2.6124107837677, |
| "learning_rate": 1.276e-06, |
| "logits/chosen": 0.8584432601928711, |
| "logits/rejected": 0.8116118311882019, |
| "logps/chosen": -195.6661376953125, |
| "logps/rejected": -196.38145446777344, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.005818118806928396, |
| "rewards/margins": 0.0013451100094243884, |
| "rewards/rejected": 0.004473009612411261, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.264, |
| "grad_norm": 2.860166311264038, |
| "learning_rate": 1.316e-06, |
| "logits/chosen": 0.6980428099632263, |
| "logits/rejected": 0.8046857118606567, |
| "logps/chosen": -189.9589080810547, |
| "logps/rejected": -203.92202758789062, |
| "loss": 0.694, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": 0.018240585923194885, |
| "rewards/margins": -0.0006985944928601384, |
| "rewards/rejected": 0.018939180299639702, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.272, |
| "grad_norm": 2.8200535774230957, |
| "learning_rate": 1.356e-06, |
| "logits/chosen": 0.8003985285758972, |
| "logits/rejected": 0.77861088514328, |
| "logps/chosen": -201.60928344726562, |
| "logps/rejected": -203.56936645507812, |
| "loss": 0.693, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.002375365002080798, |
| "rewards/margins": 0.001602139906026423, |
| "rewards/rejected": 0.0007732249796390533, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.28, |
| "grad_norm": 3.1359260082244873, |
| "learning_rate": 1.396e-06, |
| "logits/chosen": 0.7161771655082703, |
| "logits/rejected": 0.6535243988037109, |
| "logps/chosen": -196.94798278808594, |
| "logps/rejected": -207.852783203125, |
| "loss": 0.6921, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": 0.0128513528034091, |
| "rewards/margins": 0.003678938141092658, |
| "rewards/rejected": 0.009172416292130947, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.288, |
| "grad_norm": 3.2514476776123047, |
| "learning_rate": 1.436e-06, |
| "logits/chosen": 0.6619038581848145, |
| "logits/rejected": 0.656987726688385, |
| "logps/chosen": -200.4764862060547, |
| "logps/rejected": -231.81895446777344, |
| "loss": 0.6878, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.022911271080374718, |
| "rewards/margins": 0.012182674370706081, |
| "rewards/rejected": 0.010728596709668636, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.296, |
| "grad_norm": 2.784947156906128, |
| "learning_rate": 1.4760000000000001e-06, |
| "logits/chosen": 0.7618936896324158, |
| "logits/rejected": 0.7150487899780273, |
| "logps/chosen": -196.36228942871094, |
| "logps/rejected": -211.08935546875, |
| "loss": 0.6905, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": 0.017282454296946526, |
| "rewards/margins": 0.006800856441259384, |
| "rewards/rejected": 0.010481595061719418, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.304, |
| "grad_norm": 2.753596067428589, |
| "learning_rate": 1.5160000000000002e-06, |
| "logits/chosen": 0.8928642272949219, |
| "logits/rejected": 0.8450748324394226, |
| "logps/chosen": -204.9441680908203, |
| "logps/rejected": -198.3724822998047, |
| "loss": 0.6932, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": 0.029540909454226494, |
| "rewards/margins": 0.0015256500337272882, |
| "rewards/rejected": 0.028015261515975, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.312, |
| "grad_norm": 3.1084704399108887, |
| "learning_rate": 1.556e-06, |
| "logits/chosen": 0.8117288947105408, |
| "logits/rejected": 0.766746461391449, |
| "logps/chosen": -195.1800079345703, |
| "logps/rejected": -199.2845001220703, |
| "loss": 0.6961, |
| "rewards/accuracies": 0.4749999940395355, |
| "rewards/chosen": 0.01765977405011654, |
| "rewards/margins": -0.004569740500301123, |
| "rewards/rejected": 0.0222295131534338, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.32, |
| "grad_norm": 3.7862966060638428, |
| "learning_rate": 1.596e-06, |
| "logits/chosen": 0.8835588693618774, |
| "logits/rejected": 0.8266305327415466, |
| "logps/chosen": -208.58726501464844, |
| "logps/rejected": -217.5359649658203, |
| "loss": 0.6976, |
| "rewards/accuracies": 0.38750001788139343, |
| "rewards/chosen": 0.020054074004292488, |
| "rewards/margins": -0.0072321416810154915, |
| "rewards/rejected": 0.027286216616630554, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.328, |
| "grad_norm": 2.675278663635254, |
| "learning_rate": 1.636e-06, |
| "logits/chosen": 0.7499464750289917, |
| "logits/rejected": 0.7868902087211609, |
| "logps/chosen": -176.598876953125, |
| "logps/rejected": -185.85235595703125, |
| "loss": 0.697, |
| "rewards/accuracies": 0.4625000059604645, |
| "rewards/chosen": 0.015415973030030727, |
| "rewards/margins": -0.006510419305413961, |
| "rewards/rejected": 0.02192639373242855, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.336, |
| "grad_norm": 2.8932230472564697, |
| "learning_rate": 1.6760000000000001e-06, |
| "logits/chosen": 0.8424084782600403, |
| "logits/rejected": 0.8695581555366516, |
| "logps/chosen": -196.9973602294922, |
| "logps/rejected": -203.99620056152344, |
| "loss": 0.6901, |
| "rewards/accuracies": 0.5250000357627869, |
| "rewards/chosen": 0.015818921849131584, |
| "rewards/margins": 0.007251453585922718, |
| "rewards/rejected": 0.008567466400563717, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.344, |
| "grad_norm": 2.983330488204956, |
| "learning_rate": 1.7160000000000002e-06, |
| "logits/chosen": 0.5948252081871033, |
| "logits/rejected": 0.6821457743644714, |
| "logps/chosen": -192.26760864257812, |
| "logps/rejected": -200.97569274902344, |
| "loss": 0.6913, |
| "rewards/accuracies": 0.5250000357627869, |
| "rewards/chosen": 0.010373975150287151, |
| "rewards/margins": 0.004705019760876894, |
| "rewards/rejected": 0.005668954458087683, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.352, |
| "grad_norm": 3.401388168334961, |
| "learning_rate": 1.7560000000000002e-06, |
| "logits/chosen": 0.7682614922523499, |
| "logits/rejected": 0.8079833984375, |
| "logps/chosen": -201.40171813964844, |
| "logps/rejected": -215.61692810058594, |
| "loss": 0.6936, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.018279600888490677, |
| "rewards/margins": 0.0005021900869905949, |
| "rewards/rejected": 0.01777741126716137, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.36, |
| "grad_norm": 3.9692232608795166, |
| "learning_rate": 1.7960000000000003e-06, |
| "logits/chosen": 0.6712386012077332, |
| "logits/rejected": 0.7559861540794373, |
| "logps/chosen": -198.07620239257812, |
| "logps/rejected": -226.9971466064453, |
| "loss": 0.6865, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.02878693677484989, |
| "rewards/margins": 0.014408250339329243, |
| "rewards/rejected": 0.014378686435520649, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.368, |
| "grad_norm": 2.5181920528411865, |
| "learning_rate": 1.8360000000000003e-06, |
| "logits/chosen": 0.6611719131469727, |
| "logits/rejected": 0.7472667098045349, |
| "logps/chosen": -186.1030731201172, |
| "logps/rejected": -198.84902954101562, |
| "loss": 0.6893, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.022867394611239433, |
| "rewards/margins": 0.009127254597842693, |
| "rewards/rejected": 0.01374014001339674, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.376, |
| "grad_norm": 3.491785764694214, |
| "learning_rate": 1.8760000000000001e-06, |
| "logits/chosen": 0.6375329494476318, |
| "logits/rejected": 0.7117382287979126, |
| "logps/chosen": -191.62840270996094, |
| "logps/rejected": -192.08445739746094, |
| "loss": 0.6936, |
| "rewards/accuracies": 0.48750001192092896, |
| "rewards/chosen": 0.013536167331039906, |
| "rewards/margins": 0.0005845252308063209, |
| "rewards/rejected": 0.012951642274856567, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.384, |
| "grad_norm": 3.381208896636963, |
| "learning_rate": 1.916e-06, |
| "logits/chosen": 0.5306805968284607, |
| "logits/rejected": 0.6659603714942932, |
| "logps/chosen": -200.27557373046875, |
| "logps/rejected": -223.94517517089844, |
| "loss": 0.6912, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": 0.020672379061579704, |
| "rewards/margins": 0.0055915359407663345, |
| "rewards/rejected": 0.015080844052135944, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.392, |
| "grad_norm": 2.9275639057159424, |
| "learning_rate": 1.956e-06, |
| "logits/chosen": 0.7187590599060059, |
| "logits/rejected": 0.7936305403709412, |
| "logps/chosen": -198.9750213623047, |
| "logps/rejected": -198.08738708496094, |
| "loss": 0.6898, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.023504601791501045, |
| "rewards/margins": 0.007863587699830532, |
| "rewards/rejected": 0.01564101316034794, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.4, |
| "grad_norm": 2.6241567134857178, |
| "learning_rate": 1.996e-06, |
| "logits/chosen": 0.7280531525611877, |
| "logits/rejected": 0.8506690859794617, |
| "logps/chosen": -188.4355010986328, |
| "logps/rejected": -195.9119110107422, |
| "loss": 0.69, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.03479978069663048, |
| "rewards/margins": 0.007221006788313389, |
| "rewards/rejected": 0.027578774839639664, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.408, |
| "grad_norm": 2.9502575397491455, |
| "learning_rate": 2.036e-06, |
| "logits/chosen": 0.7209169268608093, |
| "logits/rejected": 0.6922793984413147, |
| "logps/chosen": -197.63975524902344, |
| "logps/rejected": -215.0830535888672, |
| "loss": 0.689, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.02393524721264839, |
| "rewards/margins": 0.009743581525981426, |
| "rewards/rejected": 0.01419166661798954, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.416, |
| "grad_norm": 2.9870102405548096, |
| "learning_rate": 2.076e-06, |
| "logits/chosen": 0.8047041296958923, |
| "logits/rejected": 0.764294445514679, |
| "logps/chosen": -202.94752502441406, |
| "logps/rejected": -195.111572265625, |
| "loss": 0.7018, |
| "rewards/accuracies": 0.42500001192092896, |
| "rewards/chosen": 0.029683226719498634, |
| "rewards/margins": -0.015081966295838356, |
| "rewards/rejected": 0.04476520046591759, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.424, |
| "grad_norm": 2.907334804534912, |
| "learning_rate": 2.116e-06, |
| "logits/chosen": 0.7759226560592651, |
| "logits/rejected": 0.7076265215873718, |
| "logps/chosen": -193.26431274414062, |
| "logps/rejected": -193.8086700439453, |
| "loss": 0.6929, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.04167484864592552, |
| "rewards/margins": 0.0013663482386618853, |
| "rewards/rejected": 0.040308497846126556, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.432, |
| "grad_norm": 2.9025113582611084, |
| "learning_rate": 2.156e-06, |
| "logits/chosen": 0.674287736415863, |
| "logits/rejected": 0.6436707377433777, |
| "logps/chosen": -195.1566619873047, |
| "logps/rejected": -226.0045166015625, |
| "loss": 0.6892, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.03686271235346794, |
| "rewards/margins": 0.009263001382350922, |
| "rewards/rejected": 0.02759971097111702, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.44, |
| "grad_norm": 3.0069284439086914, |
| "learning_rate": 2.1960000000000002e-06, |
| "logits/chosen": 0.5614029169082642, |
| "logits/rejected": 0.618638277053833, |
| "logps/chosen": -204.8896026611328, |
| "logps/rejected": -218.7178497314453, |
| "loss": 0.683, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.037945158779621124, |
| "rewards/margins": 0.021600285544991493, |
| "rewards/rejected": 0.01634487323462963, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.448, |
| "grad_norm": 3.801163673400879, |
| "learning_rate": 2.2360000000000003e-06, |
| "logits/chosen": 0.7861131429672241, |
| "logits/rejected": 0.6601011753082275, |
| "logps/chosen": -201.13636779785156, |
| "logps/rejected": -198.4203643798828, |
| "loss": 0.6883, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.056006718426942825, |
| "rewards/margins": 0.011804056353867054, |
| "rewards/rejected": 0.044202663004398346, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.456, |
| "grad_norm": 3.0970733165740967, |
| "learning_rate": 2.2760000000000003e-06, |
| "logits/chosen": 0.8429245352745056, |
| "logits/rejected": 0.8626702427864075, |
| "logps/chosen": -195.6461639404297, |
| "logps/rejected": -192.6861572265625, |
| "loss": 0.684, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.06717582792043686, |
| "rewards/margins": 0.020122915506362915, |
| "rewards/rejected": 0.047052908688783646, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.464, |
| "grad_norm": 3.105346918106079, |
| "learning_rate": 2.3160000000000004e-06, |
| "logits/chosen": 0.6571624875068665, |
| "logits/rejected": 0.7619710564613342, |
| "logps/chosen": -191.7879180908203, |
| "logps/rejected": -222.39219665527344, |
| "loss": 0.6883, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.056688953191041946, |
| "rewards/margins": 0.010812098160386086, |
| "rewards/rejected": 0.04587685689330101, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.472, |
| "grad_norm": 3.110258102416992, |
| "learning_rate": 2.3560000000000004e-06, |
| "logits/chosen": 0.6822569966316223, |
| "logits/rejected": 0.6781023144721985, |
| "logps/chosen": -215.3318328857422, |
| "logps/rejected": -217.4102020263672, |
| "loss": 0.689, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.06999214738607407, |
| "rewards/margins": 0.01019731443375349, |
| "rewards/rejected": 0.0597948394715786, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.48, |
| "grad_norm": 3.3028745651245117, |
| "learning_rate": 2.3960000000000004e-06, |
| "logits/chosen": 0.6857186555862427, |
| "logits/rejected": 0.8173799514770508, |
| "logps/chosen": -199.44915771484375, |
| "logps/rejected": -205.1499481201172, |
| "loss": 0.6833, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.07196470350027084, |
| "rewards/margins": 0.021447917446494102, |
| "rewards/rejected": 0.05051679164171219, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.488, |
| "grad_norm": 3.5192556381225586, |
| "learning_rate": 2.4360000000000005e-06, |
| "logits/chosen": 0.664252758026123, |
| "logits/rejected": 0.7970002889633179, |
| "logps/chosen": -183.03111267089844, |
| "logps/rejected": -200.3971405029297, |
| "loss": 0.6889, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.09819995611906052, |
| "rewards/margins": 0.01058993861079216, |
| "rewards/rejected": 0.08761002123355865, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.496, |
| "grad_norm": 3.326347827911377, |
| "learning_rate": 2.476e-06, |
| "logits/chosen": 0.7640261054039001, |
| "logits/rejected": 0.7449796795845032, |
| "logps/chosen": -186.8088836669922, |
| "logps/rejected": -198.9549560546875, |
| "loss": 0.685, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": 0.10616890341043472, |
| "rewards/margins": 0.01862834393978119, |
| "rewards/rejected": 0.08754055947065353, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.504, |
| "grad_norm": 3.3991470336914062, |
| "learning_rate": 2.516e-06, |
| "logits/chosen": 0.6187211275100708, |
| "logits/rejected": 0.5761955380439758, |
| "logps/chosen": -191.6369171142578, |
| "logps/rejected": -208.64756774902344, |
| "loss": 0.6814, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.10338740795850754, |
| "rewards/margins": 0.028011484071612358, |
| "rewards/rejected": 0.07537592202425003, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.512, |
| "grad_norm": 3.6961252689361572, |
| "learning_rate": 2.556e-06, |
| "logits/chosen": 0.6848690509796143, |
| "logits/rejected": 0.679492175579071, |
| "logps/chosen": -211.4331512451172, |
| "logps/rejected": -206.33084106445312, |
| "loss": 0.6836, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.09747160971164703, |
| "rewards/margins": 0.0221050176769495, |
| "rewards/rejected": 0.07536659389734268, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.52, |
| "grad_norm": 3.4172682762145996, |
| "learning_rate": 2.5960000000000002e-06, |
| "logits/chosen": 0.6366490721702576, |
| "logits/rejected": 0.5817402005195618, |
| "logps/chosen": -209.3129425048828, |
| "logps/rejected": -210.4381866455078, |
| "loss": 0.6758, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.12278693169355392, |
| "rewards/margins": 0.03931554779410362, |
| "rewards/rejected": 0.08347138017416, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.528, |
| "grad_norm": 3.1855289936065674, |
| "learning_rate": 2.6360000000000003e-06, |
| "logits/chosen": 0.6927405595779419, |
| "logits/rejected": 0.6666523218154907, |
| "logps/chosen": -204.4365234375, |
| "logps/rejected": -215.52566528320312, |
| "loss": 0.6857, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.11687055975198746, |
| "rewards/margins": 0.01899743638932705, |
| "rewards/rejected": 0.09787313640117645, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.536, |
| "grad_norm": 3.221082925796509, |
| "learning_rate": 2.6760000000000003e-06, |
| "logits/chosen": 0.675836980342865, |
| "logits/rejected": 0.6653029322624207, |
| "logps/chosen": -190.93809509277344, |
| "logps/rejected": -192.1734161376953, |
| "loss": 0.6701, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.16091938316822052, |
| "rewards/margins": 0.05060316249728203, |
| "rewards/rejected": 0.1103162169456482, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.544, |
| "grad_norm": 3.2758514881134033, |
| "learning_rate": 2.7160000000000003e-06, |
| "logits/chosen": 0.6717751622200012, |
| "logits/rejected": 0.5811682343482971, |
| "logps/chosen": -208.1886749267578, |
| "logps/rejected": -225.54039001464844, |
| "loss": 0.6732, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": 0.17058324813842773, |
| "rewards/margins": 0.046290840953588486, |
| "rewards/rejected": 0.12429242581129074, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.552, |
| "grad_norm": 3.140164375305176, |
| "learning_rate": 2.7560000000000004e-06, |
| "logits/chosen": 0.7419080138206482, |
| "logits/rejected": 0.6190251111984253, |
| "logps/chosen": -193.7239227294922, |
| "logps/rejected": -195.77371215820312, |
| "loss": 0.6682, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.2087523490190506, |
| "rewards/margins": 0.05501072481274605, |
| "rewards/rejected": 0.15374161303043365, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.56, |
| "grad_norm": 3.4456276893615723, |
| "learning_rate": 2.7960000000000004e-06, |
| "logits/chosen": 0.7370930910110474, |
| "logits/rejected": 0.8321747779846191, |
| "logps/chosen": -186.71826171875, |
| "logps/rejected": -214.2365264892578, |
| "loss": 0.6777, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.22456181049346924, |
| "rewards/margins": 0.03610905632376671, |
| "rewards/rejected": 0.18845276534557343, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.568, |
| "grad_norm": 3.7341623306274414, |
| "learning_rate": 2.8360000000000005e-06, |
| "logits/chosen": 0.7567041516304016, |
| "logits/rejected": 0.7720674872398376, |
| "logps/chosen": -190.39320373535156, |
| "logps/rejected": -222.18826293945312, |
| "loss": 0.6632, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": 0.22317533195018768, |
| "rewards/margins": 0.06986651569604874, |
| "rewards/rejected": 0.15330885350704193, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.576, |
| "grad_norm": 2.9456615447998047, |
| "learning_rate": 2.8760000000000005e-06, |
| "logits/chosen": 0.6696327328681946, |
| "logits/rejected": 0.697002649307251, |
| "logps/chosen": -203.71766662597656, |
| "logps/rejected": -222.6426239013672, |
| "loss": 0.6658, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.22747135162353516, |
| "rewards/margins": 0.06573095172643661, |
| "rewards/rejected": 0.16174040734767914, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.584, |
| "grad_norm": 3.985300302505493, |
| "learning_rate": 2.9160000000000005e-06, |
| "logits/chosen": 0.6535480618476868, |
| "logits/rejected": 0.7352523803710938, |
| "logps/chosen": -202.01422119140625, |
| "logps/rejected": -223.960693359375, |
| "loss": 0.6824, |
| "rewards/accuracies": 0.4749999940395355, |
| "rewards/chosen": 0.1612798571586609, |
| "rewards/margins": 0.03076176717877388, |
| "rewards/rejected": 0.1305180937051773, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.592, |
| "grad_norm": 3.8837013244628906, |
| "learning_rate": 2.956e-06, |
| "logits/chosen": 0.6671428084373474, |
| "logits/rejected": 0.6649322509765625, |
| "logps/chosen": -191.92437744140625, |
| "logps/rejected": -226.7194366455078, |
| "loss": 0.6725, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.16540491580963135, |
| "rewards/margins": 0.051003750413656235, |
| "rewards/rejected": 0.11440115422010422, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.6, |
| "grad_norm": 3.762125015258789, |
| "learning_rate": 2.996e-06, |
| "logits/chosen": 0.7455517649650574, |
| "logits/rejected": 0.6791686415672302, |
| "logps/chosen": -201.34228515625, |
| "logps/rejected": -205.9029998779297, |
| "loss": 0.6731, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.15725918114185333, |
| "rewards/margins": 0.05198857933282852, |
| "rewards/rejected": 0.10527060180902481, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.608, |
| "grad_norm": 4.017007827758789, |
| "learning_rate": 3.0360000000000002e-06, |
| "logits/chosen": 0.5845433473587036, |
| "logits/rejected": 0.6173285841941833, |
| "logps/chosen": -211.0860595703125, |
| "logps/rejected": -239.35105895996094, |
| "loss": 0.6691, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.1531037837266922, |
| "rewards/margins": 0.06398037821054459, |
| "rewards/rejected": 0.08912339061498642, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.616, |
| "grad_norm": 3.3737785816192627, |
| "learning_rate": 3.0760000000000003e-06, |
| "logits/chosen": 0.6799761056900024, |
| "logits/rejected": 0.7338641285896301, |
| "logps/chosen": -197.92454528808594, |
| "logps/rejected": -196.76071166992188, |
| "loss": 0.6861, |
| "rewards/accuracies": 0.5250000357627869, |
| "rewards/chosen": 0.14501558244228363, |
| "rewards/margins": 0.024691270664334297, |
| "rewards/rejected": 0.12032430619001389, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.624, |
| "grad_norm": 3.9343101978302, |
| "learning_rate": 3.1160000000000003e-06, |
| "logits/chosen": 0.6844798922538757, |
| "logits/rejected": 0.6486098766326904, |
| "logps/chosen": -209.14988708496094, |
| "logps/rejected": -208.64990234375, |
| "loss": 0.684, |
| "rewards/accuracies": 0.5250000357627869, |
| "rewards/chosen": 0.12247097492218018, |
| "rewards/margins": 0.03311007842421532, |
| "rewards/rejected": 0.08936089277267456, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.632, |
| "grad_norm": 3.3528807163238525, |
| "learning_rate": 3.1560000000000004e-06, |
| "logits/chosen": 0.5940313339233398, |
| "logits/rejected": 0.5495096445083618, |
| "logps/chosen": -200.02276611328125, |
| "logps/rejected": -203.39085388183594, |
| "loss": 0.6646, |
| "rewards/accuracies": 0.5875000357627869, |
| "rewards/chosen": 0.21792371571063995, |
| "rewards/margins": 0.07114797830581665, |
| "rewards/rejected": 0.1467757374048233, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.64, |
| "grad_norm": 3.5861222743988037, |
| "learning_rate": 3.1960000000000004e-06, |
| "logits/chosen": 0.7208414077758789, |
| "logits/rejected": 0.7849133610725403, |
| "logps/chosen": -168.2899932861328, |
| "logps/rejected": -183.3815155029297, |
| "loss": 0.6719, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": 0.22198240458965302, |
| "rewards/margins": 0.059726305305957794, |
| "rewards/rejected": 0.162256121635437, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.648, |
| "grad_norm": 3.695352077484131, |
| "learning_rate": 3.2360000000000004e-06, |
| "logits/chosen": 0.7138350009918213, |
| "logits/rejected": 0.7761127352714539, |
| "logps/chosen": -191.26907348632812, |
| "logps/rejected": -199.15538024902344, |
| "loss": 0.6862, |
| "rewards/accuracies": 0.512499988079071, |
| "rewards/chosen": 0.17254018783569336, |
| "rewards/margins": 0.027448756620287895, |
| "rewards/rejected": 0.14509142935276031, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.656, |
| "grad_norm": 4.936346054077148, |
| "learning_rate": 3.2760000000000005e-06, |
| "logits/chosen": 0.651190459728241, |
| "logits/rejected": 0.6843032240867615, |
| "logps/chosen": -185.53834533691406, |
| "logps/rejected": -210.05166625976562, |
| "loss": 0.6538, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": 0.17089328169822693, |
| "rewards/margins": 0.09923329949378967, |
| "rewards/rejected": 0.07165997475385666, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.664, |
| "grad_norm": 3.7326865196228027, |
| "learning_rate": 3.3160000000000005e-06, |
| "logits/chosen": 0.8238712549209595, |
| "logits/rejected": 0.849129855632782, |
| "logps/chosen": -199.64431762695312, |
| "logps/rejected": -206.5725555419922, |
| "loss": 0.6559, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": 0.14575400948524475, |
| "rewards/margins": 0.09553883224725723, |
| "rewards/rejected": 0.05021516606211662, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.672, |
| "grad_norm": 3.8525242805480957, |
| "learning_rate": 3.3560000000000006e-06, |
| "logits/chosen": 0.68717360496521, |
| "logits/rejected": 0.775698184967041, |
| "logps/chosen": -198.4635009765625, |
| "logps/rejected": -218.93565368652344, |
| "loss": 0.6672, |
| "rewards/accuracies": 0.5875000357627869, |
| "rewards/chosen": 0.03423814848065376, |
| "rewards/margins": 0.07274122536182404, |
| "rewards/rejected": -0.03850306198000908, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.68, |
| "grad_norm": 3.930669069290161, |
| "learning_rate": 3.3960000000000006e-06, |
| "logits/chosen": 0.7518499493598938, |
| "logits/rejected": 0.8216703534126282, |
| "logps/chosen": -205.54248046875, |
| "logps/rejected": -207.52676391601562, |
| "loss": 0.6571, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.12743444740772247, |
| "rewards/margins": 0.0990920141339302, |
| "rewards/rejected": 0.028342435136437416, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.688, |
| "grad_norm": 3.8377163410186768, |
| "learning_rate": 3.4360000000000006e-06, |
| "logits/chosen": 0.7670461535453796, |
| "logits/rejected": 0.7580811977386475, |
| "logps/chosen": -182.31930541992188, |
| "logps/rejected": -194.57054138183594, |
| "loss": 0.6654, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.18013069033622742, |
| "rewards/margins": 0.07547135651111603, |
| "rewards/rejected": 0.10465934127569199, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.696, |
| "grad_norm": 4.037013053894043, |
| "learning_rate": 3.4760000000000007e-06, |
| "logits/chosen": 0.6302778720855713, |
| "logits/rejected": 0.5533519983291626, |
| "logps/chosen": -201.7500457763672, |
| "logps/rejected": -217.78018188476562, |
| "loss": 0.6598, |
| "rewards/accuracies": 0.5875000357627869, |
| "rewards/chosen": 0.1704377979040146, |
| "rewards/margins": 0.10010436922311783, |
| "rewards/rejected": 0.07033341377973557, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.704, |
| "grad_norm": 3.9927608966827393, |
| "learning_rate": 3.5160000000000007e-06, |
| "logits/chosen": 0.6597375273704529, |
| "logits/rejected": 0.6159626245498657, |
| "logps/chosen": -193.73165893554688, |
| "logps/rejected": -198.38595581054688, |
| "loss": 0.6935, |
| "rewards/accuracies": 0.4625000059604645, |
| "rewards/chosen": 0.19550852477550507, |
| "rewards/margins": 0.04272466525435448, |
| "rewards/rejected": 0.15278387069702148, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.712, |
| "grad_norm": 3.5315475463867188, |
| "learning_rate": 3.5560000000000008e-06, |
| "logits/chosen": 0.685095489025116, |
| "logits/rejected": 0.6823846697807312, |
| "logps/chosen": -201.48406982421875, |
| "logps/rejected": -212.81094360351562, |
| "loss": 0.6487, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.20819902420043945, |
| "rewards/margins": 0.10897611826658249, |
| "rewards/rejected": 0.09922291338443756, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.72, |
| "grad_norm": 3.673504114151001, |
| "learning_rate": 3.596e-06, |
| "logits/chosen": 0.7472502589225769, |
| "logits/rejected": 0.623907208442688, |
| "logps/chosen": -183.38999938964844, |
| "logps/rejected": -181.52911376953125, |
| "loss": 0.6561, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.28343966603279114, |
| "rewards/margins": 0.09876996278762817, |
| "rewards/rejected": 0.18466970324516296, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.728, |
| "grad_norm": 4.183164596557617, |
| "learning_rate": 3.636e-06, |
| "logits/chosen": 0.6977134346961975, |
| "logits/rejected": 0.6741080284118652, |
| "logps/chosen": -192.44366455078125, |
| "logps/rejected": -186.1464080810547, |
| "loss": 0.6923, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.25711265206336975, |
| "rewards/margins": 0.020020050927996635, |
| "rewards/rejected": 0.23709259927272797, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.736, |
| "grad_norm": 3.74733304977417, |
| "learning_rate": 3.676e-06, |
| "logits/chosen": 0.7447720170021057, |
| "logits/rejected": 0.6725283265113831, |
| "logps/chosen": -186.6201934814453, |
| "logps/rejected": -204.659423828125, |
| "loss": 0.6199, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": 0.3455497920513153, |
| "rewards/margins": 0.18741247057914734, |
| "rewards/rejected": 0.15813732147216797, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.744, |
| "grad_norm": 4.2909255027771, |
| "learning_rate": 3.716e-06, |
| "logits/chosen": 0.656113862991333, |
| "logits/rejected": 0.6516739130020142, |
| "logps/chosen": -197.02088928222656, |
| "logps/rejected": -213.64414978027344, |
| "loss": 0.6621, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": 0.20855574309825897, |
| "rewards/margins": 0.08712134510278702, |
| "rewards/rejected": 0.12143440544605255, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.752, |
| "grad_norm": 4.346622943878174, |
| "learning_rate": 3.756e-06, |
| "logits/chosen": 0.7425007224082947, |
| "logits/rejected": 0.7672116160392761, |
| "logps/chosen": -164.4781494140625, |
| "logps/rejected": -189.1973876953125, |
| "loss": 0.6322, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.2733791768550873, |
| "rewards/margins": 0.15534010529518127, |
| "rewards/rejected": 0.1180390939116478, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.76, |
| "grad_norm": 3.8516108989715576, |
| "learning_rate": 3.796e-06, |
| "logits/chosen": 0.6322047710418701, |
| "logits/rejected": 0.6729938387870789, |
| "logps/chosen": -179.63258361816406, |
| "logps/rejected": -213.58462524414062, |
| "loss": 0.6348, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.10944360494613647, |
| "rewards/margins": 0.161842480301857, |
| "rewards/rejected": -0.05239887163043022, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.768, |
| "grad_norm": 3.8706507682800293, |
| "learning_rate": 3.836e-06, |
| "logits/chosen": 0.7383615374565125, |
| "logits/rejected": 0.645232617855072, |
| "logps/chosen": -209.4774932861328, |
| "logps/rejected": -218.1513214111328, |
| "loss": 0.6402, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": 0.016749557107686996, |
| "rewards/margins": 0.16539375483989716, |
| "rewards/rejected": -0.14864420890808105, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.776, |
| "grad_norm": 3.860457420349121, |
| "learning_rate": 3.876000000000001e-06, |
| "logits/chosen": 0.7996999025344849, |
| "logits/rejected": 0.7975447773933411, |
| "logps/chosen": -197.4671630859375, |
| "logps/rejected": -207.5522003173828, |
| "loss": 0.6625, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.1389116793870926, |
| "rewards/margins": 0.09984000772237778, |
| "rewards/rejected": 0.03907167166471481, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.784, |
| "grad_norm": 4.529206275939941, |
| "learning_rate": 3.916e-06, |
| "logits/chosen": 0.7569703459739685, |
| "logits/rejected": 0.7382944226264954, |
| "logps/chosen": -191.17686462402344, |
| "logps/rejected": -207.4125518798828, |
| "loss": 0.625, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.0675772950053215, |
| "rewards/margins": 0.2260824292898178, |
| "rewards/rejected": -0.15850511193275452, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.792, |
| "grad_norm": 3.819338083267212, |
| "learning_rate": 3.956000000000001e-06, |
| "logits/chosen": 0.7830851674079895, |
| "logits/rejected": 0.7947285771369934, |
| "logps/chosen": -205.89517211914062, |
| "logps/rejected": -224.50967407226562, |
| "loss": 0.6524, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.003182247979566455, |
| "rewards/margins": 0.1461525410413742, |
| "rewards/rejected": -0.14933478832244873, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 3.4207472801208496, |
| "learning_rate": 3.996e-06, |
| "logits/chosen": 0.6669479012489319, |
| "logits/rejected": 0.7112082839012146, |
| "logps/chosen": -205.70693969726562, |
| "logps/rejected": -223.3166046142578, |
| "loss": 0.6352, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.08382979780435562, |
| "rewards/margins": 0.19078943133354187, |
| "rewards/rejected": -0.10695965588092804, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.808, |
| "grad_norm": 4.2891526222229, |
| "learning_rate": 4.036000000000001e-06, |
| "logits/chosen": 0.6510803699493408, |
| "logits/rejected": 0.6577145457267761, |
| "logps/chosen": -191.43975830078125, |
| "logps/rejected": -217.14639282226562, |
| "loss": 0.6366, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": 0.09330642223358154, |
| "rewards/margins": 0.17022596299648285, |
| "rewards/rejected": -0.07691951841115952, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.816, |
| "grad_norm": 4.56123685836792, |
| "learning_rate": 4.0760000000000004e-06, |
| "logits/chosen": 0.6556877493858337, |
| "logits/rejected": 0.6388117671012878, |
| "logps/chosen": -197.0552520751953, |
| "logps/rejected": -203.9344482421875, |
| "loss": 0.6354, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.07625667005777359, |
| "rewards/margins": 0.21561256051063538, |
| "rewards/rejected": -0.13935589790344238, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.824, |
| "grad_norm": 3.7361807823181152, |
| "learning_rate": 4.116000000000001e-06, |
| "logits/chosen": 0.7679457068443298, |
| "logits/rejected": 0.7817143797874451, |
| "logps/chosen": -205.3223419189453, |
| "logps/rejected": -211.694580078125, |
| "loss": 0.654, |
| "rewards/accuracies": 0.550000011920929, |
| "rewards/chosen": 0.11138266324996948, |
| "rewards/margins": 0.13815438747406006, |
| "rewards/rejected": -0.026771722361445427, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.832, |
| "grad_norm": 5.420875072479248, |
| "learning_rate": 4.1560000000000005e-06, |
| "logits/chosen": 0.6636489629745483, |
| "logits/rejected": 0.692511260509491, |
| "logps/chosen": -207.5504150390625, |
| "logps/rejected": -225.18753051757812, |
| "loss": 0.6344, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.1605958491563797, |
| "rewards/margins": 0.1674598604440689, |
| "rewards/rejected": -0.00686401454731822, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.84, |
| "grad_norm": 3.272894859313965, |
| "learning_rate": 4.196e-06, |
| "logits/chosen": 0.7486108541488647, |
| "logits/rejected": 0.7712265253067017, |
| "logps/chosen": -194.90943908691406, |
| "logps/rejected": -205.329345703125, |
| "loss": 0.6322, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": 0.22325454652309418, |
| "rewards/margins": 0.22328029572963715, |
| "rewards/rejected": -2.5737286705407314e-05, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.848, |
| "grad_norm": 5.14847993850708, |
| "learning_rate": 4.236e-06, |
| "logits/chosen": 0.7043625116348267, |
| "logits/rejected": 0.7297138571739197, |
| "logps/chosen": -214.93301391601562, |
| "logps/rejected": -221.634765625, |
| "loss": 0.6797, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": 0.19927440583705902, |
| "rewards/margins": 0.10101515054702759, |
| "rewards/rejected": 0.09825924783945084, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.856, |
| "grad_norm": 4.329235076904297, |
| "learning_rate": 4.276e-06, |
| "logits/chosen": 0.7435252070426941, |
| "logits/rejected": 0.7416861653327942, |
| "logps/chosen": -206.2967987060547, |
| "logps/rejected": -221.4429168701172, |
| "loss": 0.6532, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.21223540604114532, |
| "rewards/margins": 0.14147967100143433, |
| "rewards/rejected": 0.070755735039711, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.864, |
| "grad_norm": 3.7461328506469727, |
| "learning_rate": 4.316e-06, |
| "logits/chosen": 0.8374633193016052, |
| "logits/rejected": 0.8583774566650391, |
| "logps/chosen": -203.61318969726562, |
| "logps/rejected": -218.10350036621094, |
| "loss": 0.6082, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": 0.2129761427640915, |
| "rewards/margins": 0.223725363612175, |
| "rewards/rejected": -0.010749227367341518, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.872, |
| "grad_norm": 3.664881467819214, |
| "learning_rate": 4.356e-06, |
| "logits/chosen": 0.7724778652191162, |
| "logits/rejected": 0.7607793807983398, |
| "logps/chosen": -194.4547576904297, |
| "logps/rejected": -221.48941040039062, |
| "loss": 0.6174, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.14989307522773743, |
| "rewards/margins": 0.24988269805908203, |
| "rewards/rejected": -0.0999896302819252, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 3.962773323059082, |
| "learning_rate": 4.396e-06, |
| "logits/chosen": 0.8602146506309509, |
| "logits/rejected": 0.7610459327697754, |
| "logps/chosen": -186.56153869628906, |
| "logps/rejected": -191.79127502441406, |
| "loss": 0.609, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": 0.13783621788024902, |
| "rewards/margins": 0.24294762313365936, |
| "rewards/rejected": -0.10511143505573273, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.888, |
| "grad_norm": 3.6281254291534424, |
| "learning_rate": 4.436e-06, |
| "logits/chosen": 0.7969589233398438, |
| "logits/rejected": 0.7449702620506287, |
| "logps/chosen": -199.1891326904297, |
| "logps/rejected": -212.9011688232422, |
| "loss": 0.6054, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -0.007032311055809259, |
| "rewards/margins": 0.28387296199798584, |
| "rewards/rejected": -0.29090526700019836, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.896, |
| "grad_norm": 4.473705768585205, |
| "learning_rate": 4.476e-06, |
| "logits/chosen": 0.8331708312034607, |
| "logits/rejected": 0.8655223846435547, |
| "logps/chosen": -190.3954620361328, |
| "logps/rejected": -204.8839874267578, |
| "loss": 0.6281, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.1991288810968399, |
| "rewards/margins": 0.20112191140651703, |
| "rewards/rejected": -0.0019930123817175627, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.904, |
| "grad_norm": 6.322517395019531, |
| "learning_rate": 4.5160000000000005e-06, |
| "logits/chosen": 0.8980779051780701, |
| "logits/rejected": 0.8646361231803894, |
| "logps/chosen": -207.82090759277344, |
| "logps/rejected": -202.1289520263672, |
| "loss": 0.6373, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.1084756851196289, |
| "rewards/margins": 0.20956145226955414, |
| "rewards/rejected": -0.10108575969934464, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.912, |
| "grad_norm": 6.67050838470459, |
| "learning_rate": 4.556e-06, |
| "logits/chosen": 0.6762229800224304, |
| "logits/rejected": 0.843460738658905, |
| "logps/chosen": -205.12686157226562, |
| "logps/rejected": -238.93972778320312, |
| "loss": 0.6326, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.20181012153625488, |
| "rewards/margins": 0.23140645027160645, |
| "rewards/rejected": -0.43321657180786133, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.92, |
| "grad_norm": 3.84826922416687, |
| "learning_rate": 4.5960000000000006e-06, |
| "logits/chosen": 0.8595611453056335, |
| "logits/rejected": 0.7575063705444336, |
| "logps/chosen": -191.56690979003906, |
| "logps/rejected": -211.3734588623047, |
| "loss": 0.7016, |
| "rewards/accuracies": 0.5375000238418579, |
| "rewards/chosen": -0.017901450395584106, |
| "rewards/margins": 0.08475067466497421, |
| "rewards/rejected": -0.10265214741230011, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.928, |
| "grad_norm": 5.697420120239258, |
| "learning_rate": 4.636e-06, |
| "logits/chosen": 0.7715900540351868, |
| "logits/rejected": 0.6638416051864624, |
| "logps/chosen": -197.2618865966797, |
| "logps/rejected": -178.20579528808594, |
| "loss": 0.6362, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": 0.10496443510055542, |
| "rewards/margins": 0.22472666203975677, |
| "rewards/rejected": -0.11976220458745956, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.936, |
| "grad_norm": 4.553537845611572, |
| "learning_rate": 4.676000000000001e-06, |
| "logits/chosen": 0.7215653657913208, |
| "logits/rejected": 0.7404984831809998, |
| "logps/chosen": -198.07424926757812, |
| "logps/rejected": -231.4817657470703, |
| "loss": 0.5759, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": 0.14309090375900269, |
| "rewards/margins": 0.4066457450389862, |
| "rewards/rejected": -0.2635548412799835, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.944, |
| "grad_norm": 6.022956848144531, |
| "learning_rate": 4.716e-06, |
| "logits/chosen": 0.8578891754150391, |
| "logits/rejected": 0.8036954998970032, |
| "logps/chosen": -202.0140380859375, |
| "logps/rejected": -212.0448760986328, |
| "loss": 0.668, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.03294707089662552, |
| "rewards/margins": 0.15671278536319733, |
| "rewards/rejected": -0.18965983390808105, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.952, |
| "grad_norm": 4.317718029022217, |
| "learning_rate": 4.756000000000001e-06, |
| "logits/chosen": 0.8726083636283875, |
| "logits/rejected": 0.9713365435600281, |
| "logps/chosen": -185.42198181152344, |
| "logps/rejected": -206.3362274169922, |
| "loss": 0.6041, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.05594133213162422, |
| "rewards/margins": 0.28803297877311707, |
| "rewards/rejected": -0.3439743220806122, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.96, |
| "grad_norm": 4.923951625823975, |
| "learning_rate": 4.796e-06, |
| "logits/chosen": 0.8178095817565918, |
| "logits/rejected": 0.8852421641349792, |
| "logps/chosen": -177.03765869140625, |
| "logps/rejected": -189.92286682128906, |
| "loss": 0.6351, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.016861964017152786, |
| "rewards/margins": 0.20337799191474915, |
| "rewards/rejected": -0.18651603162288666, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.968, |
| "grad_norm": 4.542464256286621, |
| "learning_rate": 4.836e-06, |
| "logits/chosen": 0.8311338424682617, |
| "logits/rejected": 0.7399374842643738, |
| "logps/chosen": -202.60267639160156, |
| "logps/rejected": -203.94236755371094, |
| "loss": 0.6779, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.19154860079288483, |
| "rewards/margins": 0.115830197930336, |
| "rewards/rejected": -0.3073787987232208, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.976, |
| "grad_norm": 6.436461925506592, |
| "learning_rate": 4.876e-06, |
| "logits/chosen": 0.794135570526123, |
| "logits/rejected": 0.7919009327888489, |
| "logps/chosen": -207.1306610107422, |
| "logps/rejected": -205.37637329101562, |
| "loss": 0.6314, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -0.07707437127828598, |
| "rewards/margins": 0.25921908020973206, |
| "rewards/rejected": -0.33629345893859863, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.984, |
| "grad_norm": 5.068552494049072, |
| "learning_rate": 4.916e-06, |
| "logits/chosen": 0.9020944833755493, |
| "logits/rejected": 0.8565078973770142, |
| "logps/chosen": -191.53636169433594, |
| "logps/rejected": -192.14581298828125, |
| "loss": 0.6288, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.21075907349586487, |
| "rewards/margins": 0.26774677634239197, |
| "rewards/rejected": -0.056987714022397995, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.992, |
| "grad_norm": 3.66788649559021, |
| "learning_rate": 4.9560000000000005e-06, |
| "logits/chosen": 0.759829044342041, |
| "logits/rejected": 0.7424188852310181, |
| "logps/chosen": -196.9650115966797, |
| "logps/rejected": -197.923583984375, |
| "loss": 0.5986, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.22332988679409027, |
| "rewards/margins": 0.3243139684200287, |
| "rewards/rejected": -0.1009841114282608, |
| "step": 1240 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 5.5345940589904785, |
| "learning_rate": 4.996e-06, |
| "logits/chosen": 0.8046256899833679, |
| "logits/rejected": 0.7486367225646973, |
| "logps/chosen": -196.2307586669922, |
| "logps/rejected": -213.2323455810547, |
| "loss": 0.6443, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -0.04985840991139412, |
| "rewards/margins": 0.21199627220630646, |
| "rewards/rejected": -0.2618546485900879, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.008, |
| "grad_norm": 3.8329803943634033, |
| "learning_rate": 4.9999921043206356e-06, |
| "logits/chosen": 0.8027931451797485, |
| "logits/rejected": 0.7976822853088379, |
| "logps/chosen": -198.8918914794922, |
| "logps/rejected": -188.731201171875, |
| "loss": 0.5909, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": 0.2840830385684967, |
| "rewards/margins": 0.3135008215904236, |
| "rewards/rejected": -0.029417768120765686, |
| "step": 1260 |
| }, |
| { |
| "epoch": 1.016, |
| "grad_norm": 5.155350685119629, |
| "learning_rate": 4.99996481067822e-06, |
| "logits/chosen": 0.8220119476318359, |
| "logits/rejected": 0.8561745882034302, |
| "logps/chosen": -206.8731231689453, |
| "logps/rejected": -216.76406860351562, |
| "loss": 0.5927, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.008052355609834194, |
| "rewards/margins": 0.3377354145050049, |
| "rewards/rejected": -0.3296830654144287, |
| "step": 1270 |
| }, |
| { |
| "epoch": 1.024, |
| "grad_norm": 4.907769203186035, |
| "learning_rate": 4.99991802180802e-06, |
| "logits/chosen": 0.9498602151870728, |
| "logits/rejected": 0.8992852568626404, |
| "logps/chosen": -205.61570739746094, |
| "logps/rejected": -227.8877716064453, |
| "loss": 0.6093, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": 0.015175617299973965, |
| "rewards/margins": 0.2621200382709503, |
| "rewards/rejected": -0.24694442749023438, |
| "step": 1280 |
| }, |
| { |
| "epoch": 1.032, |
| "grad_norm": 5.946913242340088, |
| "learning_rate": 4.999851738074904e-06, |
| "logits/chosen": 0.9917473196983337, |
| "logits/rejected": 0.9724864363670349, |
| "logps/chosen": -179.44322204589844, |
| "logps/rejected": -202.18516540527344, |
| "loss": 0.6529, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.23127515614032745, |
| "rewards/margins": 0.1929376870393753, |
| "rewards/rejected": 0.038337476551532745, |
| "step": 1290 |
| }, |
| { |
| "epoch": 1.04, |
| "grad_norm": 3.936194896697998, |
| "learning_rate": 4.999765959995769e-06, |
| "logits/chosen": 0.9820284247398376, |
| "logits/rejected": 0.8659685254096985, |
| "logps/chosen": -204.54019165039062, |
| "logps/rejected": -193.43544006347656, |
| "loss": 0.5873, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": 0.2293062061071396, |
| "rewards/margins": 0.34357786178588867, |
| "rewards/rejected": -0.11427167803049088, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.048, |
| "grad_norm": 4.1008076667785645, |
| "learning_rate": 4.999660688239527e-06, |
| "logits/chosen": 0.835074245929718, |
| "logits/rejected": 0.7787433862686157, |
| "logps/chosen": -188.2495880126953, |
| "logps/rejected": -197.9439697265625, |
| "loss": 0.6111, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": 0.2561519742012024, |
| "rewards/margins": 0.31733056902885437, |
| "rewards/rejected": -0.06117859110236168, |
| "step": 1310 |
| }, |
| { |
| "epoch": 1.056, |
| "grad_norm": 5.592878341674805, |
| "learning_rate": 4.9995359236271094e-06, |
| "logits/chosen": 0.883532702922821, |
| "logits/rejected": 0.8534946441650391, |
| "logps/chosen": -199.9098358154297, |
| "logps/rejected": -197.54641723632812, |
| "loss": 0.6772, |
| "rewards/accuracies": 0.574999988079071, |
| "rewards/chosen": 0.2197556495666504, |
| "rewards/margins": 0.17633479833602905, |
| "rewards/rejected": 0.04342082887887955, |
| "step": 1320 |
| }, |
| { |
| "epoch": 1.064, |
| "grad_norm": 4.941606521606445, |
| "learning_rate": 4.999391667131456e-06, |
| "logits/chosen": 0.860753059387207, |
| "logits/rejected": 0.8954046368598938, |
| "logps/chosen": -178.4503936767578, |
| "logps/rejected": -196.09449768066406, |
| "loss": 0.6786, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": 0.48869457840919495, |
| "rewards/margins": 0.12707066535949707, |
| "rewards/rejected": 0.36162394285202026, |
| "step": 1330 |
| }, |
| { |
| "epoch": 1.072, |
| "grad_norm": 5.233992576599121, |
| "learning_rate": 4.999227919877506e-06, |
| "logits/chosen": 0.8799529075622559, |
| "logits/rejected": 0.8783776164054871, |
| "logps/chosen": -195.3287811279297, |
| "logps/rejected": -195.21665954589844, |
| "loss": 0.6231, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": 0.4896652400493622, |
| "rewards/margins": 0.27517566084861755, |
| "rewards/rejected": 0.2144896239042282, |
| "step": 1340 |
| }, |
| { |
| "epoch": 1.08, |
| "grad_norm": 8.488966941833496, |
| "learning_rate": 4.999044683142196e-06, |
| "logits/chosen": 1.042872667312622, |
| "logits/rejected": 0.8863340616226196, |
| "logps/chosen": -197.02415466308594, |
| "logps/rejected": -202.64767456054688, |
| "loss": 0.6329, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.4562924802303314, |
| "rewards/margins": 0.19963227212429047, |
| "rewards/rejected": 0.25666019320487976, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.088, |
| "grad_norm": 4.995717525482178, |
| "learning_rate": 4.99884195835444e-06, |
| "logits/chosen": 0.8799748420715332, |
| "logits/rejected": 0.8477336764335632, |
| "logps/chosen": -191.94471740722656, |
| "logps/rejected": -206.7391815185547, |
| "loss": 0.6352, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": 0.18572449684143066, |
| "rewards/margins": 0.2937488555908203, |
| "rewards/rejected": -0.10802433639764786, |
| "step": 1360 |
| }, |
| { |
| "epoch": 1.096, |
| "grad_norm": 4.870668888092041, |
| "learning_rate": 4.998619747095129e-06, |
| "logits/chosen": 0.7523741722106934, |
| "logits/rejected": 0.7566614747047424, |
| "logps/chosen": -200.09573364257812, |
| "logps/rejected": -209.72647094726562, |
| "loss": 0.6096, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.1902216672897339, |
| "rewards/margins": 0.32739758491516113, |
| "rewards/rejected": -0.5176193118095398, |
| "step": 1370 |
| }, |
| { |
| "epoch": 1.104, |
| "grad_norm": 4.8878889083862305, |
| "learning_rate": 4.998378051097111e-06, |
| "logits/chosen": 0.6805158257484436, |
| "logits/rejected": 0.6997960805892944, |
| "logps/chosen": -211.3839874267578, |
| "logps/rejected": -243.62759399414062, |
| "loss": 0.6292, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -0.17687633633613586, |
| "rewards/margins": 0.31230995059013367, |
| "rewards/rejected": -0.48918625712394714, |
| "step": 1380 |
| }, |
| { |
| "epoch": 1.112, |
| "grad_norm": 4.575040817260742, |
| "learning_rate": 4.998116872245178e-06, |
| "logits/chosen": 0.9538719058036804, |
| "logits/rejected": 0.8567468523979187, |
| "logps/chosen": -197.8600311279297, |
| "logps/rejected": -194.09893798828125, |
| "loss": 0.6508, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.0491158552467823, |
| "rewards/margins": 0.23099613189697266, |
| "rewards/rejected": -0.28011199831962585, |
| "step": 1390 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 4.09750509262085, |
| "learning_rate": 4.997836212576057e-06, |
| "logits/chosen": 0.7558371424674988, |
| "logits/rejected": 0.7620294690132141, |
| "logps/chosen": -210.34483337402344, |
| "logps/rejected": -220.9149627685547, |
| "loss": 0.583, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.07163530588150024, |
| "rewards/margins": 0.40291815996170044, |
| "rewards/rejected": -0.4745534360408783, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.1280000000000001, |
| "grad_norm": 4.7588887214660645, |
| "learning_rate": 4.997536074278388e-06, |
| "logits/chosen": 0.7905246019363403, |
| "logits/rejected": 0.8206660151481628, |
| "logps/chosen": -211.0833740234375, |
| "logps/rejected": -218.993896484375, |
| "loss": 0.6144, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.14193372428417206, |
| "rewards/margins": 0.2602233588695526, |
| "rewards/rejected": -0.4021570682525635, |
| "step": 1410 |
| }, |
| { |
| "epoch": 1.1360000000000001, |
| "grad_norm": 6.200011730194092, |
| "learning_rate": 4.9972164596927085e-06, |
| "logits/chosen": 0.9080677032470703, |
| "logits/rejected": 0.8735291361808777, |
| "logps/chosen": -194.74559020996094, |
| "logps/rejected": -200.72265625, |
| "loss": 0.6563, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.202849343419075, |
| "rewards/margins": 0.19263452291488647, |
| "rewards/rejected": -0.3954838514328003, |
| "step": 1420 |
| }, |
| { |
| "epoch": 1.144, |
| "grad_norm": 7.389719486236572, |
| "learning_rate": 4.996877371311439e-06, |
| "logits/chosen": 0.7800935506820679, |
| "logits/rejected": 0.8093517422676086, |
| "logps/chosen": -211.73667907714844, |
| "logps/rejected": -222.60000610351562, |
| "loss": 0.5757, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.2545677125453949, |
| "rewards/margins": 0.49100762605667114, |
| "rewards/rejected": -0.7455753087997437, |
| "step": 1430 |
| }, |
| { |
| "epoch": 1.152, |
| "grad_norm": 4.560316562652588, |
| "learning_rate": 4.996518811778858e-06, |
| "logits/chosen": 0.9419007301330566, |
| "logits/rejected": 0.8434064984321594, |
| "logps/chosen": -193.9005889892578, |
| "logps/rejected": -196.77064514160156, |
| "loss": 0.5937, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": 0.10783553123474121, |
| "rewards/margins": 0.35865020751953125, |
| "rewards/rejected": -0.25081467628479004, |
| "step": 1440 |
| }, |
| { |
| "epoch": 1.16, |
| "grad_norm": 4.446018218994141, |
| "learning_rate": 4.996140783891085e-06, |
| "logits/chosen": 0.8335108757019043, |
| "logits/rejected": 0.7712218165397644, |
| "logps/chosen": -200.7833251953125, |
| "logps/rejected": -217.6037139892578, |
| "loss": 0.5632, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.268947035074234, |
| "rewards/margins": 0.4497573971748352, |
| "rewards/rejected": -0.7187044024467468, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.168, |
| "grad_norm": 4.578011989593506, |
| "learning_rate": 4.9957432905960575e-06, |
| "logits/chosen": 0.8321472406387329, |
| "logits/rejected": 0.8203474283218384, |
| "logps/chosen": -179.63121032714844, |
| "logps/rejected": -218.509033203125, |
| "loss": 0.5283, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -0.2659996449947357, |
| "rewards/margins": 0.5776376724243164, |
| "rewards/rejected": -0.8436372876167297, |
| "step": 1460 |
| }, |
| { |
| "epoch": 1.176, |
| "grad_norm": 8.904568672180176, |
| "learning_rate": 4.995326334993508e-06, |
| "logits/chosen": 0.7332701086997986, |
| "logits/rejected": 0.7367414832115173, |
| "logps/chosen": -215.7530975341797, |
| "logps/rejected": -235.0543670654297, |
| "loss": 0.6247, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.6151739954948425, |
| "rewards/margins": 0.4263072907924652, |
| "rewards/rejected": -1.041481375694275, |
| "step": 1470 |
| }, |
| { |
| "epoch": 1.184, |
| "grad_norm": 4.632490158081055, |
| "learning_rate": 4.994889920334939e-06, |
| "logits/chosen": 0.8015623092651367, |
| "logits/rejected": 0.6672269701957703, |
| "logps/chosen": -206.82408142089844, |
| "logps/rejected": -212.19155883789062, |
| "loss": 0.5282, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.25984275341033936, |
| "rewards/margins": 0.636847734451294, |
| "rewards/rejected": -0.8966904878616333, |
| "step": 1480 |
| }, |
| { |
| "epoch": 1.192, |
| "grad_norm": 5.93597412109375, |
| "learning_rate": 4.994434050023601e-06, |
| "logits/chosen": 0.8352071642875671, |
| "logits/rejected": 0.796085000038147, |
| "logps/chosen": -200.97604370117188, |
| "logps/rejected": -198.58670043945312, |
| "loss": 0.6777, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.5017086267471313, |
| "rewards/margins": 0.23309734463691711, |
| "rewards/rejected": -0.7348060011863708, |
| "step": 1490 |
| }, |
| { |
| "epoch": 1.2, |
| "grad_norm": 6.238118648529053, |
| "learning_rate": 4.993958727614462e-06, |
| "logits/chosen": 0.7685348391532898, |
| "logits/rejected": 0.6558945775032043, |
| "logps/chosen": -204.9226531982422, |
| "logps/rejected": -204.36001586914062, |
| "loss": 0.591, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.33830520510673523, |
| "rewards/margins": 0.46260887384414673, |
| "rewards/rejected": -0.8009141087532043, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.208, |
| "grad_norm": 7.67111873626709, |
| "learning_rate": 4.993463956814181e-06, |
| "logits/chosen": 0.8750897645950317, |
| "logits/rejected": 0.9166715741157532, |
| "logps/chosen": -189.0762176513672, |
| "logps/rejected": -201.63124084472656, |
| "loss": 0.6032, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.2800356447696686, |
| "rewards/margins": 0.3452514708042145, |
| "rewards/rejected": -0.6252870559692383, |
| "step": 1510 |
| }, |
| { |
| "epoch": 1.216, |
| "grad_norm": 4.525489330291748, |
| "learning_rate": 4.99294974148108e-06, |
| "logits/chosen": 0.8436128497123718, |
| "logits/rejected": 0.8338537216186523, |
| "logps/chosen": -201.09213256835938, |
| "logps/rejected": -220.0704803466797, |
| "loss": 0.5747, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.21393656730651855, |
| "rewards/margins": 0.4898119568824768, |
| "rewards/rejected": -0.7037484645843506, |
| "step": 1520 |
| }, |
| { |
| "epoch": 1.224, |
| "grad_norm": 7.136298656463623, |
| "learning_rate": 4.992416085625115e-06, |
| "logits/chosen": 0.9371658563613892, |
| "logits/rejected": 0.9421829581260681, |
| "logps/chosen": -187.67381286621094, |
| "logps/rejected": -210.39036560058594, |
| "loss": 0.6005, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.47332143783569336, |
| "rewards/margins": 0.3808668553829193, |
| "rewards/rejected": -0.8541883826255798, |
| "step": 1530 |
| }, |
| { |
| "epoch": 1.232, |
| "grad_norm": 5.2598700523376465, |
| "learning_rate": 4.991862993407841e-06, |
| "logits/chosen": 0.9011389017105103, |
| "logits/rejected": 0.8639345169067383, |
| "logps/chosen": -204.3238525390625, |
| "logps/rejected": -222.8467254638672, |
| "loss": 0.546, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.5745847821235657, |
| "rewards/margins": 0.592379629611969, |
| "rewards/rejected": -1.1669644117355347, |
| "step": 1540 |
| }, |
| { |
| "epoch": 1.24, |
| "grad_norm": 6.515147686004639, |
| "learning_rate": 4.99129046914238e-06, |
| "logits/chosen": 0.855043888092041, |
| "logits/rejected": 0.895084798336029, |
| "logps/chosen": -178.9577178955078, |
| "logps/rejected": -202.40538024902344, |
| "loss": 0.525, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -0.3232458531856537, |
| "rewards/margins": 0.7021111845970154, |
| "rewards/rejected": -1.0253571271896362, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.248, |
| "grad_norm": 4.3140177726745605, |
| "learning_rate": 4.990698517293394e-06, |
| "logits/chosen": 0.7897105813026428, |
| "logits/rejected": 0.7928025126457214, |
| "logps/chosen": -209.9716339111328, |
| "logps/rejected": -214.2898712158203, |
| "loss": 0.5895, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -0.16820655763149261, |
| "rewards/margins": 0.539027988910675, |
| "rewards/rejected": -0.7072345614433289, |
| "step": 1560 |
| }, |
| { |
| "epoch": 1.256, |
| "grad_norm": 7.281121730804443, |
| "learning_rate": 4.990087142477042e-06, |
| "logits/chosen": 0.797493577003479, |
| "logits/rejected": 0.841319739818573, |
| "logps/chosen": -211.1555633544922, |
| "logps/rejected": -206.3491668701172, |
| "loss": 0.5963, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.420600026845932, |
| "rewards/margins": 0.395569771528244, |
| "rewards/rejected": -0.8161698579788208, |
| "step": 1570 |
| }, |
| { |
| "epoch": 1.264, |
| "grad_norm": 6.5268683433532715, |
| "learning_rate": 4.989456349460946e-06, |
| "logits/chosen": 0.8825798034667969, |
| "logits/rejected": 0.7889868021011353, |
| "logps/chosen": -214.40603637695312, |
| "logps/rejected": -223.7728729248047, |
| "loss": 0.556, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.4342477023601532, |
| "rewards/margins": 0.5031339526176453, |
| "rewards/rejected": -0.9373816847801208, |
| "step": 1580 |
| }, |
| { |
| "epoch": 1.272, |
| "grad_norm": 5.648573398590088, |
| "learning_rate": 4.988806143164159e-06, |
| "logits/chosen": 0.8476539850234985, |
| "logits/rejected": 0.7688016891479492, |
| "logps/chosen": -213.4077911376953, |
| "logps/rejected": -233.77456665039062, |
| "loss": 0.5371, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7733410000801086, |
| "rewards/margins": 0.6128751039505005, |
| "rewards/rejected": -1.3862160444259644, |
| "step": 1590 |
| }, |
| { |
| "epoch": 1.28, |
| "grad_norm": 5.341320037841797, |
| "learning_rate": 4.988136528657118e-06, |
| "logits/chosen": 0.8851507306098938, |
| "logits/rejected": 0.9027878046035767, |
| "logps/chosen": -213.1574249267578, |
| "logps/rejected": -234.6796112060547, |
| "loss": 0.5464, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.7365319132804871, |
| "rewards/margins": 0.615622878074646, |
| "rewards/rejected": -1.3521548509597778, |
| "step": 1600 |
| }, |
| { |
| "epoch": 1.288, |
| "grad_norm": 8.493136405944824, |
| "learning_rate": 4.987447511161613e-06, |
| "logits/chosen": 0.8196334838867188, |
| "logits/rejected": 0.833379864692688, |
| "logps/chosen": -220.86338806152344, |
| "logps/rejected": -230.03433227539062, |
| "loss": 0.656, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.6297645568847656, |
| "rewards/margins": 0.4346865713596344, |
| "rewards/rejected": -1.0644512176513672, |
| "step": 1610 |
| }, |
| { |
| "epoch": 1.296, |
| "grad_norm": 7.547203540802002, |
| "learning_rate": 4.98673909605074e-06, |
| "logits/chosen": 0.8613888025283813, |
| "logits/rejected": 0.8237080574035645, |
| "logps/chosen": -210.69900512695312, |
| "logps/rejected": -225.7193145751953, |
| "loss": 0.5646, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.6549606323242188, |
| "rewards/margins": 0.5880576968193054, |
| "rewards/rejected": -1.243018388748169, |
| "step": 1620 |
| }, |
| { |
| "epoch": 1.304, |
| "grad_norm": 4.983480453491211, |
| "learning_rate": 4.986011288848863e-06, |
| "logits/chosen": 0.9473403096199036, |
| "logits/rejected": 0.8395648002624512, |
| "logps/chosen": -205.3673858642578, |
| "logps/rejected": -201.21458435058594, |
| "loss": 0.5895, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -0.6703227758407593, |
| "rewards/margins": 0.437595933675766, |
| "rewards/rejected": -1.1079187393188477, |
| "step": 1630 |
| }, |
| { |
| "epoch": 1.312, |
| "grad_norm": 4.282800674438477, |
| "learning_rate": 4.985264095231568e-06, |
| "logits/chosen": 1.042983889579773, |
| "logits/rejected": 0.9440056085586548, |
| "logps/chosen": -218.52037048339844, |
| "logps/rejected": -235.33702087402344, |
| "loss": 0.5297, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7287598848342896, |
| "rewards/margins": 0.6097584366798401, |
| "rewards/rejected": -1.3385183811187744, |
| "step": 1640 |
| }, |
| { |
| "epoch": 1.32, |
| "grad_norm": 5.741344928741455, |
| "learning_rate": 4.984497521025622e-06, |
| "logits/chosen": 0.9067613482475281, |
| "logits/rejected": 0.907869279384613, |
| "logps/chosen": -214.6510772705078, |
| "logps/rejected": -233.52578735351562, |
| "loss": 0.6452, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.5831167101860046, |
| "rewards/margins": 0.4216102659702301, |
| "rewards/rejected": -1.0047270059585571, |
| "step": 1650 |
| }, |
| { |
| "epoch": 1.328, |
| "grad_norm": 5.609917640686035, |
| "learning_rate": 4.9837115722089235e-06, |
| "logits/chosen": 1.0167735815048218, |
| "logits/rejected": 1.0690789222717285, |
| "logps/chosen": -198.3705291748047, |
| "logps/rejected": -214.2977752685547, |
| "loss": 0.591, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.6117779612541199, |
| "rewards/margins": 0.41639161109924316, |
| "rewards/rejected": -1.0281696319580078, |
| "step": 1660 |
| }, |
| { |
| "epoch": 1.336, |
| "grad_norm": 5.828847408294678, |
| "learning_rate": 4.982906254910459e-06, |
| "logits/chosen": 0.9671093821525574, |
| "logits/rejected": 0.934682309627533, |
| "logps/chosen": -224.7041473388672, |
| "logps/rejected": -225.40408325195312, |
| "loss": 0.6122, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -0.4490407407283783, |
| "rewards/margins": 0.4267473816871643, |
| "rewards/rejected": -0.875788152217865, |
| "step": 1670 |
| }, |
| { |
| "epoch": 1.3439999999999999, |
| "grad_norm": 4.787741184234619, |
| "learning_rate": 4.982081575410256e-06, |
| "logits/chosen": 0.9712103009223938, |
| "logits/rejected": 1.061010718345642, |
| "logps/chosen": -199.6064910888672, |
| "logps/rejected": -225.61083984375, |
| "loss": 0.5951, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.35054826736450195, |
| "rewards/margins": 0.4877324104309082, |
| "rewards/rejected": -0.8382806777954102, |
| "step": 1680 |
| }, |
| { |
| "epoch": 1.3519999999999999, |
| "grad_norm": 5.184296607971191, |
| "learning_rate": 4.981237540139331e-06, |
| "logits/chosen": 0.9483404159545898, |
| "logits/rejected": 0.9563024640083313, |
| "logps/chosen": -181.48716735839844, |
| "logps/rejected": -208.47232055664062, |
| "loss": 0.5151, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.23454923927783966, |
| "rewards/margins": 0.6790667176246643, |
| "rewards/rejected": -0.913615882396698, |
| "step": 1690 |
| }, |
| { |
| "epoch": 1.3599999999999999, |
| "grad_norm": 5.253026485443115, |
| "learning_rate": 4.980374155679639e-06, |
| "logits/chosen": 1.0046643018722534, |
| "logits/rejected": 0.9964167475700378, |
| "logps/chosen": -196.05494689941406, |
| "logps/rejected": -208.57164001464844, |
| "loss": 0.5772, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.5216605067253113, |
| "rewards/margins": 0.5054008960723877, |
| "rewards/rejected": -1.0270613431930542, |
| "step": 1700 |
| }, |
| { |
| "epoch": 1.3679999999999999, |
| "grad_norm": 4.707123279571533, |
| "learning_rate": 4.9794914287640264e-06, |
| "logits/chosen": 0.9587984085083008, |
| "logits/rejected": 0.9082703590393066, |
| "logps/chosen": -191.87796020507812, |
| "logps/rejected": -229.51358032226562, |
| "loss": 0.5563, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -0.3796873688697815, |
| "rewards/margins": 0.5814693570137024, |
| "rewards/rejected": -0.9611567854881287, |
| "step": 1710 |
| }, |
| { |
| "epoch": 1.376, |
| "grad_norm": 6.676301002502441, |
| "learning_rate": 4.978589366276174e-06, |
| "logits/chosen": 0.8662906885147095, |
| "logits/rejected": 0.7710773348808289, |
| "logps/chosen": -192.7714080810547, |
| "logps/rejected": -228.8347625732422, |
| "loss": 0.55, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.7520737051963806, |
| "rewards/margins": 0.6919212341308594, |
| "rewards/rejected": -1.4439948797225952, |
| "step": 1720 |
| }, |
| { |
| "epoch": 1.384, |
| "grad_norm": 6.470781326293945, |
| "learning_rate": 4.977667975250548e-06, |
| "logits/chosen": 0.8674607276916504, |
| "logits/rejected": 0.8369401097297668, |
| "logps/chosen": -204.9677276611328, |
| "logps/rejected": -220.662353515625, |
| "loss": 0.6208, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.6717852354049683, |
| "rewards/margins": 0.5695573687553406, |
| "rewards/rejected": -1.241342544555664, |
| "step": 1730 |
| }, |
| { |
| "epoch": 1.392, |
| "grad_norm": 7.195312023162842, |
| "learning_rate": 4.97672726287234e-06, |
| "logits/chosen": 0.9234275817871094, |
| "logits/rejected": 0.9190540313720703, |
| "logps/chosen": -210.9809112548828, |
| "logps/rejected": -206.3329620361328, |
| "loss": 0.6337, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.394771546125412, |
| "rewards/margins": 0.37511372566223145, |
| "rewards/rejected": -0.7698853015899658, |
| "step": 1740 |
| }, |
| { |
| "epoch": 1.4, |
| "grad_norm": 6.862684726715088, |
| "learning_rate": 4.975767236477413e-06, |
| "logits/chosen": 1.0124229192733765, |
| "logits/rejected": 1.0320953130722046, |
| "logps/chosen": -182.6747589111328, |
| "logps/rejected": -206.9286346435547, |
| "loss": 0.5748, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.14587950706481934, |
| "rewards/margins": 0.5520176887512207, |
| "rewards/rejected": -0.6978972554206848, |
| "step": 1750 |
| }, |
| { |
| "epoch": 1.408, |
| "grad_norm": 5.418376922607422, |
| "learning_rate": 4.974787903552247e-06, |
| "logits/chosen": 0.8783740997314453, |
| "logits/rejected": 0.7816500067710876, |
| "logps/chosen": -213.95645141601562, |
| "logps/rejected": -225.9214324951172, |
| "loss": 0.6632, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -0.7024328708648682, |
| "rewards/margins": 0.48432379961013794, |
| "rewards/rejected": -1.1867567300796509, |
| "step": 1760 |
| }, |
| { |
| "epoch": 1.416, |
| "grad_norm": 5.543606758117676, |
| "learning_rate": 4.973789271733877e-06, |
| "logits/chosen": 0.8639112710952759, |
| "logits/rejected": 0.8347362875938416, |
| "logps/chosen": -208.14321899414062, |
| "logps/rejected": -232.296875, |
| "loss": 0.5753, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.4658367335796356, |
| "rewards/margins": 0.6865634322166443, |
| "rewards/rejected": -1.152400255203247, |
| "step": 1770 |
| }, |
| { |
| "epoch": 1.424, |
| "grad_norm": 5.288059711456299, |
| "learning_rate": 4.972771348809834e-06, |
| "logits/chosen": 0.7869605422019958, |
| "logits/rejected": 0.7716497778892517, |
| "logps/chosen": -203.56529235839844, |
| "logps/rejected": -216.66299438476562, |
| "loss": 0.6529, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.5823510885238647, |
| "rewards/margins": 0.38376662135124207, |
| "rewards/rejected": -0.9661176800727844, |
| "step": 1780 |
| }, |
| { |
| "epoch": 1.432, |
| "grad_norm": 7.8693037033081055, |
| "learning_rate": 4.9717341427180855e-06, |
| "logits/chosen": 0.8742721676826477, |
| "logits/rejected": 0.8222671747207642, |
| "logps/chosen": -223.1770477294922, |
| "logps/rejected": -230.52334594726562, |
| "loss": 0.5733, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.4773767590522766, |
| "rewards/margins": 0.528388261795044, |
| "rewards/rejected": -1.0057649612426758, |
| "step": 1790 |
| }, |
| { |
| "epoch": 1.44, |
| "grad_norm": 5.248528480529785, |
| "learning_rate": 4.970677661546972e-06, |
| "logits/chosen": 0.7629317045211792, |
| "logits/rejected": 0.8210358023643494, |
| "logps/chosen": -188.845703125, |
| "logps/rejected": -227.7703857421875, |
| "loss": 0.5079, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -0.4410068690776825, |
| "rewards/margins": 0.8114088177680969, |
| "rewards/rejected": -1.252415657043457, |
| "step": 1800 |
| }, |
| { |
| "epoch": 1.448, |
| "grad_norm": 6.2042365074157715, |
| "learning_rate": 4.969601913535148e-06, |
| "logits/chosen": 0.8677163124084473, |
| "logits/rejected": 0.8122463226318359, |
| "logps/chosen": -198.2725067138672, |
| "logps/rejected": -191.28347778320312, |
| "loss": 0.616, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.5268740057945251, |
| "rewards/margins": 0.3900458514690399, |
| "rewards/rejected": -0.9169198274612427, |
| "step": 1810 |
| }, |
| { |
| "epoch": 1.456, |
| "grad_norm": 3.95033597946167, |
| "learning_rate": 4.9685069070715105e-06, |
| "logits/chosen": 0.8852386474609375, |
| "logits/rejected": 0.8015106320381165, |
| "logps/chosen": -213.7109832763672, |
| "logps/rejected": -232.56704711914062, |
| "loss": 0.538, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.778232753276825, |
| "rewards/margins": 0.8279777765274048, |
| "rewards/rejected": -1.6062105894088745, |
| "step": 1820 |
| }, |
| { |
| "epoch": 1.464, |
| "grad_norm": 7.496504783630371, |
| "learning_rate": 4.967392650695141e-06, |
| "logits/chosen": 0.8542248010635376, |
| "logits/rejected": 0.8511247634887695, |
| "logps/chosen": -208.1324005126953, |
| "logps/rejected": -228.3732452392578, |
| "loss": 0.6527, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.2442353963851929, |
| "rewards/margins": 0.4487527906894684, |
| "rewards/rejected": -1.6929882764816284, |
| "step": 1830 |
| }, |
| { |
| "epoch": 1.472, |
| "grad_norm": 11.455583572387695, |
| "learning_rate": 4.966259153095235e-06, |
| "logits/chosen": 0.9046268463134766, |
| "logits/rejected": 0.8542624711990356, |
| "logps/chosen": -218.9237823486328, |
| "logps/rejected": -226.2521209716797, |
| "loss": 0.5853, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -1.0356435775756836, |
| "rewards/margins": 0.6369892954826355, |
| "rewards/rejected": -1.6726330518722534, |
| "step": 1840 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 4.398818016052246, |
| "learning_rate": 4.965106423111033e-06, |
| "logits/chosen": 0.8832312822341919, |
| "logits/rejected": 0.9116541743278503, |
| "logps/chosen": -205.5150909423828, |
| "logps/rejected": -219.7370147705078, |
| "loss": 0.64, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -1.2404916286468506, |
| "rewards/margins": 0.47511014342308044, |
| "rewards/rejected": -1.7156018018722534, |
| "step": 1850 |
| }, |
| { |
| "epoch": 1.488, |
| "grad_norm": 7.927629470825195, |
| "learning_rate": 4.963934469731756e-06, |
| "logits/chosen": 0.7935855984687805, |
| "logits/rejected": 0.8751907348632812, |
| "logps/chosen": -198.9912567138672, |
| "logps/rejected": -239.27891540527344, |
| "loss": 0.5558, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.2322111129760742, |
| "rewards/margins": 0.5577873587608337, |
| "rewards/rejected": -1.7899982929229736, |
| "step": 1860 |
| }, |
| { |
| "epoch": 1.496, |
| "grad_norm": 8.568868637084961, |
| "learning_rate": 4.962743302096532e-06, |
| "logits/chosen": 0.8752376437187195, |
| "logits/rejected": 0.9415783286094666, |
| "logps/chosen": -202.5786895751953, |
| "logps/rejected": -227.0806121826172, |
| "loss": 0.6257, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.2775661945343018, |
| "rewards/margins": 0.5068507790565491, |
| "rewards/rejected": -1.7844170331954956, |
| "step": 1870 |
| }, |
| { |
| "epoch": 1.504, |
| "grad_norm": 7.778118133544922, |
| "learning_rate": 4.961532929494325e-06, |
| "logits/chosen": 0.8835949301719666, |
| "logits/rejected": 0.9183811545372009, |
| "logps/chosen": -220.37466430664062, |
| "logps/rejected": -255.732421875, |
| "loss": 0.5707, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.3042207956314087, |
| "rewards/margins": 0.6345095634460449, |
| "rewards/rejected": -1.938730239868164, |
| "step": 1880 |
| }, |
| { |
| "epoch": 1.512, |
| "grad_norm": 8.768423080444336, |
| "learning_rate": 4.960303361363863e-06, |
| "logits/chosen": 0.9741565585136414, |
| "logits/rejected": 0.9229215979576111, |
| "logps/chosen": -193.06396484375, |
| "logps/rejected": -212.60093688964844, |
| "loss": 0.5493, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.825711190700531, |
| "rewards/margins": 0.6793455481529236, |
| "rewards/rejected": -1.5050567388534546, |
| "step": 1890 |
| }, |
| { |
| "epoch": 1.52, |
| "grad_norm": 8.654824256896973, |
| "learning_rate": 4.959054607293567e-06, |
| "logits/chosen": 0.8612321019172668, |
| "logits/rejected": 0.8589774966239929, |
| "logps/chosen": -211.7546844482422, |
| "logps/rejected": -245.64761352539062, |
| "loss": 0.5589, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.3008898496627808, |
| "rewards/margins": 0.6522501111030579, |
| "rewards/rejected": -1.9531399011611938, |
| "step": 1900 |
| }, |
| { |
| "epoch": 1.528, |
| "grad_norm": 8.161283493041992, |
| "learning_rate": 4.9577866770214715e-06, |
| "logits/chosen": 0.9014043807983398, |
| "logits/rejected": 0.972660481929779, |
| "logps/chosen": -225.20498657226562, |
| "logps/rejected": -247.1440887451172, |
| "loss": 0.5944, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.402032732963562, |
| "rewards/margins": 0.5216811299324036, |
| "rewards/rejected": -1.9237139225006104, |
| "step": 1910 |
| }, |
| { |
| "epoch": 1.536, |
| "grad_norm": 5.097872257232666, |
| "learning_rate": 4.95649958043515e-06, |
| "logits/chosen": 1.0036065578460693, |
| "logits/rejected": 0.9283719062805176, |
| "logps/chosen": -189.35348510742188, |
| "logps/rejected": -208.98095703125, |
| "loss": 0.6005, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.9046257138252258, |
| "rewards/margins": 0.6082975268363953, |
| "rewards/rejected": -1.5129234790802002, |
| "step": 1920 |
| }, |
| { |
| "epoch": 1.544, |
| "grad_norm": 6.37450647354126, |
| "learning_rate": 4.955193327571643e-06, |
| "logits/chosen": 0.9055408835411072, |
| "logits/rejected": 0.8177289366722107, |
| "logps/chosen": -201.5961151123047, |
| "logps/rejected": -227.8748016357422, |
| "loss": 0.5858, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.2569955587387085, |
| "rewards/margins": 0.5454604029655457, |
| "rewards/rejected": -1.8024559020996094, |
| "step": 1930 |
| }, |
| { |
| "epoch": 1.552, |
| "grad_norm": 7.622455596923828, |
| "learning_rate": 4.95386792861737e-06, |
| "logits/chosen": 0.917728841304779, |
| "logits/rejected": 0.9097478985786438, |
| "logps/chosen": -226.21763610839844, |
| "logps/rejected": -250.62930297851562, |
| "loss": 0.6142, |
| "rewards/accuracies": 0.612500011920929, |
| "rewards/chosen": -1.4412829875946045, |
| "rewards/margins": 0.5837629437446594, |
| "rewards/rejected": -2.025045871734619, |
| "step": 1940 |
| }, |
| { |
| "epoch": 1.56, |
| "grad_norm": 7.772380828857422, |
| "learning_rate": 4.952523393908059e-06, |
| "logits/chosen": 0.9898624420166016, |
| "logits/rejected": 0.9073271155357361, |
| "logps/chosen": -230.84971618652344, |
| "logps/rejected": -236.9692840576172, |
| "loss": 0.6086, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.1479434967041016, |
| "rewards/margins": 0.5934609770774841, |
| "rewards/rejected": -1.7414045333862305, |
| "step": 1950 |
| }, |
| { |
| "epoch": 1.568, |
| "grad_norm": 4.825398921966553, |
| "learning_rate": 4.951159733928663e-06, |
| "logits/chosen": 0.9667159914970398, |
| "logits/rejected": 0.9257230162620544, |
| "logps/chosen": -201.62188720703125, |
| "logps/rejected": -218.0967254638672, |
| "loss": 0.565, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.7356204986572266, |
| "rewards/margins": 0.5399088263511658, |
| "rewards/rejected": -1.2755292654037476, |
| "step": 1960 |
| }, |
| { |
| "epoch": 1.576, |
| "grad_norm": 5.7298760414123535, |
| "learning_rate": 4.949776959313275e-06, |
| "logits/chosen": 1.0072981119155884, |
| "logits/rejected": 1.023651123046875, |
| "logps/chosen": -182.70799255371094, |
| "logps/rejected": -193.568359375, |
| "loss": 0.5054, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.4652971923351288, |
| "rewards/margins": 0.755893349647522, |
| "rewards/rejected": -1.2211906909942627, |
| "step": 1970 |
| }, |
| { |
| "epoch": 1.584, |
| "grad_norm": 5.249616622924805, |
| "learning_rate": 4.94837508084505e-06, |
| "logits/chosen": 1.0406349897384644, |
| "logits/rejected": 1.0460046529769897, |
| "logps/chosen": -207.79454040527344, |
| "logps/rejected": -222.9483642578125, |
| "loss": 0.5672, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.7943063974380493, |
| "rewards/margins": 0.5170424580574036, |
| "rewards/rejected": -1.311348795890808, |
| "step": 1980 |
| }, |
| { |
| "epoch": 1.592, |
| "grad_norm": 5.766063213348389, |
| "learning_rate": 4.9469541094561185e-06, |
| "logits/chosen": 1.1073545217514038, |
| "logits/rejected": 1.0138510465621948, |
| "logps/chosen": -207.35740661621094, |
| "logps/rejected": -210.7662353515625, |
| "loss": 0.5451, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.6396881937980652, |
| "rewards/margins": 0.5360140204429626, |
| "rewards/rejected": -1.1757020950317383, |
| "step": 1990 |
| }, |
| { |
| "epoch": 1.6, |
| "grad_norm": 4.6534624099731445, |
| "learning_rate": 4.945514056227499e-06, |
| "logits/chosen": 1.0667814016342163, |
| "logits/rejected": 1.0090528726577759, |
| "logps/chosen": -190.9570770263672, |
| "logps/rejected": -224.06448364257812, |
| "loss": 0.5949, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.903643786907196, |
| "rewards/margins": 0.4600391387939453, |
| "rewards/rejected": -1.3636829853057861, |
| "step": 2000 |
| }, |
| { |
| "epoch": 1.608, |
| "grad_norm": 6.801830291748047, |
| "learning_rate": 4.944054932389018e-06, |
| "logits/chosen": 0.9834865927696228, |
| "logits/rejected": 0.9897794723510742, |
| "logps/chosen": -197.6669158935547, |
| "logps/rejected": -222.3367156982422, |
| "loss": 0.7015, |
| "rewards/accuracies": 0.5625, |
| "rewards/chosen": -0.5120326280593872, |
| "rewards/margins": 0.37569552659988403, |
| "rewards/rejected": -0.8877281546592712, |
| "step": 2010 |
| }, |
| { |
| "epoch": 1.616, |
| "grad_norm": 4.7985663414001465, |
| "learning_rate": 4.942576749319215e-06, |
| "logits/chosen": 0.9592499136924744, |
| "logits/rejected": 0.9662960171699524, |
| "logps/chosen": -208.7536163330078, |
| "logps/rejected": -221.6714324951172, |
| "loss": 0.6111, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -0.6202236413955688, |
| "rewards/margins": 0.34536442160606384, |
| "rewards/rejected": -0.9655880331993103, |
| "step": 2020 |
| }, |
| { |
| "epoch": 1.624, |
| "grad_norm": 6.010429382324219, |
| "learning_rate": 4.9410795185452584e-06, |
| "logits/chosen": 0.9691095352172852, |
| "logits/rejected": 0.970175564289093, |
| "logps/chosen": -202.74681091308594, |
| "logps/rejected": -219.4949951171875, |
| "loss": 0.6405, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.6648755669593811, |
| "rewards/margins": 0.40488871932029724, |
| "rewards/rejected": -1.069764256477356, |
| "step": 2030 |
| }, |
| { |
| "epoch": 1.6320000000000001, |
| "grad_norm": 3.876993179321289, |
| "learning_rate": 4.9395632517428546e-06, |
| "logits/chosen": 0.7599179148674011, |
| "logits/rejected": 0.8189982771873474, |
| "logps/chosen": -203.71568298339844, |
| "logps/rejected": -221.14488220214844, |
| "loss": 0.5479, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.6682913303375244, |
| "rewards/margins": 0.7366959452629089, |
| "rewards/rejected": -1.4049873352050781, |
| "step": 2040 |
| }, |
| { |
| "epoch": 1.6400000000000001, |
| "grad_norm": 4.428971767425537, |
| "learning_rate": 4.938027960736158e-06, |
| "logits/chosen": 0.8731341361999512, |
| "logits/rejected": 0.9049884080886841, |
| "logps/chosen": -195.8953399658203, |
| "logps/rejected": -219.8108673095703, |
| "loss": 0.572, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.802505612373352, |
| "rewards/margins": 0.5797830820083618, |
| "rewards/rejected": -1.3822888135910034, |
| "step": 2050 |
| }, |
| { |
| "epoch": 1.6480000000000001, |
| "grad_norm": 4.308506011962891, |
| "learning_rate": 4.936473657497674e-06, |
| "logits/chosen": 0.8433103561401367, |
| "logits/rejected": 0.8728559613227844, |
| "logps/chosen": -189.4270477294922, |
| "logps/rejected": -208.40074157714844, |
| "loss": 0.5589, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.5602220892906189, |
| "rewards/margins": 0.6132001280784607, |
| "rewards/rejected": -1.17342209815979, |
| "step": 2060 |
| }, |
| { |
| "epoch": 1.6560000000000001, |
| "grad_norm": 5.429026126861572, |
| "learning_rate": 4.934900354148173e-06, |
| "logits/chosen": 0.8006834387779236, |
| "logits/rejected": 0.8655485510826111, |
| "logps/chosen": -196.53811645507812, |
| "logps/rejected": -226.0628662109375, |
| "loss": 0.5333, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.5719932913780212, |
| "rewards/margins": 0.6289103627204895, |
| "rewards/rejected": -1.2009037733078003, |
| "step": 2070 |
| }, |
| { |
| "epoch": 1.6640000000000001, |
| "grad_norm": 7.226114749908447, |
| "learning_rate": 4.933308062956591e-06, |
| "logits/chosen": 0.9665275812149048, |
| "logits/rejected": 0.9210414886474609, |
| "logps/chosen": -216.1051483154297, |
| "logps/rejected": -238.590576171875, |
| "loss": 0.6039, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -1.081291913986206, |
| "rewards/margins": 0.5662848353385925, |
| "rewards/rejected": -1.6475766897201538, |
| "step": 2080 |
| }, |
| { |
| "epoch": 1.6720000000000002, |
| "grad_norm": 5.951044082641602, |
| "learning_rate": 4.931696796339933e-06, |
| "logits/chosen": 0.8365780711174011, |
| "logits/rejected": 0.9172168970108032, |
| "logps/chosen": -222.9845733642578, |
| "logps/rejected": -241.9564666748047, |
| "loss": 0.5248, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.6349353790283203, |
| "rewards/margins": 0.626502513885498, |
| "rewards/rejected": -1.2614378929138184, |
| "step": 2090 |
| }, |
| { |
| "epoch": 1.6800000000000002, |
| "grad_norm": 5.7516279220581055, |
| "learning_rate": 4.930066566863182e-06, |
| "logits/chosen": 0.8466065526008606, |
| "logits/rejected": 0.8476532101631165, |
| "logps/chosen": -221.70811462402344, |
| "logps/rejected": -249.4915313720703, |
| "loss": 0.5935, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.7990935444831848, |
| "rewards/margins": 0.5039268732070923, |
| "rewards/rejected": -1.3030204772949219, |
| "step": 2100 |
| }, |
| { |
| "epoch": 1.688, |
| "grad_norm": 4.736870288848877, |
| "learning_rate": 4.9284173872391925e-06, |
| "logits/chosen": 0.9319866299629211, |
| "logits/rejected": 0.7861051559448242, |
| "logps/chosen": -211.2753143310547, |
| "logps/rejected": -213.59410095214844, |
| "loss": 0.513, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.5237230658531189, |
| "rewards/margins": 0.7074744701385498, |
| "rewards/rejected": -1.231197476387024, |
| "step": 2110 |
| }, |
| { |
| "epoch": 1.696, |
| "grad_norm": 6.545098304748535, |
| "learning_rate": 4.9267492703286005e-06, |
| "logits/chosen": 0.8851673007011414, |
| "logits/rejected": 0.9201962351799011, |
| "logps/chosen": -205.5138702392578, |
| "logps/rejected": -210.7584991455078, |
| "loss": 0.6149, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.6310408115386963, |
| "rewards/margins": 0.48075610399246216, |
| "rewards/rejected": -1.1117968559265137, |
| "step": 2120 |
| }, |
| { |
| "epoch": 1.704, |
| "grad_norm": 5.556862831115723, |
| "learning_rate": 4.9250622291397144e-06, |
| "logits/chosen": 0.8860400319099426, |
| "logits/rejected": 0.9521721005439758, |
| "logps/chosen": -203.97494506835938, |
| "logps/rejected": -223.2489776611328, |
| "loss": 0.624, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.8290230631828308, |
| "rewards/margins": 0.4605173170566559, |
| "rewards/rejected": -1.2895405292510986, |
| "step": 2130 |
| }, |
| { |
| "epoch": 1.712, |
| "grad_norm": 4.479294776916504, |
| "learning_rate": 4.923356276828422e-06, |
| "logits/chosen": 0.9692792892456055, |
| "logits/rejected": 0.9220939874649048, |
| "logps/chosen": -206.8193817138672, |
| "logps/rejected": -212.7848663330078, |
| "loss": 0.5211, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.5379689335823059, |
| "rewards/margins": 0.6115099787712097, |
| "rewards/rejected": -1.149478793144226, |
| "step": 2140 |
| }, |
| { |
| "epoch": 1.72, |
| "grad_norm": 4.822434902191162, |
| "learning_rate": 4.921631426698082e-06, |
| "logits/chosen": 0.9812706112861633, |
| "logits/rejected": 0.8581036925315857, |
| "logps/chosen": -204.3318328857422, |
| "logps/rejected": -208.5980224609375, |
| "loss": 0.6165, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.41802969574928284, |
| "rewards/margins": 0.469400554895401, |
| "rewards/rejected": -0.8874303102493286, |
| "step": 2150 |
| }, |
| { |
| "epoch": 1.728, |
| "grad_norm": 7.955755710601807, |
| "learning_rate": 4.919887692199423e-06, |
| "logits/chosen": 1.019281268119812, |
| "logits/rejected": 0.985063374042511, |
| "logps/chosen": -196.29539489746094, |
| "logps/rejected": -185.47842407226562, |
| "loss": 0.5773, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -0.2744506895542145, |
| "rewards/margins": 0.5127472877502441, |
| "rewards/rejected": -0.787198007106781, |
| "step": 2160 |
| }, |
| { |
| "epoch": 1.736, |
| "grad_norm": 3.87949275970459, |
| "learning_rate": 4.918125086930435e-06, |
| "logits/chosen": 0.8921056985855103, |
| "logits/rejected": 0.8641147613525391, |
| "logps/chosen": -208.4770965576172, |
| "logps/rejected": -232.86192321777344, |
| "loss": 0.5582, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.398175448179245, |
| "rewards/margins": 0.586793839931488, |
| "rewards/rejected": -0.9849693179130554, |
| "step": 2170 |
| }, |
| { |
| "epoch": 1.744, |
| "grad_norm": 5.009764671325684, |
| "learning_rate": 4.91634362463627e-06, |
| "logits/chosen": 0.8331074118614197, |
| "logits/rejected": 0.8069505095481873, |
| "logps/chosen": -194.30528259277344, |
| "logps/rejected": -228.6878204345703, |
| "loss": 0.5577, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.46142521500587463, |
| "rewards/margins": 0.6860167384147644, |
| "rewards/rejected": -1.1474418640136719, |
| "step": 2180 |
| }, |
| { |
| "epoch": 1.752, |
| "grad_norm": 6.22864294052124, |
| "learning_rate": 4.914543319209126e-06, |
| "logits/chosen": 0.9490287899971008, |
| "logits/rejected": 0.7996692061424255, |
| "logps/chosen": -213.89102172851562, |
| "logps/rejected": -235.63926696777344, |
| "loss": 0.6154, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.6507563591003418, |
| "rewards/margins": 0.5299718976020813, |
| "rewards/rejected": -1.1807281970977783, |
| "step": 2190 |
| }, |
| { |
| "epoch": 1.76, |
| "grad_norm": 6.673972129821777, |
| "learning_rate": 4.912724184688149e-06, |
| "logits/chosen": 0.7972329258918762, |
| "logits/rejected": 0.7832920551300049, |
| "logps/chosen": -216.7205810546875, |
| "logps/rejected": -232.1858367919922, |
| "loss": 0.6573, |
| "rewards/accuracies": 0.625, |
| "rewards/chosen": -0.7054974436759949, |
| "rewards/margins": 0.4817284643650055, |
| "rewards/rejected": -1.1872259378433228, |
| "step": 2200 |
| }, |
| { |
| "epoch": 1.768, |
| "grad_norm": 4.410974979400635, |
| "learning_rate": 4.910886235259315e-06, |
| "logits/chosen": 0.8468648791313171, |
| "logits/rejected": 0.8238309025764465, |
| "logps/chosen": -206.13621520996094, |
| "logps/rejected": -206.95458984375, |
| "loss": 0.5607, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -0.4652062952518463, |
| "rewards/margins": 0.5647412538528442, |
| "rewards/rejected": -1.0299476385116577, |
| "step": 2210 |
| }, |
| { |
| "epoch": 1.776, |
| "grad_norm": 5.151928424835205, |
| "learning_rate": 4.909029485255321e-06, |
| "logits/chosen": 0.883913516998291, |
| "logits/rejected": 0.8439237475395203, |
| "logps/chosen": -216.8835906982422, |
| "logps/rejected": -225.7892303466797, |
| "loss": 0.6172, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -0.736081063747406, |
| "rewards/margins": 0.49011898040771484, |
| "rewards/rejected": -1.2262001037597656, |
| "step": 2220 |
| }, |
| { |
| "epoch": 1.784, |
| "grad_norm": 4.777335166931152, |
| "learning_rate": 4.907153949155479e-06, |
| "logits/chosen": 0.8213723301887512, |
| "logits/rejected": 0.77827388048172, |
| "logps/chosen": -216.30235290527344, |
| "logps/rejected": -234.95713806152344, |
| "loss": 0.5687, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.8275079131126404, |
| "rewards/margins": 0.5740241408348083, |
| "rewards/rejected": -1.4015320539474487, |
| "step": 2230 |
| }, |
| { |
| "epoch": 1.792, |
| "grad_norm": 5.356624126434326, |
| "learning_rate": 4.905259641585594e-06, |
| "logits/chosen": 1.0409396886825562, |
| "logits/rejected": 0.9675580263137817, |
| "logps/chosen": -189.8865966796875, |
| "logps/rejected": -210.8790283203125, |
| "loss": 0.5454, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.5686191916465759, |
| "rewards/margins": 0.596284806728363, |
| "rewards/rejected": -1.164903998374939, |
| "step": 2240 |
| }, |
| { |
| "epoch": 1.8, |
| "grad_norm": 7.358102798461914, |
| "learning_rate": 4.903346577317859e-06, |
| "logits/chosen": 0.9212884306907654, |
| "logits/rejected": 0.9235758185386658, |
| "logps/chosen": -205.32864379882812, |
| "logps/rejected": -209.7302703857422, |
| "loss": 0.6964, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -0.7906255125999451, |
| "rewards/margins": 0.18452060222625732, |
| "rewards/rejected": -0.9751461148262024, |
| "step": 2250 |
| }, |
| { |
| "epoch": 1.808, |
| "grad_norm": 6.358880043029785, |
| "learning_rate": 4.901414771270732e-06, |
| "logits/chosen": 1.0110472440719604, |
| "logits/rejected": 0.9830945134162903, |
| "logps/chosen": -196.3888397216797, |
| "logps/rejected": -196.31834411621094, |
| "loss": 0.5517, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.44832611083984375, |
| "rewards/margins": 0.5208678841590881, |
| "rewards/rejected": -0.9691939353942871, |
| "step": 2260 |
| }, |
| { |
| "epoch": 1.8159999999999998, |
| "grad_norm": 4.602325439453125, |
| "learning_rate": 4.899464238508826e-06, |
| "logits/chosen": 0.9746546149253845, |
| "logits/rejected": 0.9642091989517212, |
| "logps/chosen": -196.0602569580078, |
| "logps/rejected": -214.0269012451172, |
| "loss": 0.5631, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.30714574456214905, |
| "rewards/margins": 0.5362817049026489, |
| "rewards/rejected": -0.8434274792671204, |
| "step": 2270 |
| }, |
| { |
| "epoch": 1.8239999999999998, |
| "grad_norm": 4.84414005279541, |
| "learning_rate": 4.8974949942427854e-06, |
| "logits/chosen": 0.9626126289367676, |
| "logits/rejected": 0.9172550439834595, |
| "logps/chosen": -193.07933044433594, |
| "logps/rejected": -198.24754333496094, |
| "loss": 0.625, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.4726344645023346, |
| "rewards/margins": 0.4540925920009613, |
| "rewards/rejected": -0.9267271161079407, |
| "step": 2280 |
| }, |
| { |
| "epoch": 1.8319999999999999, |
| "grad_norm": 8.682156562805176, |
| "learning_rate": 4.895507053829174e-06, |
| "logits/chosen": 0.8741292953491211, |
| "logits/rejected": 0.9165781140327454, |
| "logps/chosen": -206.250732421875, |
| "logps/rejected": -249.11558532714844, |
| "loss": 0.5665, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -0.6650144457817078, |
| "rewards/margins": 0.7068294286727905, |
| "rewards/rejected": -1.3718438148498535, |
| "step": 2290 |
| }, |
| { |
| "epoch": 1.8399999999999999, |
| "grad_norm": 5.111639499664307, |
| "learning_rate": 4.893500432770349e-06, |
| "logits/chosen": 0.9301668405532837, |
| "logits/rejected": 0.9247567057609558, |
| "logps/chosen": -206.8374786376953, |
| "logps/rejected": -210.31265258789062, |
| "loss": 0.5961, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.7106897234916687, |
| "rewards/margins": 0.40209847688674927, |
| "rewards/rejected": -1.112788200378418, |
| "step": 2300 |
| }, |
| { |
| "epoch": 1.8479999999999999, |
| "grad_norm": 5.505365371704102, |
| "learning_rate": 4.891475146714348e-06, |
| "logits/chosen": 0.9037653207778931, |
| "logits/rejected": 0.8538966178894043, |
| "logps/chosen": -199.22640991210938, |
| "logps/rejected": -212.10537719726562, |
| "loss": 0.5586, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.9016987085342407, |
| "rewards/margins": 0.6126433610916138, |
| "rewards/rejected": -1.514341950416565, |
| "step": 2310 |
| }, |
| { |
| "epoch": 1.8559999999999999, |
| "grad_norm": 4.674638748168945, |
| "learning_rate": 4.889431211454753e-06, |
| "logits/chosen": 0.8724443316459656, |
| "logits/rejected": 0.9939247965812683, |
| "logps/chosen": -213.60525512695312, |
| "logps/rejected": -227.02359008789062, |
| "loss": 0.5838, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -1.1054712533950806, |
| "rewards/margins": 0.44871193170547485, |
| "rewards/rejected": -1.5541832447052002, |
| "step": 2320 |
| }, |
| { |
| "epoch": 1.8639999999999999, |
| "grad_norm": 7.4808220863342285, |
| "learning_rate": 4.887368642930588e-06, |
| "logits/chosen": 0.9019695520401001, |
| "logits/rejected": 0.8304522633552551, |
| "logps/chosen": -211.55125427246094, |
| "logps/rejected": -225.2754364013672, |
| "loss": 0.6556, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -1.171556830406189, |
| "rewards/margins": 0.2245822697877884, |
| "rewards/rejected": -1.396139144897461, |
| "step": 2330 |
| }, |
| { |
| "epoch": 1.8719999999999999, |
| "grad_norm": 6.583737373352051, |
| "learning_rate": 4.8852874572261715e-06, |
| "logits/chosen": 0.9607311487197876, |
| "logits/rejected": 0.8902657628059387, |
| "logps/chosen": -194.19366455078125, |
| "logps/rejected": -233.3009796142578, |
| "loss": 0.5089, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.9743821024894714, |
| "rewards/margins": 0.8457622528076172, |
| "rewards/rejected": -1.8201442956924438, |
| "step": 2340 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 8.004322052001953, |
| "learning_rate": 4.88318767057101e-06, |
| "logits/chosen": 0.6777879595756531, |
| "logits/rejected": 0.7692098021507263, |
| "logps/chosen": -226.11428833007812, |
| "logps/rejected": -252.94114685058594, |
| "loss": 0.6428, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -1.4815189838409424, |
| "rewards/margins": 0.4003751873970032, |
| "rewards/rejected": -1.8818941116333008, |
| "step": 2350 |
| }, |
| { |
| "epoch": 1.888, |
| "grad_norm": 5.834934234619141, |
| "learning_rate": 4.881069299339662e-06, |
| "logits/chosen": 0.9836352467536926, |
| "logits/rejected": 0.925758957862854, |
| "logps/chosen": -196.276611328125, |
| "logps/rejected": -216.8465118408203, |
| "loss": 0.5801, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -0.6051527261734009, |
| "rewards/margins": 0.5349343419075012, |
| "rewards/rejected": -1.1400870084762573, |
| "step": 2360 |
| }, |
| { |
| "epoch": 1.896, |
| "grad_norm": 3.793811321258545, |
| "learning_rate": 4.878932360051611e-06, |
| "logits/chosen": 0.9329185485839844, |
| "logits/rejected": 0.8791740536689758, |
| "logps/chosen": -198.0451202392578, |
| "logps/rejected": -204.83177185058594, |
| "loss": 0.5089, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.7063581943511963, |
| "rewards/margins": 0.798491895198822, |
| "rewards/rejected": -1.5048500299453735, |
| "step": 2370 |
| }, |
| { |
| "epoch": 1.904, |
| "grad_norm": 6.470005512237549, |
| "learning_rate": 4.876776869371139e-06, |
| "logits/chosen": 0.9098548889160156, |
| "logits/rejected": 0.9413707852363586, |
| "logps/chosen": -205.4551239013672, |
| "logps/rejected": -206.733642578125, |
| "loss": 0.543, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.8138889670372009, |
| "rewards/margins": 0.5941287279129028, |
| "rewards/rejected": -1.4080175161361694, |
| "step": 2380 |
| }, |
| { |
| "epoch": 1.912, |
| "grad_norm": 7.808496475219727, |
| "learning_rate": 4.874602844107195e-06, |
| "logits/chosen": 1.01150643825531, |
| "logits/rejected": 0.873560905456543, |
| "logps/chosen": -209.4512939453125, |
| "logps/rejected": -219.6645965576172, |
| "loss": 0.649, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -1.0077110528945923, |
| "rewards/margins": 0.44567227363586426, |
| "rewards/rejected": -1.453383445739746, |
| "step": 2390 |
| }, |
| { |
| "epoch": 1.92, |
| "grad_norm": 10.799213409423828, |
| "learning_rate": 4.872410301213265e-06, |
| "logits/chosen": 0.9560382962226868, |
| "logits/rejected": 0.9300372004508972, |
| "logps/chosen": -196.2581329345703, |
| "logps/rejected": -212.28335571289062, |
| "loss": 0.582, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.8118780255317688, |
| "rewards/margins": 0.4887621998786926, |
| "rewards/rejected": -1.3006402254104614, |
| "step": 2400 |
| }, |
| { |
| "epoch": 1.928, |
| "grad_norm": 4.009180068969727, |
| "learning_rate": 4.87019925778724e-06, |
| "logits/chosen": 0.7739871144294739, |
| "logits/rejected": 0.7527830004692078, |
| "logps/chosen": -202.1708221435547, |
| "logps/rejected": -212.5369415283203, |
| "loss": 0.5072, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.6656922101974487, |
| "rewards/margins": 0.811048150062561, |
| "rewards/rejected": -1.4767402410507202, |
| "step": 2410 |
| }, |
| { |
| "epoch": 1.936, |
| "grad_norm": 4.5736799240112305, |
| "learning_rate": 4.867969731071279e-06, |
| "logits/chosen": 0.7770849466323853, |
| "logits/rejected": 0.8255655169487, |
| "logps/chosen": -192.43284606933594, |
| "logps/rejected": -222.0802001953125, |
| "loss": 0.5829, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.6381308436393738, |
| "rewards/margins": 0.6074991226196289, |
| "rewards/rejected": -1.245630145072937, |
| "step": 2420 |
| }, |
| { |
| "epoch": 1.944, |
| "grad_norm": 7.796564102172852, |
| "learning_rate": 4.86572173845168e-06, |
| "logits/chosen": 0.9963685870170593, |
| "logits/rejected": 0.986523449420929, |
| "logps/chosen": -213.4120635986328, |
| "logps/rejected": -214.7560577392578, |
| "loss": 0.664, |
| "rewards/accuracies": 0.5875000357627869, |
| "rewards/chosen": -0.7294261455535889, |
| "rewards/margins": 0.35328245162963867, |
| "rewards/rejected": -1.0827085971832275, |
| "step": 2430 |
| }, |
| { |
| "epoch": 1.952, |
| "grad_norm": 5.976226329803467, |
| "learning_rate": 4.863455297458741e-06, |
| "logits/chosen": 0.8093425631523132, |
| "logits/rejected": 0.8130960464477539, |
| "logps/chosen": -223.12828063964844, |
| "logps/rejected": -246.886474609375, |
| "loss": 0.6872, |
| "rewards/accuracies": 0.6000000238418579, |
| "rewards/chosen": -1.014013648033142, |
| "rewards/margins": 0.4549499452114105, |
| "rewards/rejected": -1.468963861465454, |
| "step": 2440 |
| }, |
| { |
| "epoch": 1.96, |
| "grad_norm": 5.015947341918945, |
| "learning_rate": 4.861170425766625e-06, |
| "logits/chosen": 0.7283975481987, |
| "logits/rejected": 0.7141422629356384, |
| "logps/chosen": -223.5131378173828, |
| "logps/rejected": -231.6356964111328, |
| "loss": 0.5357, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.8296670913696289, |
| "rewards/margins": 0.6724082827568054, |
| "rewards/rejected": -1.5020753145217896, |
| "step": 2450 |
| }, |
| { |
| "epoch": 1.968, |
| "grad_norm": 5.242801666259766, |
| "learning_rate": 4.8588671411932195e-06, |
| "logits/chosen": 0.8053815960884094, |
| "logits/rejected": 0.8061729669570923, |
| "logps/chosen": -214.3092041015625, |
| "logps/rejected": -251.5323486328125, |
| "loss": 0.5139, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.6689096689224243, |
| "rewards/margins": 0.6673839688301086, |
| "rewards/rejected": -1.3362935781478882, |
| "step": 2460 |
| }, |
| { |
| "epoch": 1.976, |
| "grad_norm": 5.3611578941345215, |
| "learning_rate": 4.8565454616999995e-06, |
| "logits/chosen": 0.7702573537826538, |
| "logits/rejected": 0.8161222338676453, |
| "logps/chosen": -194.3104248046875, |
| "logps/rejected": -229.4737091064453, |
| "loss": 0.5365, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.5571290850639343, |
| "rewards/margins": 0.699532151222229, |
| "rewards/rejected": -1.2566611766815186, |
| "step": 2470 |
| }, |
| { |
| "epoch": 1.984, |
| "grad_norm": 6.883541107177734, |
| "learning_rate": 4.85420540539189e-06, |
| "logits/chosen": 0.8040187954902649, |
| "logits/rejected": 0.8295624852180481, |
| "logps/chosen": -201.51536560058594, |
| "logps/rejected": -207.3661651611328, |
| "loss": 0.7064, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -0.622946560382843, |
| "rewards/margins": 0.24029460549354553, |
| "rewards/rejected": -0.8632411956787109, |
| "step": 2480 |
| }, |
| { |
| "epoch": 1.992, |
| "grad_norm": 6.731320381164551, |
| "learning_rate": 4.851846990517118e-06, |
| "logits/chosen": 0.8790884017944336, |
| "logits/rejected": 0.8123799562454224, |
| "logps/chosen": -206.55789184570312, |
| "logps/rejected": -234.25, |
| "loss": 0.5478, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.8480615019798279, |
| "rewards/margins": 0.6503373980522156, |
| "rewards/rejected": -1.498399019241333, |
| "step": 2490 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 5.269995212554932, |
| "learning_rate": 4.849470235467079e-06, |
| "logits/chosen": 0.9131700396537781, |
| "logits/rejected": 0.8961302638053894, |
| "logps/chosen": -205.96377563476562, |
| "logps/rejected": -208.3470458984375, |
| "loss": 0.643, |
| "rewards/accuracies": 0.5875000357627869, |
| "rewards/chosen": -0.4859640598297119, |
| "rewards/margins": 0.346769243478775, |
| "rewards/rejected": -0.8327333331108093, |
| "step": 2500 |
| }, |
| { |
| "epoch": 2.008, |
| "grad_norm": 4.879722595214844, |
| "learning_rate": 4.847075158776183e-06, |
| "logits/chosen": 0.8211914300918579, |
| "logits/rejected": 0.8813266754150391, |
| "logps/chosen": -202.9598846435547, |
| "logps/rejected": -227.003173828125, |
| "loss": 0.5717, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.9907218813896179, |
| "rewards/margins": 0.5864070057868958, |
| "rewards/rejected": -1.5771290063858032, |
| "step": 2510 |
| }, |
| { |
| "epoch": 2.016, |
| "grad_norm": 4.4429168701171875, |
| "learning_rate": 4.844661779121723e-06, |
| "logits/chosen": 0.8528118133544922, |
| "logits/rejected": 0.8727533221244812, |
| "logps/chosen": -206.079833984375, |
| "logps/rejected": -226.8438262939453, |
| "loss": 0.4943, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.6906086206436157, |
| "rewards/margins": 0.6708667874336243, |
| "rewards/rejected": -1.3614753484725952, |
| "step": 2520 |
| }, |
| { |
| "epoch": 2.024, |
| "grad_norm": 5.297170162200928, |
| "learning_rate": 4.842230115323715e-06, |
| "logits/chosen": 0.8990702629089355, |
| "logits/rejected": 0.8813673257827759, |
| "logps/chosen": -210.3358917236328, |
| "logps/rejected": -215.9623565673828, |
| "loss": 0.4864, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.9391742944717407, |
| "rewards/margins": 0.8787464499473572, |
| "rewards/rejected": -1.8179206848144531, |
| "step": 2530 |
| }, |
| { |
| "epoch": 2.032, |
| "grad_norm": 5.085737228393555, |
| "learning_rate": 4.839780186344763e-06, |
| "logits/chosen": 0.7243828177452087, |
| "logits/rejected": 0.5710186958312988, |
| "logps/chosen": -218.251708984375, |
| "logps/rejected": -233.02012634277344, |
| "loss": 0.5227, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8471910357475281, |
| "rewards/margins": 0.7661673426628113, |
| "rewards/rejected": -1.6133583784103394, |
| "step": 2540 |
| }, |
| { |
| "epoch": 2.04, |
| "grad_norm": 6.776291847229004, |
| "learning_rate": 4.837312011289907e-06, |
| "logits/chosen": 0.8092204332351685, |
| "logits/rejected": 0.7919169664382935, |
| "logps/chosen": -233.19627380371094, |
| "logps/rejected": -239.8424835205078, |
| "loss": 0.5437, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.3999868631362915, |
| "rewards/margins": 0.5697715878486633, |
| "rewards/rejected": -1.96975839138031, |
| "step": 2550 |
| }, |
| { |
| "epoch": 2.048, |
| "grad_norm": 4.587131500244141, |
| "learning_rate": 4.834825609406469e-06, |
| "logits/chosen": 0.8447713851928711, |
| "logits/rejected": 0.8319129347801208, |
| "logps/chosen": -197.89254760742188, |
| "logps/rejected": -224.28733825683594, |
| "loss": 0.4837, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.7428638339042664, |
| "rewards/margins": 0.8988615870475769, |
| "rewards/rejected": -1.6417255401611328, |
| "step": 2560 |
| }, |
| { |
| "epoch": 2.056, |
| "grad_norm": 5.485004425048828, |
| "learning_rate": 4.832321000083912e-06, |
| "logits/chosen": 0.6010708212852478, |
| "logits/rejected": 0.6102247834205627, |
| "logps/chosen": -233.9095001220703, |
| "logps/rejected": -258.9781188964844, |
| "loss": 0.5354, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.8256379961967468, |
| "rewards/margins": 0.7594703435897827, |
| "rewards/rejected": -1.5851082801818848, |
| "step": 2570 |
| }, |
| { |
| "epoch": 2.064, |
| "grad_norm": 4.678139686584473, |
| "learning_rate": 4.829798202853683e-06, |
| "logits/chosen": 0.7654294371604919, |
| "logits/rejected": 0.827416718006134, |
| "logps/chosen": -204.60549926757812, |
| "logps/rejected": -226.6314239501953, |
| "loss": 0.5546, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.819423496723175, |
| "rewards/margins": 0.5961082577705383, |
| "rewards/rejected": -1.4155317544937134, |
| "step": 2580 |
| }, |
| { |
| "epoch": 2.072, |
| "grad_norm": 7.408326625823975, |
| "learning_rate": 4.82725723738906e-06, |
| "logits/chosen": 0.9313848614692688, |
| "logits/rejected": 0.9375463724136353, |
| "logps/chosen": -222.64830017089844, |
| "logps/rejected": -225.4774169921875, |
| "loss": 0.6477, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.9466853141784668, |
| "rewards/margins": 0.3815837800502777, |
| "rewards/rejected": -1.328269124031067, |
| "step": 2590 |
| }, |
| { |
| "epoch": 2.08, |
| "grad_norm": 7.954169273376465, |
| "learning_rate": 4.824698123505004e-06, |
| "logits/chosen": 0.8060113191604614, |
| "logits/rejected": 0.7566053867340088, |
| "logps/chosen": -192.39781188964844, |
| "logps/rejected": -221.01512145996094, |
| "loss": 0.5003, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.4853571057319641, |
| "rewards/margins": 0.7401863932609558, |
| "rewards/rejected": -1.2255436182022095, |
| "step": 2600 |
| }, |
| { |
| "epoch": 2.088, |
| "grad_norm": 6.623762607574463, |
| "learning_rate": 4.822120881157998e-06, |
| "logits/chosen": 0.8647942543029785, |
| "logits/rejected": 0.8719661831855774, |
| "logps/chosen": -221.8902587890625, |
| "logps/rejected": -240.48338317871094, |
| "loss": 0.5385, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.855373203754425, |
| "rewards/margins": 0.6286361813545227, |
| "rewards/rejected": -1.4840092658996582, |
| "step": 2610 |
| }, |
| { |
| "epoch": 2.096, |
| "grad_norm": 5.830476760864258, |
| "learning_rate": 4.8195255304458945e-06, |
| "logits/chosen": 0.8587938547134399, |
| "logits/rejected": 0.7880618572235107, |
| "logps/chosen": -208.57481384277344, |
| "logps/rejected": -225.7516632080078, |
| "loss": 0.5388, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.8550947308540344, |
| "rewards/margins": 0.61111980676651, |
| "rewards/rejected": -1.4662145376205444, |
| "step": 2620 |
| }, |
| { |
| "epoch": 2.104, |
| "grad_norm": 5.968774795532227, |
| "learning_rate": 4.8169120916077626e-06, |
| "logits/chosen": 0.8810015916824341, |
| "logits/rejected": 0.8329893946647644, |
| "logps/chosen": -206.7748260498047, |
| "logps/rejected": -212.00332641601562, |
| "loss": 0.4447, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.9596864581108093, |
| "rewards/margins": 0.9688228964805603, |
| "rewards/rejected": -1.9285091161727905, |
| "step": 2630 |
| }, |
| { |
| "epoch": 2.112, |
| "grad_norm": 4.1383161544799805, |
| "learning_rate": 4.81428058502372e-06, |
| "logits/chosen": 0.7850375175476074, |
| "logits/rejected": 0.7830752730369568, |
| "logps/chosen": -215.91201782226562, |
| "logps/rejected": -243.4402313232422, |
| "loss": 0.5962, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.5484000444412231, |
| "rewards/margins": 0.7390782237052917, |
| "rewards/rejected": -2.28747820854187, |
| "step": 2640 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 5.95521879196167, |
| "learning_rate": 4.811631031214787e-06, |
| "logits/chosen": 0.7586268782615662, |
| "logits/rejected": 0.7738537192344666, |
| "logps/chosen": -226.69517517089844, |
| "logps/rejected": -252.53208923339844, |
| "loss": 0.4531, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.1299831867218018, |
| "rewards/margins": 1.046120047569275, |
| "rewards/rejected": -2.176103353500366, |
| "step": 2650 |
| }, |
| { |
| "epoch": 2.128, |
| "grad_norm": 5.219895839691162, |
| "learning_rate": 4.808963450842713e-06, |
| "logits/chosen": 0.9560911059379578, |
| "logits/rejected": 0.9116310477256775, |
| "logps/chosen": -205.336181640625, |
| "logps/rejected": -206.9613800048828, |
| "loss": 0.5378, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.8686642050743103, |
| "rewards/margins": 0.6341574788093567, |
| "rewards/rejected": -1.5028215646743774, |
| "step": 2660 |
| }, |
| { |
| "epoch": 2.136, |
| "grad_norm": 4.962176322937012, |
| "learning_rate": 4.806277864709828e-06, |
| "logits/chosen": 0.9082285165786743, |
| "logits/rejected": 0.8512415885925293, |
| "logps/chosen": -217.08535766601562, |
| "logps/rejected": -218.56155395507812, |
| "loss": 0.4624, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.5144981741905212, |
| "rewards/margins": 0.8028311133384705, |
| "rewards/rejected": -1.3173291683197021, |
| "step": 2670 |
| }, |
| { |
| "epoch": 2.144, |
| "grad_norm": 6.617359161376953, |
| "learning_rate": 4.803574293758873e-06, |
| "logits/chosen": 0.8195849657058716, |
| "logits/rejected": 0.8258503079414368, |
| "logps/chosen": -227.69168090820312, |
| "logps/rejected": -232.21450805664062, |
| "loss": 0.5883, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.4341284036636353, |
| "rewards/margins": 0.5511090755462646, |
| "rewards/rejected": -1.9852374792099, |
| "step": 2680 |
| }, |
| { |
| "epoch": 2.152, |
| "grad_norm": 4.731860637664795, |
| "learning_rate": 4.800852759072834e-06, |
| "logits/chosen": 1.0406241416931152, |
| "logits/rejected": 0.9129249453544617, |
| "logps/chosen": -212.03970336914062, |
| "logps/rejected": -217.3359832763672, |
| "loss": 0.5685, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -1.189060926437378, |
| "rewards/margins": 0.6306447386741638, |
| "rewards/rejected": -1.819705605506897, |
| "step": 2690 |
| }, |
| { |
| "epoch": 2.16, |
| "grad_norm": 6.71656608581543, |
| "learning_rate": 4.798113281874788e-06, |
| "logits/chosen": 0.8752555847167969, |
| "logits/rejected": 0.8828164935112, |
| "logps/chosen": -216.815673828125, |
| "logps/rejected": -230.06640625, |
| "loss": 0.5298, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9842235445976257, |
| "rewards/margins": 0.7561108469963074, |
| "rewards/rejected": -1.740334391593933, |
| "step": 2700 |
| }, |
| { |
| "epoch": 2.168, |
| "grad_norm": 5.923402309417725, |
| "learning_rate": 4.795355883527727e-06, |
| "logits/chosen": 1.119627833366394, |
| "logits/rejected": 1.0217770338058472, |
| "logps/chosen": -214.1453094482422, |
| "logps/rejected": -225.8303680419922, |
| "loss": 0.4973, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.4931985139846802, |
| "rewards/margins": 0.8571271300315857, |
| "rewards/rejected": -2.350325584411621, |
| "step": 2710 |
| }, |
| { |
| "epoch": 2.176, |
| "grad_norm": 5.549647331237793, |
| "learning_rate": 4.792580585534398e-06, |
| "logits/chosen": 0.976405918598175, |
| "logits/rejected": 1.0035079717636108, |
| "logps/chosen": -200.0377960205078, |
| "logps/rejected": -236.29800415039062, |
| "loss": 0.5329, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.0612291097640991, |
| "rewards/margins": 0.7105744481086731, |
| "rewards/rejected": -1.7718034982681274, |
| "step": 2720 |
| }, |
| { |
| "epoch": 2.184, |
| "grad_norm": 8.303404808044434, |
| "learning_rate": 4.789787409537131e-06, |
| "logits/chosen": 0.9948248267173767, |
| "logits/rejected": 0.940967857837677, |
| "logps/chosen": -207.9303436279297, |
| "logps/rejected": -244.0626220703125, |
| "loss": 0.5104, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.080901026725769, |
| "rewards/margins": 0.7775439620018005, |
| "rewards/rejected": -1.8584450483322144, |
| "step": 2730 |
| }, |
| { |
| "epoch": 2.192, |
| "grad_norm": 6.788060188293457, |
| "learning_rate": 4.786976377317676e-06, |
| "logits/chosen": 0.9980520606040955, |
| "logits/rejected": 0.9337381720542908, |
| "logps/chosen": -205.30735778808594, |
| "logps/rejected": -212.91213989257812, |
| "loss": 0.5431, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.287164568901062, |
| "rewards/margins": 0.6435604691505432, |
| "rewards/rejected": -1.93072509765625, |
| "step": 2740 |
| }, |
| { |
| "epoch": 2.2, |
| "grad_norm": 5.9607133865356445, |
| "learning_rate": 4.784147510797024e-06, |
| "logits/chosen": 0.956340491771698, |
| "logits/rejected": 0.8990178108215332, |
| "logps/chosen": -207.7548828125, |
| "logps/rejected": -227.23081970214844, |
| "loss": 0.5219, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -1.1541296243667603, |
| "rewards/margins": 0.8181205987930298, |
| "rewards/rejected": -1.97225022315979, |
| "step": 2750 |
| }, |
| { |
| "epoch": 2.208, |
| "grad_norm": 4.65596342086792, |
| "learning_rate": 4.7813008320352475e-06, |
| "logits/chosen": 0.886645495891571, |
| "logits/rejected": 0.8505622148513794, |
| "logps/chosen": -201.58055114746094, |
| "logps/rejected": -216.75845336914062, |
| "loss": 0.5183, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -0.5563634037971497, |
| "rewards/margins": 0.788402259349823, |
| "rewards/rejected": -1.344765543937683, |
| "step": 2760 |
| }, |
| { |
| "epoch": 2.216, |
| "grad_norm": 3.351832628250122, |
| "learning_rate": 4.778436363231317e-06, |
| "logits/chosen": 0.9784888625144958, |
| "logits/rejected": 0.9678120613098145, |
| "logps/chosen": -209.824462890625, |
| "logps/rejected": -236.84239196777344, |
| "loss": 0.5421, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.1734176874160767, |
| "rewards/margins": 0.7453652024269104, |
| "rewards/rejected": -1.9187828302383423, |
| "step": 2770 |
| }, |
| { |
| "epoch": 2.224, |
| "grad_norm": 5.160139560699463, |
| "learning_rate": 4.775554126722935e-06, |
| "logits/chosen": 1.0381357669830322, |
| "logits/rejected": 0.8997783064842224, |
| "logps/chosen": -215.1231689453125, |
| "logps/rejected": -221.33921813964844, |
| "loss": 0.4999, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.8012914657592773, |
| "rewards/margins": 0.8245387077331543, |
| "rewards/rejected": -1.625830054283142, |
| "step": 2780 |
| }, |
| { |
| "epoch": 2.232, |
| "grad_norm": 6.553884983062744, |
| "learning_rate": 4.772654144986364e-06, |
| "logits/chosen": 1.0697473287582397, |
| "logits/rejected": 0.9900814890861511, |
| "logps/chosen": -190.27476501464844, |
| "logps/rejected": -185.47320556640625, |
| "loss": 0.5253, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.6551761031150818, |
| "rewards/margins": 0.7400442361831665, |
| "rewards/rejected": -1.395220398902893, |
| "step": 2790 |
| }, |
| { |
| "epoch": 2.24, |
| "grad_norm": 7.37640905380249, |
| "learning_rate": 4.7697364406362415e-06, |
| "logits/chosen": 0.9499724507331848, |
| "logits/rejected": 0.9506826400756836, |
| "logps/chosen": -211.59814453125, |
| "logps/rejected": -214.0547637939453, |
| "loss": 0.5319, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.9677931070327759, |
| "rewards/margins": 0.7696712613105774, |
| "rewards/rejected": -1.7374645471572876, |
| "step": 2800 |
| }, |
| { |
| "epoch": 2.248, |
| "grad_norm": 7.318824291229248, |
| "learning_rate": 4.766801036425413e-06, |
| "logits/chosen": 0.9329463243484497, |
| "logits/rejected": 0.9652411341667175, |
| "logps/chosen": -198.66159057617188, |
| "logps/rejected": -218.34695434570312, |
| "loss": 0.4791, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.636730432510376, |
| "rewards/margins": 0.9497951865196228, |
| "rewards/rejected": -1.5865256786346436, |
| "step": 2810 |
| }, |
| { |
| "epoch": 2.2560000000000002, |
| "grad_norm": 8.074874877929688, |
| "learning_rate": 4.763847955244749e-06, |
| "logits/chosen": 0.9467241168022156, |
| "logits/rejected": 0.9240646362304688, |
| "logps/chosen": -195.85824584960938, |
| "logps/rejected": -213.454345703125, |
| "loss": 0.4331, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.5387411117553711, |
| "rewards/margins": 0.931182861328125, |
| "rewards/rejected": -1.4699242115020752, |
| "step": 2820 |
| }, |
| { |
| "epoch": 2.2640000000000002, |
| "grad_norm": 5.222074508666992, |
| "learning_rate": 4.760877220122972e-06, |
| "logits/chosen": 1.0361279249191284, |
| "logits/rejected": 0.9784205555915833, |
| "logps/chosen": -217.472412109375, |
| "logps/rejected": -220.15403747558594, |
| "loss": 0.4908, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.6365905404090881, |
| "rewards/margins": 0.8387727737426758, |
| "rewards/rejected": -1.4753633737564087, |
| "step": 2830 |
| }, |
| { |
| "epoch": 2.2720000000000002, |
| "grad_norm": 12.525833129882812, |
| "learning_rate": 4.757888854226469e-06, |
| "logits/chosen": 0.980974018573761, |
| "logits/rejected": 0.8848344683647156, |
| "logps/chosen": -217.03807067871094, |
| "logps/rejected": -220.2837371826172, |
| "loss": 0.6158, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.099002480506897, |
| "rewards/margins": 0.5908426642417908, |
| "rewards/rejected": -1.689845085144043, |
| "step": 2840 |
| }, |
| { |
| "epoch": 2.2800000000000002, |
| "grad_norm": 7.606281280517578, |
| "learning_rate": 4.75488288085912e-06, |
| "logits/chosen": 0.886518120765686, |
| "logits/rejected": 0.8294790387153625, |
| "logps/chosen": -223.3667449951172, |
| "logps/rejected": -241.37620544433594, |
| "loss": 0.5392, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.5162465572357178, |
| "rewards/margins": 0.6705383658409119, |
| "rewards/rejected": -2.1867847442626953, |
| "step": 2850 |
| }, |
| { |
| "epoch": 2.288, |
| "grad_norm": 6.877048015594482, |
| "learning_rate": 4.751859323462106e-06, |
| "logits/chosen": 0.8719585537910461, |
| "logits/rejected": 0.8641806840896606, |
| "logps/chosen": -196.090576171875, |
| "logps/rejected": -222.262451171875, |
| "loss": 0.4964, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.1291723251342773, |
| "rewards/margins": 0.9274942278862, |
| "rewards/rejected": -2.056666612625122, |
| "step": 2860 |
| }, |
| { |
| "epoch": 2.296, |
| "grad_norm": 8.89814281463623, |
| "learning_rate": 4.748818205613738e-06, |
| "logits/chosen": 0.9758926630020142, |
| "logits/rejected": 0.8365556597709656, |
| "logps/chosen": -208.0929412841797, |
| "logps/rejected": -219.41197204589844, |
| "loss": 0.5806, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -0.8481144905090332, |
| "rewards/margins": 0.6624639630317688, |
| "rewards/rejected": -1.5105783939361572, |
| "step": 2870 |
| }, |
| { |
| "epoch": 2.304, |
| "grad_norm": 10.317713737487793, |
| "learning_rate": 4.7457595510292615e-06, |
| "logits/chosen": 0.9229365587234497, |
| "logits/rejected": 0.7857539057731628, |
| "logps/chosen": -219.9251251220703, |
| "logps/rejected": -253.33775329589844, |
| "loss": 0.6269, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.2590759992599487, |
| "rewards/margins": 0.696280300617218, |
| "rewards/rejected": -1.9553560018539429, |
| "step": 2880 |
| }, |
| { |
| "epoch": 2.312, |
| "grad_norm": 4.831161022186279, |
| "learning_rate": 4.7426833835606815e-06, |
| "logits/chosen": 0.977482259273529, |
| "logits/rejected": 0.9541126489639282, |
| "logps/chosen": -201.36666870117188, |
| "logps/rejected": -229.928466796875, |
| "loss": 0.5535, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.8802324533462524, |
| "rewards/margins": 0.7807623744010925, |
| "rewards/rejected": -1.6609947681427002, |
| "step": 2890 |
| }, |
| { |
| "epoch": 2.32, |
| "grad_norm": 10.042828559875488, |
| "learning_rate": 4.7395897271965676e-06, |
| "logits/chosen": 0.9359084963798523, |
| "logits/rejected": 0.8896707892417908, |
| "logps/chosen": -226.84707641601562, |
| "logps/rejected": -231.924072265625, |
| "loss": 0.5353, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.173598051071167, |
| "rewards/margins": 0.6731023192405701, |
| "rewards/rejected": -1.8467003107070923, |
| "step": 2900 |
| }, |
| { |
| "epoch": 2.328, |
| "grad_norm": 7.612265110015869, |
| "learning_rate": 4.736478606061876e-06, |
| "logits/chosen": 0.9218583106994629, |
| "logits/rejected": 0.8763092160224915, |
| "logps/chosen": -189.9997100830078, |
| "logps/rejected": -222.7259521484375, |
| "loss": 0.4865, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.624975323677063, |
| "rewards/margins": 0.9700756072998047, |
| "rewards/rejected": -1.5950509309768677, |
| "step": 2910 |
| }, |
| { |
| "epoch": 2.336, |
| "grad_norm": 7.228297710418701, |
| "learning_rate": 4.733350044417752e-06, |
| "logits/chosen": 0.8519703149795532, |
| "logits/rejected": 0.9009062051773071, |
| "logps/chosen": -211.00601196289062, |
| "logps/rejected": -227.3970184326172, |
| "loss": 0.5175, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.1423438787460327, |
| "rewards/margins": 0.9418218731880188, |
| "rewards/rejected": -2.084165573120117, |
| "step": 2920 |
| }, |
| { |
| "epoch": 2.344, |
| "grad_norm": 7.3916192054748535, |
| "learning_rate": 4.730204066661349e-06, |
| "logits/chosen": 0.8180250525474548, |
| "logits/rejected": 0.8404077887535095, |
| "logps/chosen": -219.1990509033203, |
| "logps/rejected": -247.86865234375, |
| "loss": 0.4079, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.1498545408248901, |
| "rewards/margins": 1.2002915143966675, |
| "rewards/rejected": -2.3501460552215576, |
| "step": 2930 |
| }, |
| { |
| "epoch": 2.352, |
| "grad_norm": 7.41680383682251, |
| "learning_rate": 4.727040697325634e-06, |
| "logits/chosen": 0.8754854202270508, |
| "logits/rejected": 0.8143682479858398, |
| "logps/chosen": -216.9535675048828, |
| "logps/rejected": -224.46226501464844, |
| "loss": 0.4467, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.9588058590888977, |
| "rewards/margins": 0.9411051869392395, |
| "rewards/rejected": -1.8999111652374268, |
| "step": 2940 |
| }, |
| { |
| "epoch": 2.36, |
| "grad_norm": 9.07734203338623, |
| "learning_rate": 4.723859961079196e-06, |
| "logits/chosen": 0.9282005429267883, |
| "logits/rejected": 1.0409669876098633, |
| "logps/chosen": -200.03123474121094, |
| "logps/rejected": -205.33560180664062, |
| "loss": 0.5902, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.9528915286064148, |
| "rewards/margins": 0.5995592474937439, |
| "rewards/rejected": -1.5524507761001587, |
| "step": 2950 |
| }, |
| { |
| "epoch": 2.368, |
| "grad_norm": 7.95607328414917, |
| "learning_rate": 4.720661882726054e-06, |
| "logits/chosen": 0.8046501278877258, |
| "logits/rejected": 0.8820120096206665, |
| "logps/chosen": -198.43272399902344, |
| "logps/rejected": -228.85073852539062, |
| "loss": 0.5915, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -1.2360581159591675, |
| "rewards/margins": 0.611167848110199, |
| "rewards/rejected": -1.8472260236740112, |
| "step": 2960 |
| }, |
| { |
| "epoch": 2.376, |
| "grad_norm": 9.31917667388916, |
| "learning_rate": 4.717446487205466e-06, |
| "logits/chosen": 1.0128583908081055, |
| "logits/rejected": 0.8825269937515259, |
| "logps/chosen": -221.35952758789062, |
| "logps/rejected": -233.3060760498047, |
| "loss": 0.5001, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.7609128952026367, |
| "rewards/margins": 0.8846578598022461, |
| "rewards/rejected": -1.6455707550048828, |
| "step": 2970 |
| }, |
| { |
| "epoch": 2.384, |
| "grad_norm": 6.160464763641357, |
| "learning_rate": 4.714213799591733e-06, |
| "logits/chosen": 0.9421942830085754, |
| "logits/rejected": 0.9018292427062988, |
| "logps/chosen": -205.28750610351562, |
| "logps/rejected": -214.02294921875, |
| "loss": 0.5016, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.7612765431404114, |
| "rewards/margins": 0.7555828094482422, |
| "rewards/rejected": -1.5168594121932983, |
| "step": 2980 |
| }, |
| { |
| "epoch": 2.392, |
| "grad_norm": 5.200317859649658, |
| "learning_rate": 4.710963845094003e-06, |
| "logits/chosen": 0.8887328505516052, |
| "logits/rejected": 0.8375994563102722, |
| "logps/chosen": -196.9802703857422, |
| "logps/rejected": -206.02444458007812, |
| "loss": 0.5104, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.6040834784507751, |
| "rewards/margins": 0.7478589415550232, |
| "rewards/rejected": -1.3519423007965088, |
| "step": 2990 |
| }, |
| { |
| "epoch": 2.4, |
| "grad_norm": 5.419322490692139, |
| "learning_rate": 4.707696649056073e-06, |
| "logits/chosen": 0.8664781451225281, |
| "logits/rejected": 0.8220338821411133, |
| "logps/chosen": -191.3097686767578, |
| "logps/rejected": -210.8022918701172, |
| "loss": 0.4085, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.4698951244354248, |
| "rewards/margins": 1.0510791540145874, |
| "rewards/rejected": -1.5209741592407227, |
| "step": 3000 |
| }, |
| { |
| "epoch": 2.408, |
| "grad_norm": 7.53326416015625, |
| "learning_rate": 4.704412236956194e-06, |
| "logits/chosen": 0.8981558680534363, |
| "logits/rejected": 0.9666721224784851, |
| "logps/chosen": -210.7949981689453, |
| "logps/rejected": -252.54966735839844, |
| "loss": 0.4686, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.2617671489715576, |
| "rewards/margins": 1.07135808467865, |
| "rewards/rejected": -2.333125352859497, |
| "step": 3010 |
| }, |
| { |
| "epoch": 2.416, |
| "grad_norm": 4.284425258636475, |
| "learning_rate": 4.701110634406871e-06, |
| "logits/chosen": 0.7835731506347656, |
| "logits/rejected": 0.7178624272346497, |
| "logps/chosen": -197.4336395263672, |
| "logps/rejected": -236.43325805664062, |
| "loss": 0.456, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.1816260814666748, |
| "rewards/margins": 1.00633704662323, |
| "rewards/rejected": -2.1879632472991943, |
| "step": 3020 |
| }, |
| { |
| "epoch": 2.424, |
| "grad_norm": 8.3016357421875, |
| "learning_rate": 4.6977918671546635e-06, |
| "logits/chosen": 0.9753482937812805, |
| "logits/rejected": 0.8922098278999329, |
| "logps/chosen": -211.79405212402344, |
| "logps/rejected": -218.29739379882812, |
| "loss": 0.5311, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.0505523681640625, |
| "rewards/margins": 0.8706418871879578, |
| "rewards/rejected": -1.9211944341659546, |
| "step": 3030 |
| }, |
| { |
| "epoch": 2.432, |
| "grad_norm": 9.126633644104004, |
| "learning_rate": 4.6944559610799865e-06, |
| "logits/chosen": 1.0277721881866455, |
| "logits/rejected": 1.0039381980895996, |
| "logps/chosen": -183.63748168945312, |
| "logps/rejected": -211.2511749267578, |
| "loss": 0.5654, |
| "rewards/accuracies": 0.6875, |
| "rewards/chosen": -0.6382626891136169, |
| "rewards/margins": 0.8599988222122192, |
| "rewards/rejected": -1.4982614517211914, |
| "step": 3040 |
| }, |
| { |
| "epoch": 2.44, |
| "grad_norm": 7.879682540893555, |
| "learning_rate": 4.691102942196905e-06, |
| "logits/chosen": 0.9624242186546326, |
| "logits/rejected": 1.0566548109054565, |
| "logps/chosen": -197.49008178710938, |
| "logps/rejected": -237.42349243164062, |
| "loss": 0.5254, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.352699875831604, |
| "rewards/margins": 0.8264394998550415, |
| "rewards/rejected": -2.1791393756866455, |
| "step": 3050 |
| }, |
| { |
| "epoch": 2.448, |
| "grad_norm": 8.244893074035645, |
| "learning_rate": 4.687732836652935e-06, |
| "logits/chosen": 1.102777361869812, |
| "logits/rejected": 0.9323251843452454, |
| "logps/chosen": -211.6886749267578, |
| "logps/rejected": -223.81761169433594, |
| "loss": 0.5689, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -1.4250367879867554, |
| "rewards/margins": 0.8165454864501953, |
| "rewards/rejected": -2.2415823936462402, |
| "step": 3060 |
| }, |
| { |
| "epoch": 2.456, |
| "grad_norm": 10.064545631408691, |
| "learning_rate": 4.684345670728835e-06, |
| "logits/chosen": 0.9085485339164734, |
| "logits/rejected": 0.9594566226005554, |
| "logps/chosen": -216.97305297851562, |
| "logps/rejected": -245.9871368408203, |
| "loss": 0.5879, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.5272258520126343, |
| "rewards/margins": 0.6840270161628723, |
| "rewards/rejected": -2.2112529277801514, |
| "step": 3070 |
| }, |
| { |
| "epoch": 2.464, |
| "grad_norm": 5.9822773933410645, |
| "learning_rate": 4.680941470838405e-06, |
| "logits/chosen": 0.9992641806602478, |
| "logits/rejected": 0.9386343955993652, |
| "logps/chosen": -211.3744659423828, |
| "logps/rejected": -215.39097595214844, |
| "loss": 0.5522, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.8485382199287415, |
| "rewards/margins": 0.7580586075782776, |
| "rewards/rejected": -1.606596827507019, |
| "step": 3080 |
| }, |
| { |
| "epoch": 2.472, |
| "grad_norm": 9.379515647888184, |
| "learning_rate": 4.67752026352828e-06, |
| "logits/chosen": 0.9449491500854492, |
| "logits/rejected": 0.8973454833030701, |
| "logps/chosen": -229.10166931152344, |
| "logps/rejected": -235.14395141601562, |
| "loss": 0.5425, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.0061733722686768, |
| "rewards/margins": 0.7214813828468323, |
| "rewards/rejected": -1.7276546955108643, |
| "step": 3090 |
| }, |
| { |
| "epoch": 2.48, |
| "grad_norm": 7.775414943695068, |
| "learning_rate": 4.674082075477724e-06, |
| "logits/chosen": 0.9458308219909668, |
| "logits/rejected": 0.9328064322471619, |
| "logps/chosen": -194.5631561279297, |
| "logps/rejected": -212.3477325439453, |
| "loss": 0.5018, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9718823432922363, |
| "rewards/margins": 0.8911674618721008, |
| "rewards/rejected": -1.8630497455596924, |
| "step": 3100 |
| }, |
| { |
| "epoch": 2.488, |
| "grad_norm": 8.869393348693848, |
| "learning_rate": 4.670626933498415e-06, |
| "logits/chosen": 1.0055527687072754, |
| "logits/rejected": 0.8751175999641418, |
| "logps/chosen": -205.0562744140625, |
| "logps/rejected": -214.41934204101562, |
| "loss": 0.5698, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.790256679058075, |
| "rewards/margins": 0.7495672106742859, |
| "rewards/rejected": -1.5398238897323608, |
| "step": 3110 |
| }, |
| { |
| "epoch": 2.496, |
| "grad_norm": 7.518993854522705, |
| "learning_rate": 4.667154864534245e-06, |
| "logits/chosen": 0.9388038516044617, |
| "logits/rejected": 0.9028251767158508, |
| "logps/chosen": -189.31138610839844, |
| "logps/rejected": -246.3251495361328, |
| "loss": 0.4711, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.7512499690055847, |
| "rewards/margins": 0.9976223111152649, |
| "rewards/rejected": -1.7488723993301392, |
| "step": 3120 |
| }, |
| { |
| "epoch": 2.504, |
| "grad_norm": 5.921288967132568, |
| "learning_rate": 4.663665895661107e-06, |
| "logits/chosen": 0.9502741694450378, |
| "logits/rejected": 0.9423877596855164, |
| "logps/chosen": -201.31776428222656, |
| "logps/rejected": -228.7478790283203, |
| "loss": 0.6146, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.0320862531661987, |
| "rewards/margins": 0.730940580368042, |
| "rewards/rejected": -1.7630270719528198, |
| "step": 3130 |
| }, |
| { |
| "epoch": 2.512, |
| "grad_norm": 7.64976692199707, |
| "learning_rate": 4.6601600540866794e-06, |
| "logits/chosen": 0.9315633773803711, |
| "logits/rejected": 0.9705595374107361, |
| "logps/chosen": -207.4699249267578, |
| "logps/rejected": -223.7519989013672, |
| "loss": 0.5229, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.7369431257247925, |
| "rewards/margins": 0.7962962985038757, |
| "rewards/rejected": -1.5332393646240234, |
| "step": 3140 |
| }, |
| { |
| "epoch": 2.52, |
| "grad_norm": 10.3628511428833, |
| "learning_rate": 4.65663736715022e-06, |
| "logits/chosen": 1.0306199789047241, |
| "logits/rejected": 0.9668065309524536, |
| "logps/chosen": -216.719970703125, |
| "logps/rejected": -218.1455535888672, |
| "loss": 0.5989, |
| "rewards/accuracies": 0.637499988079071, |
| "rewards/chosen": -0.6501615047454834, |
| "rewards/margins": 0.6095995306968689, |
| "rewards/rejected": -1.259761095046997, |
| "step": 3150 |
| }, |
| { |
| "epoch": 2.528, |
| "grad_norm": 8.640954971313477, |
| "learning_rate": 4.653097862322347e-06, |
| "logits/chosen": 0.9273595809936523, |
| "logits/rejected": 0.9403362274169922, |
| "logps/chosen": -213.16714477539062, |
| "logps/rejected": -236.9595489501953, |
| "loss": 0.5293, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.7404825687408447, |
| "rewards/margins": 0.8145327568054199, |
| "rewards/rejected": -1.5550154447555542, |
| "step": 3160 |
| }, |
| { |
| "epoch": 2.536, |
| "grad_norm": 7.425879001617432, |
| "learning_rate": 4.6495415672048336e-06, |
| "logits/chosen": 0.8542930483818054, |
| "logits/rejected": 0.880113422870636, |
| "logps/chosen": -213.308837890625, |
| "logps/rejected": -242.27099609375, |
| "loss": 0.4891, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.4495139718055725, |
| "rewards/margins": 0.8645074963569641, |
| "rewards/rejected": -1.3140214681625366, |
| "step": 3170 |
| }, |
| { |
| "epoch": 2.544, |
| "grad_norm": 6.936334609985352, |
| "learning_rate": 4.645968509530381e-06, |
| "logits/chosen": 0.985787034034729, |
| "logits/rejected": 0.970470130443573, |
| "logps/chosen": -205.81802368164062, |
| "logps/rejected": -199.0405731201172, |
| "loss": 0.5015, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.6486034393310547, |
| "rewards/margins": 0.8106253743171692, |
| "rewards/rejected": -1.4592288732528687, |
| "step": 3180 |
| }, |
| { |
| "epoch": 2.552, |
| "grad_norm": 9.339189529418945, |
| "learning_rate": 4.642378717162411e-06, |
| "logits/chosen": 0.9108757972717285, |
| "logits/rejected": 0.8992031216621399, |
| "logps/chosen": -212.0107879638672, |
| "logps/rejected": -223.214599609375, |
| "loss": 0.5662, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.43249455094337463, |
| "rewards/margins": 0.7586955428123474, |
| "rewards/rejected": -1.1911901235580444, |
| "step": 3190 |
| }, |
| { |
| "epoch": 2.56, |
| "grad_norm": 10.31650161743164, |
| "learning_rate": 4.638772218094847e-06, |
| "logits/chosen": 0.8744648098945618, |
| "logits/rejected": 0.9359402060508728, |
| "logps/chosen": -211.1424560546875, |
| "logps/rejected": -240.16343688964844, |
| "loss": 0.5185, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.9992405772209167, |
| "rewards/margins": 0.8215592503547668, |
| "rewards/rejected": -1.820799708366394, |
| "step": 3200 |
| }, |
| { |
| "epoch": 2.568, |
| "grad_norm": 9.806978225708008, |
| "learning_rate": 4.635149040451891e-06, |
| "logits/chosen": 1.0247632265090942, |
| "logits/rejected": 0.971980094909668, |
| "logps/chosen": -204.77330017089844, |
| "logps/rejected": -208.4816131591797, |
| "loss": 0.5257, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.39921775460243225, |
| "rewards/margins": 0.7709429860115051, |
| "rewards/rejected": -1.1701607704162598, |
| "step": 3210 |
| }, |
| { |
| "epoch": 2.576, |
| "grad_norm": 7.172482490539551, |
| "learning_rate": 4.631509212487812e-06, |
| "logits/chosen": 0.9954729080200195, |
| "logits/rejected": 0.9880996942520142, |
| "logps/chosen": -210.8682098388672, |
| "logps/rejected": -213.47573852539062, |
| "loss": 0.5339, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.9985690116882324, |
| "rewards/margins": 0.739185094833374, |
| "rewards/rejected": -1.7377541065216064, |
| "step": 3220 |
| }, |
| { |
| "epoch": 2.584, |
| "grad_norm": 11.274581909179688, |
| "learning_rate": 4.627852762586718e-06, |
| "logits/chosen": 0.9896817207336426, |
| "logits/rejected": 1.021188735961914, |
| "logps/chosen": -189.18344116210938, |
| "logps/rejected": -215.19497680664062, |
| "loss": 0.5474, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.4847024381160736, |
| "rewards/margins": 0.7209543585777283, |
| "rewards/rejected": -1.205656886100769, |
| "step": 3230 |
| }, |
| { |
| "epoch": 2.592, |
| "grad_norm": 5.447055816650391, |
| "learning_rate": 4.624179719262342e-06, |
| "logits/chosen": 1.0467568635940552, |
| "logits/rejected": 1.0570539236068726, |
| "logps/chosen": -201.77415466308594, |
| "logps/rejected": -206.077392578125, |
| "loss": 0.4613, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.5657867789268494, |
| "rewards/margins": 0.8899927139282227, |
| "rewards/rejected": -1.4557795524597168, |
| "step": 3240 |
| }, |
| { |
| "epoch": 2.6, |
| "grad_norm": 7.587062358856201, |
| "learning_rate": 4.62049011115781e-06, |
| "logits/chosen": 0.9699912071228027, |
| "logits/rejected": 1.017883539199829, |
| "logps/chosen": -205.2608642578125, |
| "logps/rejected": -220.08792114257812, |
| "loss": 0.4848, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.7234296798706055, |
| "rewards/margins": 0.8834452033042908, |
| "rewards/rejected": -1.6068748235702515, |
| "step": 3250 |
| }, |
| { |
| "epoch": 2.608, |
| "grad_norm": 6.428370475769043, |
| "learning_rate": 4.616783967045432e-06, |
| "logits/chosen": 1.0170371532440186, |
| "logits/rejected": 0.9278216361999512, |
| "logps/chosen": -205.9370574951172, |
| "logps/rejected": -226.574462890625, |
| "loss": 0.443, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.777590274810791, |
| "rewards/margins": 0.9760915637016296, |
| "rewards/rejected": -1.753682017326355, |
| "step": 3260 |
| }, |
| { |
| "epoch": 2.616, |
| "grad_norm": 9.805668830871582, |
| "learning_rate": 4.6130613158264605e-06, |
| "logits/chosen": 0.8976919054985046, |
| "logits/rejected": 0.9280143976211548, |
| "logps/chosen": -206.73876953125, |
| "logps/rejected": -206.44371032714844, |
| "loss": 0.5154, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.8372632265090942, |
| "rewards/margins": 0.7562478184700012, |
| "rewards/rejected": -1.5935109853744507, |
| "step": 3270 |
| }, |
| { |
| "epoch": 2.624, |
| "grad_norm": 8.653728485107422, |
| "learning_rate": 4.6093221865308795e-06, |
| "logits/chosen": 0.9815517663955688, |
| "logits/rejected": 0.926008403301239, |
| "logps/chosen": -196.36875915527344, |
| "logps/rejected": -215.28271484375, |
| "loss": 0.5294, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.9871358871459961, |
| "rewards/margins": 0.8871564865112305, |
| "rewards/rejected": -1.874292254447937, |
| "step": 3280 |
| }, |
| { |
| "epoch": 2.632, |
| "grad_norm": 4.758111953735352, |
| "learning_rate": 4.605566608317169e-06, |
| "logits/chosen": 0.8787722587585449, |
| "logits/rejected": 0.8375174403190613, |
| "logps/chosen": -190.63404846191406, |
| "logps/rejected": -210.22802734375, |
| "loss": 0.5017, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.5020423531532288, |
| "rewards/margins": 0.8334178328514099, |
| "rewards/rejected": -1.3354600667953491, |
| "step": 3290 |
| }, |
| { |
| "epoch": 2.64, |
| "grad_norm": 7.596653938293457, |
| "learning_rate": 4.601794610472083e-06, |
| "logits/chosen": 0.9046363830566406, |
| "logits/rejected": 0.8183378577232361, |
| "logps/chosen": -215.7941436767578, |
| "logps/rejected": -241.56838989257812, |
| "loss": 0.4736, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.9933833479881287, |
| "rewards/margins": 0.9869853854179382, |
| "rewards/rejected": -1.9803688526153564, |
| "step": 3300 |
| }, |
| { |
| "epoch": 2.648, |
| "grad_norm": 12.559988975524902, |
| "learning_rate": 4.598006222410419e-06, |
| "logits/chosen": 0.9887520670890808, |
| "logits/rejected": 1.0198968648910522, |
| "logps/chosen": -188.9550018310547, |
| "logps/rejected": -219.6782684326172, |
| "loss": 0.4814, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.820119321346283, |
| "rewards/margins": 0.9246525168418884, |
| "rewards/rejected": -1.7447718381881714, |
| "step": 3310 |
| }, |
| { |
| "epoch": 2.656, |
| "grad_norm": 5.864845275878906, |
| "learning_rate": 4.594201473674788e-06, |
| "logits/chosen": 0.9294392466545105, |
| "logits/rejected": 0.796425461769104, |
| "logps/chosen": -197.6046905517578, |
| "logps/rejected": -234.70787048339844, |
| "loss": 0.4557, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.0228973627090454, |
| "rewards/margins": 0.9283467531204224, |
| "rewards/rejected": -1.9512439966201782, |
| "step": 3320 |
| }, |
| { |
| "epoch": 2.664, |
| "grad_norm": 8.346921920776367, |
| "learning_rate": 4.590380393935383e-06, |
| "logits/chosen": 0.8741597533226013, |
| "logits/rejected": 0.836887001991272, |
| "logps/chosen": -214.54185485839844, |
| "logps/rejected": -223.0530242919922, |
| "loss": 0.5895, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.2219938039779663, |
| "rewards/margins": 0.6594648361206055, |
| "rewards/rejected": -1.8814586400985718, |
| "step": 3330 |
| }, |
| { |
| "epoch": 2.672, |
| "grad_norm": 7.899364948272705, |
| "learning_rate": 4.5865430129897536e-06, |
| "logits/chosen": 0.857835590839386, |
| "logits/rejected": 0.8607529997825623, |
| "logps/chosen": -210.548095703125, |
| "logps/rejected": -236.50767517089844, |
| "loss": 0.5514, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.091759443283081, |
| "rewards/margins": 0.8923540115356445, |
| "rewards/rejected": -1.984113335609436, |
| "step": 3340 |
| }, |
| { |
| "epoch": 2.68, |
| "grad_norm": 6.950599193572998, |
| "learning_rate": 4.5826893607625665e-06, |
| "logits/chosen": 0.8608657717704773, |
| "logits/rejected": 0.9021877646446228, |
| "logps/chosen": -219.5095672607422, |
| "logps/rejected": -248.8926239013672, |
| "loss": 0.4884, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.17706298828125, |
| "rewards/margins": 1.054115653038025, |
| "rewards/rejected": -2.2311787605285645, |
| "step": 3350 |
| }, |
| { |
| "epoch": 2.6879999999999997, |
| "grad_norm": 6.3925065994262695, |
| "learning_rate": 4.578819467305375e-06, |
| "logits/chosen": 0.8635089993476868, |
| "logits/rejected": 0.9291839599609375, |
| "logps/chosen": -208.9047393798828, |
| "logps/rejected": -242.42764282226562, |
| "loss": 0.5177, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.8883923888206482, |
| "rewards/margins": 0.8316335678100586, |
| "rewards/rejected": -1.7200260162353516, |
| "step": 3360 |
| }, |
| { |
| "epoch": 2.6959999999999997, |
| "grad_norm": 7.390092372894287, |
| "learning_rate": 4.5749333627963886e-06, |
| "logits/chosen": 1.025161623954773, |
| "logits/rejected": 0.9390385746955872, |
| "logps/chosen": -192.0789031982422, |
| "logps/rejected": -208.17300415039062, |
| "loss": 0.6558, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.7664794325828552, |
| "rewards/margins": 0.47816377878189087, |
| "rewards/rejected": -1.244643211364746, |
| "step": 3370 |
| }, |
| { |
| "epoch": 2.7039999999999997, |
| "grad_norm": 6.525977611541748, |
| "learning_rate": 4.571031077540227e-06, |
| "logits/chosen": 0.8534032702445984, |
| "logits/rejected": 0.847669780254364, |
| "logps/chosen": -206.7920684814453, |
| "logps/rejected": -226.60240173339844, |
| "loss": 0.5259, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.8559421896934509, |
| "rewards/margins": 0.7720093727111816, |
| "rewards/rejected": -1.6279516220092773, |
| "step": 3380 |
| }, |
| { |
| "epoch": 2.7119999999999997, |
| "grad_norm": 11.586974143981934, |
| "learning_rate": 4.567112641967697e-06, |
| "logits/chosen": 0.849672794342041, |
| "logits/rejected": 0.7306337356567383, |
| "logps/chosen": -230.2295379638672, |
| "logps/rejected": -227.7930908203125, |
| "loss": 0.5625, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.9319648146629333, |
| "rewards/margins": 0.6750860214233398, |
| "rewards/rejected": -1.607050895690918, |
| "step": 3390 |
| }, |
| { |
| "epoch": 2.7199999999999998, |
| "grad_norm": 13.69837474822998, |
| "learning_rate": 4.563178086635546e-06, |
| "logits/chosen": 0.9492778182029724, |
| "logits/rejected": 0.9037938117980957, |
| "logps/chosen": -210.29478454589844, |
| "logps/rejected": -216.89903259277344, |
| "loss": 0.5038, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.8451706171035767, |
| "rewards/margins": 0.8125400543212891, |
| "rewards/rejected": -1.6577106714248657, |
| "step": 3400 |
| }, |
| { |
| "epoch": 2.7279999999999998, |
| "grad_norm": 4.132405757904053, |
| "learning_rate": 4.559227442226226e-06, |
| "logits/chosen": 0.9789943695068359, |
| "logits/rejected": 0.9167888760566711, |
| "logps/chosen": -191.22715759277344, |
| "logps/rejected": -232.78164672851562, |
| "loss": 0.4735, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.7555204629898071, |
| "rewards/margins": 0.9841014742851257, |
| "rewards/rejected": -1.7396221160888672, |
| "step": 3410 |
| }, |
| { |
| "epoch": 2.7359999999999998, |
| "grad_norm": 6.608091831207275, |
| "learning_rate": 4.555260739547657e-06, |
| "logits/chosen": 1.0244665145874023, |
| "logits/rejected": 0.8857207298278809, |
| "logps/chosen": -203.6723175048828, |
| "logps/rejected": -212.9291534423828, |
| "loss": 0.5356, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -0.6175267100334167, |
| "rewards/margins": 0.7223233580589294, |
| "rewards/rejected": -1.3398501873016357, |
| "step": 3420 |
| }, |
| { |
| "epoch": 2.7439999999999998, |
| "grad_norm": 9.938222885131836, |
| "learning_rate": 4.551278009532981e-06, |
| "logits/chosen": 1.0124415159225464, |
| "logits/rejected": 0.9431573152542114, |
| "logps/chosen": -207.00718688964844, |
| "logps/rejected": -225.10806274414062, |
| "loss": 0.5292, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.5922741889953613, |
| "rewards/margins": 0.7229223251342773, |
| "rewards/rejected": -1.3151965141296387, |
| "step": 3430 |
| }, |
| { |
| "epoch": 2.752, |
| "grad_norm": 6.640532970428467, |
| "learning_rate": 4.5472792832403295e-06, |
| "logits/chosen": 0.9901018142700195, |
| "logits/rejected": 0.8438584208488464, |
| "logps/chosen": -192.44761657714844, |
| "logps/rejected": -208.34683227539062, |
| "loss": 0.4769, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.6061093211174011, |
| "rewards/margins": 0.9215426445007324, |
| "rewards/rejected": -1.5276520252227783, |
| "step": 3440 |
| }, |
| { |
| "epoch": 2.76, |
| "grad_norm": 7.896131992340088, |
| "learning_rate": 4.543264591852572e-06, |
| "logits/chosen": 0.9401613473892212, |
| "logits/rejected": 1.0364055633544922, |
| "logps/chosen": -212.4904022216797, |
| "logps/rejected": -249.1276092529297, |
| "loss": 0.6105, |
| "rewards/accuracies": 0.6500000357627869, |
| "rewards/chosen": -0.9996709823608398, |
| "rewards/margins": 0.6035251021385193, |
| "rewards/rejected": -1.6031960248947144, |
| "step": 3450 |
| }, |
| { |
| "epoch": 2.768, |
| "grad_norm": 14.408479690551758, |
| "learning_rate": 4.539233966677078e-06, |
| "logits/chosen": 0.9975506067276001, |
| "logits/rejected": 1.0087366104125977, |
| "logps/chosen": -220.4411163330078, |
| "logps/rejected": -230.56089782714844, |
| "loss": 0.5005, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.6962399482727051, |
| "rewards/margins": 0.7583475112915039, |
| "rewards/rejected": -1.4545873403549194, |
| "step": 3460 |
| }, |
| { |
| "epoch": 2.776, |
| "grad_norm": 9.091312408447266, |
| "learning_rate": 4.535187439145473e-06, |
| "logits/chosen": 1.004744529724121, |
| "logits/rejected": 0.956881046295166, |
| "logps/chosen": -208.3164520263672, |
| "logps/rejected": -249.3081512451172, |
| "loss": 0.4817, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.8332870602607727, |
| "rewards/margins": 0.9917869567871094, |
| "rewards/rejected": -1.8250740766525269, |
| "step": 3470 |
| }, |
| { |
| "epoch": 2.784, |
| "grad_norm": 9.282960891723633, |
| "learning_rate": 4.531125040813392e-06, |
| "logits/chosen": 0.9015275835990906, |
| "logits/rejected": 0.8595914244651794, |
| "logps/chosen": -206.90184020996094, |
| "logps/rejected": -241.4942626953125, |
| "loss": 0.4265, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.8733939528465271, |
| "rewards/margins": 1.2572206258773804, |
| "rewards/rejected": -2.1306145191192627, |
| "step": 3480 |
| }, |
| { |
| "epoch": 2.792, |
| "grad_norm": 7.599782943725586, |
| "learning_rate": 4.527046803360232e-06, |
| "logits/chosen": 1.0097182989120483, |
| "logits/rejected": 0.9868324398994446, |
| "logps/chosen": -197.34820556640625, |
| "logps/rejected": -220.2039337158203, |
| "loss": 0.4665, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.28118035197257996, |
| "rewards/margins": 0.9197006225585938, |
| "rewards/rejected": -1.200881004333496, |
| "step": 3490 |
| }, |
| { |
| "epoch": 2.8, |
| "grad_norm": 10.817337036132812, |
| "learning_rate": 4.522952758588909e-06, |
| "logits/chosen": 1.014154314994812, |
| "logits/rejected": 1.0231674909591675, |
| "logps/chosen": -199.9979705810547, |
| "logps/rejected": -226.4349822998047, |
| "loss": 0.4632, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.3697163462638855, |
| "rewards/margins": 0.8839020133018494, |
| "rewards/rejected": -1.2536184787750244, |
| "step": 3500 |
| }, |
| { |
| "epoch": 2.808, |
| "grad_norm": 10.987738609313965, |
| "learning_rate": 4.518842938425606e-06, |
| "logits/chosen": 1.0434938669204712, |
| "logits/rejected": 0.9931101202964783, |
| "logps/chosen": -193.0313262939453, |
| "logps/rejected": -202.6310272216797, |
| "loss": 0.493, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.3265318274497986, |
| "rewards/margins": 0.9020771980285645, |
| "rewards/rejected": -1.2286089658737183, |
| "step": 3510 |
| }, |
| { |
| "epoch": 2.816, |
| "grad_norm": 6.466224193572998, |
| "learning_rate": 4.514717374919525e-06, |
| "logits/chosen": 1.0711123943328857, |
| "logits/rejected": 0.9438812136650085, |
| "logps/chosen": -204.62571716308594, |
| "logps/rejected": -227.7462921142578, |
| "loss": 0.4658, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.47518759965896606, |
| "rewards/margins": 1.0582878589630127, |
| "rewards/rejected": -1.5334755182266235, |
| "step": 3520 |
| }, |
| { |
| "epoch": 2.824, |
| "grad_norm": 9.006815910339355, |
| "learning_rate": 4.510576100242642e-06, |
| "logits/chosen": 1.0240957736968994, |
| "logits/rejected": 0.9844247698783875, |
| "logps/chosen": -198.17991638183594, |
| "logps/rejected": -212.7672576904297, |
| "loss": 0.5084, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.45504727959632874, |
| "rewards/margins": 0.9179355502128601, |
| "rewards/rejected": -1.3729829788208008, |
| "step": 3530 |
| }, |
| { |
| "epoch": 2.832, |
| "grad_norm": 7.647313594818115, |
| "learning_rate": 4.506419146689445e-06, |
| "logits/chosen": 1.0858877897262573, |
| "logits/rejected": 0.9952969551086426, |
| "logps/chosen": -204.3494110107422, |
| "logps/rejected": -224.9398651123047, |
| "loss": 0.4831, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.022721266373991966, |
| "rewards/margins": 1.0130819082260132, |
| "rewards/rejected": -1.035803198814392, |
| "step": 3540 |
| }, |
| { |
| "epoch": 2.84, |
| "grad_norm": 10.288926124572754, |
| "learning_rate": 4.502246546676697e-06, |
| "logits/chosen": 1.0694109201431274, |
| "logits/rejected": 1.054747462272644, |
| "logps/chosen": -194.4237518310547, |
| "logps/rejected": -223.91714477539062, |
| "loss": 0.5075, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.7783587574958801, |
| "rewards/margins": 0.9766228795051575, |
| "rewards/rejected": -1.7549816370010376, |
| "step": 3550 |
| }, |
| { |
| "epoch": 2.848, |
| "grad_norm": 7.374194145202637, |
| "learning_rate": 4.498058332743168e-06, |
| "logits/chosen": 0.7898425459861755, |
| "logits/rejected": 0.7930797934532166, |
| "logps/chosen": -203.2722625732422, |
| "logps/rejected": -237.77330017089844, |
| "loss": 0.5055, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.446158230304718, |
| "rewards/margins": 0.8784043192863464, |
| "rewards/rejected": -1.3245625495910645, |
| "step": 3560 |
| }, |
| { |
| "epoch": 2.856, |
| "grad_norm": 9.789468765258789, |
| "learning_rate": 4.493854537549393e-06, |
| "logits/chosen": 1.0634657144546509, |
| "logits/rejected": 1.1062793731689453, |
| "logps/chosen": -202.4827117919922, |
| "logps/rejected": -214.6761474609375, |
| "loss": 0.49, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.2086171656847, |
| "rewards/margins": 0.8339020609855652, |
| "rewards/rejected": -1.042519211769104, |
| "step": 3570 |
| }, |
| { |
| "epoch": 2.864, |
| "grad_norm": 6.90875244140625, |
| "learning_rate": 4.48963519387741e-06, |
| "logits/chosen": 1.0072237253189087, |
| "logits/rejected": 0.9182701110839844, |
| "logps/chosen": -202.2296905517578, |
| "logps/rejected": -228.3877410888672, |
| "loss": 0.5129, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.5820229649543762, |
| "rewards/margins": 0.7529212832450867, |
| "rewards/rejected": -1.334944248199463, |
| "step": 3580 |
| }, |
| { |
| "epoch": 2.872, |
| "grad_norm": 7.405145645141602, |
| "learning_rate": 4.485400334630511e-06, |
| "logits/chosen": 1.0315786600112915, |
| "logits/rejected": 1.0412896871566772, |
| "logps/chosen": -208.01670837402344, |
| "logps/rejected": -233.53164672851562, |
| "loss": 0.4206, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.788216233253479, |
| "rewards/margins": 1.1600385904312134, |
| "rewards/rejected": -1.9482548236846924, |
| "step": 3590 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 8.326019287109375, |
| "learning_rate": 4.4811499928329775e-06, |
| "logits/chosen": 0.8572309613227844, |
| "logits/rejected": 0.8392788171768188, |
| "logps/chosen": -199.2486572265625, |
| "logps/rejected": -243.4100799560547, |
| "loss": 0.5167, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.2861894369125366, |
| "rewards/margins": 0.9100348353385925, |
| "rewards/rejected": -2.1962242126464844, |
| "step": 3600 |
| }, |
| { |
| "epoch": 2.888, |
| "grad_norm": 5.889350414276123, |
| "learning_rate": 4.4768842016298275e-06, |
| "logits/chosen": 1.1370327472686768, |
| "logits/rejected": 0.966783344745636, |
| "logps/chosen": -203.1497039794922, |
| "logps/rejected": -216.1260223388672, |
| "loss": 0.5539, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.37005722522735596, |
| "rewards/margins": 0.8371629118919373, |
| "rewards/rejected": -1.2072200775146484, |
| "step": 3610 |
| }, |
| { |
| "epoch": 2.896, |
| "grad_norm": 5.19056510925293, |
| "learning_rate": 4.472602994286559e-06, |
| "logits/chosen": 1.003334641456604, |
| "logits/rejected": 0.9512959718704224, |
| "logps/chosen": -205.01858520507812, |
| "logps/rejected": -227.35317993164062, |
| "loss": 0.629, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.9021452069282532, |
| "rewards/margins": 0.6625142097473145, |
| "rewards/rejected": -1.5646594762802124, |
| "step": 3620 |
| }, |
| { |
| "epoch": 2.904, |
| "grad_norm": 8.15114974975586, |
| "learning_rate": 4.468306404188887e-06, |
| "logits/chosen": 0.9551017880439758, |
| "logits/rejected": 0.9154602289199829, |
| "logps/chosen": -198.5496368408203, |
| "logps/rejected": -231.066650390625, |
| "loss": 0.5031, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.8837252855300903, |
| "rewards/margins": 0.8877946734428406, |
| "rewards/rejected": -1.7715200185775757, |
| "step": 3630 |
| }, |
| { |
| "epoch": 2.912, |
| "grad_norm": 6.599959373474121, |
| "learning_rate": 4.463994464842485e-06, |
| "logits/chosen": 1.1515620946884155, |
| "logits/rejected": 1.0820724964141846, |
| "logps/chosen": -196.33172607421875, |
| "logps/rejected": -196.94976806640625, |
| "loss": 0.5572, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -0.42906999588012695, |
| "rewards/margins": 0.6470549702644348, |
| "rewards/rejected": -1.0761250257492065, |
| "step": 3640 |
| }, |
| { |
| "epoch": 2.92, |
| "grad_norm": 11.319299697875977, |
| "learning_rate": 4.45966720987272e-06, |
| "logits/chosen": 0.982757031917572, |
| "logits/rejected": 0.9923986792564392, |
| "logps/chosen": -221.4713134765625, |
| "logps/rejected": -228.6186065673828, |
| "loss": 0.5141, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.6713235974311829, |
| "rewards/margins": 0.912000834941864, |
| "rewards/rejected": -1.5833243131637573, |
| "step": 3650 |
| }, |
| { |
| "epoch": 2.928, |
| "grad_norm": 6.565591335296631, |
| "learning_rate": 4.455324673024396e-06, |
| "logits/chosen": 1.0351612567901611, |
| "logits/rejected": 0.9620069861412048, |
| "logps/chosen": -205.81106567382812, |
| "logps/rejected": -224.9451446533203, |
| "loss": 0.466, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.5482096076011658, |
| "rewards/margins": 0.8861121535301208, |
| "rewards/rejected": -1.434321641921997, |
| "step": 3660 |
| }, |
| { |
| "epoch": 2.936, |
| "grad_norm": 11.906221389770508, |
| "learning_rate": 4.45096688816149e-06, |
| "logits/chosen": 0.9011246562004089, |
| "logits/rejected": 0.9497923254966736, |
| "logps/chosen": -201.8857421875, |
| "logps/rejected": -207.8782501220703, |
| "loss": 0.6158, |
| "rewards/accuracies": 0.675000011920929, |
| "rewards/chosen": -0.971345067024231, |
| "rewards/margins": 0.47949621081352234, |
| "rewards/rejected": -1.4508411884307861, |
| "step": 3670 |
| }, |
| { |
| "epoch": 2.944, |
| "grad_norm": 4.588380336761475, |
| "learning_rate": 4.4465938892668815e-06, |
| "logits/chosen": 1.0076476335525513, |
| "logits/rejected": 1.000885009765625, |
| "logps/chosen": -192.5887451171875, |
| "logps/rejected": -214.83604431152344, |
| "loss": 0.4861, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.916580319404602, |
| "rewards/margins": 0.8697047233581543, |
| "rewards/rejected": -1.7862850427627563, |
| "step": 3680 |
| }, |
| { |
| "epoch": 2.952, |
| "grad_norm": 7.142757415771484, |
| "learning_rate": 4.442205710442095e-06, |
| "logits/chosen": 1.0882282257080078, |
| "logits/rejected": 1.0283339023590088, |
| "logps/chosen": -202.76055908203125, |
| "logps/rejected": -213.7969512939453, |
| "loss": 0.5747, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -1.2140840291976929, |
| "rewards/margins": 0.6454777121543884, |
| "rewards/rejected": -1.8595619201660156, |
| "step": 3690 |
| }, |
| { |
| "epoch": 2.96, |
| "grad_norm": 7.612401962280273, |
| "learning_rate": 4.43780238590703e-06, |
| "logits/chosen": 1.0234251022338867, |
| "logits/rejected": 1.0618809461593628, |
| "logps/chosen": -208.89512634277344, |
| "logps/rejected": -240.69761657714844, |
| "loss": 0.5233, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.4141120910644531, |
| "rewards/margins": 1.018404245376587, |
| "rewards/rejected": -2.432516574859619, |
| "step": 3700 |
| }, |
| { |
| "epoch": 2.968, |
| "grad_norm": 7.064319133758545, |
| "learning_rate": 4.433383949999695e-06, |
| "logits/chosen": 0.9859750866889954, |
| "logits/rejected": 0.86090087890625, |
| "logps/chosen": -195.9196319580078, |
| "logps/rejected": -218.8886260986328, |
| "loss": 0.4801, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.1885597705841064, |
| "rewards/margins": 0.8579304814338684, |
| "rewards/rejected": -2.04649019241333, |
| "step": 3710 |
| }, |
| { |
| "epoch": 2.976, |
| "grad_norm": 6.925068378448486, |
| "learning_rate": 4.428950437175944e-06, |
| "logits/chosen": 0.9019951224327087, |
| "logits/rejected": 0.8458378911018372, |
| "logps/chosen": -232.447265625, |
| "logps/rejected": -239.73648071289062, |
| "loss": 0.5006, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.1259241104125977, |
| "rewards/margins": 0.8236715197563171, |
| "rewards/rejected": -1.9495956897735596, |
| "step": 3720 |
| }, |
| { |
| "epoch": 2.984, |
| "grad_norm": 9.741296768188477, |
| "learning_rate": 4.4245018820091975e-06, |
| "logits/chosen": 0.8749257326126099, |
| "logits/rejected": 0.8770486116409302, |
| "logps/chosen": -232.95010375976562, |
| "logps/rejected": -250.5192413330078, |
| "loss": 0.5261, |
| "rewards/accuracies": 0.6625000238418579, |
| "rewards/chosen": -1.2567474842071533, |
| "rewards/margins": 1.0741055011749268, |
| "rewards/rejected": -2.33085298538208, |
| "step": 3730 |
| }, |
| { |
| "epoch": 2.992, |
| "grad_norm": 8.8729887008667, |
| "learning_rate": 4.420038319190184e-06, |
| "logits/chosen": 0.8761041760444641, |
| "logits/rejected": 0.9457497000694275, |
| "logps/chosen": -199.91307067871094, |
| "logps/rejected": -205.45762634277344, |
| "loss": 0.4907, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.8159521222114563, |
| "rewards/margins": 0.788399338722229, |
| "rewards/rejected": -1.6043514013290405, |
| "step": 3740 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 6.5494585037231445, |
| "learning_rate": 4.415559783526661e-06, |
| "logits/chosen": 0.9953758120536804, |
| "logits/rejected": 0.9608160257339478, |
| "logps/chosen": -203.28005981445312, |
| "logps/rejected": -234.4387664794922, |
| "loss": 0.5245, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.4376920461654663, |
| "rewards/margins": 0.7289747595787048, |
| "rewards/rejected": -2.1666667461395264, |
| "step": 3750 |
| }, |
| { |
| "epoch": 3.008, |
| "grad_norm": 7.342889785766602, |
| "learning_rate": 4.411066309943151e-06, |
| "logits/chosen": 0.911847710609436, |
| "logits/rejected": 0.8547344207763672, |
| "logps/chosen": -229.86656188964844, |
| "logps/rejected": -246.57359313964844, |
| "loss": 0.39, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.783231258392334, |
| "rewards/margins": 1.1452419757843018, |
| "rewards/rejected": -1.9284733533859253, |
| "step": 3760 |
| }, |
| { |
| "epoch": 3.016, |
| "grad_norm": 6.122635364532471, |
| "learning_rate": 4.406557933480665e-06, |
| "logits/chosen": 0.8730871081352234, |
| "logits/rejected": 0.7891945242881775, |
| "logps/chosen": -230.88916015625, |
| "logps/rejected": -229.0500946044922, |
| "loss": 0.4251, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.818146824836731, |
| "rewards/margins": 1.2087650299072266, |
| "rewards/rejected": -2.026911973953247, |
| "step": 3770 |
| }, |
| { |
| "epoch": 3.024, |
| "grad_norm": 7.4118452072143555, |
| "learning_rate": 4.402034689296425e-06, |
| "logits/chosen": 0.9826356768608093, |
| "logits/rejected": 0.8032142519950867, |
| "logps/chosen": -197.12989807128906, |
| "logps/rejected": -208.55838012695312, |
| "loss": 0.3441, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -0.5323260426521301, |
| "rewards/margins": 1.3038690090179443, |
| "rewards/rejected": -1.8361949920654297, |
| "step": 3780 |
| }, |
| { |
| "epoch": 3.032, |
| "grad_norm": 9.49242877960205, |
| "learning_rate": 4.397496612663599e-06, |
| "logits/chosen": 1.0212730169296265, |
| "logits/rejected": 0.8553698658943176, |
| "logps/chosen": -213.5210418701172, |
| "logps/rejected": -232.46238708496094, |
| "loss": 0.427, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.1634958982467651, |
| "rewards/margins": 1.099713921546936, |
| "rewards/rejected": -2.263209819793701, |
| "step": 3790 |
| }, |
| { |
| "epoch": 3.04, |
| "grad_norm": 7.7141571044921875, |
| "learning_rate": 4.392943738971021e-06, |
| "logits/chosen": 0.9747546315193176, |
| "logits/rejected": 0.974277675151825, |
| "logps/chosen": -211.902099609375, |
| "logps/rejected": -224.82958984375, |
| "loss": 0.3788, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.9616455435752869, |
| "rewards/margins": 1.1425886154174805, |
| "rewards/rejected": -2.104234457015991, |
| "step": 3800 |
| }, |
| { |
| "epoch": 3.048, |
| "grad_norm": 6.494782447814941, |
| "learning_rate": 4.388376103722914e-06, |
| "logits/chosen": 0.9183564186096191, |
| "logits/rejected": 0.8766688704490662, |
| "logps/chosen": -192.46058654785156, |
| "logps/rejected": -252.86399841308594, |
| "loss": 0.4398, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.0373767614364624, |
| "rewards/margins": 1.24557363986969, |
| "rewards/rejected": -2.2829504013061523, |
| "step": 3810 |
| }, |
| { |
| "epoch": 3.056, |
| "grad_norm": 12.855714797973633, |
| "learning_rate": 4.383793742538615e-06, |
| "logits/chosen": 0.9224297404289246, |
| "logits/rejected": 0.8424360156059265, |
| "logps/chosen": -188.82408142089844, |
| "logps/rejected": -220.83970642089844, |
| "loss": 0.5444, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.5318169593811035, |
| "rewards/margins": 0.8576032519340515, |
| "rewards/rejected": -1.3894203901290894, |
| "step": 3820 |
| }, |
| { |
| "epoch": 3.064, |
| "grad_norm": 6.39134407043457, |
| "learning_rate": 4.3791966911522985e-06, |
| "logits/chosen": 0.9185449481010437, |
| "logits/rejected": 0.9570236206054688, |
| "logps/chosen": -198.7986602783203, |
| "logps/rejected": -196.16566467285156, |
| "loss": 0.5775, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -0.8139546513557434, |
| "rewards/margins": 0.7184473872184753, |
| "rewards/rejected": -1.5324020385742188, |
| "step": 3830 |
| }, |
| { |
| "epoch": 3.072, |
| "grad_norm": 7.5484185218811035, |
| "learning_rate": 4.374584985412692e-06, |
| "logits/chosen": 1.0025938749313354, |
| "logits/rejected": 0.965260922908783, |
| "logps/chosen": -184.16038513183594, |
| "logps/rejected": -217.509765625, |
| "loss": 0.4271, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.8147037625312805, |
| "rewards/margins": 1.0747199058532715, |
| "rewards/rejected": -1.8894237279891968, |
| "step": 3840 |
| }, |
| { |
| "epoch": 3.08, |
| "grad_norm": 10.4950532913208, |
| "learning_rate": 4.369958661282805e-06, |
| "logits/chosen": 0.9348158240318298, |
| "logits/rejected": 0.8544149398803711, |
| "logps/chosen": -199.47950744628906, |
| "logps/rejected": -233.7744598388672, |
| "loss": 0.4727, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.8798511624336243, |
| "rewards/margins": 1.0782015323638916, |
| "rewards/rejected": -1.958052635192871, |
| "step": 3850 |
| }, |
| { |
| "epoch": 3.088, |
| "grad_norm": 10.140469551086426, |
| "learning_rate": 4.365317754839643e-06, |
| "logits/chosen": 0.8552842140197754, |
| "logits/rejected": 0.8293384909629822, |
| "logps/chosen": -229.6714324951172, |
| "logps/rejected": -237.9065399169922, |
| "loss": 0.4637, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.158323884010315, |
| "rewards/margins": 1.1132620573043823, |
| "rewards/rejected": -2.2715859413146973, |
| "step": 3860 |
| }, |
| { |
| "epoch": 3.096, |
| "grad_norm": 10.660223960876465, |
| "learning_rate": 4.360662302273926e-06, |
| "logits/chosen": 0.9855637550354004, |
| "logits/rejected": 0.8762430548667908, |
| "logps/chosen": -197.29660034179688, |
| "logps/rejected": -210.25186157226562, |
| "loss": 0.4684, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.9172877669334412, |
| "rewards/margins": 0.9678840637207031, |
| "rewards/rejected": -1.885171890258789, |
| "step": 3870 |
| }, |
| { |
| "epoch": 3.104, |
| "grad_norm": 7.666329383850098, |
| "learning_rate": 4.355992339889806e-06, |
| "logits/chosen": 0.946201741695404, |
| "logits/rejected": 0.8815839886665344, |
| "logps/chosen": -197.9135284423828, |
| "logps/rejected": -215.1569366455078, |
| "loss": 0.4383, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.3328570127487183, |
| "rewards/margins": 1.0127038955688477, |
| "rewards/rejected": -2.3455610275268555, |
| "step": 3880 |
| }, |
| { |
| "epoch": 3.112, |
| "grad_norm": 14.594318389892578, |
| "learning_rate": 4.3513079041045925e-06, |
| "logits/chosen": 1.000898003578186, |
| "logits/rejected": 1.0313762426376343, |
| "logps/chosen": -207.04624938964844, |
| "logps/rejected": -221.084228515625, |
| "loss": 0.4328, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.088545560836792, |
| "rewards/margins": 0.945988118648529, |
| "rewards/rejected": -2.0345335006713867, |
| "step": 3890 |
| }, |
| { |
| "epoch": 3.12, |
| "grad_norm": 7.375593185424805, |
| "learning_rate": 4.3466090314484526e-06, |
| "logits/chosen": 0.8702503442764282, |
| "logits/rejected": 0.9601964950561523, |
| "logps/chosen": -216.6647491455078, |
| "logps/rejected": -237.7848663330078, |
| "loss": 0.429, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.0842136144638062, |
| "rewards/margins": 1.1683653593063354, |
| "rewards/rejected": -2.2525792121887207, |
| "step": 3900 |
| }, |
| { |
| "epoch": 3.128, |
| "grad_norm": 5.803628921508789, |
| "learning_rate": 4.341895758564141e-06, |
| "logits/chosen": 0.9758888483047485, |
| "logits/rejected": 0.9623239636421204, |
| "logps/chosen": -195.2889404296875, |
| "logps/rejected": -224.08889770507812, |
| "loss": 0.4119, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.9340219497680664, |
| "rewards/margins": 1.1046533584594727, |
| "rewards/rejected": -2.038675308227539, |
| "step": 3910 |
| }, |
| { |
| "epoch": 3.136, |
| "grad_norm": 13.526612281799316, |
| "learning_rate": 4.3371681222067065e-06, |
| "logits/chosen": 0.9235677719116211, |
| "logits/rejected": 0.8304759860038757, |
| "logps/chosen": -211.0590362548828, |
| "logps/rejected": -219.9079132080078, |
| "loss": 0.4242, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.7414934039115906, |
| "rewards/margins": 1.2431910037994385, |
| "rewards/rejected": -1.9846843481063843, |
| "step": 3920 |
| }, |
| { |
| "epoch": 3.144, |
| "grad_norm": 7.47207498550415, |
| "learning_rate": 4.332426159243206e-06, |
| "logits/chosen": 0.9469828009605408, |
| "logits/rejected": 0.8730935454368591, |
| "logps/chosen": -202.57708740234375, |
| "logps/rejected": -234.28994750976562, |
| "loss": 0.3326, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -0.8033515214920044, |
| "rewards/margins": 1.5122345685958862, |
| "rewards/rejected": -2.3155860900878906, |
| "step": 3930 |
| }, |
| { |
| "epoch": 3.152, |
| "grad_norm": 10.290471076965332, |
| "learning_rate": 4.327669906652421e-06, |
| "logits/chosen": 0.9347248077392578, |
| "logits/rejected": 0.9168369174003601, |
| "logps/chosen": -205.8367156982422, |
| "logps/rejected": -241.17527770996094, |
| "loss": 0.4048, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.078516960144043, |
| "rewards/margins": 1.3800959587097168, |
| "rewards/rejected": -2.4586129188537598, |
| "step": 3940 |
| }, |
| { |
| "epoch": 3.16, |
| "grad_norm": 10.458824157714844, |
| "learning_rate": 4.322899401524563e-06, |
| "logits/chosen": 0.8786640167236328, |
| "logits/rejected": 0.726817786693573, |
| "logps/chosen": -206.63316345214844, |
| "logps/rejected": -241.03616333007812, |
| "loss": 0.417, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.183452844619751, |
| "rewards/margins": 1.3412961959838867, |
| "rewards/rejected": -2.5247490406036377, |
| "step": 3950 |
| }, |
| { |
| "epoch": 3.168, |
| "grad_norm": 11.669900894165039, |
| "learning_rate": 4.318114681060989e-06, |
| "logits/chosen": 0.9407995343208313, |
| "logits/rejected": 0.9570455551147461, |
| "logps/chosen": -201.372802734375, |
| "logps/rejected": -219.628662109375, |
| "loss": 0.4584, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.7632571458816528, |
| "rewards/margins": 1.0025835037231445, |
| "rewards/rejected": -1.765840768814087, |
| "step": 3960 |
| }, |
| { |
| "epoch": 3.176, |
| "grad_norm": 12.478621482849121, |
| "learning_rate": 4.313315782573914e-06, |
| "logits/chosen": 0.8294739127159119, |
| "logits/rejected": 0.8217317461967468, |
| "logps/chosen": -222.06484985351562, |
| "logps/rejected": -248.57861328125, |
| "loss": 0.474, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.5710614919662476, |
| "rewards/margins": 1.08151113986969, |
| "rewards/rejected": -2.6525726318359375, |
| "step": 3970 |
| }, |
| { |
| "epoch": 3.184, |
| "grad_norm": 17.692319869995117, |
| "learning_rate": 4.308502743486107e-06, |
| "logits/chosen": 0.9584707617759705, |
| "logits/rejected": 0.8594500422477722, |
| "logps/chosen": -223.4303436279297, |
| "logps/rejected": -239.1450653076172, |
| "loss": 0.4356, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.8885783553123474, |
| "rewards/margins": 1.3923364877700806, |
| "rewards/rejected": -2.280914783477783, |
| "step": 3980 |
| }, |
| { |
| "epoch": 3.192, |
| "grad_norm": 7.768544673919678, |
| "learning_rate": 4.303675601330618e-06, |
| "logits/chosen": 0.8310710191726685, |
| "logits/rejected": 0.8502975702285767, |
| "logps/chosen": -201.81480407714844, |
| "logps/rejected": -245.815673828125, |
| "loss": 0.3742, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.9760616421699524, |
| "rewards/margins": 1.6183117628097534, |
| "rewards/rejected": -2.5943734645843506, |
| "step": 3990 |
| }, |
| { |
| "epoch": 3.2, |
| "grad_norm": 11.38988208770752, |
| "learning_rate": 4.298834393750469e-06, |
| "logits/chosen": 0.9716154336929321, |
| "logits/rejected": 0.9836306571960449, |
| "logps/chosen": -210.5645294189453, |
| "logps/rejected": -230.19566345214844, |
| "loss": 0.4073, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.0508493185043335, |
| "rewards/margins": 1.2152557373046875, |
| "rewards/rejected": -2.2661049365997314, |
| "step": 4000 |
| }, |
| { |
| "epoch": 3.208, |
| "grad_norm": 8.679203987121582, |
| "learning_rate": 4.2939791584983695e-06, |
| "logits/chosen": 0.9018089175224304, |
| "logits/rejected": 0.8700457811355591, |
| "logps/chosen": -199.3931427001953, |
| "logps/rejected": -236.6988983154297, |
| "loss": 0.3966, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.3499828577041626, |
| "rewards/margins": 1.516105055809021, |
| "rewards/rejected": -2.8660879135131836, |
| "step": 4010 |
| }, |
| { |
| "epoch": 3.216, |
| "grad_norm": 6.8652777671813965, |
| "learning_rate": 4.28910993343642e-06, |
| "logits/chosen": 0.9662749171257019, |
| "logits/rejected": 0.9868101477622986, |
| "logps/chosen": -201.87808227539062, |
| "logps/rejected": -243.437744140625, |
| "loss": 0.3566, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.0853939056396484, |
| "rewards/margins": 1.5348894596099854, |
| "rewards/rejected": -2.620283365249634, |
| "step": 4020 |
| }, |
| { |
| "epoch": 3.224, |
| "grad_norm": 10.237499237060547, |
| "learning_rate": 4.284226756535814e-06, |
| "logits/chosen": 0.899558961391449, |
| "logits/rejected": 0.8322281241416931, |
| "logps/chosen": -207.8240203857422, |
| "logps/rejected": -255.903564453125, |
| "loss": 0.42, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.0343433618545532, |
| "rewards/margins": 1.4542030096054077, |
| "rewards/rejected": -2.488546371459961, |
| "step": 4030 |
| }, |
| { |
| "epoch": 3.232, |
| "grad_norm": 8.353890419006348, |
| "learning_rate": 4.279329665876548e-06, |
| "logits/chosen": 0.8721134066581726, |
| "logits/rejected": 0.8629803657531738, |
| "logps/chosen": -199.42417907714844, |
| "logps/rejected": -235.1962432861328, |
| "loss": 0.404, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.0176727771759033, |
| "rewards/margins": 1.266229510307312, |
| "rewards/rejected": -2.283902406692505, |
| "step": 4040 |
| }, |
| { |
| "epoch": 3.24, |
| "grad_norm": 8.403268814086914, |
| "learning_rate": 4.274418699647117e-06, |
| "logits/chosen": 0.9170069694519043, |
| "logits/rejected": 0.836732029914856, |
| "logps/chosen": -224.29885864257812, |
| "logps/rejected": -224.2794189453125, |
| "loss": 0.475, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.4468005895614624, |
| "rewards/margins": 1.152286410331726, |
| "rewards/rejected": -2.5990869998931885, |
| "step": 4050 |
| }, |
| { |
| "epoch": 3.248, |
| "grad_norm": 13.75932502746582, |
| "learning_rate": 4.269493896144224e-06, |
| "logits/chosen": 0.809971809387207, |
| "logits/rejected": 0.8430444598197937, |
| "logps/chosen": -188.1841278076172, |
| "logps/rejected": -231.8311767578125, |
| "loss": 0.4046, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.8959644436836243, |
| "rewards/margins": 1.3504829406738281, |
| "rewards/rejected": -2.2464473247528076, |
| "step": 4060 |
| }, |
| { |
| "epoch": 3.2560000000000002, |
| "grad_norm": 15.792646408081055, |
| "learning_rate": 4.264555293772475e-06, |
| "logits/chosen": 0.8807941675186157, |
| "logits/rejected": 0.8839223980903625, |
| "logps/chosen": -203.93402099609375, |
| "logps/rejected": -227.60142517089844, |
| "loss": 0.4881, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.9412260055541992, |
| "rewards/margins": 1.0077519416809082, |
| "rewards/rejected": -1.9489777088165283, |
| "step": 4070 |
| }, |
| { |
| "epoch": 3.2640000000000002, |
| "grad_norm": 13.880685806274414, |
| "learning_rate": 4.2596029310440826e-06, |
| "logits/chosen": 0.8665573000907898, |
| "logits/rejected": 0.9192334413528442, |
| "logps/chosen": -225.70999145507812, |
| "logps/rejected": -229.2024383544922, |
| "loss": 0.4918, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.1684324741363525, |
| "rewards/margins": 1.0852488279342651, |
| "rewards/rejected": -2.253680944442749, |
| "step": 4080 |
| }, |
| { |
| "epoch": 3.2720000000000002, |
| "grad_norm": 12.61258602142334, |
| "learning_rate": 4.254636846578567e-06, |
| "logits/chosen": 0.8059272766113281, |
| "logits/rejected": 0.7835027575492859, |
| "logps/chosen": -214.0414581298828, |
| "logps/rejected": -231.42832946777344, |
| "loss": 0.4407, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -0.8260923624038696, |
| "rewards/margins": 1.0982822179794312, |
| "rewards/rejected": -1.9243745803833008, |
| "step": 4090 |
| }, |
| { |
| "epoch": 3.2800000000000002, |
| "grad_norm": 14.153593063354492, |
| "learning_rate": 4.249657079102452e-06, |
| "logits/chosen": 0.9150283932685852, |
| "logits/rejected": 0.7976348996162415, |
| "logps/chosen": -203.6976776123047, |
| "logps/rejected": -214.48025512695312, |
| "loss": 0.4579, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -0.8042591214179993, |
| "rewards/margins": 1.0714467763900757, |
| "rewards/rejected": -1.8757059574127197, |
| "step": 4100 |
| }, |
| { |
| "epoch": 3.288, |
| "grad_norm": 12.573663711547852, |
| "learning_rate": 4.244663667448965e-06, |
| "logits/chosen": 0.9341398477554321, |
| "logits/rejected": 0.8945412039756775, |
| "logps/chosen": -218.30126953125, |
| "logps/rejected": -234.64599609375, |
| "loss": 0.462, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.2913658618927002, |
| "rewards/margins": 1.1530601978302002, |
| "rewards/rejected": -2.4444260597229004, |
| "step": 4110 |
| }, |
| { |
| "epoch": 3.296, |
| "grad_norm": 9.752543449401855, |
| "learning_rate": 4.239656650557733e-06, |
| "logits/chosen": 0.9419944882392883, |
| "logits/rejected": 0.9461938738822937, |
| "logps/chosen": -205.2966766357422, |
| "logps/rejected": -216.1801300048828, |
| "loss": 0.4384, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.2133740186691284, |
| "rewards/margins": 1.447343111038208, |
| "rewards/rejected": -2.660717010498047, |
| "step": 4120 |
| }, |
| { |
| "epoch": 3.304, |
| "grad_norm": 19.97236442565918, |
| "learning_rate": 4.234636067474481e-06, |
| "logits/chosen": 0.8762012720108032, |
| "logits/rejected": 0.8730892539024353, |
| "logps/chosen": -209.2789306640625, |
| "logps/rejected": -225.71426391601562, |
| "loss": 0.5437, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -0.9765220880508423, |
| "rewards/margins": 0.9421109557151794, |
| "rewards/rejected": -1.9186328649520874, |
| "step": 4130 |
| }, |
| { |
| "epoch": 3.312, |
| "grad_norm": 7.913999080657959, |
| "learning_rate": 4.229601957350722e-06, |
| "logits/chosen": 0.8649017214775085, |
| "logits/rejected": 0.7728471159934998, |
| "logps/chosen": -236.4957733154297, |
| "logps/rejected": -252.7061767578125, |
| "loss": 0.4567, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.455244779586792, |
| "rewards/margins": 1.1092432737350464, |
| "rewards/rejected": -2.564488172531128, |
| "step": 4140 |
| }, |
| { |
| "epoch": 3.32, |
| "grad_norm": 12.79985523223877, |
| "learning_rate": 4.224554359443459e-06, |
| "logits/chosen": 0.9708512425422668, |
| "logits/rejected": 0.8335834741592407, |
| "logps/chosen": -202.499755859375, |
| "logps/rejected": -231.90538024902344, |
| "loss": 0.4786, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.0350658893585205, |
| "rewards/margins": 1.2157402038574219, |
| "rewards/rejected": -2.2508060932159424, |
| "step": 4150 |
| }, |
| { |
| "epoch": 3.328, |
| "grad_norm": 9.839788436889648, |
| "learning_rate": 4.219493313114875e-06, |
| "logits/chosen": 0.9519194960594177, |
| "logits/rejected": 0.9104518294334412, |
| "logps/chosen": -205.2983856201172, |
| "logps/rejected": -224.93663024902344, |
| "loss": 0.3966, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.8869916796684265, |
| "rewards/margins": 1.4354019165039062, |
| "rewards/rejected": -2.3223936557769775, |
| "step": 4160 |
| }, |
| { |
| "epoch": 3.336, |
| "grad_norm": 12.0596284866333, |
| "learning_rate": 4.214418857832025e-06, |
| "logits/chosen": 0.8763806223869324, |
| "logits/rejected": 0.8765867352485657, |
| "logps/chosen": -222.2722625732422, |
| "logps/rejected": -257.39453125, |
| "loss": 0.4153, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.6189930438995361, |
| "rewards/margins": 1.1976702213287354, |
| "rewards/rejected": -2.8166635036468506, |
| "step": 4170 |
| }, |
| { |
| "epoch": 3.344, |
| "grad_norm": 8.58606243133545, |
| "learning_rate": 4.209331033166532e-06, |
| "logits/chosen": 0.9239550828933716, |
| "logits/rejected": 0.8988777995109558, |
| "logps/chosen": -198.4449920654297, |
| "logps/rejected": -243.26878356933594, |
| "loss": 0.4353, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.5305871963500977, |
| "rewards/margins": 1.0142645835876465, |
| "rewards/rejected": -2.544851779937744, |
| "step": 4180 |
| }, |
| { |
| "epoch": 3.352, |
| "grad_norm": 15.175363540649414, |
| "learning_rate": 4.2042298787942735e-06, |
| "logits/chosen": 0.843410313129425, |
| "logits/rejected": 0.8987080454826355, |
| "logps/chosen": -211.9822235107422, |
| "logps/rejected": -244.9770965576172, |
| "loss": 0.4034, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.0648707151412964, |
| "rewards/margins": 1.016554355621338, |
| "rewards/rejected": -2.081425189971924, |
| "step": 4190 |
| }, |
| { |
| "epoch": 3.36, |
| "grad_norm": 8.164642333984375, |
| "learning_rate": 4.1991154344950755e-06, |
| "logits/chosen": 0.9261566996574402, |
| "logits/rejected": 0.897598385810852, |
| "logps/chosen": -202.40428161621094, |
| "logps/rejected": -241.23191833496094, |
| "loss": 0.3954, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.40160098671913147, |
| "rewards/margins": 1.1215957403182983, |
| "rewards/rejected": -1.5231966972351074, |
| "step": 4200 |
| }, |
| { |
| "epoch": 3.368, |
| "grad_norm": 10.852888107299805, |
| "learning_rate": 4.193987740152404e-06, |
| "logits/chosen": 0.955554187297821, |
| "logits/rejected": 0.9004291892051697, |
| "logps/chosen": -184.4699249267578, |
| "logps/rejected": -220.79690551757812, |
| "loss": 0.3409, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -0.6415241956710815, |
| "rewards/margins": 1.315872073173523, |
| "rewards/rejected": -1.957396388053894, |
| "step": 4210 |
| }, |
| { |
| "epoch": 3.376, |
| "grad_norm": 18.212907791137695, |
| "learning_rate": 4.188846835753047e-06, |
| "logits/chosen": 0.8005386590957642, |
| "logits/rejected": 0.8098655939102173, |
| "logps/chosen": -215.5693359375, |
| "logps/rejected": -231.4944610595703, |
| "loss": 0.3963, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.9665080308914185, |
| "rewards/margins": 1.293958306312561, |
| "rewards/rejected": -2.2604663372039795, |
| "step": 4220 |
| }, |
| { |
| "epoch": 3.384, |
| "grad_norm": 10.81216812133789, |
| "learning_rate": 4.183692761386813e-06, |
| "logits/chosen": 0.9972082376480103, |
| "logits/rejected": 0.9668437242507935, |
| "logps/chosen": -207.8662872314453, |
| "logps/rejected": -236.48428344726562, |
| "loss": 0.4591, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -0.5786136984825134, |
| "rewards/margins": 1.239148497581482, |
| "rewards/rejected": -1.8177622556686401, |
| "step": 4230 |
| }, |
| { |
| "epoch": 3.392, |
| "grad_norm": 12.289898872375488, |
| "learning_rate": 4.178525557246207e-06, |
| "logits/chosen": 0.8920055627822876, |
| "logits/rejected": 0.8146273493766785, |
| "logps/chosen": -197.36410522460938, |
| "logps/rejected": -226.280029296875, |
| "loss": 0.4908, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.0728448629379272, |
| "rewards/margins": 1.341575264930725, |
| "rewards/rejected": -2.4144201278686523, |
| "step": 4240 |
| }, |
| { |
| "epoch": 3.4, |
| "grad_norm": 10.308980941772461, |
| "learning_rate": 4.173345263626125e-06, |
| "logits/chosen": 0.9153737425804138, |
| "logits/rejected": 0.9260154962539673, |
| "logps/chosen": -215.0824737548828, |
| "logps/rejected": -243.21629333496094, |
| "loss": 0.479, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.110467791557312, |
| "rewards/margins": 1.1035948991775513, |
| "rewards/rejected": -2.214062452316284, |
| "step": 4250 |
| }, |
| { |
| "epoch": 3.408, |
| "grad_norm": 9.071174621582031, |
| "learning_rate": 4.168151920923536e-06, |
| "logits/chosen": 0.8960357904434204, |
| "logits/rejected": 0.8135896921157837, |
| "logps/chosen": -216.384765625, |
| "logps/rejected": -231.6706085205078, |
| "loss": 0.4235, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.950639545917511, |
| "rewards/margins": 1.0974748134613037, |
| "rewards/rejected": -2.048114538192749, |
| "step": 4260 |
| }, |
| { |
| "epoch": 3.416, |
| "grad_norm": 7.223389148712158, |
| "learning_rate": 4.162945569637174e-06, |
| "logits/chosen": 0.7265322804450989, |
| "logits/rejected": 0.7518787384033203, |
| "logps/chosen": -207.7421112060547, |
| "logps/rejected": -234.2729034423828, |
| "loss": 0.4978, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.4523930549621582, |
| "rewards/margins": 1.2414056062698364, |
| "rewards/rejected": -2.693798542022705, |
| "step": 4270 |
| }, |
| { |
| "epoch": 3.424, |
| "grad_norm": 12.135030746459961, |
| "learning_rate": 4.157726250367208e-06, |
| "logits/chosen": 0.8626713752746582, |
| "logits/rejected": 0.8069788217544556, |
| "logps/chosen": -228.7198944091797, |
| "logps/rejected": -217.19143676757812, |
| "loss": 0.412, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.6234177350997925, |
| "rewards/margins": 1.184737205505371, |
| "rewards/rejected": -2.808155059814453, |
| "step": 4280 |
| }, |
| { |
| "epoch": 3.432, |
| "grad_norm": 10.609066009521484, |
| "learning_rate": 4.152494003814939e-06, |
| "logits/chosen": 0.93280029296875, |
| "logits/rejected": 0.8937816619873047, |
| "logps/chosen": -212.73155212402344, |
| "logps/rejected": -226.97402954101562, |
| "loss": 0.5399, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.471534013748169, |
| "rewards/margins": 0.9502825736999512, |
| "rewards/rejected": -2.42181658744812, |
| "step": 4290 |
| }, |
| { |
| "epoch": 3.44, |
| "grad_norm": 12.865897178649902, |
| "learning_rate": 4.147248870782477e-06, |
| "logits/chosen": 0.9401634335517883, |
| "logits/rejected": 0.8570839166641235, |
| "logps/chosen": -236.3987274169922, |
| "logps/rejected": -248.67503356933594, |
| "loss": 0.4249, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.5956907272338867, |
| "rewards/margins": 1.3058620691299438, |
| "rewards/rejected": -2.901552677154541, |
| "step": 4300 |
| }, |
| { |
| "epoch": 3.448, |
| "grad_norm": 10.754268646240234, |
| "learning_rate": 4.141990892172424e-06, |
| "logits/chosen": 0.9375637173652649, |
| "logits/rejected": 0.8381093144416809, |
| "logps/chosen": -232.57603454589844, |
| "logps/rejected": -257.6446838378906, |
| "loss": 0.4866, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.8424854278564453, |
| "rewards/margins": 1.1537357568740845, |
| "rewards/rejected": -2.9962213039398193, |
| "step": 4310 |
| }, |
| { |
| "epoch": 3.456, |
| "grad_norm": 5.4626054763793945, |
| "learning_rate": 4.136720108987552e-06, |
| "logits/chosen": 1.1030884981155396, |
| "logits/rejected": 0.9064838290214539, |
| "logps/chosen": -203.1874542236328, |
| "logps/rejected": -221.27049255371094, |
| "loss": 0.3856, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.246989130973816, |
| "rewards/margins": 1.3378962278366089, |
| "rewards/rejected": -2.584885358810425, |
| "step": 4320 |
| }, |
| { |
| "epoch": 3.464, |
| "grad_norm": 12.412184715270996, |
| "learning_rate": 4.131436562330488e-06, |
| "logits/chosen": 0.9628559350967407, |
| "logits/rejected": 0.9677176475524902, |
| "logps/chosen": -211.6782684326172, |
| "logps/rejected": -234.12564086914062, |
| "loss": 0.4181, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.5418589115142822, |
| "rewards/margins": 1.1167868375778198, |
| "rewards/rejected": -2.6586456298828125, |
| "step": 4330 |
| }, |
| { |
| "epoch": 3.472, |
| "grad_norm": 15.786921501159668, |
| "learning_rate": 4.126140293403389e-06, |
| "logits/chosen": 0.98765629529953, |
| "logits/rejected": 0.9222535490989685, |
| "logps/chosen": -210.3227081298828, |
| "logps/rejected": -228.1774444580078, |
| "loss": 0.4448, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.6453129053115845, |
| "rewards/margins": 1.0853195190429688, |
| "rewards/rejected": -2.7306325435638428, |
| "step": 4340 |
| }, |
| { |
| "epoch": 3.48, |
| "grad_norm": 10.291638374328613, |
| "learning_rate": 4.1208313435076255e-06, |
| "logits/chosen": 0.9463958740234375, |
| "logits/rejected": 0.9481539130210876, |
| "logps/chosen": -212.87197875976562, |
| "logps/rejected": -221.46165466308594, |
| "loss": 0.5025, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.4104286432266235, |
| "rewards/margins": 1.028719425201416, |
| "rewards/rejected": -2.43914794921875, |
| "step": 4350 |
| }, |
| { |
| "epoch": 3.488, |
| "grad_norm": 18.95135498046875, |
| "learning_rate": 4.115509754043454e-06, |
| "logits/chosen": 0.9782568216323853, |
| "logits/rejected": 0.9336418509483337, |
| "logps/chosen": -213.4448699951172, |
| "logps/rejected": -235.00161743164062, |
| "loss": 0.4065, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.11874504387378693, |
| "rewards/margins": 1.4726828336715698, |
| "rewards/rejected": -1.5914280414581299, |
| "step": 4360 |
| }, |
| { |
| "epoch": 3.496, |
| "grad_norm": 15.36201000213623, |
| "learning_rate": 4.1101755665097e-06, |
| "logits/chosen": 1.0215367078781128, |
| "logits/rejected": 0.9443756937980652, |
| "logps/chosen": -210.01756286621094, |
| "logps/rejected": -214.49449157714844, |
| "loss": 0.4452, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.8719436526298523, |
| "rewards/margins": 1.059888482093811, |
| "rewards/rejected": -1.931832194328308, |
| "step": 4370 |
| }, |
| { |
| "epoch": 3.504, |
| "grad_norm": 14.627403259277344, |
| "learning_rate": 4.104828822503427e-06, |
| "logits/chosen": 1.0366023778915405, |
| "logits/rejected": 0.9075769782066345, |
| "logps/chosen": -215.0442657470703, |
| "logps/rejected": -228.04214477539062, |
| "loss": 0.4347, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.925746738910675, |
| "rewards/margins": 1.1247776746749878, |
| "rewards/rejected": -2.0505244731903076, |
| "step": 4380 |
| }, |
| { |
| "epoch": 3.512, |
| "grad_norm": 12.368705749511719, |
| "learning_rate": 4.09946956371962e-06, |
| "logits/chosen": 0.8762325644493103, |
| "logits/rejected": 0.9048509001731873, |
| "logps/chosen": -215.4369659423828, |
| "logps/rejected": -259.8838806152344, |
| "loss": 0.4074, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.2534805536270142, |
| "rewards/margins": 1.3236781358718872, |
| "rewards/rejected": -2.5771586894989014, |
| "step": 4390 |
| }, |
| { |
| "epoch": 3.52, |
| "grad_norm": 11.062219619750977, |
| "learning_rate": 4.094097831950855e-06, |
| "logits/chosen": 0.9681398272514343, |
| "logits/rejected": 0.8761087656021118, |
| "logps/chosen": -194.627685546875, |
| "logps/rejected": -231.01333618164062, |
| "loss": 0.4311, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -0.7227877974510193, |
| "rewards/margins": 1.3572759628295898, |
| "rewards/rejected": -2.080063819885254, |
| "step": 4400 |
| }, |
| { |
| "epoch": 3.528, |
| "grad_norm": 13.982263565063477, |
| "learning_rate": 4.0887136690869774e-06, |
| "logits/chosen": 0.9537173509597778, |
| "logits/rejected": 0.9426767230033875, |
| "logps/chosen": -201.9144287109375, |
| "logps/rejected": -229.7262420654297, |
| "loss": 0.3984, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.8084962964057922, |
| "rewards/margins": 1.3087875843048096, |
| "rewards/rejected": -2.117284059524536, |
| "step": 4410 |
| }, |
| { |
| "epoch": 3.536, |
| "grad_norm": 12.993025779724121, |
| "learning_rate": 4.0833171171147675e-06, |
| "logits/chosen": 0.8449978828430176, |
| "logits/rejected": 0.8342208862304688, |
| "logps/chosen": -216.42979431152344, |
| "logps/rejected": -257.1995849609375, |
| "loss": 0.3506, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.0627737045288086, |
| "rewards/margins": 1.590333342552185, |
| "rewards/rejected": -2.653106927871704, |
| "step": 4420 |
| }, |
| { |
| "epoch": 3.544, |
| "grad_norm": 16.3622989654541, |
| "learning_rate": 4.077908218117625e-06, |
| "logits/chosen": 1.0056699514389038, |
| "logits/rejected": 0.9356996417045593, |
| "logps/chosen": -223.9595947265625, |
| "logps/rejected": -241.6196746826172, |
| "loss": 0.3655, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.1103066205978394, |
| "rewards/margins": 1.436450481414795, |
| "rewards/rejected": -2.546757459640503, |
| "step": 4430 |
| }, |
| { |
| "epoch": 3.552, |
| "grad_norm": 4.724842548370361, |
| "learning_rate": 4.072487014275228e-06, |
| "logits/chosen": 0.9575614929199219, |
| "logits/rejected": 0.7959676384925842, |
| "logps/chosen": -206.49546813964844, |
| "logps/rejected": -259.52972412109375, |
| "loss": 0.4052, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.4658904075622559, |
| "rewards/margins": 1.5798839330673218, |
| "rewards/rejected": -3.045774221420288, |
| "step": 4440 |
| }, |
| { |
| "epoch": 3.56, |
| "grad_norm": 11.298309326171875, |
| "learning_rate": 4.067053547863215e-06, |
| "logits/chosen": 0.9113892912864685, |
| "logits/rejected": 0.837977409362793, |
| "logps/chosen": -218.4698486328125, |
| "logps/rejected": -254.52427673339844, |
| "loss": 0.4225, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.5362995862960815, |
| "rewards/margins": 1.178178071975708, |
| "rewards/rejected": -2.7144775390625, |
| "step": 4450 |
| }, |
| { |
| "epoch": 3.568, |
| "grad_norm": 6.332326889038086, |
| "learning_rate": 4.061607861252848e-06, |
| "logits/chosen": 0.870576024055481, |
| "logits/rejected": 0.8328754305839539, |
| "logps/chosen": -217.90184020996094, |
| "logps/rejected": -243.92466735839844, |
| "loss": 0.4866, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.8576223850250244, |
| "rewards/margins": 1.264044165611267, |
| "rewards/rejected": -3.121666669845581, |
| "step": 4460 |
| }, |
| { |
| "epoch": 3.576, |
| "grad_norm": 19.035297393798828, |
| "learning_rate": 4.056149996910683e-06, |
| "logits/chosen": 0.8594030737876892, |
| "logits/rejected": 0.7456435561180115, |
| "logps/chosen": -213.825439453125, |
| "logps/rejected": -235.2193145751953, |
| "loss": 0.4798, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.9178558588027954, |
| "rewards/margins": 1.1325775384902954, |
| "rewards/rejected": -3.050433397293091, |
| "step": 4470 |
| }, |
| { |
| "epoch": 3.584, |
| "grad_norm": 7.088034152984619, |
| "learning_rate": 4.050679997398247e-06, |
| "logits/chosen": 0.7786403894424438, |
| "logits/rejected": 0.806538999080658, |
| "logps/chosen": -202.5958709716797, |
| "logps/rejected": -247.24156188964844, |
| "loss": 0.4065, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.560505747795105, |
| "rewards/margins": 1.615809440612793, |
| "rewards/rejected": -3.1763153076171875, |
| "step": 4480 |
| }, |
| { |
| "epoch": 3.592, |
| "grad_norm": 7.6185078620910645, |
| "learning_rate": 4.045197905371691e-06, |
| "logits/chosen": 0.8927198648452759, |
| "logits/rejected": 0.8568658232688904, |
| "logps/chosen": -200.9806671142578, |
| "logps/rejected": -230.39976501464844, |
| "loss": 0.4441, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.6182069778442383, |
| "rewards/margins": 1.4453877210617065, |
| "rewards/rejected": -3.0635945796966553, |
| "step": 4490 |
| }, |
| { |
| "epoch": 3.6, |
| "grad_norm": 16.420970916748047, |
| "learning_rate": 4.039703763581472e-06, |
| "logits/chosen": 0.8568277359008789, |
| "logits/rejected": 0.7848814725875854, |
| "logps/chosen": -235.65513610839844, |
| "logps/rejected": -240.30740356445312, |
| "loss": 0.463, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -2.157593250274658, |
| "rewards/margins": 1.1094763278961182, |
| "rewards/rejected": -3.2670693397521973, |
| "step": 4500 |
| }, |
| { |
| "epoch": 3.608, |
| "grad_norm": 9.267051696777344, |
| "learning_rate": 4.03419761487201e-06, |
| "logits/chosen": 1.004172921180725, |
| "logits/rejected": 0.8293590545654297, |
| "logps/chosen": -220.4943389892578, |
| "logps/rejected": -238.46202087402344, |
| "loss": 0.4328, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.6261404752731323, |
| "rewards/margins": 1.2516275644302368, |
| "rewards/rejected": -2.877768039703369, |
| "step": 4510 |
| }, |
| { |
| "epoch": 3.616, |
| "grad_norm": 8.903371810913086, |
| "learning_rate": 4.0286795021813595e-06, |
| "logits/chosen": 0.7373332977294922, |
| "logits/rejected": 0.7830902338027954, |
| "logps/chosen": -206.95864868164062, |
| "logps/rejected": -238.60684204101562, |
| "loss": 0.4642, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.362304449081421, |
| "rewards/margins": 1.3245913982391357, |
| "rewards/rejected": -2.6868958473205566, |
| "step": 4520 |
| }, |
| { |
| "epoch": 3.624, |
| "grad_norm": 9.160982131958008, |
| "learning_rate": 4.023149468540871e-06, |
| "logits/chosen": 0.8418847918510437, |
| "logits/rejected": 0.8112041354179382, |
| "logps/chosen": -207.1737060546875, |
| "logps/rejected": -238.65518188476562, |
| "loss": 0.5013, |
| "rewards/accuracies": 0.699999988079071, |
| "rewards/chosen": -1.8721437454223633, |
| "rewards/margins": 1.152505874633789, |
| "rewards/rejected": -3.0246498584747314, |
| "step": 4530 |
| }, |
| { |
| "epoch": 3.632, |
| "grad_norm": 8.608299255371094, |
| "learning_rate": 4.0176075570748596e-06, |
| "logits/chosen": 0.8552350997924805, |
| "logits/rejected": 0.8494642376899719, |
| "logps/chosen": -219.12474060058594, |
| "logps/rejected": -222.3189239501953, |
| "loss": 0.4387, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.2666515111923218, |
| "rewards/margins": 1.1172685623168945, |
| "rewards/rejected": -2.3839199542999268, |
| "step": 4540 |
| }, |
| { |
| "epoch": 3.64, |
| "grad_norm": 11.497233390808105, |
| "learning_rate": 4.012053811000262e-06, |
| "logits/chosen": 0.919759213924408, |
| "logits/rejected": 0.9661205410957336, |
| "logps/chosen": -220.15342712402344, |
| "logps/rejected": -245.29685974121094, |
| "loss": 0.3889, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.2769368886947632, |
| "rewards/margins": 1.3770891427993774, |
| "rewards/rejected": -2.6540262699127197, |
| "step": 4550 |
| }, |
| { |
| "epoch": 3.648, |
| "grad_norm": 12.795794486999512, |
| "learning_rate": 4.006488273626307e-06, |
| "logits/chosen": 0.8910170793533325, |
| "logits/rejected": 0.8077157139778137, |
| "logps/chosen": -208.62158203125, |
| "logps/rejected": -234.9666290283203, |
| "loss": 0.424, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.1060861349105835, |
| "rewards/margins": 1.2906891107559204, |
| "rewards/rejected": -2.396775007247925, |
| "step": 4560 |
| }, |
| { |
| "epoch": 3.656, |
| "grad_norm": 19.536590576171875, |
| "learning_rate": 4.000910988354172e-06, |
| "logits/chosen": 0.9590219855308533, |
| "logits/rejected": 0.9733330011367798, |
| "logps/chosen": -199.6140899658203, |
| "logps/rejected": -206.4640655517578, |
| "loss": 0.4468, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.5966355204582214, |
| "rewards/margins": 1.0923033952713013, |
| "rewards/rejected": -1.6889389753341675, |
| "step": 4570 |
| }, |
| { |
| "epoch": 3.664, |
| "grad_norm": 7.018489837646484, |
| "learning_rate": 3.995321998676648e-06, |
| "logits/chosen": 0.9202627539634705, |
| "logits/rejected": 0.8594157099723816, |
| "logps/chosen": -203.9952850341797, |
| "logps/rejected": -229.83840942382812, |
| "loss": 0.4649, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -0.984645664691925, |
| "rewards/margins": 1.0351135730743408, |
| "rewards/rejected": -2.0197594165802, |
| "step": 4580 |
| }, |
| { |
| "epoch": 3.672, |
| "grad_norm": 10.823237419128418, |
| "learning_rate": 3.989721348177801e-06, |
| "logits/chosen": 0.9117226004600525, |
| "logits/rejected": 0.8806440234184265, |
| "logps/chosen": -222.4832763671875, |
| "logps/rejected": -247.8332061767578, |
| "loss": 0.3441, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.3591053485870361, |
| "rewards/margins": 1.4962059259414673, |
| "rewards/rejected": -2.855311155319214, |
| "step": 4590 |
| }, |
| { |
| "epoch": 3.68, |
| "grad_norm": 14.88288402557373, |
| "learning_rate": 3.984109080532627e-06, |
| "logits/chosen": 0.9188446998596191, |
| "logits/rejected": 0.8930045366287231, |
| "logps/chosen": -190.447509765625, |
| "logps/rejected": -212.60386657714844, |
| "loss": 0.4098, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.0938667058944702, |
| "rewards/margins": 1.1657882928848267, |
| "rewards/rejected": -2.259654998779297, |
| "step": 4600 |
| }, |
| { |
| "epoch": 3.6879999999999997, |
| "grad_norm": 15.50723934173584, |
| "learning_rate": 3.978485239506717e-06, |
| "logits/chosen": 0.8951994180679321, |
| "logits/rejected": 0.9232072830200195, |
| "logps/chosen": -222.2184295654297, |
| "logps/rejected": -248.290771484375, |
| "loss": 0.4084, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.0719726085662842, |
| "rewards/margins": 1.3165521621704102, |
| "rewards/rejected": -2.3885247707366943, |
| "step": 4610 |
| }, |
| { |
| "epoch": 3.6959999999999997, |
| "grad_norm": 11.021881103515625, |
| "learning_rate": 3.972849868955913e-06, |
| "logits/chosen": 0.9283590316772461, |
| "logits/rejected": 0.8612028360366821, |
| "logps/chosen": -208.7410888671875, |
| "logps/rejected": -236.5598907470703, |
| "loss": 0.2934, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.6821409463882446, |
| "rewards/margins": 1.696975588798523, |
| "rewards/rejected": -2.3791165351867676, |
| "step": 4620 |
| }, |
| { |
| "epoch": 3.7039999999999997, |
| "grad_norm": 7.1401777267456055, |
| "learning_rate": 3.967203012825965e-06, |
| "logits/chosen": 0.8836082816123962, |
| "logits/rejected": 0.8645914196968079, |
| "logps/chosen": -213.68142700195312, |
| "logps/rejected": -251.0132293701172, |
| "loss": 0.4014, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.4805501699447632, |
| "rewards/margins": 1.6087449789047241, |
| "rewards/rejected": -3.0892951488494873, |
| "step": 4630 |
| }, |
| { |
| "epoch": 3.7119999999999997, |
| "grad_norm": 15.971772193908691, |
| "learning_rate": 3.961544715152195e-06, |
| "logits/chosen": 0.9219527244567871, |
| "logits/rejected": 0.8754696249961853, |
| "logps/chosen": -201.8592987060547, |
| "logps/rejected": -226.51504516601562, |
| "loss": 0.4237, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.1449387073516846, |
| "rewards/margins": 1.4969040155410767, |
| "rewards/rejected": -2.6418426036834717, |
| "step": 4640 |
| }, |
| { |
| "epoch": 3.7199999999999998, |
| "grad_norm": 3.4029507637023926, |
| "learning_rate": 3.955875020059141e-06, |
| "logits/chosen": 0.9539863467216492, |
| "logits/rejected": 0.9481542706489563, |
| "logps/chosen": -190.33999633789062, |
| "logps/rejected": -230.81553649902344, |
| "loss": 0.4776, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.0588791370391846, |
| "rewards/margins": 1.255807876586914, |
| "rewards/rejected": -2.3146870136260986, |
| "step": 4650 |
| }, |
| { |
| "epoch": 3.7279999999999998, |
| "grad_norm": 9.8549165725708, |
| "learning_rate": 3.950193971760227e-06, |
| "logits/chosen": 0.9642109274864197, |
| "logits/rejected": 0.9821377992630005, |
| "logps/chosen": -210.73709106445312, |
| "logps/rejected": -215.7576904296875, |
| "loss": 0.4352, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.1254199743270874, |
| "rewards/margins": 1.1262983083724976, |
| "rewards/rejected": -2.251718282699585, |
| "step": 4660 |
| }, |
| { |
| "epoch": 3.7359999999999998, |
| "grad_norm": 13.556317329406738, |
| "learning_rate": 3.944501614557408e-06, |
| "logits/chosen": 1.0108891725540161, |
| "logits/rejected": 0.903409481048584, |
| "logps/chosen": -189.86575317382812, |
| "logps/rejected": -216.7268524169922, |
| "loss": 0.3932, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.1368598937988281, |
| "rewards/margins": 1.453155517578125, |
| "rewards/rejected": -2.590015172958374, |
| "step": 4670 |
| }, |
| { |
| "epoch": 3.7439999999999998, |
| "grad_norm": 12.027454376220703, |
| "learning_rate": 3.938797992840828e-06, |
| "logits/chosen": 0.9133999943733215, |
| "logits/rejected": 0.8714101910591125, |
| "logps/chosen": -202.46926879882812, |
| "logps/rejected": -219.1940460205078, |
| "loss": 0.504, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.0332410335540771, |
| "rewards/margins": 1.1814830303192139, |
| "rewards/rejected": -2.214724063873291, |
| "step": 4680 |
| }, |
| { |
| "epoch": 3.752, |
| "grad_norm": 7.450799465179443, |
| "learning_rate": 3.933083151088476e-06, |
| "logits/chosen": 0.9655442237854004, |
| "logits/rejected": 0.9951929450035095, |
| "logps/chosen": -216.5625762939453, |
| "logps/rejected": -226.92529296875, |
| "loss": 0.4698, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.4813133478164673, |
| "rewards/margins": 1.0279206037521362, |
| "rewards/rejected": -2.5092339515686035, |
| "step": 4690 |
| }, |
| { |
| "epoch": 3.76, |
| "grad_norm": 10.197044372558594, |
| "learning_rate": 3.927357133865836e-06, |
| "logits/chosen": 0.8014942407608032, |
| "logits/rejected": 0.7166596055030823, |
| "logps/chosen": -206.07383728027344, |
| "logps/rejected": -233.2744598388672, |
| "loss": 0.4263, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.1761497259140015, |
| "rewards/margins": 1.4023005962371826, |
| "rewards/rejected": -2.5784504413604736, |
| "step": 4700 |
| }, |
| { |
| "epoch": 3.768, |
| "grad_norm": 12.484579086303711, |
| "learning_rate": 3.92161998582554e-06, |
| "logits/chosen": 0.9144058227539062, |
| "logits/rejected": 0.8918857574462891, |
| "logps/chosen": -210.94981384277344, |
| "logps/rejected": -232.71836853027344, |
| "loss": 0.4134, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.373250961303711, |
| "rewards/margins": 1.1428226232528687, |
| "rewards/rejected": -2.516073703765869, |
| "step": 4710 |
| }, |
| { |
| "epoch": 3.776, |
| "grad_norm": 10.925464630126953, |
| "learning_rate": 3.9158717517070216e-06, |
| "logits/chosen": 0.9928507804870605, |
| "logits/rejected": 0.9784805178642273, |
| "logps/chosen": -184.71435546875, |
| "logps/rejected": -216.792724609375, |
| "loss": 0.3318, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.1705821752548218, |
| "rewards/margins": 1.615303874015808, |
| "rewards/rejected": -2.78588604927063, |
| "step": 4720 |
| }, |
| { |
| "epoch": 3.784, |
| "grad_norm": 12.400118827819824, |
| "learning_rate": 3.9101124763361645e-06, |
| "logits/chosen": 0.9748711585998535, |
| "logits/rejected": 0.9293093681335449, |
| "logps/chosen": -217.0461883544922, |
| "logps/rejected": -227.57286071777344, |
| "loss": 0.4629, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.5834509134292603, |
| "rewards/margins": 1.2847626209259033, |
| "rewards/rejected": -2.868213415145874, |
| "step": 4730 |
| }, |
| { |
| "epoch": 3.792, |
| "grad_norm": 11.517487525939941, |
| "learning_rate": 3.904342204624955e-06, |
| "logits/chosen": 0.9611164331436157, |
| "logits/rejected": 0.9418201446533203, |
| "logps/chosen": -217.9922637939453, |
| "logps/rejected": -231.400634765625, |
| "loss": 0.4182, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.1144764423370361, |
| "rewards/margins": 1.261598825454712, |
| "rewards/rejected": -2.376075029373169, |
| "step": 4740 |
| }, |
| { |
| "epoch": 3.8, |
| "grad_norm": 18.023473739624023, |
| "learning_rate": 3.8985609815711315e-06, |
| "logits/chosen": 1.0194095373153687, |
| "logits/rejected": 0.9130865931510925, |
| "logps/chosen": -217.2231903076172, |
| "logps/rejected": -231.2641143798828, |
| "loss": 0.4554, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.4849942922592163, |
| "rewards/margins": 1.1392329931259155, |
| "rewards/rejected": -2.624227285385132, |
| "step": 4750 |
| }, |
| { |
| "epoch": 3.808, |
| "grad_norm": 17.991451263427734, |
| "learning_rate": 3.892768852257831e-06, |
| "logits/chosen": 0.9631202816963196, |
| "logits/rejected": 0.8651983141899109, |
| "logps/chosen": -215.14443969726562, |
| "logps/rejected": -251.19363403320312, |
| "loss": 0.492, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.984946608543396, |
| "rewards/margins": 1.1132451295852661, |
| "rewards/rejected": -3.098191499710083, |
| "step": 4760 |
| }, |
| { |
| "epoch": 3.816, |
| "grad_norm": 10.07773208618164, |
| "learning_rate": 3.886965861853243e-06, |
| "logits/chosen": 0.9928179979324341, |
| "logits/rejected": 0.9808389544487, |
| "logps/chosen": -209.1436309814453, |
| "logps/rejected": -228.26318359375, |
| "loss": 0.3544, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.264801263809204, |
| "rewards/margins": 1.3731207847595215, |
| "rewards/rejected": -2.6379220485687256, |
| "step": 4770 |
| }, |
| { |
| "epoch": 3.824, |
| "grad_norm": 21.519670486450195, |
| "learning_rate": 3.881152055610253e-06, |
| "logits/chosen": 0.9541469812393188, |
| "logits/rejected": 0.8962286114692688, |
| "logps/chosen": -216.61123657226562, |
| "logps/rejected": -220.80076599121094, |
| "loss": 0.5935, |
| "rewards/accuracies": 0.7125000357627869, |
| "rewards/chosen": -1.6538161039352417, |
| "rewards/margins": 0.6947936415672302, |
| "rewards/rejected": -2.348609685897827, |
| "step": 4780 |
| }, |
| { |
| "epoch": 3.832, |
| "grad_norm": 12.259000778198242, |
| "learning_rate": 3.875327478866089e-06, |
| "logits/chosen": 0.8635460734367371, |
| "logits/rejected": 0.8118668794631958, |
| "logps/chosen": -233.24827575683594, |
| "logps/rejected": -245.0417022705078, |
| "loss": 0.4181, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.2450588941574097, |
| "rewards/margins": 1.115803599357605, |
| "rewards/rejected": -2.3608624935150146, |
| "step": 4790 |
| }, |
| { |
| "epoch": 3.84, |
| "grad_norm": 14.562280654907227, |
| "learning_rate": 3.869492177041971e-06, |
| "logits/chosen": 0.9073505401611328, |
| "logits/rejected": 0.872600257396698, |
| "logps/chosen": -231.16311645507812, |
| "logps/rejected": -224.2497100830078, |
| "loss": 0.5217, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -1.0939778089523315, |
| "rewards/margins": 0.7748345732688904, |
| "rewards/rejected": -1.8688122034072876, |
| "step": 4800 |
| }, |
| { |
| "epoch": 3.848, |
| "grad_norm": 14.074769973754883, |
| "learning_rate": 3.863646195642754e-06, |
| "logits/chosen": 0.9319748282432556, |
| "logits/rejected": 0.8828820586204529, |
| "logps/chosen": -201.0325164794922, |
| "logps/rejected": -209.9548797607422, |
| "loss": 0.4524, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -0.7232056856155396, |
| "rewards/margins": 1.1827844381332397, |
| "rewards/rejected": -1.9059902429580688, |
| "step": 4810 |
| }, |
| { |
| "epoch": 3.856, |
| "grad_norm": 13.172402381896973, |
| "learning_rate": 3.857789580256576e-06, |
| "logits/chosen": 0.8821210861206055, |
| "logits/rejected": 0.8552846312522888, |
| "logps/chosen": -204.34593200683594, |
| "logps/rejected": -239.4946746826172, |
| "loss": 0.5189, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.5751923322677612, |
| "rewards/margins": 0.9332385063171387, |
| "rewards/rejected": -2.5084309577941895, |
| "step": 4820 |
| }, |
| { |
| "epoch": 3.864, |
| "grad_norm": 12.942428588867188, |
| "learning_rate": 3.8519223765544985e-06, |
| "logits/chosen": 0.9515409469604492, |
| "logits/rejected": 0.8108540773391724, |
| "logps/chosen": -196.3207550048828, |
| "logps/rejected": -233.15110778808594, |
| "loss": 0.3708, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.2026864290237427, |
| "rewards/margins": 1.3849681615829468, |
| "rewards/rejected": -2.5876548290252686, |
| "step": 4830 |
| }, |
| { |
| "epoch": 3.872, |
| "grad_norm": 16.509870529174805, |
| "learning_rate": 3.8460446302901575e-06, |
| "logits/chosen": 0.9374640583992004, |
| "logits/rejected": 0.8481482863426208, |
| "logps/chosen": -200.82627868652344, |
| "logps/rejected": -220.869873046875, |
| "loss": 0.4426, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.4088255167007446, |
| "rewards/margins": 1.1208895444869995, |
| "rewards/rejected": -2.529715061187744, |
| "step": 4840 |
| }, |
| { |
| "epoch": 3.88, |
| "grad_norm": 6.809768199920654, |
| "learning_rate": 3.840156387299397e-06, |
| "logits/chosen": 0.8348701596260071, |
| "logits/rejected": 0.8732596635818481, |
| "logps/chosen": -216.68138122558594, |
| "logps/rejected": -247.988037109375, |
| "loss": 0.4629, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.482513189315796, |
| "rewards/margins": 1.2046746015548706, |
| "rewards/rejected": -2.687187910079956, |
| "step": 4850 |
| }, |
| { |
| "epoch": 3.888, |
| "grad_norm": 10.955533981323242, |
| "learning_rate": 3.8342576934999184e-06, |
| "logits/chosen": 0.8893832564353943, |
| "logits/rejected": 0.8247093558311462, |
| "logps/chosen": -204.13548278808594, |
| "logps/rejected": -241.85671997070312, |
| "loss": 0.4514, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.217315435409546, |
| "rewards/margins": 1.1432523727416992, |
| "rewards/rejected": -2.360567808151245, |
| "step": 4860 |
| }, |
| { |
| "epoch": 3.896, |
| "grad_norm": 11.238479614257812, |
| "learning_rate": 3.828348594890923e-06, |
| "logits/chosen": 0.8213216662406921, |
| "logits/rejected": 0.8109332919120789, |
| "logps/chosen": -209.7275848388672, |
| "logps/rejected": -236.1708526611328, |
| "loss": 0.4234, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.2727683782577515, |
| "rewards/margins": 1.410487174987793, |
| "rewards/rejected": -2.683255910873413, |
| "step": 4870 |
| }, |
| { |
| "epoch": 3.904, |
| "grad_norm": 12.693486213684082, |
| "learning_rate": 3.822429137552747e-06, |
| "logits/chosen": 0.9051896929740906, |
| "logits/rejected": 0.8351686596870422, |
| "logps/chosen": -212.6349639892578, |
| "logps/rejected": -205.12376403808594, |
| "loss": 0.5046, |
| "rewards/accuracies": 0.762499988079071, |
| "rewards/chosen": -0.834870457649231, |
| "rewards/margins": 0.9832606315612793, |
| "rewards/rejected": -1.8181308507919312, |
| "step": 4880 |
| }, |
| { |
| "epoch": 3.912, |
| "grad_norm": 6.716438293457031, |
| "learning_rate": 3.816499367646508e-06, |
| "logits/chosen": 1.0042909383773804, |
| "logits/rejected": 0.9129732251167297, |
| "logps/chosen": -212.87109375, |
| "logps/rejected": -230.78567504882812, |
| "loss": 0.535, |
| "rewards/accuracies": 0.7250000238418579, |
| "rewards/chosen": -1.2843660116195679, |
| "rewards/margins": 1.0389277935028076, |
| "rewards/rejected": -2.323293924331665, |
| "step": 4890 |
| }, |
| { |
| "epoch": 3.92, |
| "grad_norm": 10.39694595336914, |
| "learning_rate": 3.8105593314137434e-06, |
| "logits/chosen": 0.9733989834785461, |
| "logits/rejected": 0.9892801642417908, |
| "logps/chosen": -209.4358367919922, |
| "logps/rejected": -221.290771484375, |
| "loss": 0.3994, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.1634297370910645, |
| "rewards/margins": 1.246660590171814, |
| "rewards/rejected": -2.410090208053589, |
| "step": 4900 |
| }, |
| { |
| "epoch": 3.928, |
| "grad_norm": 16.674728393554688, |
| "learning_rate": 3.804609075176049e-06, |
| "logits/chosen": 0.7904636859893799, |
| "logits/rejected": 0.7977269291877747, |
| "logps/chosen": -202.6865692138672, |
| "logps/rejected": -237.93357849121094, |
| "loss": 0.4082, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.5116066932678223, |
| "rewards/margins": 1.3030771017074585, |
| "rewards/rejected": -2.8146839141845703, |
| "step": 4910 |
| }, |
| { |
| "epoch": 3.936, |
| "grad_norm": 11.239212036132812, |
| "learning_rate": 3.7986486453347183e-06, |
| "logits/chosen": 0.9851727485656738, |
| "logits/rejected": 0.8940200805664062, |
| "logps/chosen": -206.59951782226562, |
| "logps/rejected": -215.1295623779297, |
| "loss": 0.4914, |
| "rewards/accuracies": 0.737500011920929, |
| "rewards/chosen": -1.3703104257583618, |
| "rewards/margins": 0.929050624370575, |
| "rewards/rejected": -2.299360990524292, |
| "step": 4920 |
| }, |
| { |
| "epoch": 3.944, |
| "grad_norm": 14.794110298156738, |
| "learning_rate": 3.7926780883703794e-06, |
| "logits/chosen": 0.9150724411010742, |
| "logits/rejected": 0.8797799944877625, |
| "logps/chosen": -209.21240234375, |
| "logps/rejected": -228.32688903808594, |
| "loss": 0.4379, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.8193817138671875, |
| "rewards/margins": 1.1322726011276245, |
| "rewards/rejected": -1.9516544342041016, |
| "step": 4930 |
| }, |
| { |
| "epoch": 3.952, |
| "grad_norm": 9.821282386779785, |
| "learning_rate": 3.7866974508426355e-06, |
| "logits/chosen": 0.8630434274673462, |
| "logits/rejected": 0.9651856422424316, |
| "logps/chosen": -226.81321716308594, |
| "logps/rejected": -220.50515747070312, |
| "loss": 0.402, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.8245296478271484, |
| "rewards/margins": 1.2236753702163696, |
| "rewards/rejected": -2.0482048988342285, |
| "step": 4940 |
| }, |
| { |
| "epoch": 3.96, |
| "grad_norm": 11.387616157531738, |
| "learning_rate": 3.7807067793897006e-06, |
| "logits/chosen": 0.8870008587837219, |
| "logits/rejected": 0.8385466933250427, |
| "logps/chosen": -230.2690887451172, |
| "logps/rejected": -249.96641540527344, |
| "loss": 0.5202, |
| "rewards/accuracies": 0.7875000238418579, |
| "rewards/chosen": -1.0625405311584473, |
| "rewards/margins": 0.9815382361412048, |
| "rewards/rejected": -2.044078826904297, |
| "step": 4950 |
| }, |
| { |
| "epoch": 3.968, |
| "grad_norm": 10.633583068847656, |
| "learning_rate": 3.7747061207280322e-06, |
| "logits/chosen": 0.867588222026825, |
| "logits/rejected": 0.78270423412323, |
| "logps/chosen": -230.1920928955078, |
| "logps/rejected": -261.4850158691406, |
| "loss": 0.4374, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.1695181131362915, |
| "rewards/margins": 1.3066359758377075, |
| "rewards/rejected": -2.476154088973999, |
| "step": 4960 |
| }, |
| { |
| "epoch": 3.976, |
| "grad_norm": 11.73788070678711, |
| "learning_rate": 3.7686955216519733e-06, |
| "logits/chosen": 0.8684186339378357, |
| "logits/rejected": 0.9026303291320801, |
| "logps/chosen": -202.72027587890625, |
| "logps/rejected": -224.8450164794922, |
| "loss": 0.3963, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.1008380651474, |
| "rewards/margins": 1.233585238456726, |
| "rewards/rejected": -2.334423065185547, |
| "step": 4970 |
| }, |
| { |
| "epoch": 3.984, |
| "grad_norm": 12.70168685913086, |
| "learning_rate": 3.7626750290333824e-06, |
| "logits/chosen": 0.9102399945259094, |
| "logits/rejected": 0.789924681186676, |
| "logps/chosen": -217.3763885498047, |
| "logps/rejected": -246.22720336914062, |
| "loss": 0.431, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.2897388935089111, |
| "rewards/margins": 1.2651302814483643, |
| "rewards/rejected": -2.5548691749572754, |
| "step": 4980 |
| }, |
| { |
| "epoch": 3.992, |
| "grad_norm": 10.833782196044922, |
| "learning_rate": 3.7566446898212704e-06, |
| "logits/chosen": 0.8599758148193359, |
| "logits/rejected": 0.8142706751823425, |
| "logps/chosen": -215.34092712402344, |
| "logps/rejected": -204.10824584960938, |
| "loss": 0.4499, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.1187313795089722, |
| "rewards/margins": 1.1984655857086182, |
| "rewards/rejected": -2.317196846008301, |
| "step": 4990 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 13.4774169921875, |
| "learning_rate": 3.7506045510414337e-06, |
| "logits/chosen": 1.0011329650878906, |
| "logits/rejected": 0.9479683041572571, |
| "logps/chosen": -199.9625701904297, |
| "logps/rejected": -239.7847137451172, |
| "loss": 0.3563, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.0051459074020386, |
| "rewards/margins": 1.3487907648086548, |
| "rewards/rejected": -2.3539369106292725, |
| "step": 5000 |
| }, |
| { |
| "epoch": 4.008, |
| "grad_norm": 8.692243576049805, |
| "learning_rate": 3.7445546597960882e-06, |
| "logits/chosen": 1.0498260259628296, |
| "logits/rejected": 1.0109456777572632, |
| "logps/chosen": -207.2587432861328, |
| "logps/rejected": -230.1753692626953, |
| "loss": 0.3541, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.9930923581123352, |
| "rewards/margins": 1.3766816854476929, |
| "rewards/rejected": -2.3697738647460938, |
| "step": 5010 |
| }, |
| { |
| "epoch": 4.016, |
| "grad_norm": 14.994426727294922, |
| "learning_rate": 3.7384950632635e-06, |
| "logits/chosen": 1.0372326374053955, |
| "logits/rejected": 0.9997326135635376, |
| "logps/chosen": -206.49755859375, |
| "logps/rejected": -229.08706665039062, |
| "loss": 0.4148, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.0184519290924072, |
| "rewards/margins": 1.1713188886642456, |
| "rewards/rejected": -2.1897706985473633, |
| "step": 5020 |
| }, |
| { |
| "epoch": 4.024, |
| "grad_norm": 12.80656909942627, |
| "learning_rate": 3.732425808697622e-06, |
| "logits/chosen": 1.04145085811615, |
| "logits/rejected": 0.8659443855285645, |
| "logps/chosen": -207.9678192138672, |
| "logps/rejected": -230.5495147705078, |
| "loss": 0.3428, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.694399356842041, |
| "rewards/margins": 1.4684067964553833, |
| "rewards/rejected": -2.162806272506714, |
| "step": 5030 |
| }, |
| { |
| "epoch": 4.032, |
| "grad_norm": 7.541386604309082, |
| "learning_rate": 3.726346943427719e-06, |
| "logits/chosen": 0.9582153558731079, |
| "logits/rejected": 0.867928683757782, |
| "logps/chosen": -208.20193481445312, |
| "logps/rejected": -234.29641723632812, |
| "loss": 0.3957, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.2386597394943237, |
| "rewards/margins": 1.2774535417556763, |
| "rewards/rejected": -2.516113042831421, |
| "step": 5040 |
| }, |
| { |
| "epoch": 4.04, |
| "grad_norm": 12.571610450744629, |
| "learning_rate": 3.720258514858004e-06, |
| "logits/chosen": 1.0318713188171387, |
| "logits/rejected": 0.8940740823745728, |
| "logps/chosen": -195.3436279296875, |
| "logps/rejected": -235.5468292236328, |
| "loss": 0.3239, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.7823795676231384, |
| "rewards/margins": 1.5767382383346558, |
| "rewards/rejected": -2.3591177463531494, |
| "step": 5050 |
| }, |
| { |
| "epoch": 4.048, |
| "grad_norm": 14.773040771484375, |
| "learning_rate": 3.714160570467266e-06, |
| "logits/chosen": 0.9699028134346008, |
| "logits/rejected": 0.9142343401908875, |
| "logps/chosen": -218.6599578857422, |
| "logps/rejected": -241.7670440673828, |
| "loss": 0.3705, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.231390357017517, |
| "rewards/margins": 1.3999671936035156, |
| "rewards/rejected": -2.6313576698303223, |
| "step": 5060 |
| }, |
| { |
| "epoch": 4.056, |
| "grad_norm": 8.311098098754883, |
| "learning_rate": 3.7080531578085e-06, |
| "logits/chosen": 0.880449116230011, |
| "logits/rejected": 0.8501182794570923, |
| "logps/chosen": -207.9825897216797, |
| "logps/rejected": -245.84878540039062, |
| "loss": 0.3206, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.1131296157836914, |
| "rewards/margins": 1.682613730430603, |
| "rewards/rejected": -2.795743465423584, |
| "step": 5070 |
| }, |
| { |
| "epoch": 4.064, |
| "grad_norm": 13.896095275878906, |
| "learning_rate": 3.701936324508537e-06, |
| "logits/chosen": 1.1272010803222656, |
| "logits/rejected": 0.9481062293052673, |
| "logps/chosen": -211.05064392089844, |
| "logps/rejected": -214.7148895263672, |
| "loss": 0.3481, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.8524333834648132, |
| "rewards/margins": 1.6547110080718994, |
| "rewards/rejected": -2.5071444511413574, |
| "step": 5080 |
| }, |
| { |
| "epoch": 4.072, |
| "grad_norm": 11.393054008483887, |
| "learning_rate": 3.6958101182676725e-06, |
| "logits/chosen": 0.9813786745071411, |
| "logits/rejected": 0.962406575679779, |
| "logps/chosen": -205.95632934570312, |
| "logps/rejected": -231.1307373046875, |
| "loss": 0.3552, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.020094633102417, |
| "rewards/margins": 1.620416283607483, |
| "rewards/rejected": -2.6405110359191895, |
| "step": 5090 |
| }, |
| { |
| "epoch": 4.08, |
| "grad_norm": 16.073274612426758, |
| "learning_rate": 3.6896745868592924e-06, |
| "logits/chosen": 0.980717658996582, |
| "logits/rejected": 0.9125275611877441, |
| "logps/chosen": -199.70022583007812, |
| "logps/rejected": -237.94412231445312, |
| "loss": 0.3716, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.2604044675827026, |
| "rewards/margins": 1.5706150531768799, |
| "rewards/rejected": -2.831019639968872, |
| "step": 5100 |
| }, |
| { |
| "epoch": 4.088, |
| "grad_norm": 15.651266098022461, |
| "learning_rate": 3.683529778129504e-06, |
| "logits/chosen": 1.0326038599014282, |
| "logits/rejected": 0.9012963175773621, |
| "logps/chosen": -218.704833984375, |
| "logps/rejected": -238.8866424560547, |
| "loss": 0.3246, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.407049536705017, |
| "rewards/margins": 1.6295727491378784, |
| "rewards/rejected": -3.0366222858428955, |
| "step": 5110 |
| }, |
| { |
| "epoch": 4.096, |
| "grad_norm": 9.559797286987305, |
| "learning_rate": 3.677375739996759e-06, |
| "logits/chosen": 0.9783565402030945, |
| "logits/rejected": 0.9558612704277039, |
| "logps/chosen": -231.9866485595703, |
| "logps/rejected": -237.6753387451172, |
| "loss": 0.3427, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.2938991785049438, |
| "rewards/margins": 1.4221700429916382, |
| "rewards/rejected": -2.716069221496582, |
| "step": 5120 |
| }, |
| { |
| "epoch": 4.104, |
| "grad_norm": 20.652666091918945, |
| "learning_rate": 3.6712125204514836e-06, |
| "logits/chosen": 1.0956971645355225, |
| "logits/rejected": 1.1476175785064697, |
| "logps/chosen": -198.1278533935547, |
| "logps/rejected": -215.04112243652344, |
| "loss": 0.3917, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -0.28689682483673096, |
| "rewards/margins": 1.569340467453003, |
| "rewards/rejected": -1.8562372922897339, |
| "step": 5130 |
| }, |
| { |
| "epoch": 4.112, |
| "grad_norm": 20.57211685180664, |
| "learning_rate": 3.6650401675557025e-06, |
| "logits/chosen": 1.0987768173217773, |
| "logits/rejected": 0.8933493494987488, |
| "logps/chosen": -203.9111785888672, |
| "logps/rejected": -229.03076171875, |
| "loss": 0.3811, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.243438720703125, |
| "rewards/margins": 1.4107178449630737, |
| "rewards/rejected": -2.654156446456909, |
| "step": 5140 |
| }, |
| { |
| "epoch": 4.12, |
| "grad_norm": 5.7946271896362305, |
| "learning_rate": 3.658858729442662e-06, |
| "logits/chosen": 0.9748809933662415, |
| "logits/rejected": 0.8560425043106079, |
| "logps/chosen": -199.7461395263672, |
| "logps/rejected": -234.7223663330078, |
| "loss": 0.3288, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.376451015472412, |
| "rewards/margins": 1.6137140989303589, |
| "rewards/rejected": -2.9901649951934814, |
| "step": 5150 |
| }, |
| { |
| "epoch": 4.128, |
| "grad_norm": 6.650130271911621, |
| "learning_rate": 3.65266825431646e-06, |
| "logits/chosen": 0.9371269345283508, |
| "logits/rejected": 1.002604603767395, |
| "logps/chosen": -216.43150329589844, |
| "logps/rejected": -230.34336853027344, |
| "loss": 0.3251, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.1472088098526, |
| "rewards/margins": 1.6432514190673828, |
| "rewards/rejected": -2.7904601097106934, |
| "step": 5160 |
| }, |
| { |
| "epoch": 4.136, |
| "grad_norm": 24.08129119873047, |
| "learning_rate": 3.646468790451663e-06, |
| "logits/chosen": 0.8831321597099304, |
| "logits/rejected": 0.776918888092041, |
| "logps/chosen": -213.083740234375, |
| "logps/rejected": -230.79202270507812, |
| "loss": 0.4005, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.3220943212509155, |
| "rewards/margins": 1.3914340734481812, |
| "rewards/rejected": -2.7135283946990967, |
| "step": 5170 |
| }, |
| { |
| "epoch": 4.144, |
| "grad_norm": 7.815832138061523, |
| "learning_rate": 3.6402603861929374e-06, |
| "logits/chosen": 1.0589165687561035, |
| "logits/rejected": 0.9391776919364929, |
| "logps/chosen": -221.99449157714844, |
| "logps/rejected": -242.86216735839844, |
| "loss": 0.2609, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.1631277799606323, |
| "rewards/margins": 1.766058325767517, |
| "rewards/rejected": -2.9291858673095703, |
| "step": 5180 |
| }, |
| { |
| "epoch": 4.152, |
| "grad_norm": 13.720664978027344, |
| "learning_rate": 3.6340430899546656e-06, |
| "logits/chosen": 0.9697431921958923, |
| "logits/rejected": 0.9235960841178894, |
| "logps/chosen": -223.24876403808594, |
| "logps/rejected": -240.31167602539062, |
| "loss": 0.4369, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.2503145933151245, |
| "rewards/margins": 1.2561858892440796, |
| "rewards/rejected": -2.506500482559204, |
| "step": 5190 |
| }, |
| { |
| "epoch": 4.16, |
| "grad_norm": 16.428617477416992, |
| "learning_rate": 3.6278169502205734e-06, |
| "logits/chosen": 1.0490871667861938, |
| "logits/rejected": 0.9422445297241211, |
| "logps/chosen": -211.8967742919922, |
| "logps/rejected": -263.45782470703125, |
| "loss": 0.3235, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.041167140007019, |
| "rewards/margins": 1.970610499382019, |
| "rewards/rejected": -3.011777639389038, |
| "step": 5200 |
| }, |
| { |
| "epoch": 4.168, |
| "grad_norm": 28.398910522460938, |
| "learning_rate": 3.6215820155433486e-06, |
| "logits/chosen": 0.9427713751792908, |
| "logits/rejected": 0.9313327670097351, |
| "logps/chosen": -243.6337890625, |
| "logps/rejected": -252.7070770263672, |
| "loss": 0.3734, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.9518672227859497, |
| "rewards/margins": 1.5367608070373535, |
| "rewards/rejected": -2.4886281490325928, |
| "step": 5210 |
| }, |
| { |
| "epoch": 4.176, |
| "grad_norm": 14.895367622375488, |
| "learning_rate": 3.615338334544265e-06, |
| "logits/chosen": 0.9828926920890808, |
| "logits/rejected": 0.9496780633926392, |
| "logps/chosen": -203.6242218017578, |
| "logps/rejected": -224.4546661376953, |
| "loss": 0.3542, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.7610888481140137, |
| "rewards/margins": 1.4806041717529297, |
| "rewards/rejected": -2.2416930198669434, |
| "step": 5220 |
| }, |
| { |
| "epoch": 4.184, |
| "grad_norm": 19.1573543548584, |
| "learning_rate": 3.6090859559128e-06, |
| "logits/chosen": 0.9076266288757324, |
| "logits/rejected": 0.9045122265815735, |
| "logps/chosen": -212.6093292236328, |
| "logps/rejected": -223.61474609375, |
| "loss": 0.3464, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.48652344942092896, |
| "rewards/margins": 1.6024211645126343, |
| "rewards/rejected": -2.088944435119629, |
| "step": 5230 |
| }, |
| { |
| "epoch": 4.192, |
| "grad_norm": 7.1050310134887695, |
| "learning_rate": 3.6028249284062593e-06, |
| "logits/chosen": 0.9402229189872742, |
| "logits/rejected": 0.9130407571792603, |
| "logps/chosen": -215.83328247070312, |
| "logps/rejected": -237.27294921875, |
| "loss": 0.3177, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.442810297012329, |
| "rewards/margins": 1.6023463010787964, |
| "rewards/rejected": -3.045156478881836, |
| "step": 5240 |
| }, |
| { |
| "epoch": 4.2, |
| "grad_norm": 13.212199211120605, |
| "learning_rate": 3.5965553008493924e-06, |
| "logits/chosen": 0.9931579828262329, |
| "logits/rejected": 0.8333091139793396, |
| "logps/chosen": -216.7156219482422, |
| "logps/rejected": -227.3023681640625, |
| "loss": 0.4671, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.2548701763153076, |
| "rewards/margins": 1.3508440256118774, |
| "rewards/rejected": -2.6057140827178955, |
| "step": 5250 |
| }, |
| { |
| "epoch": 4.208, |
| "grad_norm": 15.673922538757324, |
| "learning_rate": 3.590277122134015e-06, |
| "logits/chosen": 0.925972580909729, |
| "logits/rejected": 0.9485855102539062, |
| "logps/chosen": -203.7695770263672, |
| "logps/rejected": -233.1253204345703, |
| "loss": 0.2825, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.1053215265274048, |
| "rewards/margins": 1.8811248540878296, |
| "rewards/rejected": -2.9864463806152344, |
| "step": 5260 |
| }, |
| { |
| "epoch": 4.216, |
| "grad_norm": 18.827274322509766, |
| "learning_rate": 3.5839904412186254e-06, |
| "logits/chosen": 0.959101676940918, |
| "logits/rejected": 1.012556552886963, |
| "logps/chosen": -203.8087615966797, |
| "logps/rejected": -251.2672119140625, |
| "loss": 0.3625, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.9612110257148743, |
| "rewards/margins": 1.7676079273223877, |
| "rewards/rejected": -2.7288191318511963, |
| "step": 5270 |
| }, |
| { |
| "epoch": 4.224, |
| "grad_norm": 10.582806587219238, |
| "learning_rate": 3.577695307128024e-06, |
| "logits/chosen": 0.8980388641357422, |
| "logits/rejected": 0.7367168664932251, |
| "logps/chosen": -219.5625, |
| "logps/rejected": -244.0664520263672, |
| "loss": 0.2701, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.9930900931358337, |
| "rewards/margins": 2.069688558578491, |
| "rewards/rejected": -3.0627784729003906, |
| "step": 5280 |
| }, |
| { |
| "epoch": 4.232, |
| "grad_norm": 17.180286407470703, |
| "learning_rate": 3.571391768952932e-06, |
| "logits/chosen": 0.8705675005912781, |
| "logits/rejected": 0.8819023966789246, |
| "logps/chosen": -189.57054138183594, |
| "logps/rejected": -228.0927734375, |
| "loss": 0.3455, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.9885832667350769, |
| "rewards/margins": 1.6450799703598022, |
| "rewards/rejected": -2.6336631774902344, |
| "step": 5290 |
| }, |
| { |
| "epoch": 4.24, |
| "grad_norm": 13.662627220153809, |
| "learning_rate": 3.5650798758496053e-06, |
| "logits/chosen": 0.880375325679779, |
| "logits/rejected": 0.8753491640090942, |
| "logps/chosen": -213.9600067138672, |
| "logps/rejected": -244.53857421875, |
| "loss": 0.3318, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.6102064847946167, |
| "rewards/margins": 1.6889526844024658, |
| "rewards/rejected": -3.299158811569214, |
| "step": 5300 |
| }, |
| { |
| "epoch": 4.248, |
| "grad_norm": 19.561792373657227, |
| "learning_rate": 3.558759677039455e-06, |
| "logits/chosen": 0.8705887794494629, |
| "logits/rejected": 0.8816524744033813, |
| "logps/chosen": -213.3289031982422, |
| "logps/rejected": -243.65737915039062, |
| "loss": 0.3461, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.5360292196273804, |
| "rewards/margins": 1.6422207355499268, |
| "rewards/rejected": -3.1782500743865967, |
| "step": 5310 |
| }, |
| { |
| "epoch": 4.256, |
| "grad_norm": 16.225622177124023, |
| "learning_rate": 3.552431221808661e-06, |
| "logits/chosen": 0.988519012928009, |
| "logits/rejected": 0.824196457862854, |
| "logps/chosen": -195.06642150878906, |
| "logps/rejected": -229.2406768798828, |
| "loss": 0.3154, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.1573065519332886, |
| "rewards/margins": 1.9272657632827759, |
| "rewards/rejected": -3.0845723152160645, |
| "step": 5320 |
| }, |
| { |
| "epoch": 4.264, |
| "grad_norm": 11.897854804992676, |
| "learning_rate": 3.5460945595077874e-06, |
| "logits/chosen": 0.9042286276817322, |
| "logits/rejected": 0.8848081827163696, |
| "logps/chosen": -203.17344665527344, |
| "logps/rejected": -256.4823303222656, |
| "loss": 0.3054, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -1.392894983291626, |
| "rewards/margins": 1.7944774627685547, |
| "rewards/rejected": -3.1873722076416016, |
| "step": 5330 |
| }, |
| { |
| "epoch": 4.272, |
| "grad_norm": 13.320012092590332, |
| "learning_rate": 3.539749739551401e-06, |
| "logits/chosen": 0.9267520904541016, |
| "logits/rejected": 0.9451652765274048, |
| "logps/chosen": -191.97021484375, |
| "logps/rejected": -214.5209503173828, |
| "loss": 0.2868, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.6399614810943604, |
| "rewards/margins": 1.7933428287506104, |
| "rewards/rejected": -2.4333043098449707, |
| "step": 5340 |
| }, |
| { |
| "epoch": 4.28, |
| "grad_norm": 10.956954002380371, |
| "learning_rate": 3.533396811417682e-06, |
| "logits/chosen": 0.9412531852722168, |
| "logits/rejected": 0.9464859366416931, |
| "logps/chosen": -192.187255859375, |
| "logps/rejected": -215.4243621826172, |
| "loss": 0.3242, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.8624544143676758, |
| "rewards/margins": 1.6475841999053955, |
| "rewards/rejected": -2.5100386142730713, |
| "step": 5350 |
| }, |
| { |
| "epoch": 4.288, |
| "grad_norm": 18.71451187133789, |
| "learning_rate": 3.527035824648039e-06, |
| "logits/chosen": 0.8915054202079773, |
| "logits/rejected": 0.7794849276542664, |
| "logps/chosen": -192.20079040527344, |
| "logps/rejected": -232.98291015625, |
| "loss": 0.3807, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.0030672550201416, |
| "rewards/margins": 1.811819076538086, |
| "rewards/rejected": -2.8148863315582275, |
| "step": 5360 |
| }, |
| { |
| "epoch": 4.296, |
| "grad_norm": 12.68542194366455, |
| "learning_rate": 3.520666828846726e-06, |
| "logits/chosen": 0.7684556841850281, |
| "logits/rejected": 0.7673687934875488, |
| "logps/chosen": -211.497314453125, |
| "logps/rejected": -240.1795654296875, |
| "loss": 0.3209, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.4810975790023804, |
| "rewards/margins": 1.700387954711914, |
| "rewards/rejected": -3.181485652923584, |
| "step": 5370 |
| }, |
| { |
| "epoch": 4.304, |
| "grad_norm": 6.8565144538879395, |
| "learning_rate": 3.5142898736804516e-06, |
| "logits/chosen": 0.9116408228874207, |
| "logits/rejected": 0.796613335609436, |
| "logps/chosen": -211.6884765625, |
| "logps/rejected": -243.06777954101562, |
| "loss": 0.3276, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.3549237251281738, |
| "rewards/margins": 1.6243526935577393, |
| "rewards/rejected": -2.979276418685913, |
| "step": 5380 |
| }, |
| { |
| "epoch": 4.312, |
| "grad_norm": 9.291360855102539, |
| "learning_rate": 3.5079050088779927e-06, |
| "logits/chosen": 0.8643763661384583, |
| "logits/rejected": 0.8873327374458313, |
| "logps/chosen": -180.9557342529297, |
| "logps/rejected": -216.88780212402344, |
| "loss": 0.3089, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -0.6550284624099731, |
| "rewards/margins": 1.8085215091705322, |
| "rewards/rejected": -2.463550090789795, |
| "step": 5390 |
| }, |
| { |
| "epoch": 4.32, |
| "grad_norm": 10.52054500579834, |
| "learning_rate": 3.501512284229807e-06, |
| "logits/chosen": 0.8750426173210144, |
| "logits/rejected": 0.7605211734771729, |
| "logps/chosen": -212.87950134277344, |
| "logps/rejected": -263.2989196777344, |
| "loss": 0.2426, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -1.4775450229644775, |
| "rewards/margins": 2.0750772953033447, |
| "rewards/rejected": -3.552621841430664, |
| "step": 5400 |
| }, |
| { |
| "epoch": 4.328, |
| "grad_norm": 16.574983596801758, |
| "learning_rate": 3.4951117495876473e-06, |
| "logits/chosen": 0.9197494387626648, |
| "logits/rejected": 0.8915640115737915, |
| "logps/chosen": -215.3748016357422, |
| "logps/rejected": -229.8962860107422, |
| "loss": 0.3404, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.7154279947280884, |
| "rewards/margins": 1.5610477924346924, |
| "rewards/rejected": -3.276475667953491, |
| "step": 5410 |
| }, |
| { |
| "epoch": 4.336, |
| "grad_norm": 15.055716514587402, |
| "learning_rate": 3.4887034548641673e-06, |
| "logits/chosen": 0.9449960589408875, |
| "logits/rejected": 0.9040482640266418, |
| "logps/chosen": -217.7838897705078, |
| "logps/rejected": -268.0353088378906, |
| "loss": 0.3165, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.7793148756027222, |
| "rewards/margins": 1.969291090965271, |
| "rewards/rejected": -3.748605728149414, |
| "step": 5420 |
| }, |
| { |
| "epoch": 4.344, |
| "grad_norm": 7.452909469604492, |
| "learning_rate": 3.482287450032536e-06, |
| "logits/chosen": 0.8897709250450134, |
| "logits/rejected": 0.8994399905204773, |
| "logps/chosen": -212.1781768798828, |
| "logps/rejected": -253.2353973388672, |
| "loss": 0.3063, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.805542230606079, |
| "rewards/margins": 1.7644466161727905, |
| "rewards/rejected": -3.569988965988159, |
| "step": 5430 |
| }, |
| { |
| "epoch": 4.352, |
| "grad_norm": 19.223543167114258, |
| "learning_rate": 3.47586378512605e-06, |
| "logits/chosen": 1.0354408025741577, |
| "logits/rejected": 0.9919587969779968, |
| "logps/chosen": -216.69546508789062, |
| "logps/rejected": -250.68069458007812, |
| "loss": 0.3631, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.8229974508285522, |
| "rewards/margins": 1.5450613498687744, |
| "rewards/rejected": -3.368058919906616, |
| "step": 5440 |
| }, |
| { |
| "epoch": 4.36, |
| "grad_norm": 13.662184715270996, |
| "learning_rate": 3.4694325102377356e-06, |
| "logits/chosen": 0.972303569316864, |
| "logits/rejected": 0.8980138897895813, |
| "logps/chosen": -206.581787109375, |
| "logps/rejected": -222.6265106201172, |
| "loss": 0.4966, |
| "rewards/accuracies": 0.75, |
| "rewards/chosen": -1.6232937574386597, |
| "rewards/margins": 1.1546478271484375, |
| "rewards/rejected": -2.7779414653778076, |
| "step": 5450 |
| }, |
| { |
| "epoch": 4.368, |
| "grad_norm": 6.83968448638916, |
| "learning_rate": 3.462993675519968e-06, |
| "logits/chosen": 0.8973142504692078, |
| "logits/rejected": 1.0178273916244507, |
| "logps/chosen": -226.5254364013672, |
| "logps/rejected": -241.5300750732422, |
| "loss": 0.3852, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -2.1246910095214844, |
| "rewards/margins": 1.604926347732544, |
| "rewards/rejected": -3.7296173572540283, |
| "step": 5460 |
| }, |
| { |
| "epoch": 4.376, |
| "grad_norm": 13.287477493286133, |
| "learning_rate": 3.4565473311840735e-06, |
| "logits/chosen": 1.0390936136245728, |
| "logits/rejected": 0.9516915678977966, |
| "logps/chosen": -214.5906982421875, |
| "logps/rejected": -220.36778259277344, |
| "loss": 0.2884, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -1.473663330078125, |
| "rewards/margins": 1.6746243238449097, |
| "rewards/rejected": -3.148287534713745, |
| "step": 5470 |
| }, |
| { |
| "epoch": 4.384, |
| "grad_norm": 8.79075813293457, |
| "learning_rate": 3.4500935274999414e-06, |
| "logits/chosen": 0.8991445899009705, |
| "logits/rejected": 0.7987154126167297, |
| "logps/chosen": -221.56704711914062, |
| "logps/rejected": -226.0802764892578, |
| "loss": 0.3229, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -1.3047336339950562, |
| "rewards/margins": 1.384783148765564, |
| "rewards/rejected": -2.68951678276062, |
| "step": 5480 |
| }, |
| { |
| "epoch": 4.392, |
| "grad_norm": 18.390661239624023, |
| "learning_rate": 3.443632314795627e-06, |
| "logits/chosen": 1.0096405744552612, |
| "logits/rejected": 0.9392326474189758, |
| "logps/chosen": -209.8367156982422, |
| "logps/rejected": -223.6701202392578, |
| "loss": 0.3204, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.4388588666915894, |
| "rewards/margins": 1.779291033744812, |
| "rewards/rejected": -3.2181496620178223, |
| "step": 5490 |
| }, |
| { |
| "epoch": 4.4, |
| "grad_norm": 14.75433349609375, |
| "learning_rate": 3.4371637434569664e-06, |
| "logits/chosen": 0.9798237085342407, |
| "logits/rejected": 0.8885357975959778, |
| "logps/chosen": -215.8732147216797, |
| "logps/rejected": -250.0683135986328, |
| "loss": 0.2836, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.3270418643951416, |
| "rewards/margins": 1.8547767400741577, |
| "rewards/rejected": -3.181818723678589, |
| "step": 5500 |
| }, |
| { |
| "epoch": 4.408, |
| "grad_norm": 7.223197937011719, |
| "learning_rate": 3.430687863927178e-06, |
| "logits/chosen": 0.9878398776054382, |
| "logits/rejected": 0.907991886138916, |
| "logps/chosen": -220.220947265625, |
| "logps/rejected": -232.602294921875, |
| "loss": 0.3854, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.4777723550796509, |
| "rewards/margins": 1.486155390739441, |
| "rewards/rejected": -2.963927745819092, |
| "step": 5510 |
| }, |
| { |
| "epoch": 4.416, |
| "grad_norm": 18.754655838012695, |
| "learning_rate": 3.4242047267064714e-06, |
| "logits/chosen": 1.0766704082489014, |
| "logits/rejected": 1.0072535276412964, |
| "logps/chosen": -218.5634307861328, |
| "logps/rejected": -240.5037841796875, |
| "loss": 0.3645, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.6403402090072632, |
| "rewards/margins": 1.6216999292373657, |
| "rewards/rejected": -3.26203989982605, |
| "step": 5520 |
| }, |
| { |
| "epoch": 4.424, |
| "grad_norm": 16.398393630981445, |
| "learning_rate": 3.4177143823516523e-06, |
| "logits/chosen": 0.9290235638618469, |
| "logits/rejected": 0.8646947145462036, |
| "logps/chosen": -221.277099609375, |
| "logps/rejected": -243.0144805908203, |
| "loss": 0.3467, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.3182934522628784, |
| "rewards/margins": 1.7082767486572266, |
| "rewards/rejected": -3.0265700817108154, |
| "step": 5530 |
| }, |
| { |
| "epoch": 4.432, |
| "grad_norm": 10.032676696777344, |
| "learning_rate": 3.4112168814757307e-06, |
| "logits/chosen": 0.9694162607192993, |
| "logits/rejected": 0.9398545622825623, |
| "logps/chosen": -205.362060546875, |
| "logps/rejected": -224.15017700195312, |
| "loss": 0.2903, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.2131686210632324, |
| "rewards/margins": 1.873663306236267, |
| "rewards/rejected": -3.086832046508789, |
| "step": 5540 |
| }, |
| { |
| "epoch": 4.44, |
| "grad_norm": 21.09775733947754, |
| "learning_rate": 3.4047122747475227e-06, |
| "logits/chosen": 0.9186191558837891, |
| "logits/rejected": 0.763767421245575, |
| "logps/chosen": -226.12075805664062, |
| "logps/rejected": -264.490966796875, |
| "loss": 0.3921, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.8982375860214233, |
| "rewards/margins": 1.5824882984161377, |
| "rewards/rejected": -3.4807260036468506, |
| "step": 5550 |
| }, |
| { |
| "epoch": 4.448, |
| "grad_norm": 25.094377517700195, |
| "learning_rate": 3.3982006128912587e-06, |
| "logits/chosen": 0.8753210306167603, |
| "logits/rejected": 0.8916776776313782, |
| "logps/chosen": -205.23233032226562, |
| "logps/rejected": -227.34988403320312, |
| "loss": 0.433, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.5061285495758057, |
| "rewards/margins": 1.4716495275497437, |
| "rewards/rejected": -2.9777779579162598, |
| "step": 5560 |
| }, |
| { |
| "epoch": 4.456, |
| "grad_norm": 19.3261661529541, |
| "learning_rate": 3.391681946686186e-06, |
| "logits/chosen": 0.8373166918754578, |
| "logits/rejected": 0.819493293762207, |
| "logps/chosen": -207.85977172851562, |
| "logps/rejected": -245.46836853027344, |
| "loss": 0.3845, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.96077561378479, |
| "rewards/margins": 1.5056307315826416, |
| "rewards/rejected": -3.4664063453674316, |
| "step": 5570 |
| }, |
| { |
| "epoch": 4.464, |
| "grad_norm": 17.59246253967285, |
| "learning_rate": 3.385156326966173e-06, |
| "logits/chosen": 0.9824124574661255, |
| "logits/rejected": 0.9056622385978699, |
| "logps/chosen": -212.71084594726562, |
| "logps/rejected": -223.310791015625, |
| "loss": 0.3712, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.4070976972579956, |
| "rewards/margins": 1.7284908294677734, |
| "rewards/rejected": -3.1355881690979004, |
| "step": 5580 |
| }, |
| { |
| "epoch": 4.4719999999999995, |
| "grad_norm": 10.986981391906738, |
| "learning_rate": 3.3786238046193125e-06, |
| "logits/chosen": 0.9522453546524048, |
| "logits/rejected": 0.8866806030273438, |
| "logps/chosen": -206.303955078125, |
| "logps/rejected": -239.8437042236328, |
| "loss": 0.4034, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.8517950773239136, |
| "rewards/margins": 1.48299241065979, |
| "rewards/rejected": -3.3347878456115723, |
| "step": 5590 |
| }, |
| { |
| "epoch": 4.48, |
| "grad_norm": 10.576725006103516, |
| "learning_rate": 3.372084430587528e-06, |
| "logits/chosen": 0.8120132684707642, |
| "logits/rejected": 0.8881155252456665, |
| "logps/chosen": -224.1233367919922, |
| "logps/rejected": -252.2123260498047, |
| "loss": 0.3466, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.8974634408950806, |
| "rewards/margins": 1.5087515115737915, |
| "rewards/rejected": -3.406214952468872, |
| "step": 5600 |
| }, |
| { |
| "epoch": 4.4879999999999995, |
| "grad_norm": 7.61484432220459, |
| "learning_rate": 3.365538255866169e-06, |
| "logits/chosen": 0.9328517913818359, |
| "logits/rejected": 0.832955539226532, |
| "logps/chosen": -224.76219177246094, |
| "logps/rejected": -263.3474426269531, |
| "loss": 0.3206, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.6335548162460327, |
| "rewards/margins": 2.0621376037597656, |
| "rewards/rejected": -3.695692539215088, |
| "step": 5610 |
| }, |
| { |
| "epoch": 4.496, |
| "grad_norm": 7.165886878967285, |
| "learning_rate": 3.3589853315036227e-06, |
| "logits/chosen": 0.9069485068321228, |
| "logits/rejected": 0.8654775619506836, |
| "logps/chosen": -231.8223876953125, |
| "logps/rejected": -247.6749267578125, |
| "loss": 0.3769, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.6844438314437866, |
| "rewards/margins": 1.5118721723556519, |
| "rewards/rejected": -3.1963162422180176, |
| "step": 5620 |
| }, |
| { |
| "epoch": 4.504, |
| "grad_norm": 12.06303596496582, |
| "learning_rate": 3.3524257086009105e-06, |
| "logits/chosen": 0.8854274153709412, |
| "logits/rejected": 0.9237664341926575, |
| "logps/chosen": -219.48692321777344, |
| "logps/rejected": -252.7774200439453, |
| "loss": 0.2831, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.6034126281738281, |
| "rewards/margins": 1.9384158849716187, |
| "rewards/rejected": -3.541828155517578, |
| "step": 5630 |
| }, |
| { |
| "epoch": 4.5120000000000005, |
| "grad_norm": 10.347089767456055, |
| "learning_rate": 3.3458594383112868e-06, |
| "logits/chosen": 0.9743103981018066, |
| "logits/rejected": 0.820307731628418, |
| "logps/chosen": -192.3817138671875, |
| "logps/rejected": -223.5673065185547, |
| "loss": 0.3259, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.055615782737732, |
| "rewards/margins": 1.9324188232421875, |
| "rewards/rejected": -2.98803448677063, |
| "step": 5640 |
| }, |
| { |
| "epoch": 4.52, |
| "grad_norm": 21.758241653442383, |
| "learning_rate": 3.339286571839848e-06, |
| "logits/chosen": 0.8464789390563965, |
| "logits/rejected": 0.8443576693534851, |
| "logps/chosen": -203.8209991455078, |
| "logps/rejected": -246.0260772705078, |
| "loss": 0.3542, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.057892084121704, |
| "rewards/margins": 1.6661615371704102, |
| "rewards/rejected": -2.7240536212921143, |
| "step": 5650 |
| }, |
| { |
| "epoch": 4.5280000000000005, |
| "grad_norm": 15.415779113769531, |
| "learning_rate": 3.332707160443128e-06, |
| "logits/chosen": 0.9635658264160156, |
| "logits/rejected": 0.8357729911804199, |
| "logps/chosen": -222.7996826171875, |
| "logps/rejected": -249.92446899414062, |
| "loss": 0.2987, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.3085647821426392, |
| "rewards/margins": 1.8709415197372437, |
| "rewards/rejected": -3.179506301879883, |
| "step": 5660 |
| }, |
| { |
| "epoch": 4.536, |
| "grad_norm": 22.9267578125, |
| "learning_rate": 3.3261212554286977e-06, |
| "logits/chosen": 0.8950627446174622, |
| "logits/rejected": 0.8704226613044739, |
| "logps/chosen": -221.8727569580078, |
| "logps/rejected": -254.3755645751953, |
| "loss": 0.4041, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -2.043503761291504, |
| "rewards/margins": 1.4210357666015625, |
| "rewards/rejected": -3.4645392894744873, |
| "step": 5670 |
| }, |
| { |
| "epoch": 4.5440000000000005, |
| "grad_norm": 31.6077880859375, |
| "learning_rate": 3.319528908154766e-06, |
| "logits/chosen": 0.8877006769180298, |
| "logits/rejected": 0.8138026595115662, |
| "logps/chosen": -252.089111328125, |
| "logps/rejected": -247.1710968017578, |
| "loss": 0.427, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.8856754302978516, |
| "rewards/margins": 1.4032905101776123, |
| "rewards/rejected": -3.288965940475464, |
| "step": 5680 |
| }, |
| { |
| "epoch": 4.552, |
| "grad_norm": 19.946836471557617, |
| "learning_rate": 3.3129301700297834e-06, |
| "logits/chosen": 1.060017704963684, |
| "logits/rejected": 1.0112991333007812, |
| "logps/chosen": -202.8851776123047, |
| "logps/rejected": -234.48912048339844, |
| "loss": 0.3442, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.6744213104248047, |
| "rewards/margins": 1.476469874382019, |
| "rewards/rejected": -3.1508913040161133, |
| "step": 5690 |
| }, |
| { |
| "epoch": 4.5600000000000005, |
| "grad_norm": 9.337148666381836, |
| "learning_rate": 3.306325092512034e-06, |
| "logits/chosen": 0.9761505126953125, |
| "logits/rejected": 0.9596638083457947, |
| "logps/chosen": -208.33743286132812, |
| "logps/rejected": -234.8901824951172, |
| "loss": 0.3815, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.9948211908340454, |
| "rewards/margins": 1.56894850730896, |
| "rewards/rejected": -3.563769578933716, |
| "step": 5700 |
| }, |
| { |
| "epoch": 4.568, |
| "grad_norm": 13.035239219665527, |
| "learning_rate": 3.2997137271092396e-06, |
| "logits/chosen": 0.963702380657196, |
| "logits/rejected": 0.872109055519104, |
| "logps/chosen": -234.18614196777344, |
| "logps/rejected": -254.7106475830078, |
| "loss": 0.3564, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -2.163374900817871, |
| "rewards/margins": 1.6218290328979492, |
| "rewards/rejected": -3.7852039337158203, |
| "step": 5710 |
| }, |
| { |
| "epoch": 4.576, |
| "grad_norm": 27.116188049316406, |
| "learning_rate": 3.293096125378156e-06, |
| "logits/chosen": 0.92717045545578, |
| "logits/rejected": 0.908390462398529, |
| "logps/chosen": -210.4567413330078, |
| "logps/rejected": -253.227294921875, |
| "loss": 0.3333, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.5652707815170288, |
| "rewards/margins": 1.8512943983078003, |
| "rewards/rejected": -3.41656494140625, |
| "step": 5720 |
| }, |
| { |
| "epoch": 4.584, |
| "grad_norm": 19.045427322387695, |
| "learning_rate": 3.2864723389241697e-06, |
| "logits/chosen": 0.9135765433311462, |
| "logits/rejected": 0.859271228313446, |
| "logps/chosen": -222.8280487060547, |
| "logps/rejected": -242.25540161132812, |
| "loss": 0.3374, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.3668630123138428, |
| "rewards/margins": 1.7538166046142578, |
| "rewards/rejected": -3.1206796169281006, |
| "step": 5730 |
| }, |
| { |
| "epoch": 4.592, |
| "grad_norm": 20.58856773376465, |
| "learning_rate": 3.279842419400899e-06, |
| "logits/chosen": 0.8949125409126282, |
| "logits/rejected": 0.9091690182685852, |
| "logps/chosen": -224.7333984375, |
| "logps/rejected": -265.9559631347656, |
| "loss": 0.4016, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.5353691577911377, |
| "rewards/margins": 1.6605640649795532, |
| "rewards/rejected": -3.1959331035614014, |
| "step": 5740 |
| }, |
| { |
| "epoch": 4.6, |
| "grad_norm": 15.529658317565918, |
| "learning_rate": 3.2732064185097885e-06, |
| "logits/chosen": 1.0308302640914917, |
| "logits/rejected": 0.9736013412475586, |
| "logps/chosen": -207.3667449951172, |
| "logps/rejected": -235.87290954589844, |
| "loss": 0.3166, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.106737732887268, |
| "rewards/margins": 1.6635363101959229, |
| "rewards/rejected": -2.7702741622924805, |
| "step": 5750 |
| }, |
| { |
| "epoch": 4.608, |
| "grad_norm": 13.790717124938965, |
| "learning_rate": 3.2665643879997054e-06, |
| "logits/chosen": 1.0450878143310547, |
| "logits/rejected": 1.0168274641036987, |
| "logps/chosen": -201.68594360351562, |
| "logps/rejected": -224.7779541015625, |
| "loss": 0.4748, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.329674243927002, |
| "rewards/margins": 1.2952015399932861, |
| "rewards/rejected": -2.624875783920288, |
| "step": 5760 |
| }, |
| { |
| "epoch": 4.616, |
| "grad_norm": 19.334339141845703, |
| "learning_rate": 3.259916379666538e-06, |
| "logits/chosen": 0.9460241198539734, |
| "logits/rejected": 0.902341365814209, |
| "logps/chosen": -206.3498077392578, |
| "logps/rejected": -234.02978515625, |
| "loss": 0.3343, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -0.8321787714958191, |
| "rewards/margins": 1.6867774724960327, |
| "rewards/rejected": -2.518956422805786, |
| "step": 5770 |
| }, |
| { |
| "epoch": 4.624, |
| "grad_norm": 11.740504264831543, |
| "learning_rate": 3.2532624453527904e-06, |
| "logits/chosen": 0.765145480632782, |
| "logits/rejected": 0.8052225112915039, |
| "logps/chosen": -215.4130401611328, |
| "logps/rejected": -230.92808532714844, |
| "loss": 0.3438, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -0.9054020047187805, |
| "rewards/margins": 1.7861785888671875, |
| "rewards/rejected": -2.6915805339813232, |
| "step": 5780 |
| }, |
| { |
| "epoch": 4.632, |
| "grad_norm": 23.415552139282227, |
| "learning_rate": 3.2466026369471804e-06, |
| "logits/chosen": 0.8790031671524048, |
| "logits/rejected": 0.8607792854309082, |
| "logps/chosen": -205.34423828125, |
| "logps/rejected": -228.1710968017578, |
| "loss": 0.4049, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.2571852207183838, |
| "rewards/margins": 1.416163682937622, |
| "rewards/rejected": -2.6733486652374268, |
| "step": 5790 |
| }, |
| { |
| "epoch": 4.64, |
| "grad_norm": 17.48906707763672, |
| "learning_rate": 3.2399370063842297e-06, |
| "logits/chosen": 0.865296483039856, |
| "logits/rejected": 0.8317980170249939, |
| "logps/chosen": -233.6914520263672, |
| "logps/rejected": -257.34161376953125, |
| "loss": 0.4092, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.9758745431900024, |
| "rewards/margins": 1.5804134607315063, |
| "rewards/rejected": -3.556288242340088, |
| "step": 5800 |
| }, |
| { |
| "epoch": 4.648, |
| "grad_norm": 15.079739570617676, |
| "learning_rate": 3.2332656056438663e-06, |
| "logits/chosen": 0.9580942392349243, |
| "logits/rejected": 0.8425354957580566, |
| "logps/chosen": -206.0723419189453, |
| "logps/rejected": -235.4949493408203, |
| "loss": 0.4092, |
| "rewards/accuracies": 0.800000011920929, |
| "rewards/chosen": -1.753932237625122, |
| "rewards/margins": 1.4880059957504272, |
| "rewards/rejected": -3.241938352584839, |
| "step": 5810 |
| }, |
| { |
| "epoch": 4.656, |
| "grad_norm": 27.08865737915039, |
| "learning_rate": 3.226588486751012e-06, |
| "logits/chosen": 1.0157322883605957, |
| "logits/rejected": 0.9247297644615173, |
| "logps/chosen": -198.93539428710938, |
| "logps/rejected": -235.2766876220703, |
| "loss": 0.37, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.8956657648086548, |
| "rewards/margins": 1.5729421377182007, |
| "rewards/rejected": -3.4686081409454346, |
| "step": 5820 |
| }, |
| { |
| "epoch": 4.664, |
| "grad_norm": 13.346570014953613, |
| "learning_rate": 3.2199057017751822e-06, |
| "logits/chosen": 1.0085042715072632, |
| "logits/rejected": 0.9250786900520325, |
| "logps/chosen": -216.81689453125, |
| "logps/rejected": -223.42991638183594, |
| "loss": 0.3252, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.240125060081482, |
| "rewards/margins": 1.7455374002456665, |
| "rewards/rejected": -2.9856624603271484, |
| "step": 5830 |
| }, |
| { |
| "epoch": 4.672, |
| "grad_norm": 13.748871803283691, |
| "learning_rate": 3.2132173028300756e-06, |
| "logits/chosen": 1.0077823400497437, |
| "logits/rejected": 0.9494333267211914, |
| "logps/chosen": -220.22952270507812, |
| "logps/rejected": -239.5417938232422, |
| "loss": 0.4106, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.271531343460083, |
| "rewards/margins": 1.3013349771499634, |
| "rewards/rejected": -2.572866201400757, |
| "step": 5840 |
| }, |
| { |
| "epoch": 4.68, |
| "grad_norm": 11.705482482910156, |
| "learning_rate": 3.2065233420731717e-06, |
| "logits/chosen": 0.9431636929512024, |
| "logits/rejected": 0.945948600769043, |
| "logps/chosen": -211.1238555908203, |
| "logps/rejected": -250.669189453125, |
| "loss": 0.3788, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.2726339101791382, |
| "rewards/margins": 1.5259406566619873, |
| "rewards/rejected": -2.798574447631836, |
| "step": 5850 |
| }, |
| { |
| "epoch": 4.688, |
| "grad_norm": 19.19478416442871, |
| "learning_rate": 3.1998238717053202e-06, |
| "logits/chosen": 1.0462414026260376, |
| "logits/rejected": 0.9334519505500793, |
| "logps/chosen": -204.07395935058594, |
| "logps/rejected": -229.44419860839844, |
| "loss": 0.3411, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.3902432918548584, |
| "rewards/margins": 1.6846592426300049, |
| "rewards/rejected": -3.0749025344848633, |
| "step": 5860 |
| }, |
| { |
| "epoch": 4.696, |
| "grad_norm": 16.826845169067383, |
| "learning_rate": 3.1931189439703383e-06, |
| "logits/chosen": 0.9771106839179993, |
| "logits/rejected": 0.8640087246894836, |
| "logps/chosen": -211.6616668701172, |
| "logps/rejected": -238.89736938476562, |
| "loss": 0.3763, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.612330436706543, |
| "rewards/margins": 1.4654382467269897, |
| "rewards/rejected": -3.0777688026428223, |
| "step": 5870 |
| }, |
| { |
| "epoch": 4.704, |
| "grad_norm": 18.69582176208496, |
| "learning_rate": 3.186408611154597e-06, |
| "logits/chosen": 0.8807933926582336, |
| "logits/rejected": 0.8436470031738281, |
| "logps/chosen": -199.52122497558594, |
| "logps/rejected": -235.47007751464844, |
| "loss": 0.3434, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.1381405591964722, |
| "rewards/margins": 1.7940791845321655, |
| "rewards/rejected": -2.9322197437286377, |
| "step": 5880 |
| }, |
| { |
| "epoch": 4.712, |
| "grad_norm": 13.82207202911377, |
| "learning_rate": 3.1796929255866223e-06, |
| "logits/chosen": 0.908224880695343, |
| "logits/rejected": 0.8031226992607117, |
| "logps/chosen": -226.13247680664062, |
| "logps/rejected": -268.6899108886719, |
| "loss": 0.2751, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.7007683515548706, |
| "rewards/margins": 2.1727986335754395, |
| "rewards/rejected": -3.8735668659210205, |
| "step": 5890 |
| }, |
| { |
| "epoch": 4.72, |
| "grad_norm": 17.216737747192383, |
| "learning_rate": 3.1729719396366765e-06, |
| "logits/chosen": 0.8856326341629028, |
| "logits/rejected": 0.8279878497123718, |
| "logps/chosen": -211.522216796875, |
| "logps/rejected": -239.03599548339844, |
| "loss": 0.3723, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.6209534406661987, |
| "rewards/margins": 1.6722420454025269, |
| "rewards/rejected": -3.2931954860687256, |
| "step": 5900 |
| }, |
| { |
| "epoch": 4.728, |
| "grad_norm": 11.951440811157227, |
| "learning_rate": 3.1662457057163603e-06, |
| "logits/chosen": 0.9104948043823242, |
| "logits/rejected": 0.80853271484375, |
| "logps/chosen": -216.1103057861328, |
| "logps/rejected": -228.3678741455078, |
| "loss": 0.3353, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.3812806606292725, |
| "rewards/margins": 1.7015780210494995, |
| "rewards/rejected": -3.0828585624694824, |
| "step": 5910 |
| }, |
| { |
| "epoch": 4.736, |
| "grad_norm": 18.6155948638916, |
| "learning_rate": 3.1595142762781966e-06, |
| "logits/chosen": 1.020864486694336, |
| "logits/rejected": 0.9599675536155701, |
| "logps/chosen": -213.35317993164062, |
| "logps/rejected": -227.1990203857422, |
| "loss": 0.3485, |
| "rewards/accuracies": 0.8125, |
| "rewards/chosen": -1.3319700956344604, |
| "rewards/margins": 1.7788978815078735, |
| "rewards/rejected": -3.110867738723755, |
| "step": 5920 |
| }, |
| { |
| "epoch": 4.744, |
| "grad_norm": 15.233264923095703, |
| "learning_rate": 3.1527777038152237e-06, |
| "logits/chosen": 0.99336177110672, |
| "logits/rejected": 0.9045869708061218, |
| "logps/chosen": -225.9204559326172, |
| "logps/rejected": -246.9542999267578, |
| "loss": 0.2477, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.912535548210144, |
| "rewards/margins": 2.0346450805664062, |
| "rewards/rejected": -3.947180986404419, |
| "step": 5930 |
| }, |
| { |
| "epoch": 4.752, |
| "grad_norm": 8.648079872131348, |
| "learning_rate": 3.1460360408605866e-06, |
| "logits/chosen": 0.778138279914856, |
| "logits/rejected": 0.8619017004966736, |
| "logps/chosen": -206.7869873046875, |
| "logps/rejected": -250.70361328125, |
| "loss": 0.2783, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.4929323196411133, |
| "rewards/margins": 1.780416488647461, |
| "rewards/rejected": -3.2733490467071533, |
| "step": 5940 |
| }, |
| { |
| "epoch": 4.76, |
| "grad_norm": 13.389790534973145, |
| "learning_rate": 3.1392893399871294e-06, |
| "logits/chosen": 0.8986393213272095, |
| "logits/rejected": 0.8389616012573242, |
| "logps/chosen": -233.67100524902344, |
| "logps/rejected": -222.7420196533203, |
| "loss": 0.427, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.2816507816314697, |
| "rewards/margins": 1.3874958753585815, |
| "rewards/rejected": -2.6691465377807617, |
| "step": 5950 |
| }, |
| { |
| "epoch": 4.768, |
| "grad_norm": 13.679242134094238, |
| "learning_rate": 3.132537653806978e-06, |
| "logits/chosen": 0.9529990553855896, |
| "logits/rejected": 0.9898123741149902, |
| "logps/chosen": -206.0071258544922, |
| "logps/rejected": -235.917236328125, |
| "loss": 0.489, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.56574547290802, |
| "rewards/margins": 1.2233296632766724, |
| "rewards/rejected": -2.7890751361846924, |
| "step": 5960 |
| }, |
| { |
| "epoch": 4.776, |
| "grad_norm": 25.058631896972656, |
| "learning_rate": 3.1257810349711388e-06, |
| "logits/chosen": 1.1274060010910034, |
| "logits/rejected": 0.9430057406425476, |
| "logps/chosen": -229.041259765625, |
| "logps/rejected": -240.43399047851562, |
| "loss": 0.3475, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.2477760314941406, |
| "rewards/margins": 1.6116218566894531, |
| "rewards/rejected": -2.8593976497650146, |
| "step": 5970 |
| }, |
| { |
| "epoch": 4.784, |
| "grad_norm": 26.500873565673828, |
| "learning_rate": 3.1190195361690833e-06, |
| "logits/chosen": 1.0664939880371094, |
| "logits/rejected": 0.9565810561180115, |
| "logps/chosen": -206.330078125, |
| "logps/rejected": -240.9081573486328, |
| "loss": 0.3886, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.549399971961975, |
| "rewards/margins": 1.5753132104873657, |
| "rewards/rejected": -3.1247129440307617, |
| "step": 5980 |
| }, |
| { |
| "epoch": 4.792, |
| "grad_norm": 8.312376976013184, |
| "learning_rate": 3.1122532101283366e-06, |
| "logits/chosen": 0.9260608553886414, |
| "logits/rejected": 0.8606094717979431, |
| "logps/chosen": -221.0975341796875, |
| "logps/rejected": -267.6355285644531, |
| "loss": 0.3165, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.3144680261611938, |
| "rewards/margins": 1.7448300123214722, |
| "rewards/rejected": -3.059298038482666, |
| "step": 5990 |
| }, |
| { |
| "epoch": 4.8, |
| "grad_norm": 9.584151268005371, |
| "learning_rate": 3.1054821096140675e-06, |
| "logits/chosen": 1.0778309106826782, |
| "logits/rejected": 0.8670400977134705, |
| "logps/chosen": -201.90811157226562, |
| "logps/rejected": -216.34109497070312, |
| "loss": 0.2893, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -0.7194448709487915, |
| "rewards/margins": 1.7727628946304321, |
| "rewards/rejected": -2.4922077655792236, |
| "step": 6000 |
| }, |
| { |
| "epoch": 4.808, |
| "grad_norm": 10.907683372497559, |
| "learning_rate": 3.0987062874286805e-06, |
| "logits/chosen": 0.895865261554718, |
| "logits/rejected": 0.9385225176811218, |
| "logps/chosen": -206.30361938476562, |
| "logps/rejected": -236.09593200683594, |
| "loss": 0.2717, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.7000702023506165, |
| "rewards/margins": 2.0624783039093018, |
| "rewards/rejected": -2.7625484466552734, |
| "step": 6010 |
| }, |
| { |
| "epoch": 4.816, |
| "grad_norm": 20.199901580810547, |
| "learning_rate": 3.0919257964113962e-06, |
| "logits/chosen": 0.9721388220787048, |
| "logits/rejected": 0.9084480404853821, |
| "logps/chosen": -205.85423278808594, |
| "logps/rejected": -226.93325805664062, |
| "loss": 0.4159, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.0990513563156128, |
| "rewards/margins": 1.5490108728408813, |
| "rewards/rejected": -2.648062229156494, |
| "step": 6020 |
| }, |
| { |
| "epoch": 4.824, |
| "grad_norm": 13.252838134765625, |
| "learning_rate": 3.085140689437846e-06, |
| "logits/chosen": 1.1252657175064087, |
| "logits/rejected": 1.0671288967132568, |
| "logps/chosen": -192.517333984375, |
| "logps/rejected": -223.1140594482422, |
| "loss": 0.285, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.5657386779785156, |
| "rewards/margins": 2.0705223083496094, |
| "rewards/rejected": -2.636261224746704, |
| "step": 6030 |
| }, |
| { |
| "epoch": 4.832, |
| "grad_norm": 16.6810359954834, |
| "learning_rate": 3.0783510194196577e-06, |
| "logits/chosen": 1.0133423805236816, |
| "logits/rejected": 0.9432527422904968, |
| "logps/chosen": -212.550537109375, |
| "logps/rejected": -248.9680633544922, |
| "loss": 0.3551, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.3131626844406128, |
| "rewards/margins": 1.684877634048462, |
| "rewards/rejected": -2.998040199279785, |
| "step": 6040 |
| }, |
| { |
| "epoch": 4.84, |
| "grad_norm": 12.429515838623047, |
| "learning_rate": 3.0715568393040405e-06, |
| "logits/chosen": 0.997822105884552, |
| "logits/rejected": 0.9970841407775879, |
| "logps/chosen": -186.12039184570312, |
| "logps/rejected": -226.0194854736328, |
| "loss": 0.2044, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.5419926047325134, |
| "rewards/margins": 2.209963321685791, |
| "rewards/rejected": -2.75195574760437, |
| "step": 6050 |
| }, |
| { |
| "epoch": 4.848, |
| "grad_norm": 10.648442268371582, |
| "learning_rate": 3.0647582020733773e-06, |
| "logits/chosen": 0.8079277873039246, |
| "logits/rejected": 0.8897015452384949, |
| "logps/chosen": -228.3728485107422, |
| "logps/rejected": -263.2770690917969, |
| "loss": 0.2747, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.4080426692962646, |
| "rewards/margins": 2.117032051086426, |
| "rewards/rejected": -3.5250747203826904, |
| "step": 6060 |
| }, |
| { |
| "epoch": 4.856, |
| "grad_norm": 34.41978073120117, |
| "learning_rate": 3.0579551607448064e-06, |
| "logits/chosen": 0.9691902995109558, |
| "logits/rejected": 0.8717222213745117, |
| "logps/chosen": -229.96450805664062, |
| "logps/rejected": -267.5755920410156, |
| "loss": 0.3608, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.6460784673690796, |
| "rewards/margins": 1.8111892938613892, |
| "rewards/rejected": -3.4572677612304688, |
| "step": 6070 |
| }, |
| { |
| "epoch": 4.864, |
| "grad_norm": 10.431986808776855, |
| "learning_rate": 3.051147768369811e-06, |
| "logits/chosen": 0.8802177309989929, |
| "logits/rejected": 0.8142924308776855, |
| "logps/chosen": -208.6748046875, |
| "logps/rejected": -240.06729125976562, |
| "loss": 0.2808, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.5522571802139282, |
| "rewards/margins": 2.0125110149383545, |
| "rewards/rejected": -3.5647683143615723, |
| "step": 6080 |
| }, |
| { |
| "epoch": 4.872, |
| "grad_norm": 15.201266288757324, |
| "learning_rate": 3.0443360780338034e-06, |
| "logits/chosen": 0.8416620492935181, |
| "logits/rejected": 0.8180571794509888, |
| "logps/chosen": -210.61656188964844, |
| "logps/rejected": -238.749267578125, |
| "loss": 0.4195, |
| "rewards/accuracies": 0.7750000357627869, |
| "rewards/chosen": -1.1236116886138916, |
| "rewards/margins": 1.5696989297866821, |
| "rewards/rejected": -2.693310499191284, |
| "step": 6090 |
| }, |
| { |
| "epoch": 4.88, |
| "grad_norm": 13.592103004455566, |
| "learning_rate": 3.0375201428557135e-06, |
| "logits/chosen": 0.9775605201721191, |
| "logits/rejected": 0.9477388262748718, |
| "logps/chosen": -211.87046813964844, |
| "logps/rejected": -223.9567413330078, |
| "loss": 0.3774, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.3752092123031616, |
| "rewards/margins": 1.6053650379180908, |
| "rewards/rejected": -2.980574369430542, |
| "step": 6100 |
| }, |
| { |
| "epoch": 4.888, |
| "grad_norm": 18.705928802490234, |
| "learning_rate": 3.0307000159875733e-06, |
| "logits/chosen": 1.0056161880493164, |
| "logits/rejected": 1.0067530870437622, |
| "logps/chosen": -217.2322235107422, |
| "logps/rejected": -247.19590759277344, |
| "loss": 0.3999, |
| "rewards/accuracies": 0.824999988079071, |
| "rewards/chosen": -1.5061392784118652, |
| "rewards/margins": 1.4345601797103882, |
| "rewards/rejected": -2.940699338912964, |
| "step": 6110 |
| }, |
| { |
| "epoch": 4.896, |
| "grad_norm": 25.736547470092773, |
| "learning_rate": 3.0238757506141013e-06, |
| "logits/chosen": 0.9335775375366211, |
| "logits/rejected": 0.8118970990180969, |
| "logps/chosen": -203.6862030029297, |
| "logps/rejected": -248.3121337890625, |
| "loss": 0.3255, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -0.9543191194534302, |
| "rewards/margins": 2.1874382495880127, |
| "rewards/rejected": -3.1417574882507324, |
| "step": 6120 |
| }, |
| { |
| "epoch": 4.904, |
| "grad_norm": 36.35403823852539, |
| "learning_rate": 3.0170473999522914e-06, |
| "logits/chosen": 0.7437816262245178, |
| "logits/rejected": 0.7528760433197021, |
| "logps/chosen": -203.16519165039062, |
| "logps/rejected": -233.81167602539062, |
| "loss": 0.3915, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.3550660610198975, |
| "rewards/margins": 1.738660454750061, |
| "rewards/rejected": -3.093726396560669, |
| "step": 6130 |
| }, |
| { |
| "epoch": 4.912, |
| "grad_norm": 21.394716262817383, |
| "learning_rate": 3.010215017250993e-06, |
| "logits/chosen": 0.8748787045478821, |
| "logits/rejected": 0.8038078546524048, |
| "logps/chosen": -190.148681640625, |
| "logps/rejected": -229.1069793701172, |
| "loss": 0.3333, |
| "rewards/accuracies": 0.8375000357627869, |
| "rewards/chosen": -1.0847818851470947, |
| "rewards/margins": 1.952123999595642, |
| "rewards/rejected": -3.0369060039520264, |
| "step": 6140 |
| }, |
| { |
| "epoch": 4.92, |
| "grad_norm": 10.547325134277344, |
| "learning_rate": 3.0033786557904982e-06, |
| "logits/chosen": 0.9673800468444824, |
| "logits/rejected": 0.8164333701133728, |
| "logps/chosen": -215.92520141601562, |
| "logps/rejected": -247.7021942138672, |
| "loss": 0.385, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.3397104740142822, |
| "rewards/margins": 1.9476795196533203, |
| "rewards/rejected": -3.2873897552490234, |
| "step": 6150 |
| }, |
| { |
| "epoch": 4.928, |
| "grad_norm": 8.210875511169434, |
| "learning_rate": 2.996538368882127e-06, |
| "logits/chosen": 1.024520754814148, |
| "logits/rejected": 0.8471878170967102, |
| "logps/chosen": -226.50840759277344, |
| "logps/rejected": -246.0098114013672, |
| "loss": 0.2765, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -0.8774771690368652, |
| "rewards/margins": 1.9889500141143799, |
| "rewards/rejected": -2.866427183151245, |
| "step": 6160 |
| }, |
| { |
| "epoch": 4.936, |
| "grad_norm": 11.18367862701416, |
| "learning_rate": 2.9896942098678124e-06, |
| "logits/chosen": 0.9735992550849915, |
| "logits/rejected": 0.981691300868988, |
| "logps/chosen": -192.818359375, |
| "logps/rejected": -243.27273559570312, |
| "loss": 0.3407, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -0.7151866555213928, |
| "rewards/margins": 1.898476481437683, |
| "rewards/rejected": -2.6136631965637207, |
| "step": 6170 |
| }, |
| { |
| "epoch": 4.944, |
| "grad_norm": 22.406173706054688, |
| "learning_rate": 2.982846232119679e-06, |
| "logits/chosen": 1.0046285390853882, |
| "logits/rejected": 0.9979419708251953, |
| "logps/chosen": -216.0959014892578, |
| "logps/rejected": -244.8262176513672, |
| "loss": 0.324, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -1.145372986793518, |
| "rewards/margins": 1.6784679889678955, |
| "rewards/rejected": -2.823841094970703, |
| "step": 6180 |
| }, |
| { |
| "epoch": 4.952, |
| "grad_norm": 28.244096755981445, |
| "learning_rate": 2.975994489039634e-06, |
| "logits/chosen": 0.9947078824043274, |
| "logits/rejected": 0.91530841588974, |
| "logps/chosen": -208.3612823486328, |
| "logps/rejected": -234.4157257080078, |
| "loss": 0.3708, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.6478198766708374, |
| "rewards/margins": 1.7686901092529297, |
| "rewards/rejected": -3.4165101051330566, |
| "step": 6190 |
| }, |
| { |
| "epoch": 4.96, |
| "grad_norm": 18.052692413330078, |
| "learning_rate": 2.9691390340589467e-06, |
| "logits/chosen": 1.0676629543304443, |
| "logits/rejected": 0.9271091818809509, |
| "logps/chosen": -199.14598083496094, |
| "logps/rejected": -222.794921875, |
| "loss": 0.2456, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -0.8989327549934387, |
| "rewards/margins": 2.075803518295288, |
| "rewards/rejected": -2.974736213684082, |
| "step": 6200 |
| }, |
| { |
| "epoch": 4.968, |
| "grad_norm": 10.866455078125, |
| "learning_rate": 2.9622799206378306e-06, |
| "logits/chosen": 1.0321582555770874, |
| "logits/rejected": 0.9904826283454895, |
| "logps/chosen": -201.66383361816406, |
| "logps/rejected": -241.1051788330078, |
| "loss": 0.3023, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.0962049961090088, |
| "rewards/margins": 1.7505791187286377, |
| "rewards/rejected": -2.8467838764190674, |
| "step": 6210 |
| }, |
| { |
| "epoch": 4.976, |
| "grad_norm": 14.606306076049805, |
| "learning_rate": 2.955417202265032e-06, |
| "logits/chosen": 1.0466164350509644, |
| "logits/rejected": 0.9499098062515259, |
| "logps/chosen": -206.0836639404297, |
| "logps/rejected": -229.9392852783203, |
| "loss": 0.3209, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -1.0380445718765259, |
| "rewards/margins": 1.6832345724105835, |
| "rewards/rejected": -2.7212791442871094, |
| "step": 6220 |
| }, |
| { |
| "epoch": 4.984, |
| "grad_norm": 27.189558029174805, |
| "learning_rate": 2.948550932457407e-06, |
| "logits/chosen": 0.9810224771499634, |
| "logits/rejected": 0.8776025176048279, |
| "logps/chosen": -220.196044921875, |
| "logps/rejected": -241.2982940673828, |
| "loss": 0.344, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.4501808881759644, |
| "rewards/margins": 1.7209481000900269, |
| "rewards/rejected": -3.171128988265991, |
| "step": 6230 |
| }, |
| { |
| "epoch": 4.992, |
| "grad_norm": 13.859283447265625, |
| "learning_rate": 2.9416811647595052e-06, |
| "logits/chosen": 1.027130365371704, |
| "logits/rejected": 0.9805015921592712, |
| "logps/chosen": -207.3785858154297, |
| "logps/rejected": -245.68896484375, |
| "loss": 0.2176, |
| "rewards/accuracies": 0.9625000357627869, |
| "rewards/chosen": -1.786203384399414, |
| "rewards/margins": 2.202639579772949, |
| "rewards/rejected": -3.9888432025909424, |
| "step": 6240 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 27.184709548950195, |
| "learning_rate": 2.9348079527431565e-06, |
| "logits/chosen": 0.9774474501609802, |
| "logits/rejected": 0.830977737903595, |
| "logps/chosen": -207.9765625, |
| "logps/rejected": -246.947021484375, |
| "loss": 0.2871, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.9358631372451782, |
| "rewards/margins": 2.2705037593841553, |
| "rewards/rejected": -4.206367492675781, |
| "step": 6250 |
| }, |
| { |
| "epoch": 5.008, |
| "grad_norm": 9.864638328552246, |
| "learning_rate": 2.927931350007048e-06, |
| "logits/chosen": 0.9456281661987305, |
| "logits/rejected": 0.8694238662719727, |
| "logps/chosen": -221.7121124267578, |
| "logps/rejected": -256.9190979003906, |
| "loss": 0.2611, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -2.0195796489715576, |
| "rewards/margins": 2.1393039226531982, |
| "rewards/rejected": -4.158883571624756, |
| "step": 6260 |
| }, |
| { |
| "epoch": 5.016, |
| "grad_norm": 8.061872482299805, |
| "learning_rate": 2.9210514101763116e-06, |
| "logits/chosen": 0.9081168174743652, |
| "logits/rejected": 0.8007159233093262, |
| "logps/chosen": -208.5377197265625, |
| "logps/rejected": -260.0176696777344, |
| "loss": 0.2097, |
| "rewards/accuracies": 0.9750000238418579, |
| "rewards/chosen": -2.110689878463745, |
| "rewards/margins": 2.2806320190429688, |
| "rewards/rejected": -4.391321659088135, |
| "step": 6270 |
| }, |
| { |
| "epoch": 5.024, |
| "grad_norm": 13.147801399230957, |
| "learning_rate": 2.9141681869020973e-06, |
| "logits/chosen": 0.8848690390586853, |
| "logits/rejected": 0.9038190245628357, |
| "logps/chosen": -234.75083923339844, |
| "logps/rejected": -270.18408203125, |
| "loss": 0.2374, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -2.0579333305358887, |
| "rewards/margins": 2.2107951641082764, |
| "rewards/rejected": -4.268728733062744, |
| "step": 6280 |
| }, |
| { |
| "epoch": 5.032, |
| "grad_norm": 22.0900936126709, |
| "learning_rate": 2.907281733861164e-06, |
| "logits/chosen": 0.8925933241844177, |
| "logits/rejected": 0.8973062634468079, |
| "logps/chosen": -206.32763671875, |
| "logps/rejected": -228.326904296875, |
| "loss": 0.3404, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -1.4018090963363647, |
| "rewards/margins": 1.735507607460022, |
| "rewards/rejected": -3.1373164653778076, |
| "step": 6290 |
| }, |
| { |
| "epoch": 5.04, |
| "grad_norm": 8.617568016052246, |
| "learning_rate": 2.900392104755455e-06, |
| "logits/chosen": 0.8251405954360962, |
| "logits/rejected": 0.7466452121734619, |
| "logps/chosen": -231.0389862060547, |
| "logps/rejected": -279.979248046875, |
| "loss": 0.261, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -2.3339290618896484, |
| "rewards/margins": 2.2454123497009277, |
| "rewards/rejected": -4.579341411590576, |
| "step": 6300 |
| }, |
| { |
| "epoch": 5.048, |
| "grad_norm": 10.349010467529297, |
| "learning_rate": 2.8934993533116827e-06, |
| "logits/chosen": 0.8101547360420227, |
| "logits/rejected": 0.8393245935440063, |
| "logps/chosen": -218.5235137939453, |
| "logps/rejected": -276.7334899902344, |
| "loss": 0.2239, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -1.8703062534332275, |
| "rewards/margins": 2.4195773601531982, |
| "rewards/rejected": -4.289883613586426, |
| "step": 6310 |
| }, |
| { |
| "epoch": 5.056, |
| "grad_norm": 6.812648296356201, |
| "learning_rate": 2.8866035332809083e-06, |
| "logits/chosen": 0.9793133735656738, |
| "logits/rejected": 0.9104774594306946, |
| "logps/chosen": -209.8477783203125, |
| "logps/rejected": -235.04031372070312, |
| "loss": 0.2581, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.3933300971984863, |
| "rewards/margins": 2.483262777328491, |
| "rewards/rejected": -3.8765931129455566, |
| "step": 6320 |
| }, |
| { |
| "epoch": 5.064, |
| "grad_norm": 13.762728691101074, |
| "learning_rate": 2.879704698438121e-06, |
| "logits/chosen": 0.9087037444114685, |
| "logits/rejected": 0.890673816204071, |
| "logps/chosen": -208.9591064453125, |
| "logps/rejected": -263.70452880859375, |
| "loss": 0.2147, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.8164809942245483, |
| "rewards/margins": 2.4334583282470703, |
| "rewards/rejected": -4.24993896484375, |
| "step": 6330 |
| }, |
| { |
| "epoch": 5.072, |
| "grad_norm": 27.724454879760742, |
| "learning_rate": 2.8728029025818206e-06, |
| "logits/chosen": 0.9317230582237244, |
| "logits/rejected": 0.7435727119445801, |
| "logps/chosen": -212.3452911376953, |
| "logps/rejected": -262.7144470214844, |
| "loss": 0.2844, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.7303894758224487, |
| "rewards/margins": 2.263822317123413, |
| "rewards/rejected": -3.9942119121551514, |
| "step": 6340 |
| }, |
| { |
| "epoch": 5.08, |
| "grad_norm": 23.111251831054688, |
| "learning_rate": 2.865898199533597e-06, |
| "logits/chosen": 0.9201246500015259, |
| "logits/rejected": 0.8896446228027344, |
| "logps/chosen": -209.3739013671875, |
| "logps/rejected": -240.9625244140625, |
| "loss": 0.2962, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.9742488861083984, |
| "rewards/margins": 1.8627338409423828, |
| "rewards/rejected": -3.8369832038879395, |
| "step": 6350 |
| }, |
| { |
| "epoch": 5.088, |
| "grad_norm": 24.608367919921875, |
| "learning_rate": 2.8589906431377133e-06, |
| "logits/chosen": 1.0965173244476318, |
| "logits/rejected": 0.9428312182426453, |
| "logps/chosen": -223.93748474121094, |
| "logps/rejected": -240.9108428955078, |
| "loss": 0.2826, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.867902398109436, |
| "rewards/margins": 2.019596815109253, |
| "rewards/rejected": -3.8874988555908203, |
| "step": 6360 |
| }, |
| { |
| "epoch": 5.096, |
| "grad_norm": 14.740994453430176, |
| "learning_rate": 2.8520802872606803e-06, |
| "logits/chosen": 0.9774947166442871, |
| "logits/rejected": 0.9799107909202576, |
| "logps/chosen": -204.7219696044922, |
| "logps/rejected": -231.45262145996094, |
| "loss": 0.3067, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.5889536142349243, |
| "rewards/margins": 2.0691580772399902, |
| "rewards/rejected": -3.658111572265625, |
| "step": 6370 |
| }, |
| { |
| "epoch": 5.104, |
| "grad_norm": 9.468008995056152, |
| "learning_rate": 2.8451671857908414e-06, |
| "logits/chosen": 0.9921843409538269, |
| "logits/rejected": 0.9002591371536255, |
| "logps/chosen": -198.7250213623047, |
| "logps/rejected": -225.64834594726562, |
| "loss": 0.2472, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.3202179670333862, |
| "rewards/margins": 2.2721245288848877, |
| "rewards/rejected": -3.5923423767089844, |
| "step": 6380 |
| }, |
| { |
| "epoch": 5.112, |
| "grad_norm": 14.54666805267334, |
| "learning_rate": 2.8382513926379508e-06, |
| "logits/chosen": 0.9203723073005676, |
| "logits/rejected": 0.8950970768928528, |
| "logps/chosen": -196.84657287597656, |
| "logps/rejected": -230.43809509277344, |
| "loss": 0.2501, |
| "rewards/accuracies": 0.9625000357627869, |
| "rewards/chosen": -1.5130068063735962, |
| "rewards/margins": 2.144577741622925, |
| "rewards/rejected": -3.6575844287872314, |
| "step": 6390 |
| }, |
| { |
| "epoch": 5.12, |
| "grad_norm": 27.838401794433594, |
| "learning_rate": 2.831332961732754e-06, |
| "logits/chosen": 1.0219676494598389, |
| "logits/rejected": 0.9032443165779114, |
| "logps/chosen": -212.4551544189453, |
| "logps/rejected": -241.2015838623047, |
| "loss": 0.2542, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.7502228021621704, |
| "rewards/margins": 2.144639015197754, |
| "rewards/rejected": -3.894861936569214, |
| "step": 6400 |
| }, |
| { |
| "epoch": 5.128, |
| "grad_norm": 23.901247024536133, |
| "learning_rate": 2.8244119470265628e-06, |
| "logits/chosen": 0.9221467971801758, |
| "logits/rejected": 0.844575822353363, |
| "logps/chosen": -216.648681640625, |
| "logps/rejected": -226.3651123046875, |
| "loss": 0.3867, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.8910839557647705, |
| "rewards/margins": 2.042074203491211, |
| "rewards/rejected": -3.9331586360931396, |
| "step": 6410 |
| }, |
| { |
| "epoch": 5.136, |
| "grad_norm": 28.365636825561523, |
| "learning_rate": 2.817488402490841e-06, |
| "logits/chosen": 0.9146007895469666, |
| "logits/rejected": 0.9036192297935486, |
| "logps/chosen": -217.9712371826172, |
| "logps/rejected": -246.8628387451172, |
| "loss": 0.2528, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -2.148552417755127, |
| "rewards/margins": 2.226809024810791, |
| "rewards/rejected": -4.375361919403076, |
| "step": 6420 |
| }, |
| { |
| "epoch": 5.144, |
| "grad_norm": 11.069653511047363, |
| "learning_rate": 2.8105623821167804e-06, |
| "logits/chosen": 0.9730092883110046, |
| "logits/rejected": 0.8130871057510376, |
| "logps/chosen": -223.188720703125, |
| "logps/rejected": -259.5501403808594, |
| "loss": 0.2751, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -2.180257558822632, |
| "rewards/margins": 2.317899465560913, |
| "rewards/rejected": -4.498157024383545, |
| "step": 6430 |
| }, |
| { |
| "epoch": 5.152, |
| "grad_norm": 4.842264652252197, |
| "learning_rate": 2.8036339399148783e-06, |
| "logits/chosen": 0.8939194083213806, |
| "logits/rejected": 0.8858678936958313, |
| "logps/chosen": -210.72207641601562, |
| "logps/rejected": -263.359375, |
| "loss": 0.2706, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.6614614725112915, |
| "rewards/margins": 2.150581121444702, |
| "rewards/rejected": -3.812042713165283, |
| "step": 6440 |
| }, |
| { |
| "epoch": 5.16, |
| "grad_norm": 15.502528190612793, |
| "learning_rate": 2.796703129914519e-06, |
| "logits/chosen": 0.9737855792045593, |
| "logits/rejected": 0.8153099417686462, |
| "logps/chosen": -200.01934814453125, |
| "logps/rejected": -228.4278106689453, |
| "loss": 0.2503, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.3171756267547607, |
| "rewards/margins": 2.079169988632202, |
| "rewards/rejected": -3.396345615386963, |
| "step": 6450 |
| }, |
| { |
| "epoch": 5.168, |
| "grad_norm": 13.45697021484375, |
| "learning_rate": 2.7897700061635517e-06, |
| "logits/chosen": 0.9658388495445251, |
| "logits/rejected": 0.8410293459892273, |
| "logps/chosen": -197.5312957763672, |
| "logps/rejected": -234.99929809570312, |
| "loss": 0.2299, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -1.5396157503128052, |
| "rewards/margins": 2.2552897930145264, |
| "rewards/rejected": -3.794905424118042, |
| "step": 6460 |
| }, |
| { |
| "epoch": 5.176, |
| "grad_norm": 21.632190704345703, |
| "learning_rate": 2.7828346227278676e-06, |
| "logits/chosen": 0.8654441833496094, |
| "logits/rejected": 0.8274869322776794, |
| "logps/chosen": -220.19541931152344, |
| "logps/rejected": -281.8578796386719, |
| "loss": 0.3081, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -2.2533280849456787, |
| "rewards/margins": 2.436194658279419, |
| "rewards/rejected": -4.689522743225098, |
| "step": 6470 |
| }, |
| { |
| "epoch": 5.184, |
| "grad_norm": 23.60213851928711, |
| "learning_rate": 2.7758970336909795e-06, |
| "logits/chosen": 0.9322023391723633, |
| "logits/rejected": 0.8540251851081848, |
| "logps/chosen": -226.8634796142578, |
| "logps/rejected": -249.7890625, |
| "loss": 0.3004, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.6639741659164429, |
| "rewards/margins": 2.1177260875701904, |
| "rewards/rejected": -3.781700611114502, |
| "step": 6480 |
| }, |
| { |
| "epoch": 5.192, |
| "grad_norm": 8.637389183044434, |
| "learning_rate": 2.768957293153602e-06, |
| "logits/chosen": 1.001732587814331, |
| "logits/rejected": 1.0790340900421143, |
| "logps/chosen": -212.6641845703125, |
| "logps/rejected": -248.3900604248047, |
| "loss": 0.2027, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.9972444772720337, |
| "rewards/margins": 2.5673165321350098, |
| "rewards/rejected": -4.564560890197754, |
| "step": 6490 |
| }, |
| { |
| "epoch": 5.2, |
| "grad_norm": 17.306949615478516, |
| "learning_rate": 2.7620154552332236e-06, |
| "logits/chosen": 1.0369716882705688, |
| "logits/rejected": 0.8845782279968262, |
| "logps/chosen": -210.2711944580078, |
| "logps/rejected": -248.135009765625, |
| "loss": 0.2221, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.953507423400879, |
| "rewards/margins": 2.378849506378174, |
| "rewards/rejected": -4.332356929779053, |
| "step": 6500 |
| }, |
| { |
| "epoch": 5.208, |
| "grad_norm": 20.987321853637695, |
| "learning_rate": 2.755071574063692e-06, |
| "logits/chosen": 0.9889179468154907, |
| "logits/rejected": 0.9481157660484314, |
| "logps/chosen": -209.94509887695312, |
| "logps/rejected": -241.16763305664062, |
| "loss": 0.192, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -1.9956191778182983, |
| "rewards/margins": 2.235707998275757, |
| "rewards/rejected": -4.231327056884766, |
| "step": 6510 |
| }, |
| { |
| "epoch": 5.216, |
| "grad_norm": 22.150367736816406, |
| "learning_rate": 2.7481257037947873e-06, |
| "logits/chosen": 1.0156019926071167, |
| "logits/rejected": 0.8769356608390808, |
| "logps/chosen": -219.45596313476562, |
| "logps/rejected": -244.5421905517578, |
| "loss": 0.2559, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -1.335376262664795, |
| "rewards/margins": 2.1717984676361084, |
| "rewards/rejected": -3.5071747303009033, |
| "step": 6520 |
| }, |
| { |
| "epoch": 5.224, |
| "grad_norm": 24.29526138305664, |
| "learning_rate": 2.741177898591801e-06, |
| "logits/chosen": 1.0322328805923462, |
| "logits/rejected": 0.9097422957420349, |
| "logps/chosen": -215.4120635986328, |
| "logps/rejected": -245.8870849609375, |
| "loss": 0.2705, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -2.2170920372009277, |
| "rewards/margins": 2.0834217071533203, |
| "rewards/rejected": -4.300513744354248, |
| "step": 6530 |
| }, |
| { |
| "epoch": 5.232, |
| "grad_norm": 11.549744606018066, |
| "learning_rate": 2.7342282126351145e-06, |
| "logits/chosen": 1.0487924814224243, |
| "logits/rejected": 0.9894416928291321, |
| "logps/chosen": -229.5109405517578, |
| "logps/rejected": -263.1656494140625, |
| "loss": 0.2757, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -2.495918035507202, |
| "rewards/margins": 2.1933634281158447, |
| "rewards/rejected": -4.689281463623047, |
| "step": 6540 |
| }, |
| { |
| "epoch": 5.24, |
| "grad_norm": 11.867500305175781, |
| "learning_rate": 2.727276700119774e-06, |
| "logits/chosen": 1.0762747526168823, |
| "logits/rejected": 0.9830428957939148, |
| "logps/chosen": -216.1927490234375, |
| "logps/rejected": -268.2547302246094, |
| "loss": 0.3535, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -2.7991743087768555, |
| "rewards/margins": 2.384692430496216, |
| "rewards/rejected": -5.183866500854492, |
| "step": 6550 |
| }, |
| { |
| "epoch": 5.248, |
| "grad_norm": 12.038691520690918, |
| "learning_rate": 2.720323415255071e-06, |
| "logits/chosen": 1.0917648077011108, |
| "logits/rejected": 1.0066547393798828, |
| "logps/chosen": -216.46788024902344, |
| "logps/rejected": -242.0109405517578, |
| "loss": 0.2506, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -2.0974338054656982, |
| "rewards/margins": 2.069669723510742, |
| "rewards/rejected": -4.1671037673950195, |
| "step": 6560 |
| }, |
| { |
| "epoch": 5.256, |
| "grad_norm": 16.80321502685547, |
| "learning_rate": 2.713368412264118e-06, |
| "logits/chosen": 0.9983375668525696, |
| "logits/rejected": 0.8874215483665466, |
| "logps/chosen": -205.94091796875, |
| "logps/rejected": -261.5921936035156, |
| "loss": 0.2188, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -1.691676139831543, |
| "rewards/margins": 2.6781671047210693, |
| "rewards/rejected": -4.369843482971191, |
| "step": 6570 |
| }, |
| { |
| "epoch": 5.264, |
| "grad_norm": 45.896324157714844, |
| "learning_rate": 2.7064117453834245e-06, |
| "logits/chosen": 1.0483051538467407, |
| "logits/rejected": 0.9006147384643555, |
| "logps/chosen": -196.8784637451172, |
| "logps/rejected": -240.83731079101562, |
| "loss": 0.2838, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.3554176092147827, |
| "rewards/margins": 2.4722535610198975, |
| "rewards/rejected": -3.8276710510253906, |
| "step": 6580 |
| }, |
| { |
| "epoch": 5.272, |
| "grad_norm": 52.870967864990234, |
| "learning_rate": 2.699453468862477e-06, |
| "logits/chosen": 0.8586319088935852, |
| "logits/rejected": 0.8957145810127258, |
| "logps/chosen": -202.1999053955078, |
| "logps/rejected": -247.98497009277344, |
| "loss": 0.3508, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.5604242086410522, |
| "rewards/margins": 2.2796757221221924, |
| "rewards/rejected": -3.840100049972534, |
| "step": 6590 |
| }, |
| { |
| "epoch": 5.28, |
| "grad_norm": 15.097871780395508, |
| "learning_rate": 2.6924936369633126e-06, |
| "logits/chosen": 0.9903114438056946, |
| "logits/rejected": 0.8972232937812805, |
| "logps/chosen": -220.2637176513672, |
| "logps/rejected": -246.101318359375, |
| "loss": 0.2577, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.400626540184021, |
| "rewards/margins": 2.2727909088134766, |
| "rewards/rejected": -3.673417806625366, |
| "step": 6600 |
| }, |
| { |
| "epoch": 5.288, |
| "grad_norm": 30.0377140045166, |
| "learning_rate": 2.6855323039601e-06, |
| "logits/chosen": 0.9595162272453308, |
| "logits/rejected": 0.9324502944946289, |
| "logps/chosen": -217.57749938964844, |
| "logps/rejected": -249.253662109375, |
| "loss": 0.2962, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -2.08235502243042, |
| "rewards/margins": 2.116223096847534, |
| "rewards/rejected": -4.198577880859375, |
| "step": 6610 |
| }, |
| { |
| "epoch": 5.296, |
| "grad_norm": 35.225372314453125, |
| "learning_rate": 2.678569524138711e-06, |
| "logits/chosen": 1.0795904397964478, |
| "logits/rejected": 0.9872816205024719, |
| "logps/chosen": -217.6615753173828, |
| "logps/rejected": -247.40611267089844, |
| "loss": 0.2785, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.6414073705673218, |
| "rewards/margins": 2.288874387741089, |
| "rewards/rejected": -3.9302818775177, |
| "step": 6620 |
| }, |
| { |
| "epoch": 5.304, |
| "grad_norm": 19.5015926361084, |
| "learning_rate": 2.671605351796302e-06, |
| "logits/chosen": 1.0191433429718018, |
| "logits/rejected": 1.0426959991455078, |
| "logps/chosen": -215.02542114257812, |
| "logps/rejected": -250.69419860839844, |
| "loss": 0.2692, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -1.4523265361785889, |
| "rewards/margins": 2.202432632446289, |
| "rewards/rejected": -3.654759168624878, |
| "step": 6630 |
| }, |
| { |
| "epoch": 5.312, |
| "grad_norm": 13.902155876159668, |
| "learning_rate": 2.664639841240888e-06, |
| "logits/chosen": 1.0611361265182495, |
| "logits/rejected": 1.0298746824264526, |
| "logps/chosen": -210.43655395507812, |
| "logps/rejected": -242.15591430664062, |
| "loss": 0.2278, |
| "rewards/accuracies": 0.9625000357627869, |
| "rewards/chosen": -1.0366039276123047, |
| "rewards/margins": 2.3139262199401855, |
| "rewards/rejected": -3.3505303859710693, |
| "step": 6640 |
| }, |
| { |
| "epoch": 5.32, |
| "grad_norm": 19.404386520385742, |
| "learning_rate": 2.6576730467909202e-06, |
| "logits/chosen": 0.9389854669570923, |
| "logits/rejected": 0.8830668330192566, |
| "logps/chosen": -211.813720703125, |
| "logps/rejected": -255.224365234375, |
| "loss": 0.2227, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.772042155265808, |
| "rewards/margins": 2.3123362064361572, |
| "rewards/rejected": -4.084378719329834, |
| "step": 6650 |
| }, |
| { |
| "epoch": 5.328, |
| "grad_norm": 19.177539825439453, |
| "learning_rate": 2.6507050227748595e-06, |
| "logits/chosen": 0.9689971804618835, |
| "logits/rejected": 0.8664621710777283, |
| "logps/chosen": -218.7672882080078, |
| "logps/rejected": -242.08872985839844, |
| "loss": 0.2906, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.9733574390411377, |
| "rewards/margins": 2.0987534523010254, |
| "rewards/rejected": -4.072110652923584, |
| "step": 6660 |
| }, |
| { |
| "epoch": 5.336, |
| "grad_norm": 19.155794143676758, |
| "learning_rate": 2.6437358235307574e-06, |
| "logits/chosen": 0.9650457501411438, |
| "logits/rejected": 0.9216805696487427, |
| "logps/chosen": -223.25161743164062, |
| "logps/rejected": -267.2557373046875, |
| "loss": 0.2284, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -2.293266773223877, |
| "rewards/margins": 2.462886095046997, |
| "rewards/rejected": -4.756152629852295, |
| "step": 6670 |
| }, |
| { |
| "epoch": 5.344, |
| "grad_norm": 39.01654815673828, |
| "learning_rate": 2.6367655034058302e-06, |
| "logits/chosen": 0.9830673336982727, |
| "logits/rejected": 0.9171612858772278, |
| "logps/chosen": -213.6517791748047, |
| "logps/rejected": -247.17857360839844, |
| "loss": 0.1726, |
| "rewards/accuracies": 0.9625000357627869, |
| "rewards/chosen": -1.5426089763641357, |
| "rewards/margins": 2.6942059993743896, |
| "rewards/rejected": -4.236814975738525, |
| "step": 6680 |
| }, |
| { |
| "epoch": 5.352, |
| "grad_norm": 18.91624641418457, |
| "learning_rate": 2.629794116756035e-06, |
| "logits/chosen": 1.1095460653305054, |
| "logits/rejected": 1.073819875717163, |
| "logps/chosen": -196.78050231933594, |
| "logps/rejected": -221.8975067138672, |
| "loss": 0.2172, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -1.370259165763855, |
| "rewards/margins": 2.258117198944092, |
| "rewards/rejected": -3.628376007080078, |
| "step": 6690 |
| }, |
| { |
| "epoch": 5.36, |
| "grad_norm": 10.549464225769043, |
| "learning_rate": 2.6228217179456433e-06, |
| "logits/chosen": 0.9206059575080872, |
| "logits/rejected": 0.8385736346244812, |
| "logps/chosen": -229.06982421875, |
| "logps/rejected": -266.5553894042969, |
| "loss": 0.2318, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -2.381563663482666, |
| "rewards/margins": 2.3572521209716797, |
| "rewards/rejected": -4.7388153076171875, |
| "step": 6700 |
| }, |
| { |
| "epoch": 5.368, |
| "grad_norm": 18.678146362304688, |
| "learning_rate": 2.6158483613468227e-06, |
| "logits/chosen": 0.8933264017105103, |
| "logits/rejected": 0.9182813763618469, |
| "logps/chosen": -246.1497039794922, |
| "logps/rejected": -265.3963317871094, |
| "loss": 0.2916, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -2.833988666534424, |
| "rewards/margins": 2.2055633068084717, |
| "rewards/rejected": -5.039552211761475, |
| "step": 6710 |
| }, |
| { |
| "epoch": 5.376, |
| "grad_norm": 15.417664527893066, |
| "learning_rate": 2.60887410133921e-06, |
| "logits/chosen": 1.0603265762329102, |
| "logits/rejected": 0.9646215438842773, |
| "logps/chosen": -213.02725219726562, |
| "logps/rejected": -248.04296875, |
| "loss": 0.1764, |
| "rewards/accuracies": 0.949999988079071, |
| "rewards/chosen": -2.2449893951416016, |
| "rewards/margins": 2.7219924926757812, |
| "rewards/rejected": -4.966982364654541, |
| "step": 6720 |
| }, |
| { |
| "epoch": 5.384, |
| "grad_norm": 16.097373962402344, |
| "learning_rate": 2.6018989923094827e-06, |
| "logits/chosen": 1.0397377014160156, |
| "logits/rejected": 1.0830873250961304, |
| "logps/chosen": -216.1459197998047, |
| "logps/rejected": -231.951904296875, |
| "loss": 0.2511, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -2.4918582439422607, |
| "rewards/margins": 2.454404354095459, |
| "rewards/rejected": -4.946262359619141, |
| "step": 6730 |
| }, |
| { |
| "epoch": 5.392, |
| "grad_norm": 15.579238891601562, |
| "learning_rate": 2.594923088650946e-06, |
| "logits/chosen": 1.000435709953308, |
| "logits/rejected": 1.0281956195831299, |
| "logps/chosen": -225.9973907470703, |
| "logps/rejected": -261.2825012207031, |
| "loss": 0.2612, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.4454987049102783, |
| "rewards/margins": 2.452462911605835, |
| "rewards/rejected": -3.897961378097534, |
| "step": 6740 |
| }, |
| { |
| "epoch": 5.4, |
| "grad_norm": 23.80678367614746, |
| "learning_rate": 2.5879464447630947e-06, |
| "logits/chosen": 0.9257308840751648, |
| "logits/rejected": 0.9335107207298279, |
| "logps/chosen": -205.0893096923828, |
| "logps/rejected": -268.3067932128906, |
| "loss": 0.2616, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -1.4560121297836304, |
| "rewards/margins": 2.8784759044647217, |
| "rewards/rejected": -4.3344879150390625, |
| "step": 6750 |
| }, |
| { |
| "epoch": 5.408, |
| "grad_norm": 14.877538681030273, |
| "learning_rate": 2.5809691150512013e-06, |
| "logits/chosen": 1.0292404890060425, |
| "logits/rejected": 0.8708595633506775, |
| "logps/chosen": -230.5414581298828, |
| "logps/rejected": -267.4679870605469, |
| "loss": 0.2617, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -2.2800981998443604, |
| "rewards/margins": 2.2236454486846924, |
| "rewards/rejected": -4.503743648529053, |
| "step": 6760 |
| }, |
| { |
| "epoch": 5.416, |
| "grad_norm": 30.09552764892578, |
| "learning_rate": 2.573991153925883e-06, |
| "logits/chosen": 0.9744608998298645, |
| "logits/rejected": 0.9204347729682922, |
| "logps/chosen": -230.64749145507812, |
| "logps/rejected": -238.489990234375, |
| "loss": 0.2917, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.851356863975525, |
| "rewards/margins": 2.2804553508758545, |
| "rewards/rejected": -4.131812572479248, |
| "step": 6770 |
| }, |
| { |
| "epoch": 5.424, |
| "grad_norm": 9.124749183654785, |
| "learning_rate": 2.5670126158026843e-06, |
| "logits/chosen": 0.9641228914260864, |
| "logits/rejected": 0.9122328758239746, |
| "logps/chosen": -209.06993103027344, |
| "logps/rejected": -239.84507751464844, |
| "loss": 0.1824, |
| "rewards/accuracies": 0.9625000357627869, |
| "rewards/chosen": -1.1467351913452148, |
| "rewards/margins": 2.435342788696289, |
| "rewards/rejected": -3.582077741622925, |
| "step": 6780 |
| }, |
| { |
| "epoch": 5.432, |
| "grad_norm": 13.851131439208984, |
| "learning_rate": 2.5600335551016447e-06, |
| "logits/chosen": 1.2253344058990479, |
| "logits/rejected": 1.1556156873703003, |
| "logps/chosen": -200.3877410888672, |
| "logps/rejected": -242.9773712158203, |
| "loss": 0.2018, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -1.4336289167404175, |
| "rewards/margins": 2.616694450378418, |
| "rewards/rejected": -4.050323486328125, |
| "step": 6790 |
| }, |
| { |
| "epoch": 5.44, |
| "grad_norm": 15.476871490478516, |
| "learning_rate": 2.553054026246884e-06, |
| "logits/chosen": 1.0492868423461914, |
| "logits/rejected": 0.955453097820282, |
| "logps/chosen": -229.8250732421875, |
| "logps/rejected": -254.77793884277344, |
| "loss": 0.2157, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.6499685049057007, |
| "rewards/margins": 2.502528429031372, |
| "rewards/rejected": -4.152497291564941, |
| "step": 6800 |
| }, |
| { |
| "epoch": 5.448, |
| "grad_norm": 17.754188537597656, |
| "learning_rate": 2.546074083666169e-06, |
| "logits/chosen": 1.1146057844161987, |
| "logits/rejected": 1.0327948331832886, |
| "logps/chosen": -196.68882751464844, |
| "logps/rejected": -219.73692321777344, |
| "loss": 0.2566, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.2834064960479736, |
| "rewards/margins": 2.399442434310913, |
| "rewards/rejected": -3.6828484535217285, |
| "step": 6810 |
| }, |
| { |
| "epoch": 5.456, |
| "grad_norm": 14.777560234069824, |
| "learning_rate": 2.539093781790494e-06, |
| "logits/chosen": 1.1234928369522095, |
| "logits/rejected": 0.8710097670555115, |
| "logps/chosen": -229.3218231201172, |
| "logps/rejected": -263.53204345703125, |
| "loss": 0.3007, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -2.53562593460083, |
| "rewards/margins": 2.2426531314849854, |
| "rewards/rejected": -4.778278827667236, |
| "step": 6820 |
| }, |
| { |
| "epoch": 5.464, |
| "grad_norm": 11.484394073486328, |
| "learning_rate": 2.5321131750536548e-06, |
| "logits/chosen": 1.0820449590682983, |
| "logits/rejected": 1.0439467430114746, |
| "logps/chosen": -210.1967315673828, |
| "logps/rejected": -252.2407684326172, |
| "loss": 0.2437, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -1.7417097091674805, |
| "rewards/margins": 2.493793487548828, |
| "rewards/rejected": -4.235503673553467, |
| "step": 6830 |
| }, |
| { |
| "epoch": 5.4719999999999995, |
| "grad_norm": 7.133698463439941, |
| "learning_rate": 2.525132317891827e-06, |
| "logits/chosen": 0.9405183792114258, |
| "logits/rejected": 0.7907823920249939, |
| "logps/chosen": -223.71621704101562, |
| "logps/rejected": -250.6181182861328, |
| "loss": 0.309, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -2.273303747177124, |
| "rewards/margins": 2.0772006511688232, |
| "rewards/rejected": -4.350503444671631, |
| "step": 6840 |
| }, |
| { |
| "epoch": 5.48, |
| "grad_norm": 36.052452087402344, |
| "learning_rate": 2.518151264743135e-06, |
| "logits/chosen": 1.0576660633087158, |
| "logits/rejected": 0.9024376273155212, |
| "logps/chosen": -219.67166137695312, |
| "logps/rejected": -256.7233581542969, |
| "loss": 0.2635, |
| "rewards/accuracies": 0.875, |
| "rewards/chosen": -2.318809986114502, |
| "rewards/margins": 2.248960494995117, |
| "rewards/rejected": -4.567770481109619, |
| "step": 6850 |
| }, |
| { |
| "epoch": 5.4879999999999995, |
| "grad_norm": 22.618186950683594, |
| "learning_rate": 2.5111700700472346e-06, |
| "logits/chosen": 1.0195873975753784, |
| "logits/rejected": 0.735422670841217, |
| "logps/chosen": -220.07212829589844, |
| "logps/rejected": -274.3692932128906, |
| "loss": 0.3001, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -2.25004243850708, |
| "rewards/margins": 2.4187867641448975, |
| "rewards/rejected": -4.668828964233398, |
| "step": 6860 |
| }, |
| { |
| "epoch": 5.496, |
| "grad_norm": 14.920022964477539, |
| "learning_rate": 2.5041887882448845e-06, |
| "logits/chosen": 0.9542160034179688, |
| "logits/rejected": 0.9047889709472656, |
| "logps/chosen": -217.98045349121094, |
| "logps/rejected": -259.5525817871094, |
| "loss": 0.2509, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -2.3205878734588623, |
| "rewards/margins": 2.485825777053833, |
| "rewards/rejected": -4.806413173675537, |
| "step": 6870 |
| }, |
| { |
| "epoch": 5.504, |
| "grad_norm": 27.746845245361328, |
| "learning_rate": 2.4972074737775215e-06, |
| "logits/chosen": 0.8094510436058044, |
| "logits/rejected": 0.820503830909729, |
| "logps/chosen": -235.88784790039062, |
| "logps/rejected": -269.9020690917969, |
| "loss": 0.2144, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -3.0258710384368896, |
| "rewards/margins": 2.428048610687256, |
| "rewards/rejected": -5.453919410705566, |
| "step": 6880 |
| }, |
| { |
| "epoch": 5.5120000000000005, |
| "grad_norm": 38.07632064819336, |
| "learning_rate": 2.490226181086838e-06, |
| "logits/chosen": 1.0112783908843994, |
| "logits/rejected": 0.9033260345458984, |
| "logps/chosen": -205.05882263183594, |
| "logps/rejected": -243.261962890625, |
| "loss": 0.2399, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -2.0925614833831787, |
| "rewards/margins": 2.7473719120025635, |
| "rewards/rejected": -4.839933395385742, |
| "step": 6890 |
| }, |
| { |
| "epoch": 5.52, |
| "grad_norm": 35.062713623046875, |
| "learning_rate": 2.4832449646143605e-06, |
| "logits/chosen": 1.0022151470184326, |
| "logits/rejected": 0.9039661288261414, |
| "logps/chosen": -211.7960968017578, |
| "logps/rejected": -265.2066345214844, |
| "loss": 0.3377, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -2.1607472896575928, |
| "rewards/margins": 2.2616782188415527, |
| "rewards/rejected": -4.422425746917725, |
| "step": 6900 |
| }, |
| { |
| "epoch": 5.5280000000000005, |
| "grad_norm": 21.59208869934082, |
| "learning_rate": 2.4762638788010123e-06, |
| "logits/chosen": 0.8795046210289001, |
| "logits/rejected": 0.8476438522338867, |
| "logps/chosen": -229.28501892089844, |
| "logps/rejected": -292.383544921875, |
| "loss": 0.283, |
| "rewards/accuracies": 0.9000000357627869, |
| "rewards/chosen": -2.4365899562835693, |
| "rewards/margins": 2.430114984512329, |
| "rewards/rejected": -4.86670446395874, |
| "step": 6910 |
| }, |
| { |
| "epoch": 5.536, |
| "grad_norm": 33.235862731933594, |
| "learning_rate": 2.4692829780867066e-06, |
| "logits/chosen": 0.9138635993003845, |
| "logits/rejected": 0.8161695599555969, |
| "logps/chosen": -233.41311645507812, |
| "logps/rejected": -261.68975830078125, |
| "loss": 0.2933, |
| "rewards/accuracies": 0.887499988079071, |
| "rewards/chosen": -1.5699150562286377, |
| "rewards/margins": 2.5199315547943115, |
| "rewards/rejected": -4.089846611022949, |
| "step": 6920 |
| }, |
| { |
| "epoch": 5.5440000000000005, |
| "grad_norm": 28.37091064453125, |
| "learning_rate": 2.4623023169099074e-06, |
| "logits/chosen": 1.0121078491210938, |
| "logits/rejected": 0.8701547980308533, |
| "logps/chosen": -226.9220733642578, |
| "logps/rejected": -260.5491638183594, |
| "loss": 0.2636, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -2.4063045978546143, |
| "rewards/margins": 2.338066577911377, |
| "rewards/rejected": -4.744370937347412, |
| "step": 6930 |
| }, |
| { |
| "epoch": 5.552, |
| "grad_norm": 17.56963348388672, |
| "learning_rate": 2.4553219497072144e-06, |
| "logits/chosen": 0.9864139556884766, |
| "logits/rejected": 0.9286383986473083, |
| "logps/chosen": -224.4748992919922, |
| "logps/rejected": -247.63027954101562, |
| "loss": 0.2675, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -2.1812076568603516, |
| "rewards/margins": 2.474334716796875, |
| "rewards/rejected": -4.655541896820068, |
| "step": 6940 |
| }, |
| { |
| "epoch": 5.5600000000000005, |
| "grad_norm": 37.597293853759766, |
| "learning_rate": 2.4483419309129315e-06, |
| "logits/chosen": 0.8857519030570984, |
| "logits/rejected": 0.9429466128349304, |
| "logps/chosen": -202.9930877685547, |
| "logps/rejected": -244.86172485351562, |
| "loss": 0.2741, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.7118009328842163, |
| "rewards/margins": 2.3302266597747803, |
| "rewards/rejected": -4.042027473449707, |
| "step": 6950 |
| }, |
| { |
| "epoch": 5.568, |
| "grad_norm": 22.82415008544922, |
| "learning_rate": 2.441362314958649e-06, |
| "logits/chosen": 1.015781283378601, |
| "logits/rejected": 0.8939239382743835, |
| "logps/chosen": -197.0980987548828, |
| "logps/rejected": -247.9208526611328, |
| "loss": 0.2189, |
| "rewards/accuracies": 0.925000011920929, |
| "rewards/chosen": -1.4986218214035034, |
| "rewards/margins": 2.6701772212982178, |
| "rewards/rejected": -4.168798923492432, |
| "step": 6960 |
| }, |
| { |
| "epoch": 5.576, |
| "grad_norm": 30.6041259765625, |
| "learning_rate": 2.4343831562728135e-06, |
| "logits/chosen": 0.8862255215644836, |
| "logits/rejected": 0.8734444975852966, |
| "logps/chosen": -238.130859375, |
| "logps/rejected": -281.5000915527344, |
| "loss": 0.3029, |
| "rewards/accuracies": 0.8500000238418579, |
| "rewards/chosen": -2.5411267280578613, |
| "rewards/margins": 2.136220693588257, |
| "rewards/rejected": -4.677347660064697, |
| "step": 6970 |
| }, |
| { |
| "epoch": 5.584, |
| "grad_norm": 14.699413299560547, |
| "learning_rate": 2.4274045092803056e-06, |
| "logits/chosen": 0.9806970953941345, |
| "logits/rejected": 0.8635124564170837, |
| "logps/chosen": -230.31900024414062, |
| "logps/rejected": -276.25421142578125, |
| "loss": 0.241, |
| "rewards/accuracies": 0.9125000238418579, |
| "rewards/chosen": -2.257481098175049, |
| "rewards/margins": 2.4393088817596436, |
| "rewards/rejected": -4.696789741516113, |
| "step": 6980 |
| }, |
| { |
| "epoch": 5.592, |
| "grad_norm": 26.546295166015625, |
| "learning_rate": 2.4204264284020182e-06, |
| "logits/chosen": 1.0598602294921875, |
| "logits/rejected": 1.0161948204040527, |
| "logps/chosen": -208.1858367919922, |
| "logps/rejected": -228.87876892089844, |
| "loss": 0.3018, |
| "rewards/accuracies": 0.862500011920929, |
| "rewards/chosen": -1.463453769683838, |
| "rewards/margins": 2.1223933696746826, |
| "rewards/rejected": -3.5858471393585205, |
| "step": 6990 |
| }, |
| { |
| "epoch": 5.6, |
| "grad_norm": 13.010027885437012, |
| "learning_rate": 2.4134489680544263e-06, |
| "logits/chosen": 1.0219472646713257, |
| "logits/rejected": 0.916050910949707, |
| "logps/chosen": -237.35302734375, |
| "logps/rejected": -259.0421447753906, |
| "loss": 0.1984, |
| "rewards/accuracies": 0.9375, |
| "rewards/chosen": -1.8679497241973877, |
| "rewards/margins": 2.485231399536133, |
| "rewards/rejected": -4.3531813621521, |
| "step": 7000 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 12500, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0023474241339392e+18, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|