| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 593, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0, |
| "learning_rate": 4.201680672268907e-09, |
| "logits/chosen": -0.6788080930709839, |
| "logits/rejected": -1.1750900745391846, |
| "logps/chosen": -702.8984985351562, |
| "logps/rejected": -239.67630004882812, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0, |
| "learning_rate": 8.403361344537815e-09, |
| "logits/chosen": -1.6158480644226074, |
| "logits/rejected": -1.2959809303283691, |
| "logps/chosen": -112.90769958496094, |
| "logps/rejected": -81.65785217285156, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": 0.0, |
| "rewards/margins": 0.0, |
| "rewards/rejected": 0.0, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.2605042016806723e-08, |
| "logits/chosen": -2.375753879547119, |
| "logits/rejected": -2.5303637981414795, |
| "logps/chosen": -105.81280517578125, |
| "logps/rejected": -131.5235595703125, |
| "loss": 0.6943, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.023235511034727097, |
| "rewards/margins": -0.002191734267398715, |
| "rewards/rejected": 0.02542724646627903, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 1.680672268907563e-08, |
| "logits/chosen": -1.907819151878357, |
| "logits/rejected": -1.9828282594680786, |
| "logps/chosen": -243.6266326904297, |
| "logps/rejected": -293.4872741699219, |
| "loss": 0.6958, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.013934326358139515, |
| "rewards/margins": -0.03516464680433273, |
| "rewards/rejected": 0.049098968505859375, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.1008403361344538e-08, |
| "logits/chosen": -1.5391994714736938, |
| "logits/rejected": -1.6013704538345337, |
| "logps/chosen": -514.83447265625, |
| "logps/rejected": -273.52606201171875, |
| "loss": 0.6919, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.04896850883960724, |
| "rewards/margins": 0.09175796806812286, |
| "rewards/rejected": -0.042789459228515625, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.5210084033613446e-08, |
| "logits/chosen": -2.251502513885498, |
| "logits/rejected": -1.4788130521774292, |
| "logps/chosen": -194.65187072753906, |
| "logps/rejected": -230.2232666015625, |
| "loss": 0.6948, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.06929359585046768, |
| "rewards/margins": 0.0685802549123764, |
| "rewards/rejected": 0.000713348388671875, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 2.941176470588235e-08, |
| "logits/chosen": -1.6795597076416016, |
| "logits/rejected": -1.6621124744415283, |
| "logps/chosen": -188.00582885742188, |
| "logps/rejected": -178.40765380859375, |
| "loss": 0.6955, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.004383087158203125, |
| "rewards/margins": -0.015031430870294571, |
| "rewards/rejected": 0.019414519891142845, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 3.361344537815126e-08, |
| "logits/chosen": -1.2061922550201416, |
| "logits/rejected": -1.4656660556793213, |
| "logps/chosen": -493.43206787109375, |
| "logps/rejected": -74.92171478271484, |
| "loss": 0.6919, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.07655182480812073, |
| "rewards/margins": 0.10538730025291443, |
| "rewards/rejected": -0.028835486620664597, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 3.7815126050420164e-08, |
| "logits/chosen": -1.5676227807998657, |
| "logits/rejected": -1.5455267429351807, |
| "logps/chosen": -228.5581817626953, |
| "logps/rejected": -194.3417510986328, |
| "loss": 0.6953, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.03210144117474556, |
| "rewards/margins": -0.005328751169145107, |
| "rewards/rejected": 0.03743019327521324, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.2016806722689076e-08, |
| "logits/chosen": -1.2673882246017456, |
| "logits/rejected": -1.175107717514038, |
| "logps/chosen": -226.69273376464844, |
| "logps/rejected": -170.93002319335938, |
| "loss": 0.6932, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.05511780083179474, |
| "rewards/margins": 0.0763774886727333, |
| "rewards/rejected": -0.021259689703583717, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 4.621848739495798e-08, |
| "logits/chosen": -1.7369565963745117, |
| "logits/rejected": -2.0291335582733154, |
| "logps/chosen": -134.85565185546875, |
| "logps/rejected": -61.743980407714844, |
| "loss": 0.6925, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.0022448543459177017, |
| "rewards/margins": 0.011675357818603516, |
| "rewards/rejected": -0.013920212164521217, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.042016806722689e-08, |
| "logits/chosen": -1.3196473121643066, |
| "logits/rejected": -1.325734257698059, |
| "logps/chosen": -80.28683471679688, |
| "logps/rejected": -79.56066131591797, |
| "loss": 0.6918, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.006443023681640625, |
| "rewards/margins": 0.021741105243563652, |
| "rewards/rejected": -0.015298080630600452, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.46218487394958e-08, |
| "logits/chosen": -1.5383967161178589, |
| "logits/rejected": -1.4319273233413696, |
| "logps/chosen": -71.45745086669922, |
| "logps/rejected": -93.32796478271484, |
| "loss": 0.6971, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.006036281585693359, |
| "rewards/margins": 0.015392017550766468, |
| "rewards/rejected": -0.021428298205137253, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 5.88235294117647e-08, |
| "logits/chosen": -1.968656301498413, |
| "logits/rejected": -1.845158338546753, |
| "logps/chosen": -168.4257049560547, |
| "logps/rejected": -300.03240966796875, |
| "loss": 0.6904, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.027264786884188652, |
| "rewards/margins": -0.05615234375, |
| "rewards/rejected": 0.028887558728456497, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 6.302521008403361e-08, |
| "logits/chosen": -1.1591368913650513, |
| "logits/rejected": -1.4170737266540527, |
| "logps/chosen": -538.101806640625, |
| "logps/rejected": -236.76358032226562, |
| "loss": 0.6928, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.03155364841222763, |
| "rewards/margins": 0.02361450158059597, |
| "rewards/rejected": 0.00793914869427681, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 6.722689075630252e-08, |
| "logits/chosen": -1.7213101387023926, |
| "logits/rejected": -1.8231241703033447, |
| "logps/chosen": -196.15289306640625, |
| "logps/rejected": -119.35342407226562, |
| "loss": 0.6931, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.029254913330078125, |
| "rewards/margins": 0.023168563842773438, |
| "rewards/rejected": 0.0060863494873046875, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.142857142857142e-08, |
| "logits/chosen": -1.7846002578735352, |
| "logits/rejected": -2.3181114196777344, |
| "logps/chosen": -273.34564208984375, |
| "logps/rejected": -146.905029296875, |
| "loss": 0.6954, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.03240509331226349, |
| "rewards/margins": 0.0660804733633995, |
| "rewards/rejected": -0.033675383776426315, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.563025210084033e-08, |
| "logits/chosen": -2.3839895725250244, |
| "logits/rejected": -1.8420289754867554, |
| "logps/chosen": -47.616455078125, |
| "logps/rejected": -177.2080841064453, |
| "loss": 0.6935, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.0052467347122728825, |
| "rewards/margins": 0.012156296521425247, |
| "rewards/rejected": -0.006909562274813652, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 7.983193277310923e-08, |
| "logits/chosen": -2.3161439895629883, |
| "logits/rejected": -1.8462892770767212, |
| "logps/chosen": -96.58424377441406, |
| "logps/rejected": -209.37664794921875, |
| "loss": 0.6883, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.0225248821079731, |
| "rewards/margins": 0.016490697860717773, |
| "rewards/rejected": -0.03901557996869087, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 8.403361344537815e-08, |
| "logits/chosen": -2.2219033241271973, |
| "logits/rejected": -2.0519139766693115, |
| "logps/chosen": -346.8481750488281, |
| "logps/rejected": -1364.489990234375, |
| "loss": 0.6862, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.08504104614257812, |
| "rewards/margins": 0.16014480590820312, |
| "rewards/rejected": -0.075103759765625, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 8.823529411764706e-08, |
| "logits/chosen": -1.4293802976608276, |
| "logits/rejected": -1.661201000213623, |
| "logps/chosen": -307.6474609375, |
| "logps/rejected": -215.94967651367188, |
| "loss": 0.6908, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.004551697056740522, |
| "rewards/margins": 0.012935257516801357, |
| "rewards/rejected": -0.008383559994399548, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.243697478991596e-08, |
| "logits/chosen": -1.8076047897338867, |
| "logits/rejected": -1.5782675743103027, |
| "logps/chosen": -179.2224884033203, |
| "logps/rejected": -232.96527099609375, |
| "loss": 0.6916, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.04394521564245224, |
| "rewards/margins": 0.08951330184936523, |
| "rewards/rejected": -0.045568086206912994, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 9.663865546218488e-08, |
| "logits/chosen": -1.7942882776260376, |
| "logits/rejected": -1.0943225622177124, |
| "logps/chosen": -55.99930191040039, |
| "logps/rejected": -140.6543426513672, |
| "loss": 0.6901, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.023340702056884766, |
| "rewards/margins": 0.0005490314215421677, |
| "rewards/rejected": 0.022791672497987747, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.0084033613445378e-07, |
| "logits/chosen": -0.837689995765686, |
| "logits/rejected": -1.8798249959945679, |
| "logps/chosen": -213.58486938476562, |
| "logps/rejected": -28.56793785095215, |
| "loss": 0.6852, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.026500703766942024, |
| "rewards/margins": 0.03877449035644531, |
| "rewards/rejected": -0.012273788452148438, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.0504201680672269e-07, |
| "logits/chosen": -2.5890495777130127, |
| "logits/rejected": -1.7141728401184082, |
| "logps/chosen": -12.43747329711914, |
| "logps/rejected": -147.9033203125, |
| "loss": 0.6846, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -0.0024075033143162727, |
| "rewards/margins": -0.05119595676660538, |
| "rewards/rejected": 0.04878845438361168, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 1.092436974789916e-07, |
| "logits/chosen": -2.9379727840423584, |
| "logits/rejected": -1.3671715259552002, |
| "logps/chosen": -203.9562530517578, |
| "logps/rejected": -130.590576171875, |
| "loss": 0.6829, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.021196747198700905, |
| "rewards/margins": 0.04621582105755806, |
| "rewards/rejected": -0.025019073858857155, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.134453781512605e-07, |
| "logits/chosen": -1.2850056886672974, |
| "logits/rejected": -1.527043104171753, |
| "logps/chosen": -293.0238037109375, |
| "logps/rejected": -110.18681335449219, |
| "loss": 0.6859, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -0.069427490234375, |
| "rewards/margins": -0.04693755879998207, |
| "rewards/rejected": -0.02248992957174778, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.176470588235294e-07, |
| "logits/chosen": -2.0202457904815674, |
| "logits/rejected": -2.382385730743408, |
| "logps/chosen": -323.6606750488281, |
| "logps/rejected": -179.65538024902344, |
| "loss": 0.6833, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -0.028132058680057526, |
| "rewards/margins": -0.018391229212284088, |
| "rewards/rejected": -0.009740829467773438, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.2184873949579832e-07, |
| "logits/chosen": -1.9959008693695068, |
| "logits/rejected": -1.408521294593811, |
| "logps/chosen": -221.8274383544922, |
| "logps/rejected": -225.3356475830078, |
| "loss": 0.6799, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.023763515055179596, |
| "rewards/margins": 0.14066720008850098, |
| "rewards/rejected": -0.11690368503332138, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.2605042016806723e-07, |
| "logits/chosen": -1.8002355098724365, |
| "logits/rejected": -1.521448016166687, |
| "logps/chosen": -94.72344970703125, |
| "logps/rejected": -110.32486724853516, |
| "loss": 0.6821, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -0.054589081555604935, |
| "rewards/margins": -0.024533655494451523, |
| "rewards/rejected": -0.03005542792379856, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.3025210084033613e-07, |
| "logits/chosen": -1.8377197980880737, |
| "logits/rejected": -2.063385248184204, |
| "logps/chosen": -62.164398193359375, |
| "logps/rejected": -104.72893524169922, |
| "loss": 0.679, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.019611358642578125, |
| "rewards/margins": -0.0029705059714615345, |
| "rewards/rejected": 0.022581864148378372, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 1.3445378151260504e-07, |
| "logits/chosen": -2.392535924911499, |
| "logits/rejected": -2.1506621837615967, |
| "logps/chosen": -11.346028327941895, |
| "logps/rejected": -71.765625, |
| "loss": 0.6785, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.015737399458885193, |
| "rewards/margins": -0.004548127297312021, |
| "rewards/rejected": -0.01118927076458931, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.3865546218487394e-07, |
| "logits/chosen": -2.0362868309020996, |
| "logits/rejected": -2.1367034912109375, |
| "logps/chosen": -266.005859375, |
| "logps/rejected": -214.60191345214844, |
| "loss": 0.6788, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.052748873829841614, |
| "rewards/margins": 0.09725818783044815, |
| "rewards/rejected": -0.04450931400060654, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.4285714285714285e-07, |
| "logits/chosen": -1.3824855089187622, |
| "logits/rejected": -1.5640826225280762, |
| "logps/chosen": -111.83187866210938, |
| "logps/rejected": -36.405189514160156, |
| "loss": 0.6693, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.008962631225585938, |
| "rewards/margins": 0.011541889980435371, |
| "rewards/rejected": -0.02050452120602131, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.4705882352941175e-07, |
| "logits/chosen": -2.272282361984253, |
| "logits/rejected": -2.159532308578491, |
| "logps/chosen": -43.7902717590332, |
| "logps/rejected": -74.43631744384766, |
| "loss": 0.6652, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.01943950727581978, |
| "rewards/margins": 0.03280620649456978, |
| "rewards/rejected": -0.01336669921875, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.5126050420168066e-07, |
| "logits/chosen": -2.0161397457122803, |
| "logits/rejected": -1.3697400093078613, |
| "logps/chosen": -67.14362335205078, |
| "logps/rejected": -123.745361328125, |
| "loss": 0.6641, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.0367613323032856, |
| "rewards/margins": 0.04867387190461159, |
| "rewards/rejected": -0.01191253773868084, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.554621848739496e-07, |
| "logits/chosen": -2.1913275718688965, |
| "logits/rejected": -1.7024658918380737, |
| "logps/chosen": -10.184264183044434, |
| "logps/rejected": -107.8653793334961, |
| "loss": 0.6593, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.004525709431618452, |
| "rewards/margins": -0.01936373859643936, |
| "rewards/rejected": 0.014838028699159622, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 1.5966386554621847e-07, |
| "logits/chosen": -1.081247091293335, |
| "logits/rejected": -2.124126434326172, |
| "logps/chosen": -789.7781982421875, |
| "logps/rejected": -147.115966796875, |
| "loss": 0.659, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.05167846754193306, |
| "rewards/margins": 0.20409394800662994, |
| "rewards/rejected": -0.152415469288826, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.638655462184874e-07, |
| "logits/chosen": -2.1789205074310303, |
| "logits/rejected": -1.1509499549865723, |
| "logps/chosen": -295.66265869140625, |
| "logps/rejected": -394.5150451660156, |
| "loss": 0.6517, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.12732239067554474, |
| "rewards/margins": 0.1430404633283615, |
| "rewards/rejected": -0.27036285400390625, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.680672268907563e-07, |
| "logits/chosen": -2.1448452472686768, |
| "logits/rejected": -2.1956920623779297, |
| "logps/chosen": -62.93687438964844, |
| "logps/rejected": -84.88615417480469, |
| "loss": 0.6534, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.008682060055434704, |
| "rewards/margins": 0.0031255725771188736, |
| "rewards/rejected": -0.011807632632553577, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.722689075630252e-07, |
| "logits/chosen": -1.4040545225143433, |
| "logits/rejected": -0.7300827503204346, |
| "logps/chosen": -326.945068359375, |
| "logps/rejected": -324.3778076171875, |
| "loss": 0.6435, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.008136749267578125, |
| "rewards/margins": 0.2589103579521179, |
| "rewards/rejected": -0.26704710721969604, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.764705882352941e-07, |
| "logits/chosen": -1.4858357906341553, |
| "logits/rejected": -2.0196330547332764, |
| "logps/chosen": -459.7484130859375, |
| "logps/rejected": -210.59243774414062, |
| "loss": 0.6423, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.13091735541820526, |
| "rewards/margins": -0.0402679406106472, |
| "rewards/rejected": -0.09064941853284836, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.8067226890756302e-07, |
| "logits/chosen": -1.4100688695907593, |
| "logits/rejected": -2.2512903213500977, |
| "logps/chosen": -263.8143615722656, |
| "logps/rejected": -82.95572662353516, |
| "loss": 0.6408, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -0.052451327443122864, |
| "rewards/margins": -0.04398571699857712, |
| "rewards/rejected": -0.008465608581900597, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 1.8487394957983192e-07, |
| "logits/chosen": -1.654313325881958, |
| "logits/rejected": -1.3700717687606812, |
| "logps/chosen": -167.07334899902344, |
| "logps/rejected": -133.7058868408203, |
| "loss": 0.646, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.036783602088689804, |
| "rewards/margins": 0.10394057631492615, |
| "rewards/rejected": -0.14072418212890625, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.8907563025210083e-07, |
| "logits/chosen": -2.3346948623657227, |
| "logits/rejected": -1.4270800352096558, |
| "logps/chosen": -363.33868408203125, |
| "logps/rejected": -202.8612060546875, |
| "loss": 0.6329, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.11517754197120667, |
| "rewards/margins": -0.01095886155962944, |
| "rewards/rejected": -0.10421867668628693, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9327731092436976e-07, |
| "logits/chosen": -1.5132286548614502, |
| "logits/rejected": -0.9802812933921814, |
| "logps/chosen": -463.6932067871094, |
| "logps/rejected": -284.0732421875, |
| "loss": 0.6186, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.11024780571460724, |
| "rewards/margins": 0.5847091674804688, |
| "rewards/rejected": -0.6949569582939148, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 1.9747899159663864e-07, |
| "logits/chosen": -1.640755295753479, |
| "logits/rejected": -2.064528465270996, |
| "logps/chosen": -141.10398864746094, |
| "logps/rejected": -74.3631362915039, |
| "loss": 0.6133, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.003640557639300823, |
| "rewards/margins": 0.031121447682380676, |
| "rewards/rejected": -0.02748088911175728, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 2.0168067226890757e-07, |
| "logits/chosen": -1.3043317794799805, |
| "logits/rejected": -0.7944495677947998, |
| "logps/chosen": -236.87294006347656, |
| "logps/rejected": -140.55502319335938, |
| "loss": 0.6065, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.0874466672539711, |
| "rewards/margins": 0.6055868864059448, |
| "rewards/rejected": -0.5181402564048767, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 2.0588235294117645e-07, |
| "logits/chosen": -2.2910568714141846, |
| "logits/rejected": -1.5469049215316772, |
| "logps/chosen": -50.14934539794922, |
| "logps/rejected": -230.92745971679688, |
| "loss": 0.6062, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.012336349114775658, |
| "rewards/margins": 0.3008907437324524, |
| "rewards/rejected": -0.3132270872592926, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 2.1008403361344538e-07, |
| "logits/chosen": -1.6005315780639648, |
| "logits/rejected": -2.0069663524627686, |
| "logps/chosen": -300.32855224609375, |
| "logps/rejected": -70.41799926757812, |
| "loss": 0.5983, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.16448670625686646, |
| "rewards/margins": -0.07991065829992294, |
| "rewards/rejected": -0.08457604050636292, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2.1428571428571426e-07, |
| "logits/chosen": -1.6963545083999634, |
| "logits/rejected": -1.8104299306869507, |
| "logps/chosen": -224.7620391845703, |
| "logps/rejected": -106.64592742919922, |
| "loss": 0.5888, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.040007784962654114, |
| "rewards/margins": 0.2745014429092407, |
| "rewards/rejected": -0.31450921297073364, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2.184873949579832e-07, |
| "logits/chosen": -1.519837737083435, |
| "logits/rejected": -1.6336404085159302, |
| "logps/chosen": -302.5843505859375, |
| "logps/rejected": -260.89599609375, |
| "loss": 0.5738, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -0.00217361468821764, |
| "rewards/margins": -0.08065643161535263, |
| "rewards/rejected": 0.07848282158374786, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2.226890756302521e-07, |
| "logits/chosen": -0.7233390212059021, |
| "logits/rejected": -0.5498945116996765, |
| "logps/chosen": -314.1108093261719, |
| "logps/rejected": -156.276611328125, |
| "loss": 0.5757, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.03915100172162056, |
| "rewards/margins": 0.5292686223983765, |
| "rewards/rejected": -0.5684196352958679, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2.26890756302521e-07, |
| "logits/chosen": -2.079944610595703, |
| "logits/rejected": -2.1164326667785645, |
| "logps/chosen": -363.44049072265625, |
| "logps/rejected": -204.23228454589844, |
| "loss": 0.5763, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.03126373142004013, |
| "rewards/margins": 0.17057648301124573, |
| "rewards/rejected": -0.139312744140625, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2.3109243697478993e-07, |
| "logits/chosen": -0.8852956891059875, |
| "logits/rejected": -1.17733633518219, |
| "logps/chosen": -229.51646423339844, |
| "logps/rejected": -123.25852966308594, |
| "loss": 0.5658, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.012698173522949219, |
| "rewards/margins": 0.3284967541694641, |
| "rewards/rejected": -0.3157985806465149, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 2.352941176470588e-07, |
| "logits/chosen": -1.2436057329177856, |
| "logits/rejected": -1.3107479810714722, |
| "logps/chosen": -507.35784912109375, |
| "logps/rejected": -224.55007934570312, |
| "loss": 0.5549, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.12623444199562073, |
| "rewards/margins": 0.4832092523574829, |
| "rewards/rejected": -0.6094436645507812, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 2.394957983193277e-07, |
| "logits/chosen": -1.765979290008545, |
| "logits/rejected": -2.5764899253845215, |
| "logps/chosen": -305.29486083984375, |
| "logps/rejected": -71.30033874511719, |
| "loss": 0.5634, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.14705120027065277, |
| "rewards/margins": 0.1404399424791336, |
| "rewards/rejected": 0.00661125173792243, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 2.4369747899159664e-07, |
| "logits/chosen": -0.8349874019622803, |
| "logits/rejected": -0.4467710256576538, |
| "logps/chosen": -396.75067138671875, |
| "logps/rejected": -238.7788848876953, |
| "loss": 0.5588, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2361343502998352, |
| "rewards/margins": 0.7512519955635071, |
| "rewards/rejected": -0.9873863458633423, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 2.478991596638655e-07, |
| "logits/chosen": -1.1839135885238647, |
| "logits/rejected": -1.342013955116272, |
| "logps/chosen": -506.8759460449219, |
| "logps/rejected": -188.46533203125, |
| "loss": 0.5478, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.19339600205421448, |
| "rewards/margins": 0.9577789306640625, |
| "rewards/rejected": -1.1511750221252441, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 2.5210084033613445e-07, |
| "logits/chosen": -1.238139271736145, |
| "logits/rejected": -1.305624008178711, |
| "logps/chosen": -354.024169921875, |
| "logps/rejected": -150.709228515625, |
| "loss": 0.5474, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.03719634935259819, |
| "rewards/margins": 0.4611190855503082, |
| "rewards/rejected": -0.4983154535293579, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 2.5630252100840333e-07, |
| "logits/chosen": -1.0698193311691284, |
| "logits/rejected": -1.1871693134307861, |
| "logps/chosen": -533.863525390625, |
| "logps/rejected": -207.14418029785156, |
| "loss": 0.5121, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.06107788532972336, |
| "rewards/margins": 1.0981537103652954, |
| "rewards/rejected": -1.1592315435409546, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 2.6050420168067226e-07, |
| "logits/chosen": -1.856410026550293, |
| "logits/rejected": -1.372816801071167, |
| "logps/chosen": -205.7808074951172, |
| "logps/rejected": -189.2294464111328, |
| "loss": 0.5077, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.07348620891571045, |
| "rewards/margins": 0.7908002138137817, |
| "rewards/rejected": -0.8642864227294922, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 2.6470588235294114e-07, |
| "logits/chosen": -1.718478798866272, |
| "logits/rejected": -1.663999080657959, |
| "logps/chosen": -604.8311767578125, |
| "logps/rejected": -742.4992065429688, |
| "loss": 0.518, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.17543946206569672, |
| "rewards/margins": -0.012347415089607239, |
| "rewards/rejected": -0.16309204697608948, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 2.689075630252101e-07, |
| "logits/chosen": -2.534972906112671, |
| "logits/rejected": -2.4803988933563232, |
| "logps/chosen": -27.763654708862305, |
| "logps/rejected": -65.91168975830078, |
| "loss": 0.4941, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.041327860206365585, |
| "rewards/margins": 0.3402096629142761, |
| "rewards/rejected": -0.2988818287849426, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 2.7310924369747895e-07, |
| "logits/chosen": -1.866020679473877, |
| "logits/rejected": -1.5979124307632446, |
| "logps/chosen": -348.8237609863281, |
| "logps/rejected": -415.2561950683594, |
| "loss": 0.4814, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.09458465874195099, |
| "rewards/margins": 0.18892823159694672, |
| "rewards/rejected": -0.2835128903388977, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 2.773109243697479e-07, |
| "logits/chosen": -1.7812227010726929, |
| "logits/rejected": -1.2362346649169922, |
| "logps/chosen": -210.58091735839844, |
| "logps/rejected": -221.3896026611328, |
| "loss": 0.4545, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.13596276938915253, |
| "rewards/margins": 1.656264305114746, |
| "rewards/rejected": -1.7922271490097046, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 2.815126050420168e-07, |
| "logits/chosen": -1.5426255464553833, |
| "logits/rejected": -1.5356191396713257, |
| "logps/chosen": -36.993186950683594, |
| "logps/rejected": -77.3095474243164, |
| "loss": 0.4601, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.021850014105439186, |
| "rewards/margins": 0.5636359453201294, |
| "rewards/rejected": -0.5417859554290771, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 2.857142857142857e-07, |
| "logits/chosen": -1.814134955406189, |
| "logits/rejected": -1.493807077407837, |
| "logps/chosen": -214.3492431640625, |
| "logps/rejected": -331.2470397949219, |
| "loss": 0.466, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.041826628148555756, |
| "rewards/margins": 0.37629854679107666, |
| "rewards/rejected": -0.3344719111919403, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 2.899159663865546e-07, |
| "logits/chosen": -1.104387640953064, |
| "logits/rejected": -1.4887744188308716, |
| "logps/chosen": -790.798583984375, |
| "logps/rejected": -575.5951538085938, |
| "loss": 0.4556, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.17354126274585724, |
| "rewards/margins": 0.449990838766098, |
| "rewards/rejected": -0.276449590921402, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 2.941176470588235e-07, |
| "logits/chosen": -1.1406750679016113, |
| "logits/rejected": -1.7379848957061768, |
| "logps/chosen": -677.4329833984375, |
| "logps/rejected": -125.09095001220703, |
| "loss": 0.4216, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.29751741886138916, |
| "rewards/margins": 1.025307536125183, |
| "rewards/rejected": -0.727790117263794, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 2.9831932773109244e-07, |
| "logits/chosen": -1.548018455505371, |
| "logits/rejected": -1.9434715509414673, |
| "logps/chosen": -60.39828109741211, |
| "logps/rejected": -38.36094665527344, |
| "loss": 0.435, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.07226741313934326, |
| "rewards/margins": 0.6180351972579956, |
| "rewards/rejected": -0.6903026103973389, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 3.025210084033613e-07, |
| "logits/chosen": -1.4888752698898315, |
| "logits/rejected": -1.3269966840744019, |
| "logps/chosen": -333.23468017578125, |
| "logps/rejected": -181.92431640625, |
| "loss": 0.4167, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.046831514686346054, |
| "rewards/margins": 2.105104923248291, |
| "rewards/rejected": -2.1519362926483154, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 3.0672268907563024e-07, |
| "logits/chosen": -1.5784661769866943, |
| "logits/rejected": -1.5319206714630127, |
| "logps/chosen": -37.48617172241211, |
| "logps/rejected": -38.057193756103516, |
| "loss": 0.4306, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.07858496159315109, |
| "rewards/margins": 0.3954930901527405, |
| "rewards/rejected": -0.47407805919647217, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 3.109243697478992e-07, |
| "logits/chosen": -2.385610580444336, |
| "logits/rejected": -2.365718364715576, |
| "logps/chosen": -31.291919708251953, |
| "logps/rejected": -60.941036224365234, |
| "loss": 0.3708, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.0072297099977731705, |
| "rewards/margins": 0.6486601829528809, |
| "rewards/rejected": -0.6414304971694946, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 3.1512605042016805e-07, |
| "logits/chosen": -1.8049649000167847, |
| "logits/rejected": -1.474593162536621, |
| "logps/chosen": -145.8429412841797, |
| "logps/rejected": -114.68021392822266, |
| "loss": 0.3998, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.15717864036560059, |
| "rewards/margins": 0.8213388919830322, |
| "rewards/rejected": -0.9785175919532776, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 3.1932773109243693e-07, |
| "logits/chosen": -1.4387171268463135, |
| "logits/rejected": -1.3081284761428833, |
| "logps/chosen": -72.28972625732422, |
| "logps/rejected": -99.7930679321289, |
| "loss": 0.4351, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.03587455675005913, |
| "rewards/margins": 0.7939237356185913, |
| "rewards/rejected": -0.7580491900444031, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 3.2352941176470586e-07, |
| "logits/chosen": -1.4761199951171875, |
| "logits/rejected": -2.109046459197998, |
| "logps/chosen": -155.40235900878906, |
| "logps/rejected": -155.92733764648438, |
| "loss": 0.395, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.06405086815357208, |
| "rewards/margins": 0.329689621925354, |
| "rewards/rejected": -0.3937404751777649, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 3.277310924369748e-07, |
| "logits/chosen": -2.038198232650757, |
| "logits/rejected": -2.2060189247131348, |
| "logps/chosen": -148.52001953125, |
| "logps/rejected": -190.42556762695312, |
| "loss": 0.3962, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.5522751212120056, |
| "rewards/margins": 0.17331847548484802, |
| "rewards/rejected": -0.725593626499176, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 3.319327731092437e-07, |
| "logits/chosen": -1.1491724252700806, |
| "logits/rejected": -0.9036651849746704, |
| "logps/chosen": -343.11529541015625, |
| "logps/rejected": -174.84776306152344, |
| "loss": 0.4158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.14253844320774078, |
| "rewards/margins": 2.43456768989563, |
| "rewards/rejected": -2.577106237411499, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 3.361344537815126e-07, |
| "logits/chosen": -1.5111039876937866, |
| "logits/rejected": -1.9596521854400635, |
| "logps/chosen": -572.5203857421875, |
| "logps/rejected": -325.9815368652344, |
| "loss": 0.373, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3365722894668579, |
| "rewards/margins": 2.104917287826538, |
| "rewards/rejected": -1.7683449983596802, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 3.403361344537815e-07, |
| "logits/chosen": -1.8409173488616943, |
| "logits/rejected": -1.9764267206192017, |
| "logps/chosen": -352.3498840332031, |
| "logps/rejected": -321.4953308105469, |
| "loss": 0.3865, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.22799532115459442, |
| "rewards/margins": 0.7164055109024048, |
| "rewards/rejected": -0.48841017484664917, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 3.445378151260504e-07, |
| "logits/chosen": -1.8073253631591797, |
| "logits/rejected": -2.74298357963562, |
| "logps/chosen": -222.5423583984375, |
| "logps/rejected": -52.44717788696289, |
| "loss": 0.3857, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.19195251166820526, |
| "rewards/margins": 0.3153046667575836, |
| "rewards/rejected": -0.5072571635246277, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 3.487394957983193e-07, |
| "logits/chosen": -2.154622793197632, |
| "logits/rejected": -1.6847541332244873, |
| "logps/chosen": -208.72132873535156, |
| "logps/rejected": -175.53054809570312, |
| "loss": 0.3637, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.23619385063648224, |
| "rewards/margins": 1.846364140510559, |
| "rewards/rejected": -2.0825579166412354, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 3.529411764705882e-07, |
| "logits/chosen": -1.876042127609253, |
| "logits/rejected": -1.3837803602218628, |
| "logps/chosen": -53.90886306762695, |
| "logps/rejected": -144.5306396484375, |
| "loss": 0.364, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.01596364937722683, |
| "rewards/margins": 0.7440950274467468, |
| "rewards/rejected": -0.7600586414337158, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 3.5714285714285716e-07, |
| "logits/chosen": -1.4703798294067383, |
| "logits/rejected": -1.7445605993270874, |
| "logps/chosen": -269.6485900878906, |
| "logps/rejected": -158.86940002441406, |
| "loss": 0.3672, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10937881469726562, |
| "rewards/margins": 2.3677597045898438, |
| "rewards/rejected": -2.258380889892578, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.6134453781512604e-07, |
| "logits/chosen": -1.6020888090133667, |
| "logits/rejected": -1.6218175888061523, |
| "logps/chosen": -152.8944091796875, |
| "logps/rejected": -168.00990295410156, |
| "loss": 0.3729, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.5375404357910156, |
| "rewards/margins": 0.09828647971153259, |
| "rewards/rejected": -0.6358269453048706, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.655462184873949e-07, |
| "logits/chosen": -1.5672448873519897, |
| "logits/rejected": -1.6510668992996216, |
| "logps/chosen": -447.45806884765625, |
| "logps/rejected": -239.46371459960938, |
| "loss": 0.3704, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2388494461774826, |
| "rewards/margins": 3.1040165424346924, |
| "rewards/rejected": -3.3428659439086914, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.6974789915966385e-07, |
| "logits/chosen": -0.6570608615875244, |
| "logits/rejected": -0.8279274702072144, |
| "logps/chosen": -395.8023376464844, |
| "logps/rejected": -155.96295166015625, |
| "loss": 0.3316, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.0513305589556694, |
| "rewards/margins": 3.0571579933166504, |
| "rewards/rejected": -3.0058274269104004, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.739495798319328e-07, |
| "logits/chosen": -1.7863593101501465, |
| "logits/rejected": -2.064410924911499, |
| "logps/chosen": -333.698486328125, |
| "logps/rejected": -122.79313659667969, |
| "loss": 0.3579, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.06812648475170135, |
| "rewards/margins": 0.5757344365119934, |
| "rewards/rejected": -0.643860936164856, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.7815126050420166e-07, |
| "logits/chosen": -1.7475690841674805, |
| "logits/rejected": -2.228104591369629, |
| "logps/chosen": -287.8011169433594, |
| "logps/rejected": -31.779052734375, |
| "loss": 0.3574, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.38655588030815125, |
| "rewards/margins": 0.9471727609634399, |
| "rewards/rejected": -0.5606168508529663, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 3.8235294117647053e-07, |
| "logits/chosen": -1.7612462043762207, |
| "logits/rejected": -1.4105805158615112, |
| "logps/chosen": -122.49490356445312, |
| "logps/rejected": -187.72357177734375, |
| "loss": 0.3251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.22412091493606567, |
| "rewards/margins": 2.9434642791748047, |
| "rewards/rejected": -3.1675851345062256, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 3.865546218487395e-07, |
| "logits/chosen": -1.4474364519119263, |
| "logits/rejected": -1.5902643203735352, |
| "logps/chosen": -239.24310302734375, |
| "logps/rejected": -135.27609252929688, |
| "loss": 0.3662, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.21128883957862854, |
| "rewards/margins": 1.6379708051681519, |
| "rewards/rejected": -1.849259614944458, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 3.907563025210084e-07, |
| "logits/chosen": -1.1318254470825195, |
| "logits/rejected": -1.4229687452316284, |
| "logps/chosen": -260.1365966796875, |
| "logps/rejected": -74.00960540771484, |
| "loss": 0.3516, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.2649814486503601, |
| "rewards/margins": 1.8917981386184692, |
| "rewards/rejected": -1.6268166303634644, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 3.949579831932773e-07, |
| "logits/chosen": -1.8826991319656372, |
| "logits/rejected": -2.357274055480957, |
| "logps/chosen": -311.9720458984375, |
| "logps/rejected": -158.02745056152344, |
| "loss": 0.3471, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.05377950519323349, |
| "rewards/margins": 1.7474021911621094, |
| "rewards/rejected": -1.6936227083206177, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 3.991596638655462e-07, |
| "logits/chosen": -1.2974653244018555, |
| "logits/rejected": -1.5636136531829834, |
| "logps/chosen": -478.4207763671875, |
| "logps/rejected": -169.5061492919922, |
| "loss": 0.2943, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.32736513018608093, |
| "rewards/margins": 3.4351882934570312, |
| "rewards/rejected": -3.107823133468628, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.0336134453781514e-07, |
| "logits/chosen": -1.6629210710525513, |
| "logits/rejected": -1.3563766479492188, |
| "logps/chosen": -302.283447265625, |
| "logps/rejected": -185.8943328857422, |
| "loss": 0.3216, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.08565587550401688, |
| "rewards/margins": 2.939385414123535, |
| "rewards/rejected": -2.853729486465454, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.07563025210084e-07, |
| "logits/chosen": -1.579493761062622, |
| "logits/rejected": -1.9858088493347168, |
| "logps/chosen": -166.11209106445312, |
| "logps/rejected": -93.21321868896484, |
| "loss": 0.2999, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.39952126145362854, |
| "rewards/margins": 0.800835371017456, |
| "rewards/rejected": -1.2003566026687622, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.117647058823529e-07, |
| "logits/chosen": -1.9741665124893188, |
| "logits/rejected": -1.3600385189056396, |
| "logps/chosen": -258.8650817871094, |
| "logps/rejected": -185.20352172851562, |
| "loss": 0.3346, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.09575500339269638, |
| "rewards/margins": 2.4661808013916016, |
| "rewards/rejected": -2.5619359016418457, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.159663865546218e-07, |
| "logits/chosen": -1.7311229705810547, |
| "logits/rejected": -2.162808418273926, |
| "logps/chosen": -423.9176940917969, |
| "logps/rejected": -175.74667358398438, |
| "loss": 0.2905, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -1.4093338251113892, |
| "rewards/margins": -0.9930892586708069, |
| "rewards/rejected": -0.4162445068359375, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.2016806722689076e-07, |
| "logits/chosen": -1.7871568202972412, |
| "logits/rejected": -1.6674413681030273, |
| "logps/chosen": -133.9112548828125, |
| "logps/rejected": -180.4711456298828, |
| "loss": 0.2967, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.17781352996826172, |
| "rewards/margins": 1.3768540620803833, |
| "rewards/rejected": -1.554667592048645, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.2436974789915964e-07, |
| "logits/chosen": -1.9930833578109741, |
| "logits/rejected": -2.3848719596862793, |
| "logps/chosen": -248.69406127929688, |
| "logps/rejected": -160.950927734375, |
| "loss": 0.2864, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.7147164344787598, |
| "rewards/margins": 1.7294585704803467, |
| "rewards/rejected": -2.4441750049591064, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.285714285714285e-07, |
| "logits/chosen": -1.812693476676941, |
| "logits/rejected": -1.6050926446914673, |
| "logps/chosen": -277.7933654785156, |
| "logps/rejected": -483.4625549316406, |
| "loss": 0.3317, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.6971309781074524, |
| "rewards/margins": -0.4464415907859802, |
| "rewards/rejected": -0.2506893575191498, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.327731092436975e-07, |
| "logits/chosen": -1.461750864982605, |
| "logits/rejected": -1.7464590072631836, |
| "logps/chosen": -173.95265197753906, |
| "logps/rejected": -135.8446807861328, |
| "loss": 0.2714, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.12809354066848755, |
| "rewards/margins": 3.298431396484375, |
| "rewards/rejected": -3.4265246391296387, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.369747899159664e-07, |
| "logits/chosen": -1.5056589841842651, |
| "logits/rejected": -1.9312864542007446, |
| "logps/chosen": -201.46270751953125, |
| "logps/rejected": -267.52276611328125, |
| "loss": 0.3103, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1706157773733139, |
| "rewards/margins": 0.6676197052001953, |
| "rewards/rejected": -0.4970039427280426, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.4117647058823526e-07, |
| "logits/chosen": -2.0907585620880127, |
| "logits/rejected": -1.849104642868042, |
| "logps/chosen": -21.733829498291016, |
| "logps/rejected": -76.71643829345703, |
| "loss": 0.2871, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.0892881453037262, |
| "rewards/margins": 1.409407138824463, |
| "rewards/rejected": -1.4986952543258667, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.453781512605042e-07, |
| "logits/chosen": -0.9986115097999573, |
| "logits/rejected": -0.6594001054763794, |
| "logps/chosen": -368.1170654296875, |
| "logps/rejected": -226.82830810546875, |
| "loss": 0.3077, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4677414000034332, |
| "rewards/margins": 4.347979545593262, |
| "rewards/rejected": -4.815721035003662, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.495798319327731e-07, |
| "logits/chosen": -1.8972766399383545, |
| "logits/rejected": -2.1591522693634033, |
| "logps/chosen": -341.3930358886719, |
| "logps/rejected": -195.02679443359375, |
| "loss": 0.28, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.6874268054962158, |
| "rewards/margins": 2.0455260276794434, |
| "rewards/rejected": -2.732952833175659, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.53781512605042e-07, |
| "logits/chosen": -1.1454424858093262, |
| "logits/rejected": -1.2674638032913208, |
| "logps/chosen": -141.8154754638672, |
| "logps/rejected": -19.07546043395996, |
| "loss": 0.3023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4041498303413391, |
| "rewards/margins": 1.1306045055389404, |
| "rewards/rejected": -0.7264547348022461, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.579831932773109e-07, |
| "logits/chosen": -2.07033109664917, |
| "logits/rejected": -2.380950450897217, |
| "logps/chosen": -80.41409301757812, |
| "logps/rejected": -98.79011535644531, |
| "loss": 0.2933, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -0.43403300642967224, |
| "rewards/margins": -0.3926330506801605, |
| "rewards/rejected": -0.04139995574951172, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.6218487394957986e-07, |
| "logits/chosen": -2.5270440578460693, |
| "logits/rejected": -2.435595750808716, |
| "logps/chosen": -24.47152328491211, |
| "logps/rejected": -148.6935577392578, |
| "loss": 0.3021, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.11162052303552628, |
| "rewards/margins": 3.680636405944824, |
| "rewards/rejected": -3.7922568321228027, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.6638655462184874e-07, |
| "logits/chosen": -1.4159996509552002, |
| "logits/rejected": -1.1079641580581665, |
| "logps/chosen": -213.67677307128906, |
| "logps/rejected": -146.0835418701172, |
| "loss": 0.2737, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6399146914482117, |
| "rewards/margins": 2.671748161315918, |
| "rewards/rejected": -3.3116626739501953, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.705882352941176e-07, |
| "logits/chosen": -1.859910488128662, |
| "logits/rejected": -2.269141435623169, |
| "logps/chosen": -270.6617126464844, |
| "logps/rejected": -149.7200164794922, |
| "loss": 0.2958, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3810228407382965, |
| "rewards/margins": 2.3572652339935303, |
| "rewards/rejected": -2.738288164138794, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.747899159663865e-07, |
| "logits/chosen": -1.615531325340271, |
| "logits/rejected": -2.3673205375671387, |
| "logps/chosen": -264.86578369140625, |
| "logps/rejected": -242.48330688476562, |
| "loss": 0.292, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.4372093081474304, |
| "rewards/margins": 2.0082688331604004, |
| "rewards/rejected": -2.4454782009124756, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.789915966386554e-07, |
| "logits/chosen": -1.3283716440200806, |
| "logits/rejected": -1.4000985622406006, |
| "logps/chosen": -751.4788818359375, |
| "logps/rejected": -504.08892822265625, |
| "loss": 0.2725, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6655944585800171, |
| "rewards/margins": 4.143014907836914, |
| "rewards/rejected": -3.4774200916290283, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.831932773109244e-07, |
| "logits/chosen": -1.942575216293335, |
| "logits/rejected": -1.5764302015304565, |
| "logps/chosen": -49.1660270690918, |
| "logps/rejected": -56.93123245239258, |
| "loss": 0.2784, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.0690576359629631, |
| "rewards/margins": 1.2642771005630493, |
| "rewards/rejected": -1.3333348035812378, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.873949579831933e-07, |
| "logits/chosen": -2.2477641105651855, |
| "logits/rejected": -2.076430559158325, |
| "logps/chosen": -29.411571502685547, |
| "logps/rejected": -121.75186920166016, |
| "loss": 0.3125, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.06398458778858185, |
| "rewards/margins": 3.124544143676758, |
| "rewards/rejected": -3.1885287761688232, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.915966386554621e-07, |
| "logits/chosen": -2.0187594890594482, |
| "logits/rejected": -1.1486027240753174, |
| "logps/chosen": -329.5198974609375, |
| "logps/rejected": -279.2951354980469, |
| "loss": 0.2825, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.12247312068939209, |
| "rewards/margins": 6.082241058349609, |
| "rewards/rejected": -5.959768295288086, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 4.95798319327731e-07, |
| "logits/chosen": -1.4514484405517578, |
| "logits/rejected": -2.005096435546875, |
| "logps/chosen": -317.6128845214844, |
| "logps/rejected": -207.64822387695312, |
| "loss": 0.2737, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.06410064548254013, |
| "rewards/margins": 2.704306125640869, |
| "rewards/rejected": -2.6402053833007812, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 5e-07, |
| "logits/chosen": -0.627937376499176, |
| "logits/rejected": -0.6839653253555298, |
| "logps/chosen": -91.22163391113281, |
| "logps/rejected": -55.82908630371094, |
| "loss": 0.2844, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14227086305618286, |
| "rewards/margins": 2.2958028316497803, |
| "rewards/rejected": -2.153531789779663, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 5.042016806722689e-07, |
| "logits/chosen": -2.05309796333313, |
| "logits/rejected": -1.3187203407287598, |
| "logps/chosen": -375.8322448730469, |
| "logps/rejected": -427.10931396484375, |
| "loss": 0.2755, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.019181065261363983, |
| "rewards/margins": 2.3469948768615723, |
| "rewards/rejected": -2.3278136253356934, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2, |
| "learning_rate": 5.084033613445377e-07, |
| "logits/chosen": -1.6020848751068115, |
| "logits/rejected": -1.890777826309204, |
| "logps/chosen": -389.67840576171875, |
| "logps/rejected": -156.53250122070312, |
| "loss": 0.2691, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7378628253936768, |
| "rewards/margins": 2.929511308670044, |
| "rewards/rejected": -3.6673741340637207, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 5.126050420168067e-07, |
| "logits/chosen": -1.5457667112350464, |
| "logits/rejected": -0.9591537714004517, |
| "logps/chosen": -232.89480590820312, |
| "logps/rejected": -236.02783203125, |
| "loss": 0.2301, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7060562372207642, |
| "rewards/margins": 4.284882545471191, |
| "rewards/rejected": -4.990938663482666, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 5.168067226890757e-07, |
| "logits/chosen": -1.6671092510223389, |
| "logits/rejected": -1.6428896188735962, |
| "logps/chosen": -63.35133361816406, |
| "logps/rejected": -109.79218292236328, |
| "loss": 0.2499, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2081298828125, |
| "rewards/margins": 2.169139862060547, |
| "rewards/rejected": -2.3772695064544678, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 5.210084033613445e-07, |
| "logits/chosen": -1.5807249546051025, |
| "logits/rejected": -1.7926443815231323, |
| "logps/chosen": -74.63159942626953, |
| "logps/rejected": -134.3237762451172, |
| "loss": 0.2469, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.030628204345703125, |
| "rewards/margins": 1.1621196269989014, |
| "rewards/rejected": -1.1927478313446045, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 5.252100840336135e-07, |
| "logits/chosen": -1.318616509437561, |
| "logits/rejected": -1.6264910697937012, |
| "logps/chosen": -527.2734375, |
| "logps/rejected": -174.54412841796875, |
| "loss": 0.2468, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.34924012422561646, |
| "rewards/margins": 3.5974647998809814, |
| "rewards/rejected": -3.946704864501953, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 5.294117647058823e-07, |
| "logits/chosen": -1.1573264598846436, |
| "logits/rejected": -0.8850076198577881, |
| "logps/chosen": -82.6595458984375, |
| "logps/rejected": -150.1305389404297, |
| "loss": 0.2477, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.006247520446777344, |
| "rewards/margins": 2.6717464923858643, |
| "rewards/rejected": -2.6779940128326416, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.21, |
| "learning_rate": 5.336134453781512e-07, |
| "logits/chosen": -1.5155869722366333, |
| "logits/rejected": -1.3886268138885498, |
| "logps/chosen": -13.588849067687988, |
| "logps/rejected": -60.8436279296875, |
| "loss": 0.2575, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.14050476253032684, |
| "rewards/margins": 1.282322883605957, |
| "rewards/rejected": -1.4228277206420898, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 5.378151260504201e-07, |
| "logits/chosen": -2.142261505126953, |
| "logits/rejected": -2.022604465484619, |
| "logps/chosen": -36.83582305908203, |
| "logps/rejected": -84.54644775390625, |
| "loss": 0.2476, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.06873762607574463, |
| "rewards/margins": 1.3124231100082397, |
| "rewards/rejected": -1.3811607360839844, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 5.42016806722689e-07, |
| "logits/chosen": -2.0003206729888916, |
| "logits/rejected": -2.7086338996887207, |
| "logps/chosen": -297.49810791015625, |
| "logps/rejected": -121.93182373046875, |
| "loss": 0.2778, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6414843797683716, |
| "rewards/margins": 2.057905673980713, |
| "rewards/rejected": -1.4164212942123413, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 5.462184873949579e-07, |
| "logits/chosen": -2.3803634643554688, |
| "logits/rejected": -1.4992303848266602, |
| "logps/chosen": -57.217220306396484, |
| "logps/rejected": -203.0693817138672, |
| "loss": 0.2675, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.25870370864868164, |
| "rewards/margins": 2.8911209106445312, |
| "rewards/rejected": -3.149824619293213, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 5.504201680672269e-07, |
| "logits/chosen": -1.2806719541549683, |
| "logits/rejected": -2.390531063079834, |
| "logps/chosen": -454.8437194824219, |
| "logps/rejected": -94.0916748046875, |
| "loss": 0.2616, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.13427734375, |
| "rewards/margins": 3.2612316608428955, |
| "rewards/rejected": -3.1269543170928955, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 5.546218487394958e-07, |
| "logits/chosen": -1.9892646074295044, |
| "logits/rejected": -1.4233769178390503, |
| "logps/chosen": -108.95271301269531, |
| "logps/rejected": -160.64715576171875, |
| "loss": 0.2424, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.15306320786476135, |
| "rewards/margins": 3.4583911895751953, |
| "rewards/rejected": -3.611454486846924, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.22, |
| "learning_rate": 5.588235294117647e-07, |
| "logits/chosen": -1.2769445180892944, |
| "logits/rejected": -1.524395227432251, |
| "logps/chosen": -52.44013214111328, |
| "logps/rejected": -91.19084167480469, |
| "loss": 0.2597, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.10794153809547424, |
| "rewards/margins": 0.9474404454231262, |
| "rewards/rejected": -1.0553820133209229, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 5.630252100840336e-07, |
| "logits/chosen": -2.26499080657959, |
| "logits/rejected": -1.1651140451431274, |
| "logps/chosen": -140.05178833007812, |
| "logps/rejected": -229.3851318359375, |
| "loss": 0.2595, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.13012734055519104, |
| "rewards/margins": 3.768279790878296, |
| "rewards/rejected": -3.898406982421875, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 5.672268907563025e-07, |
| "logits/chosen": -1.6903553009033203, |
| "logits/rejected": -1.663693904876709, |
| "logps/chosen": -70.8027572631836, |
| "logps/rejected": -179.33489990234375, |
| "loss": 0.2193, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4184946119785309, |
| "rewards/margins": 0.8598314523696899, |
| "rewards/rejected": -0.44133681058883667, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 5.714285714285714e-07, |
| "logits/chosen": -1.894440770149231, |
| "logits/rejected": -2.020301342010498, |
| "logps/chosen": -215.9483642578125, |
| "logps/rejected": -198.383544921875, |
| "loss": 0.2796, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.6141928434371948, |
| "rewards/margins": 0.986687183380127, |
| "rewards/rejected": -1.6008800268173218, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 5.756302521008402e-07, |
| "logits/chosen": -2.016982316970825, |
| "logits/rejected": -1.272426962852478, |
| "logps/chosen": -51.863426208496094, |
| "logps/rejected": -149.52340698242188, |
| "loss": 0.252, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.017465591430664062, |
| "rewards/margins": 2.352057695388794, |
| "rewards/rejected": -2.369523286819458, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 5.798319327731093e-07, |
| "logits/chosen": -0.8563526272773743, |
| "logits/rejected": -0.9021680355072021, |
| "logps/chosen": -510.8074951171875, |
| "logps/rejected": -266.77423095703125, |
| "loss": 0.2882, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9366821050643921, |
| "rewards/margins": 5.248723030090332, |
| "rewards/rejected": -6.1854047775268555, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.23, |
| "learning_rate": 5.840336134453782e-07, |
| "logits/chosen": -1.8311724662780762, |
| "logits/rejected": -1.8810319900512695, |
| "logps/chosen": -106.62870788574219, |
| "logps/rejected": -123.30711364746094, |
| "loss": 0.2667, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.28059542179107666, |
| "rewards/margins": 3.491508960723877, |
| "rewards/rejected": -3.772104263305664, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 5.88235294117647e-07, |
| "logits/chosen": -2.032766103744507, |
| "logits/rejected": -1.9613983631134033, |
| "logps/chosen": -239.09063720703125, |
| "logps/rejected": -421.301513671875, |
| "loss": 0.267, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.27995407581329346, |
| "rewards/margins": 5.965301990509033, |
| "rewards/rejected": -6.245256423950195, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 5.924369747899159e-07, |
| "logits/chosen": -0.8632844090461731, |
| "logits/rejected": -1.520354151725769, |
| "logps/chosen": -345.11065673828125, |
| "logps/rejected": -139.1497039794922, |
| "loss": 0.2711, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5095192193984985, |
| "rewards/margins": 2.5539865493774414, |
| "rewards/rejected": -2.0444672107696533, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 5.966386554621849e-07, |
| "logits/chosen": -1.3275768756866455, |
| "logits/rejected": -2.037048816680908, |
| "logps/chosen": -461.4246826171875, |
| "logps/rejected": -711.2941284179688, |
| "loss": 0.2599, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.077467441558838, |
| "rewards/margins": 0.06723290681838989, |
| "rewards/rejected": -1.144700288772583, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 6.008403361344537e-07, |
| "logits/chosen": -2.1924614906311035, |
| "logits/rejected": -2.12215256690979, |
| "logps/chosen": -35.62915802001953, |
| "logps/rejected": -112.1571273803711, |
| "loss": 0.2408, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3445836305618286, |
| "rewards/margins": 3.033930540084839, |
| "rewards/rejected": -3.378514289855957, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 6.050420168067226e-07, |
| "logits/chosen": -1.556571125984192, |
| "logits/rejected": -1.026197910308838, |
| "logps/chosen": -352.4817810058594, |
| "logps/rejected": -479.5821838378906, |
| "loss": 0.2467, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.581763505935669, |
| "rewards/margins": 3.661806583404541, |
| "rewards/rejected": -4.243570327758789, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.24, |
| "learning_rate": 6.092436974789916e-07, |
| "logits/chosen": -1.3160314559936523, |
| "logits/rejected": -1.45841646194458, |
| "logps/chosen": -36.064300537109375, |
| "logps/rejected": -15.627889633178711, |
| "loss": 0.2421, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.07876625657081604, |
| "rewards/margins": 0.7412266731262207, |
| "rewards/rejected": -0.8199928998947144, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 6.134453781512605e-07, |
| "logits/chosen": -1.2557013034820557, |
| "logits/rejected": -1.1368392705917358, |
| "logps/chosen": -427.95294189453125, |
| "logps/rejected": -396.66290283203125, |
| "loss": 0.2784, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.8088486194610596, |
| "rewards/margins": 1.1350128650665283, |
| "rewards/rejected": -2.943861484527588, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 6.176470588235294e-07, |
| "logits/chosen": -2.3703060150146484, |
| "logits/rejected": -1.596415638923645, |
| "logps/chosen": -37.992149353027344, |
| "logps/rejected": -99.8422622680664, |
| "loss": 0.2201, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.14648981392383575, |
| "rewards/margins": 0.39361295104026794, |
| "rewards/rejected": -0.5401027798652649, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 6.218487394957984e-07, |
| "logits/chosen": -2.017554759979248, |
| "logits/rejected": -1.885864019393921, |
| "logps/chosen": -31.028438568115234, |
| "logps/rejected": -165.63868713378906, |
| "loss": 0.2205, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.28271618485450745, |
| "rewards/margins": 5.170090675354004, |
| "rewards/rejected": -5.4528069496154785, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 6.260504201680672e-07, |
| "logits/chosen": -1.8670669794082642, |
| "logits/rejected": -1.4380172491073608, |
| "logps/chosen": -174.096435546875, |
| "logps/rejected": -223.5670166015625, |
| "loss": 0.2268, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1435142755508423, |
| "rewards/margins": 5.261943817138672, |
| "rewards/rejected": -6.405458450317383, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 6.302521008403361e-07, |
| "logits/chosen": -2.312112808227539, |
| "logits/rejected": -1.3749383687973022, |
| "logps/chosen": -48.91535949707031, |
| "logps/rejected": -204.19570922851562, |
| "loss": 0.205, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.1308881789445877, |
| "rewards/margins": 1.4819360971450806, |
| "rewards/rejected": -1.6128243207931519, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.25, |
| "learning_rate": 6.344537815126049e-07, |
| "logits/chosen": -2.0252039432525635, |
| "logits/rejected": -2.22589111328125, |
| "logps/chosen": -223.1410369873047, |
| "logps/rejected": -64.30872344970703, |
| "loss": 0.2547, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.0106048583984375, |
| "rewards/margins": 0.5256061553955078, |
| "rewards/rejected": -0.5150012969970703, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 6.386554621848739e-07, |
| "logits/chosen": -1.5374224185943604, |
| "logits/rejected": -1.8345128297805786, |
| "logps/chosen": -300.93792724609375, |
| "logps/rejected": -145.31222534179688, |
| "loss": 0.2391, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8454994559288025, |
| "rewards/margins": 2.8806023597717285, |
| "rewards/rejected": -3.726101875305176, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 6.428571428571429e-07, |
| "logits/chosen": -1.6726059913635254, |
| "logits/rejected": -1.3613877296447754, |
| "logps/chosen": -693.0863037109375, |
| "logps/rejected": -1098.9571533203125, |
| "loss": 0.2422, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -0.5227920413017273, |
| "rewards/margins": -0.413046270608902, |
| "rewards/rejected": -0.10974578559398651, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 6.470588235294117e-07, |
| "logits/chosen": -2.3774003982543945, |
| "logits/rejected": -1.5442438125610352, |
| "logps/chosen": -41.62322998046875, |
| "logps/rejected": -237.86154174804688, |
| "loss": 0.237, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.06168098375201225, |
| "rewards/margins": 5.083809852600098, |
| "rewards/rejected": -5.145491123199463, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 6.512605042016807e-07, |
| "logits/chosen": -2.0312511920928955, |
| "logits/rejected": -1.431885004043579, |
| "logps/chosen": -149.83917236328125, |
| "logps/rejected": -129.43667602539062, |
| "loss": 0.2291, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.30399322509765625, |
| "rewards/margins": 2.70412540435791, |
| "rewards/rejected": -3.0081186294555664, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 6.554621848739496e-07, |
| "logits/chosen": -0.6298438310623169, |
| "logits/rejected": -0.9561706781387329, |
| "logps/chosen": -303.7615051269531, |
| "logps/rejected": -206.84622192382812, |
| "loss": 0.215, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0302143096923828, |
| "rewards/margins": 2.9986958503723145, |
| "rewards/rejected": -4.028910160064697, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.26, |
| "learning_rate": 6.596638655462184e-07, |
| "logits/chosen": -1.0504183769226074, |
| "logits/rejected": -2.596862316131592, |
| "logps/chosen": -604.466552734375, |
| "logps/rejected": -95.1014633178711, |
| "loss": 0.2044, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6258544921875, |
| "rewards/margins": 3.6576082706451416, |
| "rewards/rejected": -3.0317537784576416, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 6.638655462184873e-07, |
| "logits/chosen": -1.4746941328048706, |
| "logits/rejected": -2.3198976516723633, |
| "logps/chosen": -510.818115234375, |
| "logps/rejected": -121.24671936035156, |
| "loss": 0.2315, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.42745667695999146, |
| "rewards/margins": 2.9992709159851074, |
| "rewards/rejected": -2.5718140602111816, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 6.680672268907563e-07, |
| "logits/chosen": -1.1884299516677856, |
| "logits/rejected": -0.7777690291404724, |
| "logps/chosen": -96.32203674316406, |
| "logps/rejected": -142.47738647460938, |
| "loss": 0.2078, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.33403682708740234, |
| "rewards/margins": 1.1259114742279053, |
| "rewards/rejected": -1.4599483013153076, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 6.722689075630252e-07, |
| "logits/chosen": -1.337453007698059, |
| "logits/rejected": -1.9868876934051514, |
| "logps/chosen": -307.2751159667969, |
| "logps/rejected": -128.35006713867188, |
| "loss": 0.2637, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.154842808842659, |
| "rewards/margins": 0.5152485370635986, |
| "rewards/rejected": -0.36040574312210083, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 6.764705882352941e-07, |
| "logits/chosen": -1.0279209613800049, |
| "logits/rejected": -2.354536533355713, |
| "logps/chosen": -310.1678466796875, |
| "logps/rejected": -82.68232727050781, |
| "loss": 0.2187, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6246879696846008, |
| "rewards/margins": 4.095552921295166, |
| "rewards/rejected": -3.470865249633789, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 6.80672268907563e-07, |
| "logits/chosen": -2.222411870956421, |
| "logits/rejected": -2.0266168117523193, |
| "logps/chosen": -31.795406341552734, |
| "logps/rejected": -155.34320068359375, |
| "loss": 0.1976, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.33064308762550354, |
| "rewards/margins": 5.187854766845703, |
| "rewards/rejected": -5.518497943878174, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.27, |
| "learning_rate": 6.848739495798319e-07, |
| "logits/chosen": -1.2088196277618408, |
| "logits/rejected": -2.6081087589263916, |
| "logps/chosen": -340.25689697265625, |
| "logps/rejected": -129.95571899414062, |
| "loss": 0.2151, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.44170382618904114, |
| "rewards/margins": 1.4245266914367676, |
| "rewards/rejected": -0.982822835445404, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 6.890756302521008e-07, |
| "logits/chosen": -1.2510286569595337, |
| "logits/rejected": -1.489122748374939, |
| "logps/chosen": -358.14013671875, |
| "logps/rejected": -482.1717529296875, |
| "loss": 0.2434, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.6463592648506165, |
| "rewards/margins": -0.21972429752349854, |
| "rewards/rejected": -0.4266350269317627, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 6.932773109243697e-07, |
| "logits/chosen": -1.6255576610565186, |
| "logits/rejected": -2.694222927093506, |
| "logps/chosen": -328.84320068359375, |
| "logps/rejected": -102.6138687133789, |
| "loss": 0.2171, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14793242514133453, |
| "rewards/margins": 2.839181661605835, |
| "rewards/rejected": -2.691249132156372, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 6.974789915966386e-07, |
| "logits/chosen": -1.9384331703186035, |
| "logits/rejected": -1.580994725227356, |
| "logps/chosen": -276.12921142578125, |
| "logps/rejected": -316.6767578125, |
| "loss": 0.2836, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9446266889572144, |
| "rewards/margins": 9.762857437133789, |
| "rewards/rejected": -10.707484245300293, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 7.016806722689075e-07, |
| "logits/chosen": -1.6407663822174072, |
| "logits/rejected": -1.5907646417617798, |
| "logps/chosen": -514.5374145507812, |
| "logps/rejected": -433.6203308105469, |
| "loss": 0.2494, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.24427911639213562, |
| "rewards/margins": 2.523643970489502, |
| "rewards/rejected": -2.279364824295044, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 7.058823529411765e-07, |
| "logits/chosen": -1.674712061882019, |
| "logits/rejected": -1.6826207637786865, |
| "logps/chosen": -47.03229522705078, |
| "logps/rejected": -77.42449951171875, |
| "loss": 0.2308, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.0016428008675575256, |
| "rewards/margins": 1.911266565322876, |
| "rewards/rejected": -1.9096237421035767, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 7.100840336134454e-07, |
| "logits/chosen": -1.5853824615478516, |
| "logits/rejected": -1.8151202201843262, |
| "logps/chosen": -343.2923889160156, |
| "logps/rejected": -209.4898681640625, |
| "loss": 0.2194, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.006474226713180542, |
| "rewards/margins": 7.342705726623535, |
| "rewards/rejected": -7.349180221557617, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 7.142857142857143e-07, |
| "logits/chosen": -1.7506383657455444, |
| "logits/rejected": -1.3358906507492065, |
| "logps/chosen": -263.41241455078125, |
| "logps/rejected": -275.55706787109375, |
| "loss": 0.2147, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8642104864120483, |
| "rewards/margins": 5.5388641357421875, |
| "rewards/rejected": -6.403074741363525, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 7.184873949579831e-07, |
| "logits/chosen": -1.4059754610061646, |
| "logits/rejected": -1.2317900657653809, |
| "logps/chosen": -191.6216278076172, |
| "logps/rejected": -363.2991027832031, |
| "loss": 0.2269, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.5392516851425171, |
| "rewards/margins": 1.2023521661758423, |
| "rewards/rejected": -1.7416038513183594, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 7.226890756302521e-07, |
| "logits/chosen": -1.632811188697815, |
| "logits/rejected": -1.3805952072143555, |
| "logps/chosen": -133.94142150878906, |
| "logps/rejected": -269.5859680175781, |
| "loss": 0.2391, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.4022440016269684, |
| "rewards/margins": 4.070891857147217, |
| "rewards/rejected": -4.473135948181152, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 7.268907563025209e-07, |
| "logits/chosen": -1.6576902866363525, |
| "logits/rejected": -1.4377576112747192, |
| "logps/chosen": -43.066829681396484, |
| "logps/rejected": -67.72735595703125, |
| "loss": 0.2507, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.6717521548271179, |
| "rewards/margins": 0.4190084934234619, |
| "rewards/rejected": -1.0907607078552246, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.29, |
| "learning_rate": 7.310924369747898e-07, |
| "logits/chosen": -1.5274075269699097, |
| "logits/rejected": -2.076913833618164, |
| "logps/chosen": -260.2308349609375, |
| "logps/rejected": -114.42774963378906, |
| "loss": 0.1975, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.2202541828155518, |
| "rewards/margins": 3.1372241973876953, |
| "rewards/rejected": -1.916969895362854, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 7.352941176470589e-07, |
| "logits/chosen": -1.3671880960464478, |
| "logits/rejected": -1.7833813428878784, |
| "logps/chosen": -409.7170715332031, |
| "logps/rejected": -275.7892150878906, |
| "loss": 0.2064, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.522778332233429, |
| "rewards/margins": 5.268821716308594, |
| "rewards/rejected": -5.791600227355957, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 7.394957983193277e-07, |
| "logits/chosen": -2.0044264793395996, |
| "logits/rejected": -2.814589023590088, |
| "logps/chosen": -286.0587158203125, |
| "logps/rejected": -112.9316177368164, |
| "loss": 0.2077, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.33617284893989563, |
| "rewards/margins": 1.9382390975952148, |
| "rewards/rejected": -1.6020662784576416, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 7.436974789915966e-07, |
| "logits/chosen": -2.030181646347046, |
| "logits/rejected": -1.3561756610870361, |
| "logps/chosen": -192.53549194335938, |
| "logps/rejected": -259.68597412109375, |
| "loss": 0.2328, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3000236749649048, |
| "rewards/margins": 1.729806900024414, |
| "rewards/rejected": -2.0298304557800293, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 7.478991596638656e-07, |
| "logits/chosen": -1.314343810081482, |
| "logits/rejected": -0.948784351348877, |
| "logps/chosen": -419.88946533203125, |
| "logps/rejected": -249.33352661132812, |
| "loss": 0.2275, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.001434326171875, |
| "rewards/margins": 8.745747566223145, |
| "rewards/rejected": -8.74718189239502, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 7.521008403361344e-07, |
| "logits/chosen": -1.3746141195297241, |
| "logits/rejected": -1.2344566583633423, |
| "logps/chosen": -33.07182693481445, |
| "logps/rejected": -67.85247802734375, |
| "loss": 0.2025, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7276212573051453, |
| "rewards/margins": 2.376377820968628, |
| "rewards/rejected": -3.103999137878418, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.3, |
| "learning_rate": 7.563025210084033e-07, |
| "logits/chosen": -1.0103384256362915, |
| "logits/rejected": -2.038599967956543, |
| "logps/chosen": -425.00360107421875, |
| "logps/rejected": -136.74205017089844, |
| "loss": 0.2085, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.21104279160499573, |
| "rewards/margins": 5.775498390197754, |
| "rewards/rejected": -5.564455986022949, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 7.605042016806722e-07, |
| "logits/chosen": -1.7550321817398071, |
| "logits/rejected": -2.278670310974121, |
| "logps/chosen": -90.49088287353516, |
| "logps/rejected": -64.57603454589844, |
| "loss": 0.2221, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6065499782562256, |
| "rewards/margins": 2.4836509227752686, |
| "rewards/rejected": -3.090200901031494, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 7.647058823529411e-07, |
| "logits/chosen": -1.554534912109375, |
| "logits/rejected": -2.033510684967041, |
| "logps/chosen": -275.0732116699219, |
| "logps/rejected": -270.5262145996094, |
| "loss": 0.2019, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.631757378578186, |
| "rewards/margins": 5.426800727844238, |
| "rewards/rejected": -4.795043468475342, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 7.689075630252101e-07, |
| "logits/chosen": -1.0876420736312866, |
| "logits/rejected": -1.9512230157852173, |
| "logps/chosen": -462.33001708984375, |
| "logps/rejected": -397.541259765625, |
| "loss": 0.2168, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3109893798828125, |
| "rewards/margins": 2.645329475402832, |
| "rewards/rejected": -2.3343400955200195, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 7.73109243697479e-07, |
| "logits/chosen": -2.234156847000122, |
| "logits/rejected": -2.153066873550415, |
| "logps/chosen": -59.26795196533203, |
| "logps/rejected": -123.72718811035156, |
| "loss": 0.2057, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.2919696867465973, |
| "rewards/margins": 0.5463926196098328, |
| "rewards/rejected": -0.8383622765541077, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 7.773109243697479e-07, |
| "logits/chosen": -1.892674446105957, |
| "logits/rejected": -0.7566059827804565, |
| "logps/chosen": -128.3382110595703, |
| "logps/rejected": -352.1267395019531, |
| "loss": 0.2409, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1412032842636108, |
| "rewards/margins": 3.8913497924804688, |
| "rewards/rejected": -5.032553195953369, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.31, |
| "learning_rate": 7.815126050420168e-07, |
| "logits/chosen": -1.3167263269424438, |
| "logits/rejected": -1.3082215785980225, |
| "logps/chosen": -188.31219482421875, |
| "logps/rejected": -194.53448486328125, |
| "loss": 0.2037, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.14720916748046875, |
| "rewards/margins": 4.053531169891357, |
| "rewards/rejected": -4.200739860534668, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 7.857142857142856e-07, |
| "logits/chosen": -1.758721113204956, |
| "logits/rejected": -1.5897212028503418, |
| "logps/chosen": -490.67822265625, |
| "logps/rejected": -506.51971435546875, |
| "loss": 0.1959, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8040527701377869, |
| "rewards/margins": 1.7363312244415283, |
| "rewards/rejected": -2.540383815765381, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 7.899159663865545e-07, |
| "logits/chosen": -1.2402632236480713, |
| "logits/rejected": -2.1518731117248535, |
| "logps/chosen": -440.912353515625, |
| "logps/rejected": -190.54396057128906, |
| "loss": 0.201, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4817703366279602, |
| "rewards/margins": 2.1288094520568848, |
| "rewards/rejected": -1.6470390558242798, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 7.941176470588235e-07, |
| "logits/chosen": -1.8251270055770874, |
| "logits/rejected": -1.9880740642547607, |
| "logps/chosen": -525.2092895507812, |
| "logps/rejected": -373.4764404296875, |
| "loss": 0.2224, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.25654488801956177, |
| "rewards/margins": 3.9607901573181152, |
| "rewards/rejected": -3.704245090484619, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 7.983193277310924e-07, |
| "logits/chosen": -0.7393491864204407, |
| "logits/rejected": -1.706693172454834, |
| "logps/chosen": -540.9542236328125, |
| "logps/rejected": -206.2239990234375, |
| "loss": 0.1797, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6728149652481079, |
| "rewards/margins": 7.774383544921875, |
| "rewards/rejected": -7.101568222045898, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 8.025210084033613e-07, |
| "logits/chosen": -1.3957598209381104, |
| "logits/rejected": -1.6105530261993408, |
| "logps/chosen": -451.7230224609375, |
| "logps/rejected": -323.118408203125, |
| "loss": 0.1768, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5888229608535767, |
| "rewards/margins": 6.918619155883789, |
| "rewards/rejected": -7.507441997528076, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.32, |
| "learning_rate": 8.067226890756303e-07, |
| "logits/chosen": -2.012829065322876, |
| "logits/rejected": -1.7003165483474731, |
| "logps/chosen": -231.9615936279297, |
| "logps/rejected": -416.9687805175781, |
| "loss": 0.2369, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.45652008056640625, |
| "rewards/margins": 4.130596160888672, |
| "rewards/rejected": -4.587116241455078, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 8.109243697478991e-07, |
| "logits/chosen": -1.829034447669983, |
| "logits/rejected": -1.623055338859558, |
| "logps/chosen": -405.52508544921875, |
| "logps/rejected": -334.392578125, |
| "loss": 0.2274, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.20874272286891937, |
| "rewards/margins": 1.027557611465454, |
| "rewards/rejected": -0.8188148736953735, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 8.15126050420168e-07, |
| "logits/chosen": -0.5686680674552917, |
| "logits/rejected": -0.6403495669364929, |
| "logps/chosen": -654.5108642578125, |
| "logps/rejected": -387.836669921875, |
| "loss": 0.2104, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.738049328327179, |
| "rewards/margins": 5.746264457702637, |
| "rewards/rejected": -6.48431396484375, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 8.19327731092437e-07, |
| "logits/chosen": -1.5858798027038574, |
| "logits/rejected": -1.6420996189117432, |
| "logps/chosen": -116.9679946899414, |
| "logps/rejected": -202.39669799804688, |
| "loss": 0.1847, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6328601837158203, |
| "rewards/margins": 3.9840362071990967, |
| "rewards/rejected": -4.616896629333496, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 8.235294117647058e-07, |
| "logits/chosen": -1.6367160081863403, |
| "logits/rejected": -1.6892379522323608, |
| "logps/chosen": -175.56951904296875, |
| "logps/rejected": -154.72865295410156, |
| "loss": 0.1826, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.052147675305604935, |
| "rewards/margins": 6.1080498695373535, |
| "rewards/rejected": -6.1601972579956055, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 8.277310924369747e-07, |
| "logits/chosen": -1.7774677276611328, |
| "logits/rejected": -2.791229248046875, |
| "logps/chosen": -339.4785461425781, |
| "logps/rejected": -127.16014099121094, |
| "loss": 0.2728, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5950691103935242, |
| "rewards/margins": 5.04873514175415, |
| "rewards/rejected": -5.64380407333374, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.33, |
| "learning_rate": 8.319327731092437e-07, |
| "logits/chosen": -1.5082173347473145, |
| "logits/rejected": -1.6745432615280151, |
| "logps/chosen": -78.62190246582031, |
| "logps/rejected": -35.42887496948242, |
| "loss": 0.2192, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.4798523187637329, |
| "rewards/margins": 1.3703386783599854, |
| "rewards/rejected": -1.8501909971237183, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 8.361344537815126e-07, |
| "logits/chosen": -1.651850938796997, |
| "logits/rejected": -2.2229156494140625, |
| "logps/chosen": -417.2205810546875, |
| "logps/rejected": -179.9000244140625, |
| "loss": 0.1873, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.32609206438064575, |
| "rewards/margins": 2.4329936504364014, |
| "rewards/rejected": -2.7590856552124023, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 8.403361344537815e-07, |
| "logits/chosen": -1.3835643529891968, |
| "logits/rejected": -1.4247071743011475, |
| "logps/chosen": -68.0342025756836, |
| "logps/rejected": -78.13129425048828, |
| "loss": 0.2554, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.7295857667922974, |
| "rewards/margins": 2.134474277496338, |
| "rewards/rejected": -2.8640599250793457, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 8.445378151260503e-07, |
| "logits/chosen": -1.367997169494629, |
| "logits/rejected": -1.0557571649551392, |
| "logps/chosen": -353.5372314453125, |
| "logps/rejected": -270.9771728515625, |
| "loss": 0.1961, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3901122808456421, |
| "rewards/margins": 8.15034294128418, |
| "rewards/rejected": -8.540454864501953, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 8.487394957983193e-07, |
| "logits/chosen": -1.7191728353500366, |
| "logits/rejected": -1.4392447471618652, |
| "logps/chosen": -22.586790084838867, |
| "logps/rejected": -131.69461059570312, |
| "loss": 0.1911, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.18322506546974182, |
| "rewards/margins": 4.684540748596191, |
| "rewards/rejected": -4.867766380310059, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 8.529411764705882e-07, |
| "logits/chosen": -1.5832895040512085, |
| "logits/rejected": -1.645804524421692, |
| "logps/chosen": -67.92455291748047, |
| "logps/rejected": -73.83755493164062, |
| "loss": 0.2242, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.22629156708717346, |
| "rewards/margins": 1.8930432796478271, |
| "rewards/rejected": -2.1193346977233887, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.34, |
| "learning_rate": 8.57142857142857e-07, |
| "logits/chosen": -1.3173104524612427, |
| "logits/rejected": -1.7601145505905151, |
| "logps/chosen": -541.1436767578125, |
| "logps/rejected": -157.87823486328125, |
| "loss": 0.2101, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.8486511707305908, |
| "rewards/margins": 1.4881985187530518, |
| "rewards/rejected": -3.3368496894836426, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 8.613445378151261e-07, |
| "logits/chosen": -0.26975634694099426, |
| "logits/rejected": -0.21657763421535492, |
| "logps/chosen": -448.43878173828125, |
| "logps/rejected": -328.8587646484375, |
| "loss": 0.1599, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.2071120738983154, |
| "rewards/margins": 9.672096252441406, |
| "rewards/rejected": -11.879209518432617, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 8.65546218487395e-07, |
| "logits/chosen": -1.3258733749389648, |
| "logits/rejected": -1.7010501623153687, |
| "logps/chosen": -280.8769836425781, |
| "logps/rejected": -82.33454132080078, |
| "loss": 0.2117, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.4332069754600525, |
| "rewards/margins": 1.0438206195831299, |
| "rewards/rejected": -1.4770275354385376, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 8.697478991596638e-07, |
| "logits/chosen": -1.833431601524353, |
| "logits/rejected": -1.4919263124465942, |
| "logps/chosen": -550.7933959960938, |
| "logps/rejected": -366.83819580078125, |
| "loss": 0.2718, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9451126456260681, |
| "rewards/margins": 9.382735252380371, |
| "rewards/rejected": -10.327848434448242, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 8.739495798319328e-07, |
| "logits/chosen": -1.6695424318313599, |
| "logits/rejected": -2.533496618270874, |
| "logps/chosen": -193.40089416503906, |
| "logps/rejected": -144.17640686035156, |
| "loss": 0.1894, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4194018244743347, |
| "rewards/margins": 3.9117088317871094, |
| "rewards/rejected": -4.33111047744751, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 8.781512605042016e-07, |
| "logits/chosen": -1.0884276628494263, |
| "logits/rejected": -1.7498663663864136, |
| "logps/chosen": -236.72494506835938, |
| "logps/rejected": -124.72262573242188, |
| "loss": 0.1856, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.06562767922878265, |
| "rewards/margins": 4.05698823928833, |
| "rewards/rejected": -4.122615814208984, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.35, |
| "learning_rate": 8.823529411764705e-07, |
| "logits/chosen": -1.2553131580352783, |
| "logits/rejected": -1.654006838798523, |
| "logps/chosen": -549.10400390625, |
| "logps/rejected": -261.51226806640625, |
| "loss": 0.1827, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3053421378135681, |
| "rewards/margins": 7.806386947631836, |
| "rewards/rejected": -8.11172866821289, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.865546218487394e-07, |
| "logits/chosen": -1.1624391078948975, |
| "logits/rejected": -1.2052842378616333, |
| "logps/chosen": -47.10140609741211, |
| "logps/rejected": -77.38719177246094, |
| "loss": 0.1972, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.18456250429153442, |
| "rewards/margins": 4.566642761230469, |
| "rewards/rejected": -4.7512054443359375, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.907563025210084e-07, |
| "logits/chosen": -2.755701780319214, |
| "logits/rejected": -1.7558363676071167, |
| "logps/chosen": -249.60147094726562, |
| "logps/rejected": -118.43754577636719, |
| "loss": 0.1763, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.39346620440483093, |
| "rewards/margins": 4.152596473693848, |
| "rewards/rejected": -4.546062469482422, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.949579831932773e-07, |
| "logits/chosen": -1.4844564199447632, |
| "logits/rejected": -2.2958929538726807, |
| "logps/chosen": -372.4450988769531, |
| "logps/rejected": -301.98016357421875, |
| "loss": 0.215, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4042709469795227, |
| "rewards/margins": 3.7409682273864746, |
| "rewards/rejected": -4.145238876342773, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 8.991596638655462e-07, |
| "logits/chosen": -1.6278800964355469, |
| "logits/rejected": -2.122774600982666, |
| "logps/chosen": -185.77044677734375, |
| "logps/rejected": -173.4629364013672, |
| "loss": 0.2481, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.15187864005565643, |
| "rewards/margins": 5.104752540588379, |
| "rewards/rejected": -4.952874183654785, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 9.033613445378151e-07, |
| "logits/chosen": -1.2420963048934937, |
| "logits/rejected": -1.2279390096664429, |
| "logps/chosen": -56.64626693725586, |
| "logps/rejected": -95.80750274658203, |
| "loss": 0.2378, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10336628556251526, |
| "rewards/margins": 4.724967956542969, |
| "rewards/rejected": -4.6216020584106445, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.36, |
| "learning_rate": 9.07563025210084e-07, |
| "logits/chosen": -0.6830325126647949, |
| "logits/rejected": -0.7307128310203552, |
| "logps/chosen": -509.64990234375, |
| "logps/rejected": -261.376220703125, |
| "loss": 0.2494, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0829544067382812, |
| "rewards/margins": 7.50957727432251, |
| "rewards/rejected": -8.592531204223633, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 9.117647058823529e-07, |
| "logits/chosen": -1.6173702478408813, |
| "logits/rejected": -1.7928811311721802, |
| "logps/chosen": -288.9893798828125, |
| "logps/rejected": -148.32977294921875, |
| "loss": 0.2141, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.27213987708091736, |
| "rewards/margins": 6.079885482788086, |
| "rewards/rejected": -6.352025032043457, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 9.159663865546218e-07, |
| "logits/chosen": -1.3861663341522217, |
| "logits/rejected": -1.4028596878051758, |
| "logps/chosen": -72.13826751708984, |
| "logps/rejected": -156.20257568359375, |
| "loss": 0.2067, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7177965044975281, |
| "rewards/margins": 5.9151411056518555, |
| "rewards/rejected": -6.632937908172607, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 9.201680672268907e-07, |
| "logits/chosen": -1.943166971206665, |
| "logits/rejected": -1.703479290008545, |
| "logps/chosen": -254.5187530517578, |
| "logps/rejected": -391.06610107421875, |
| "loss": 0.198, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8562338948249817, |
| "rewards/margins": 6.539711952209473, |
| "rewards/rejected": -7.395946025848389, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 9.243697478991597e-07, |
| "logits/chosen": -1.8535621166229248, |
| "logits/rejected": -2.3986668586730957, |
| "logps/chosen": -221.98687744140625, |
| "logps/rejected": -164.90716552734375, |
| "loss": 0.2149, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.19046229124069214, |
| "rewards/margins": 3.5672414302825928, |
| "rewards/rejected": -3.7577037811279297, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 9.285714285714285e-07, |
| "logits/chosen": -1.5400798320770264, |
| "logits/rejected": -0.977436900138855, |
| "logps/chosen": -38.834285736083984, |
| "logps/rejected": -214.7268524169922, |
| "loss": 0.2612, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6614927649497986, |
| "rewards/margins": 6.089022636413574, |
| "rewards/rejected": -6.750515460968018, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.37, |
| "learning_rate": 9.327731092436975e-07, |
| "logits/chosen": -1.2816520929336548, |
| "logits/rejected": -1.7735843658447266, |
| "logps/chosen": -653.9671630859375, |
| "logps/rejected": -604.3455810546875, |
| "loss": 0.2203, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.04913024976849556, |
| "rewards/margins": 0.8042449355125427, |
| "rewards/rejected": -0.8533751964569092, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 9.369747899159663e-07, |
| "logits/chosen": -0.8977038264274597, |
| "logits/rejected": -1.0373615026474, |
| "logps/chosen": -559.4537963867188, |
| "logps/rejected": -400.3348388671875, |
| "loss": 0.2109, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4869690239429474, |
| "rewards/margins": 4.854780673980713, |
| "rewards/rejected": -5.341749668121338, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 9.411764705882352e-07, |
| "logits/chosen": -1.5731453895568848, |
| "logits/rejected": -1.3540092706680298, |
| "logps/chosen": -362.89971923828125, |
| "logps/rejected": -213.01864624023438, |
| "loss": 0.2251, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5867255330085754, |
| "rewards/margins": 7.263546466827393, |
| "rewards/rejected": -7.850271701812744, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 9.453781512605042e-07, |
| "logits/chosen": -0.8027037978172302, |
| "logits/rejected": -1.3039056062698364, |
| "logps/chosen": -191.79257202148438, |
| "logps/rejected": -130.3656768798828, |
| "loss": 0.2174, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5622925162315369, |
| "rewards/margins": 2.7290549278259277, |
| "rewards/rejected": -2.166762590408325, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 9.49579831932773e-07, |
| "logits/chosen": -1.7100781202316284, |
| "logits/rejected": -1.5751913785934448, |
| "logps/chosen": -51.673133850097656, |
| "logps/rejected": -87.66356658935547, |
| "loss": 0.1912, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.03124428540468216, |
| "rewards/margins": 3.016190528869629, |
| "rewards/rejected": -3.0474348068237305, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 9.53781512605042e-07, |
| "logits/chosen": -1.7360178232192993, |
| "logits/rejected": -1.5387942790985107, |
| "logps/chosen": -623.2257690429688, |
| "logps/rejected": -446.02789306640625, |
| "loss": 0.2341, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.8850250244140625, |
| "rewards/margins": 0.6927383542060852, |
| "rewards/rejected": -1.577763319015503, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.38, |
| "learning_rate": 9.579831932773109e-07, |
| "logits/chosen": -1.3128571510314941, |
| "logits/rejected": -1.2455374002456665, |
| "logps/chosen": -491.56341552734375, |
| "logps/rejected": -306.54974365234375, |
| "loss": 0.2017, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8349395990371704, |
| "rewards/margins": 7.316784381866455, |
| "rewards/rejected": -8.151723861694336, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 9.621848739495798e-07, |
| "logits/chosen": -0.9198746681213379, |
| "logits/rejected": -1.0064659118652344, |
| "logps/chosen": -170.07423400878906, |
| "logps/rejected": -151.16380310058594, |
| "loss": 0.2215, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.42428553104400635, |
| "rewards/margins": 4.414795398712158, |
| "rewards/rejected": -4.839080810546875, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 9.663865546218487e-07, |
| "logits/chosen": -0.19439470767974854, |
| "logits/rejected": -0.1546761840581894, |
| "logps/chosen": -443.8742980957031, |
| "logps/rejected": -319.8472595214844, |
| "loss": 0.2317, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7987457513809204, |
| "rewards/margins": 9.688015937805176, |
| "rewards/rejected": -11.486761093139648, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 9.705882352941176e-07, |
| "logits/chosen": -0.9490076303482056, |
| "logits/rejected": -0.48910650610923767, |
| "logps/chosen": -513.2789306640625, |
| "logps/rejected": -340.21929931640625, |
| "loss": 0.2053, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3714752197265625, |
| "rewards/margins": 9.171676635742188, |
| "rewards/rejected": -10.54315185546875, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 9.747899159663866e-07, |
| "logits/chosen": -1.1433497667312622, |
| "logits/rejected": -1.3219791650772095, |
| "logps/chosen": -505.0430908203125, |
| "logps/rejected": -255.84214782714844, |
| "loss": 0.2021, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.42404481768608093, |
| "rewards/margins": 8.78639030456543, |
| "rewards/rejected": -9.21043586730957, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 9.789915966386553e-07, |
| "logits/chosen": -1.0191929340362549, |
| "logits/rejected": -1.4857556819915771, |
| "logps/chosen": -178.77206420898438, |
| "logps/rejected": -129.4377899169922, |
| "loss": 0.2355, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5842880606651306, |
| "rewards/margins": 3.4605538845062256, |
| "rewards/rejected": -4.044841766357422, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.39, |
| "learning_rate": 9.831932773109242e-07, |
| "logits/chosen": -1.518091082572937, |
| "logits/rejected": -1.1380579471588135, |
| "logps/chosen": -144.8524627685547, |
| "logps/rejected": -124.58405303955078, |
| "loss": 0.1858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.12543268501758575, |
| "rewards/margins": 5.242918014526367, |
| "rewards/rejected": -5.368350982666016, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 9.873949579831934e-07, |
| "logits/chosen": -1.8797619342803955, |
| "logits/rejected": -1.4644118547439575, |
| "logps/chosen": -70.80252838134766, |
| "logps/rejected": -90.79109191894531, |
| "loss": 0.2319, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.3334128856658936, |
| "rewards/margins": 1.8264957666397095, |
| "rewards/rejected": -3.1599087715148926, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 9.91596638655462e-07, |
| "logits/chosen": -0.9166591763496399, |
| "logits/rejected": -0.93157559633255, |
| "logps/chosen": -12.770793914794922, |
| "logps/rejected": -55.15324783325195, |
| "loss": 0.1993, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.37832537293434143, |
| "rewards/margins": 3.1305532455444336, |
| "rewards/rejected": -3.508878707885742, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 9.95798319327731e-07, |
| "logits/chosen": -1.6806684732437134, |
| "logits/rejected": -2.278653144836426, |
| "logps/chosen": -293.66644287109375, |
| "logps/rejected": -160.80221557617188, |
| "loss": 0.1972, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.09649372100830078, |
| "rewards/margins": 8.160371780395508, |
| "rewards/rejected": -8.063878059387207, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 1e-06, |
| "logits/chosen": -0.28713756799697876, |
| "logits/rejected": -0.2611769437789917, |
| "logps/chosen": -21.57666778564453, |
| "logps/rejected": -82.406982421875, |
| "loss": 0.2085, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.08337266743183136, |
| "rewards/margins": 4.99355936050415, |
| "rewards/rejected": -5.076931953430176, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 9.99999458185223e-07, |
| "logits/chosen": -2.185724973678589, |
| "logits/rejected": -2.134838104248047, |
| "logps/chosen": -37.73735427856445, |
| "logps/rejected": -141.9013671875, |
| "loss": 0.1863, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.4596128463745117, |
| "rewards/margins": 5.266613960266113, |
| "rewards/rejected": -6.726226806640625, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.4, |
| "learning_rate": 9.999978327420662e-07, |
| "logits/chosen": -1.203190565109253, |
| "logits/rejected": -2.1939120292663574, |
| "logps/chosen": -514.1233520507812, |
| "logps/rejected": -75.5627670288086, |
| "loss": 0.2014, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 2.2211365699768066, |
| "rewards/margins": 3.9060745239257812, |
| "rewards/rejected": -1.684937834739685, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 9.999951236740525e-07, |
| "logits/chosen": -1.3303395509719849, |
| "logits/rejected": -1.934448480606079, |
| "logps/chosen": -135.79515075683594, |
| "logps/rejected": -107.17416381835938, |
| "loss": 0.1887, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.06375274807214737, |
| "rewards/margins": 2.7002902030944824, |
| "rewards/rejected": -2.636537551879883, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 9.999913309870528e-07, |
| "logits/chosen": -1.44986891746521, |
| "logits/rejected": -1.9651787281036377, |
| "logps/chosen": -200.12448120117188, |
| "logps/rejected": -163.63800048828125, |
| "loss": 0.1916, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.016937255859375, |
| "rewards/margins": 2.1956772804260254, |
| "rewards/rejected": -2.1787400245666504, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 9.999864546892874e-07, |
| "logits/chosen": -1.2548986673355103, |
| "logits/rejected": -1.1517094373703003, |
| "logps/chosen": -21.825084686279297, |
| "logps/rejected": -100.38935852050781, |
| "loss": 0.2183, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.235842227935791, |
| "rewards/margins": 4.932180404663086, |
| "rewards/rejected": -6.168022155761719, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 9.99980494791324e-07, |
| "logits/chosen": -1.3461451530456543, |
| "logits/rejected": -2.2621426582336426, |
| "logps/chosen": -529.697998046875, |
| "logps/rejected": -53.855751037597656, |
| "loss": 0.2415, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.11737975478172302, |
| "rewards/margins": 2.4394328594207764, |
| "rewards/rejected": -2.5568125247955322, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 9.999734513060793e-07, |
| "logits/chosen": -1.6569421291351318, |
| "logits/rejected": -1.4833012819290161, |
| "logps/chosen": -21.329483032226562, |
| "logps/rejected": -213.5611572265625, |
| "loss": 0.1858, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2380591332912445, |
| "rewards/margins": 10.061112403869629, |
| "rewards/rejected": -10.299171447753906, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.41, |
| "learning_rate": 9.999653242488186e-07, |
| "logits/chosen": -1.3467878103256226, |
| "logits/rejected": -1.1173731088638306, |
| "logps/chosen": -388.6378173828125, |
| "logps/rejected": -294.2225341796875, |
| "loss": 0.1943, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8519317507743835, |
| "rewards/margins": 9.176179885864258, |
| "rewards/rejected": -10.028111457824707, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 9.999561136371554e-07, |
| "logits/chosen": -1.8872058391571045, |
| "logits/rejected": -1.6369519233703613, |
| "logps/chosen": -214.09727478027344, |
| "logps/rejected": -376.61334228515625, |
| "loss": 0.1797, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -0.4823310971260071, |
| "rewards/margins": -0.6333345770835876, |
| "rewards/rejected": 0.15100345015525818, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 9.99945819491051e-07, |
| "logits/chosen": -1.6549506187438965, |
| "logits/rejected": -1.558489441871643, |
| "logps/chosen": -263.10980224609375, |
| "logps/rejected": -266.67388916015625, |
| "loss": 0.1845, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2903716564178467, |
| "rewards/margins": 5.641432285308838, |
| "rewards/rejected": -6.931804180145264, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 9.99934441832816e-07, |
| "logits/chosen": -1.7296139001846313, |
| "logits/rejected": -1.958460807800293, |
| "logps/chosen": -219.89266967773438, |
| "logps/rejected": -296.84906005859375, |
| "loss": 0.1885, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.706303060054779, |
| "rewards/margins": 5.47901725769043, |
| "rewards/rejected": -6.1853203773498535, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 9.999219806871085e-07, |
| "logits/chosen": -1.1240818500518799, |
| "logits/rejected": -1.4093561172485352, |
| "logps/chosen": -481.0801086425781, |
| "logps/rejected": -516.7035522460938, |
| "loss": 0.2172, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.33460843563079834, |
| "rewards/margins": 6.402099609375, |
| "rewards/rejected": -6.067491054534912, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 9.99908436080935e-07, |
| "logits/chosen": -0.8841766119003296, |
| "logits/rejected": -1.7438700199127197, |
| "logps/chosen": -250.23077392578125, |
| "logps/rejected": -161.5799560546875, |
| "loss": 0.2042, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4004615843296051, |
| "rewards/margins": 6.907259941101074, |
| "rewards/rejected": -6.50679874420166, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 9.998938080436503e-07, |
| "logits/chosen": -1.61567223072052, |
| "logits/rejected": -2.236949920654297, |
| "logps/chosen": -63.57852554321289, |
| "logps/rejected": -129.1857452392578, |
| "loss": 0.2339, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2066521644592285, |
| "rewards/margins": 4.170039176940918, |
| "rewards/rejected": -5.3766913414001465, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 9.998780966069568e-07, |
| "logits/chosen": -1.5731325149536133, |
| "logits/rejected": -1.5780061483383179, |
| "logps/chosen": -24.21834373474121, |
| "logps/rejected": -83.41648864746094, |
| "loss": 0.1516, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8808462023735046, |
| "rewards/margins": 3.9870688915252686, |
| "rewards/rejected": -4.867915153503418, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 9.998613018049058e-07, |
| "logits/chosen": -1.2458865642547607, |
| "logits/rejected": -1.5161921977996826, |
| "logps/chosen": -137.61996459960938, |
| "logps/rejected": -198.40377807617188, |
| "loss": 0.1805, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.1367048025131226, |
| "rewards/margins": 5.306751251220703, |
| "rewards/rejected": -6.443456172943115, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 9.998434236738956e-07, |
| "logits/chosen": -1.3364038467407227, |
| "logits/rejected": -1.633857011795044, |
| "logps/chosen": -427.1192626953125, |
| "logps/rejected": -423.90826416015625, |
| "loss": 0.1867, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.897076427936554, |
| "rewards/margins": 5.636436462402344, |
| "rewards/rejected": -6.533513069152832, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 9.998244622526728e-07, |
| "logits/chosen": -1.3373851776123047, |
| "logits/rejected": -1.0651671886444092, |
| "logps/chosen": -123.55149841308594, |
| "logps/rejected": -188.85494995117188, |
| "loss": 0.1508, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2550186216831207, |
| "rewards/margins": 4.371315002441406, |
| "rewards/rejected": -4.626333713531494, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.43, |
| "learning_rate": 9.99804417582332e-07, |
| "logits/chosen": -1.8455286026000977, |
| "logits/rejected": -1.6597044467926025, |
| "logps/chosen": -37.02387619018555, |
| "logps/rejected": -157.0921630859375, |
| "loss": 0.1598, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6415446400642395, |
| "rewards/margins": 7.187655448913574, |
| "rewards/rejected": -7.829200267791748, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 9.997832897063147e-07, |
| "logits/chosen": -1.3792277574539185, |
| "logits/rejected": -1.3170826435089111, |
| "logps/chosen": -268.0335388183594, |
| "logps/rejected": -193.27032470703125, |
| "loss": 0.2239, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.12644805014133453, |
| "rewards/margins": 7.42259407043457, |
| "rewards/rejected": -7.549041748046875, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 9.99761078670411e-07, |
| "logits/chosen": -2.141418218612671, |
| "logits/rejected": -2.0980770587921143, |
| "logps/chosen": -36.05842590332031, |
| "logps/rejected": -130.27944946289062, |
| "loss": 0.1816, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2582319378852844, |
| "rewards/margins": 4.223054885864258, |
| "rewards/rejected": -4.481287002563477, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 9.997377845227574e-07, |
| "logits/chosen": -1.0465216636657715, |
| "logits/rejected": -0.9531089663505554, |
| "logps/chosen": -123.76052856445312, |
| "logps/rejected": -127.10284423828125, |
| "loss": 0.1821, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5870941281318665, |
| "rewards/margins": 1.3501648902893066, |
| "rewards/rejected": -1.9372591972351074, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 9.997134073138388e-07, |
| "logits/chosen": -2.0905027389526367, |
| "logits/rejected": -2.080312490463257, |
| "logps/chosen": -161.46063232421875, |
| "logps/rejected": -82.06962585449219, |
| "loss": 0.1953, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -2.4451663494110107, |
| "rewards/margins": -1.771939754486084, |
| "rewards/rejected": -0.6732265949249268, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 9.996879470964867e-07, |
| "logits/chosen": -1.8112472295761108, |
| "logits/rejected": -2.4045462608337402, |
| "logps/chosen": -167.5275421142578, |
| "logps/rejected": -174.94483947753906, |
| "loss": 0.2188, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.22896921634674072, |
| "rewards/margins": 5.413414478302002, |
| "rewards/rejected": -5.642383575439453, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.44, |
| "learning_rate": 9.996614039258803e-07, |
| "logits/chosen": -0.665377676486969, |
| "logits/rejected": -0.4620656967163086, |
| "logps/chosen": -148.4291229248047, |
| "logps/rejected": -186.41522216796875, |
| "loss": 0.2179, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5123512744903564, |
| "rewards/margins": 7.561125755310059, |
| "rewards/rejected": -8.073476791381836, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.996337778595453e-07, |
| "logits/chosen": -1.246445894241333, |
| "logits/rejected": -1.9943310022354126, |
| "logps/chosen": -311.7137451171875, |
| "logps/rejected": -163.29208374023438, |
| "loss": 0.1784, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3149963617324829, |
| "rewards/margins": 7.973663330078125, |
| "rewards/rejected": -7.658667087554932, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.996050689573542e-07, |
| "logits/chosen": -0.8231534361839294, |
| "logits/rejected": -1.4796454906463623, |
| "logps/chosen": -392.57366943359375, |
| "logps/rejected": -215.98666381835938, |
| "loss": 0.1828, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.50887531042099, |
| "rewards/margins": 4.215329647064209, |
| "rewards/rejected": -4.724205017089844, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.995752772815274e-07, |
| "logits/chosen": -1.0359983444213867, |
| "logits/rejected": -1.4785257577896118, |
| "logps/chosen": -98.88191223144531, |
| "logps/rejected": -250.07452392578125, |
| "loss": 0.1721, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7754969000816345, |
| "rewards/margins": 5.748414516448975, |
| "rewards/rejected": -6.523911476135254, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.995444028966306e-07, |
| "logits/chosen": -1.2062805891036987, |
| "logits/rejected": -0.6490817666053772, |
| "logps/chosen": -66.56135559082031, |
| "logps/rejected": -173.89833068847656, |
| "loss": 0.2002, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.7681030631065369, |
| "rewards/margins": 3.30619478225708, |
| "rewards/rejected": -4.074297904968262, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.995124458695768e-07, |
| "logits/chosen": -1.8022315502166748, |
| "logits/rejected": -1.7330073118209839, |
| "logps/chosen": -119.78292846679688, |
| "logps/rejected": -185.3957061767578, |
| "loss": 0.1853, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.318962961435318, |
| "rewards/margins": 3.6459078788757324, |
| "rewards/rejected": -3.9648709297180176, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.45, |
| "learning_rate": 9.99479406269625e-07, |
| "logits/chosen": -0.64534592628479, |
| "logits/rejected": -1.2948215007781982, |
| "logps/chosen": -386.3092956542969, |
| "logps/rejected": -148.838134765625, |
| "loss": 0.2189, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.32144472002983093, |
| "rewards/margins": 4.571913242340088, |
| "rewards/rejected": -4.89335823059082, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.994452841683807e-07, |
| "logits/chosen": -1.8895020484924316, |
| "logits/rejected": -1.8781219720840454, |
| "logps/chosen": -109.0746841430664, |
| "logps/rejected": -128.44271850585938, |
| "loss": 0.1739, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.8171417713165283, |
| "rewards/margins": -0.010423451662063599, |
| "rewards/rejected": -0.8067182898521423, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.994100796397953e-07, |
| "logits/chosen": -2.2890195846557617, |
| "logits/rejected": -2.143787145614624, |
| "logps/chosen": -67.44159698486328, |
| "logps/rejected": -293.24310302734375, |
| "loss": 0.2057, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6767939329147339, |
| "rewards/margins": 12.486234664916992, |
| "rewards/rejected": -13.163028717041016, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.993737927601663e-07, |
| "logits/chosen": -1.2766467332839966, |
| "logits/rejected": -0.8370835781097412, |
| "logps/chosen": -126.51022338867188, |
| "logps/rejected": -241.4452362060547, |
| "loss": 0.1861, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.14905127882957458, |
| "rewards/margins": 8.128352165222168, |
| "rewards/rejected": -8.277403831481934, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.993364236081366e-07, |
| "logits/chosen": -2.052964925765991, |
| "logits/rejected": -1.607141375541687, |
| "logps/chosen": -103.60296630859375, |
| "logps/rejected": -158.26458740234375, |
| "loss": 0.154, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.312964916229248, |
| "rewards/margins": 1.991333246231079, |
| "rewards/rejected": -3.304298162460327, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.992979722646948e-07, |
| "logits/chosen": -2.079749584197998, |
| "logits/rejected": -2.0677154064178467, |
| "logps/chosen": -41.83177947998047, |
| "logps/rejected": -55.48749542236328, |
| "loss": 0.1965, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.5781872272491455, |
| "rewards/margins": 1.497841477394104, |
| "rewards/rejected": -2.076028823852539, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.46, |
| "learning_rate": 9.992584388131748e-07, |
| "logits/chosen": -2.3774337768554688, |
| "logits/rejected": -1.4596751928329468, |
| "logps/chosen": -47.4880256652832, |
| "logps/rejected": -253.26637268066406, |
| "loss": 0.1938, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3620060980319977, |
| "rewards/margins": 8.875134468078613, |
| "rewards/rejected": -9.237140655517578, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.992178233392562e-07, |
| "logits/chosen": -1.6642234325408936, |
| "logits/rejected": -1.6687958240509033, |
| "logps/chosen": -307.078369140625, |
| "logps/rejected": -324.7652587890625, |
| "loss": 0.2655, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5620359182357788, |
| "rewards/margins": 5.345144748687744, |
| "rewards/rejected": -4.783108711242676, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.991761259309633e-07, |
| "logits/chosen": -1.3904876708984375, |
| "logits/rejected": -1.6669962406158447, |
| "logps/chosen": -400.28887939453125, |
| "logps/rejected": -245.69497680664062, |
| "loss": 0.181, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.673797070980072, |
| "rewards/margins": 7.228074550628662, |
| "rewards/rejected": -7.901871681213379, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.991333466786648e-07, |
| "logits/chosen": -1.216492772102356, |
| "logits/rejected": -1.1512091159820557, |
| "logps/chosen": -16.977386474609375, |
| "logps/rejected": -128.8122100830078, |
| "loss": 0.1701, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.0717000961303711, |
| "rewards/margins": 7.391216278076172, |
| "rewards/rejected": -7.462916374206543, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.990894856750744e-07, |
| "logits/chosen": -0.9838640689849854, |
| "logits/rejected": -2.3961727619171143, |
| "logps/chosen": -650.3472900390625, |
| "logps/rejected": -48.605430603027344, |
| "loss": 0.1742, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.338510125875473, |
| "rewards/margins": 1.6768007278442383, |
| "rewards/rejected": -1.3382906913757324, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.990445430152506e-07, |
| "logits/chosen": -0.4278978407382965, |
| "logits/rejected": -0.6988143920898438, |
| "logps/chosen": -533.7452392578125, |
| "logps/rejected": -292.7690734863281, |
| "loss": 0.2038, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9284515380859375, |
| "rewards/margins": 10.271871566772461, |
| "rewards/rejected": -11.200323104858398, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.47, |
| "learning_rate": 9.989985187965955e-07, |
| "logits/chosen": -1.6768834590911865, |
| "logits/rejected": -1.5711687803268433, |
| "logps/chosen": -439.8897705078125, |
| "logps/rejected": -265.1025695800781, |
| "loss": 0.2195, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3489418029785156, |
| "rewards/margins": 7.014950752258301, |
| "rewards/rejected": -8.363892555236816, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.989514131188558e-07, |
| "logits/chosen": -1.4727314710617065, |
| "logits/rejected": -1.679413914680481, |
| "logps/chosen": -47.99615478515625, |
| "logps/rejected": -121.50358581542969, |
| "loss": 0.1682, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.8484709858894348, |
| "rewards/margins": 4.306775093078613, |
| "rewards/rejected": -5.155246257781982, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.989032260841215e-07, |
| "logits/chosen": -1.3292348384857178, |
| "logits/rejected": -1.4934347867965698, |
| "logps/chosen": -443.295654296875, |
| "logps/rejected": -279.7898254394531, |
| "loss": 0.176, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.13922274112701416, |
| "rewards/margins": 7.711603164672852, |
| "rewards/rejected": -7.850825786590576, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.988539577968264e-07, |
| "logits/chosen": -1.694291591644287, |
| "logits/rejected": -1.0821882486343384, |
| "logps/chosen": -46.26044845581055, |
| "logps/rejected": -100.78984069824219, |
| "loss": 0.199, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.683262825012207, |
| "rewards/margins": 1.5582889318466187, |
| "rewards/rejected": -2.2415518760681152, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.988036083637477e-07, |
| "logits/chosen": -1.4071030616760254, |
| "logits/rejected": -1.7171040773391724, |
| "logps/chosen": -215.1892852783203, |
| "logps/rejected": -162.13401794433594, |
| "loss": 0.2222, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.22356048226356506, |
| "rewards/margins": 8.27348518371582, |
| "rewards/rejected": -8.497045516967773, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.987521778940057e-07, |
| "logits/chosen": -1.219684362411499, |
| "logits/rejected": -1.615993618965149, |
| "logps/chosen": -529.4940185546875, |
| "logps/rejected": -260.98486328125, |
| "loss": 0.187, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5555427670478821, |
| "rewards/margins": 7.262081623077393, |
| "rewards/rejected": -7.817624092102051, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.48, |
| "learning_rate": 9.986996664990635e-07, |
| "logits/chosen": -1.1856704950332642, |
| "logits/rejected": -1.439422607421875, |
| "logps/chosen": -128.1553192138672, |
| "logps/rejected": -44.11671447753906, |
| "loss": 0.1936, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.0148143768310547, |
| "rewards/margins": -0.19398105144500732, |
| "rewards/rejected": -0.8208333253860474, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.986460742927269e-07, |
| "logits/chosen": -0.8404701948165894, |
| "logits/rejected": -0.4110315442085266, |
| "logps/chosen": -421.8138122558594, |
| "logps/rejected": -330.5446472167969, |
| "loss": 0.225, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2713227272033691, |
| "rewards/margins": 11.016674041748047, |
| "rewards/rejected": -12.287996292114258, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.985914013911442e-07, |
| "logits/chosen": -1.3006629943847656, |
| "logits/rejected": -1.1097196340560913, |
| "logps/chosen": -624.8807983398438, |
| "logps/rejected": -328.3757019042969, |
| "loss": 0.1815, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.4635086059570312, |
| "rewards/margins": 3.9749832153320312, |
| "rewards/rejected": -5.4384918212890625, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.985356479128056e-07, |
| "logits/chosen": -0.30210334062576294, |
| "logits/rejected": -0.34102773666381836, |
| "logps/chosen": -203.22467041015625, |
| "logps/rejected": -202.3996124267578, |
| "loss": 0.2037, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.25408071279525757, |
| "rewards/margins": 9.476703643798828, |
| "rewards/rejected": -9.73078441619873, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.984788139785432e-07, |
| "logits/chosen": -1.5046318769454956, |
| "logits/rejected": -1.4954370260238647, |
| "logps/chosen": -128.044189453125, |
| "logps/rejected": -212.79881286621094, |
| "loss": 0.1563, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5831112265586853, |
| "rewards/margins": 4.677426338195801, |
| "rewards/rejected": -4.094315052032471, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.984208997115311e-07, |
| "logits/chosen": -1.3097823858261108, |
| "logits/rejected": -0.7972382307052612, |
| "logps/chosen": -61.33431625366211, |
| "logps/rejected": -224.305908203125, |
| "loss": 0.1748, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.20676542818546295, |
| "rewards/margins": 7.611514091491699, |
| "rewards/rejected": -7.81827974319458, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.49, |
| "learning_rate": 9.983619052372847e-07, |
| "logits/chosen": -1.4994029998779297, |
| "logits/rejected": -0.9353764653205872, |
| "logps/chosen": -454.6905517578125, |
| "logps/rejected": -354.83721923828125, |
| "loss": 0.1724, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6514847278594971, |
| "rewards/margins": 12.14840030670166, |
| "rewards/rejected": -12.799884796142578, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.983018306836599e-07, |
| "logits/chosen": -1.3215970993041992, |
| "logits/rejected": -1.484683632850647, |
| "logps/chosen": -519.1898803710938, |
| "logps/rejected": -226.5242919921875, |
| "loss": 0.1852, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4192039668560028, |
| "rewards/margins": 8.631196975708008, |
| "rewards/rejected": -8.211993217468262, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.98240676180854e-07, |
| "logits/chosen": -2.4572155475616455, |
| "logits/rejected": -2.3966965675354004, |
| "logps/chosen": -40.4495849609375, |
| "logps/rejected": -193.80703735351562, |
| "loss": 0.1768, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9751855134963989, |
| "rewards/margins": 9.661815643310547, |
| "rewards/rejected": -10.637001037597656, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.981784418614046e-07, |
| "logits/chosen": -1.1194053888320923, |
| "logits/rejected": -1.3947285413742065, |
| "logps/chosen": -282.65728759765625, |
| "logps/rejected": -197.75747680664062, |
| "loss": 0.1737, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5431690216064453, |
| "rewards/margins": 4.192901134490967, |
| "rewards/rejected": -4.736070156097412, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.981151278601899e-07, |
| "logits/chosen": -1.8385300636291504, |
| "logits/rejected": -0.9504812359809875, |
| "logps/chosen": -119.73028564453125, |
| "logps/rejected": -262.2828674316406, |
| "loss": 0.1592, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2197556495666504, |
| "rewards/margins": 6.548429489135742, |
| "rewards/rejected": -7.768185615539551, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.980507343144271e-07, |
| "logits/chosen": -0.959502637386322, |
| "logits/rejected": -0.6492790579795837, |
| "logps/chosen": -598.463623046875, |
| "logps/rejected": -408.7165222167969, |
| "loss": 0.2004, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.5735183954238892, |
| "rewards/margins": 13.148619651794434, |
| "rewards/rejected": -14.722137451171875, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.5, |
| "learning_rate": 9.979852613636743e-07, |
| "logits/chosen": -1.758123755455017, |
| "logits/rejected": -2.5972981452941895, |
| "logps/chosen": -210.6790771484375, |
| "logps/rejected": -183.95455932617188, |
| "loss": 0.2032, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4298916161060333, |
| "rewards/margins": 4.640817642211914, |
| "rewards/rejected": -5.070709228515625, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.979187091498283e-07, |
| "logits/chosen": -1.9287923574447632, |
| "logits/rejected": -2.3071420192718506, |
| "logps/chosen": -224.43728637695312, |
| "logps/rejected": -252.1556396484375, |
| "loss": 0.1592, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8604675531387329, |
| "rewards/margins": 4.382769584655762, |
| "rewards/rejected": -5.243237018585205, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.978510778171245e-07, |
| "logits/chosen": -1.4522650241851807, |
| "logits/rejected": -0.9249738454818726, |
| "logps/chosen": -410.66632080078125, |
| "logps/rejected": -477.5390625, |
| "loss": 0.1934, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.16993102431297302, |
| "rewards/margins": 6.357221603393555, |
| "rewards/rejected": -6.5271525382995605, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.977823675121382e-07, |
| "logits/chosen": -1.6546478271484375, |
| "logits/rejected": -2.3277413845062256, |
| "logps/chosen": -207.47552490234375, |
| "logps/rejected": -181.27479553222656, |
| "loss": 0.1846, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2778858244419098, |
| "rewards/margins": 6.3818769454956055, |
| "rewards/rejected": -6.659762382507324, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.977125783837818e-07, |
| "logits/chosen": -1.469759464263916, |
| "logits/rejected": -2.041001319885254, |
| "logps/chosen": -286.0638427734375, |
| "logps/rejected": -141.27078247070312, |
| "loss": 0.1763, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4046391546726227, |
| "rewards/margins": 7.348243713378906, |
| "rewards/rejected": -6.943604469299316, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.97641710583307e-07, |
| "logits/chosen": -1.4234567880630493, |
| "logits/rejected": -1.6683655977249146, |
| "logps/chosen": -362.9468078613281, |
| "logps/rejected": -260.8007507324219, |
| "loss": 0.1771, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.304555892944336, |
| "rewards/margins": 6.251507759094238, |
| "rewards/rejected": -7.556063652038574, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.51, |
| "learning_rate": 9.975697642643022e-07, |
| "logits/chosen": -1.9733335971832275, |
| "logits/rejected": -1.0282042026519775, |
| "logps/chosen": -142.15017700195312, |
| "logps/rejected": -305.75054931640625, |
| "loss": 0.1982, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6309067010879517, |
| "rewards/margins": 5.164991855621338, |
| "rewards/rejected": -5.7958984375, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.97496739582694e-07, |
| "logits/chosen": -0.8187223076820374, |
| "logits/rejected": -1.733450174331665, |
| "logps/chosen": -683.83056640625, |
| "logps/rejected": -257.8089599609375, |
| "loss": 0.2229, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4965972900390625, |
| "rewards/margins": 11.01584243774414, |
| "rewards/rejected": -11.512439727783203, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.974226366967457e-07, |
| "logits/chosen": -1.2671740055084229, |
| "logits/rejected": -1.1369811296463013, |
| "logps/chosen": -495.198974609375, |
| "logps/rejected": -387.591064453125, |
| "loss": 0.2071, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4078201353549957, |
| "rewards/margins": 7.06597900390625, |
| "rewards/rejected": -7.473799228668213, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.973474557670574e-07, |
| "logits/chosen": -1.4290441274642944, |
| "logits/rejected": -2.188762903213501, |
| "logps/chosen": -75.417724609375, |
| "logps/rejected": -70.07405853271484, |
| "loss": 0.176, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.36977919936180115, |
| "rewards/margins": 4.8193769454956055, |
| "rewards/rejected": -5.1891560554504395, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.972711969565658e-07, |
| "logits/chosen": -0.476540207862854, |
| "logits/rejected": -1.4027069807052612, |
| "logps/chosen": -1183.3697509765625, |
| "logps/rejected": -414.2918701171875, |
| "loss": 0.1684, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6394989490509033, |
| "rewards/margins": 6.497945308685303, |
| "rewards/rejected": -5.8584465980529785, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.971938604305434e-07, |
| "logits/chosen": -1.002270221710205, |
| "logits/rejected": -0.6929762363433838, |
| "logps/chosen": -175.58682250976562, |
| "logps/rejected": -347.9329833984375, |
| "loss": 0.1936, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.08384094387292862, |
| "rewards/margins": 1.912644863128662, |
| "rewards/rejected": -1.8288038969039917, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.52, |
| "learning_rate": 9.971154463565984e-07, |
| "logits/chosen": -0.7919758558273315, |
| "logits/rejected": -1.6887415647506714, |
| "logps/chosen": -293.739990234375, |
| "logps/rejected": -134.64825439453125, |
| "loss": 0.163, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.24404069781303406, |
| "rewards/margins": 7.300686836242676, |
| "rewards/rejected": -7.544727325439453, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.97035954904675e-07, |
| "logits/chosen": -1.3530901670455933, |
| "logits/rejected": -1.727453589439392, |
| "logps/chosen": -433.71282958984375, |
| "logps/rejected": -299.36175537109375, |
| "loss": 0.1831, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0365535020828247, |
| "rewards/margins": 6.750691890716553, |
| "rewards/rejected": -7.787245273590088, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.969553862470508e-07, |
| "logits/chosen": -0.9584515690803528, |
| "logits/rejected": -0.9624962210655212, |
| "logps/chosen": -32.61267852783203, |
| "logps/rejected": -126.69062042236328, |
| "loss": 0.1925, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5699091553688049, |
| "rewards/margins": 6.840811729431152, |
| "rewards/rejected": -7.4107208251953125, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.968737405583395e-07, |
| "logits/chosen": -2.1836318969726562, |
| "logits/rejected": -1.6158103942871094, |
| "logps/chosen": -131.1636962890625, |
| "logps/rejected": -316.4326171875, |
| "loss": 0.1549, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3803081512451172, |
| "rewards/margins": 7.161952972412109, |
| "rewards/rejected": -7.542261600494385, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.967910180154888e-07, |
| "logits/chosen": -0.774591326713562, |
| "logits/rejected": -1.062556505203247, |
| "logps/chosen": -555.9920043945312, |
| "logps/rejected": -338.88055419921875, |
| "loss": 0.2006, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.0430755615234375, |
| "rewards/margins": 7.494574069976807, |
| "rewards/rejected": -7.537649631500244, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.967072187977793e-07, |
| "logits/chosen": -1.3581414222717285, |
| "logits/rejected": -1.6791399717330933, |
| "logps/chosen": -354.8208312988281, |
| "logps/rejected": -287.4419860839844, |
| "loss": 0.1415, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.2476089596748352, |
| "rewards/margins": 4.651924133300781, |
| "rewards/rejected": -4.404314994812012, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.53, |
| "learning_rate": 9.96622343086826e-07, |
| "logits/chosen": -1.4797134399414062, |
| "logits/rejected": -1.6615569591522217, |
| "logps/chosen": -264.7172546386719, |
| "logps/rejected": -219.28221130371094, |
| "loss": 0.1911, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.6093296408653259, |
| "rewards/margins": 10.906174659729004, |
| "rewards/rejected": -10.296845436096191, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.96536391066576e-07, |
| "logits/chosen": -1.7635356187820435, |
| "logits/rejected": -1.4284019470214844, |
| "logps/chosen": -262.9644775390625, |
| "logps/rejected": -328.27642822265625, |
| "loss": 0.1773, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7439472675323486, |
| "rewards/margins": 5.8976640701293945, |
| "rewards/rejected": -5.153717041015625, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.964493629233104e-07, |
| "logits/chosen": -1.0009286403656006, |
| "logits/rejected": -0.9932736158370972, |
| "logps/chosen": -289.8218994140625, |
| "logps/rejected": -227.1639404296875, |
| "loss": 0.144, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.4354541301727295, |
| "rewards/margins": 8.911469459533691, |
| "rewards/rejected": -10.346923828125, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.963612588456412e-07, |
| "logits/chosen": -2.250784397125244, |
| "logits/rejected": -1.9884320497512817, |
| "logps/chosen": -46.93716049194336, |
| "logps/rejected": -284.6944274902344, |
| "loss": 0.1901, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.18621844053268433, |
| "rewards/margins": 10.04560661315918, |
| "rewards/rejected": -10.231825828552246, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.962720790245126e-07, |
| "logits/chosen": -1.6217896938323975, |
| "logits/rejected": -1.0232822895050049, |
| "logps/chosen": -193.26486206054688, |
| "logps/rejected": -346.9532470703125, |
| "loss": 0.1927, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.14425279200077057, |
| "rewards/margins": 2.2471230030059814, |
| "rewards/rejected": -2.391375780105591, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.96181823653201e-07, |
| "logits/chosen": -1.9973036050796509, |
| "logits/rejected": -2.215181827545166, |
| "logps/chosen": -267.81097412109375, |
| "logps/rejected": -295.62298583984375, |
| "loss": 0.1495, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1725495457649231, |
| "rewards/margins": 7.604098320007324, |
| "rewards/rejected": -7.776648044586182, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.54, |
| "learning_rate": 9.96090492927313e-07, |
| "logits/chosen": -1.6204432249069214, |
| "logits/rejected": -1.2365188598632812, |
| "logps/chosen": -46.48271942138672, |
| "logps/rejected": -140.6979217529297, |
| "loss": 0.1883, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3501051366329193, |
| "rewards/margins": 7.698967456817627, |
| "rewards/rejected": -8.049072265625, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.959980870447852e-07, |
| "logits/chosen": -0.840675950050354, |
| "logits/rejected": -0.4675593972206116, |
| "logps/chosen": -480.7996826171875, |
| "logps/rejected": -337.56854248046875, |
| "loss": 0.1888, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6151520013809204, |
| "rewards/margins": 12.177379608154297, |
| "rewards/rejected": -13.792531967163086, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.959046062058862e-07, |
| "logits/chosen": -1.4950480461120605, |
| "logits/rejected": -1.5916494131088257, |
| "logps/chosen": -64.40080261230469, |
| "logps/rejected": -71.2299575805664, |
| "loss": 0.2506, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.16438627243041992, |
| "rewards/margins": 4.567226886749268, |
| "rewards/rejected": -4.7316131591796875, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.958100506132126e-07, |
| "logits/chosen": -0.7526825666427612, |
| "logits/rejected": -1.9301047325134277, |
| "logps/chosen": -560.9881591796875, |
| "logps/rejected": -72.82947540283203, |
| "loss": 0.1853, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.4727003276348114, |
| "rewards/margins": 5.001248359680176, |
| "rewards/rejected": -4.528548240661621, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.957144204716907e-07, |
| "logits/chosen": -0.9634856581687927, |
| "logits/rejected": -1.3579816818237305, |
| "logps/chosen": -301.60650634765625, |
| "logps/rejected": -186.40699768066406, |
| "loss": 0.1644, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7037521600723267, |
| "rewards/margins": 7.330875873565674, |
| "rewards/rejected": -8.034627914428711, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.956177159885764e-07, |
| "logits/chosen": -1.8041788339614868, |
| "logits/rejected": -2.096233367919922, |
| "logps/chosen": -149.9138946533203, |
| "logps/rejected": -199.65846252441406, |
| "loss": 0.1742, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1912795603275299, |
| "rewards/margins": 6.335195541381836, |
| "rewards/rejected": -6.143916130065918, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.55, |
| "learning_rate": 9.955199373734528e-07, |
| "logits/chosen": -2.210163116455078, |
| "logits/rejected": -1.5051549673080444, |
| "logps/chosen": -43.63890838623047, |
| "logps/rejected": -231.96681213378906, |
| "loss": 0.1834, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6588637828826904, |
| "rewards/margins": 4.09450101852417, |
| "rewards/rejected": -4.753364562988281, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 9.954210848382317e-07, |
| "logits/chosen": -1.917798399925232, |
| "logits/rejected": -1.8297691345214844, |
| "logps/chosen": -28.441646575927734, |
| "logps/rejected": -159.19583129882812, |
| "loss": 0.1769, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.4995257258415222, |
| "rewards/margins": 7.739633560180664, |
| "rewards/rejected": -8.23915958404541, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 9.953211585971522e-07, |
| "logits/chosen": -1.6009342670440674, |
| "logits/rejected": -1.1614232063293457, |
| "logps/chosen": -143.366455078125, |
| "logps/rejected": -187.1925048828125, |
| "loss": 0.1742, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.8375290036201477, |
| "rewards/margins": 0.36481326818466187, |
| "rewards/rejected": -1.2023422718048096, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 9.952201588667803e-07, |
| "logits/chosen": -1.2768915891647339, |
| "logits/rejected": -2.192704916000366, |
| "logps/chosen": -347.0742492675781, |
| "logps/rejected": -107.85566711425781, |
| "loss": 0.2237, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9857234954833984, |
| "rewards/margins": 4.846242904663086, |
| "rewards/rejected": -5.831965923309326, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 9.951180858660089e-07, |
| "logits/chosen": -1.8204140663146973, |
| "logits/rejected": -1.5513927936553955, |
| "logps/chosen": -409.3429260253906, |
| "logps/rejected": -370.9957275390625, |
| "loss": 0.2061, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.23614689707756042, |
| "rewards/margins": 0.9891689419746399, |
| "rewards/rejected": -1.225315809249878, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 9.95014939816056e-07, |
| "logits/chosen": -0.7975092530250549, |
| "logits/rejected": -1.1003104448318481, |
| "logps/chosen": -433.71661376953125, |
| "logps/rejected": -278.63995361328125, |
| "loss": 0.1544, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7607848644256592, |
| "rewards/margins": 9.997198104858398, |
| "rewards/rejected": -11.75798225402832, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 9.949107209404663e-07, |
| "logits/chosen": -2.2883524894714355, |
| "logits/rejected": -2.550262212753296, |
| "logps/chosen": -122.60182189941406, |
| "logps/rejected": -206.03915405273438, |
| "loss": 0.2065, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.2694628238677979, |
| "rewards/margins": 6.3892903327941895, |
| "rewards/rejected": -7.658753395080566, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 9.948054294651088e-07, |
| "logits/chosen": -1.41136634349823, |
| "logits/rejected": -2.039381265640259, |
| "logps/chosen": -242.87060546875, |
| "logps/rejected": -118.76246643066406, |
| "loss": 0.1629, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8472713232040405, |
| "rewards/margins": 0.7751063704490662, |
| "rewards/rejected": -1.6223777532577515, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 9.946990656181779e-07, |
| "logits/chosen": -1.0350347757339478, |
| "logits/rejected": -0.8846843838691711, |
| "logps/chosen": -52.651214599609375, |
| "logps/rejected": -150.87210083007812, |
| "loss": 0.2258, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.9379146099090576, |
| "rewards/margins": 7.0258870124816895, |
| "rewards/rejected": -8.963801383972168, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 9.945916296301912e-07, |
| "logits/chosen": -2.120410442352295, |
| "logits/rejected": -1.4654765129089355, |
| "logps/chosen": -81.31796264648438, |
| "logps/rejected": -198.11753845214844, |
| "loss": 0.1973, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5237701535224915, |
| "rewards/margins": 3.80403470993042, |
| "rewards/rejected": -4.3278045654296875, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 9.944831217339903e-07, |
| "logits/chosen": -1.9489809274673462, |
| "logits/rejected": -1.5405217409133911, |
| "logps/chosen": -335.79010009765625, |
| "logps/rejected": -635.9193115234375, |
| "loss": 0.1915, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.2987374067306519, |
| "rewards/margins": 1.2891318798065186, |
| "rewards/rejected": -2.587869167327881, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.57, |
| "learning_rate": 9.943735421647404e-07, |
| "logits/chosen": -1.1065800189971924, |
| "logits/rejected": -1.074052095413208, |
| "logps/chosen": -33.51839828491211, |
| "logps/rejected": -160.16390991210938, |
| "loss": 0.2031, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1291143894195557, |
| "rewards/margins": 8.39620304107666, |
| "rewards/rejected": -9.525317192077637, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 9.94262891159928e-07, |
| "logits/chosen": -1.4195408821105957, |
| "logits/rejected": -1.2427754402160645, |
| "logps/chosen": -230.10208129882812, |
| "logps/rejected": -262.679931640625, |
| "loss": 0.2108, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.9072086811065674, |
| "rewards/margins": 10.54747200012207, |
| "rewards/rejected": -12.454681396484375, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 9.941511689593633e-07, |
| "logits/chosen": -1.1351226568222046, |
| "logits/rejected": -1.9858900308609009, |
| "logps/chosen": -298.932861328125, |
| "logps/rejected": -142.6973876953125, |
| "loss": 0.1527, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.044623613357544, |
| "rewards/margins": 9.147809982299805, |
| "rewards/rejected": -8.10318660736084, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 9.940383758051767e-07, |
| "logits/chosen": -1.8453896045684814, |
| "logits/rejected": -1.5388239622116089, |
| "logps/chosen": -197.114990234375, |
| "logps/rejected": -312.2078552246094, |
| "loss": 0.2152, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8707555532455444, |
| "rewards/margins": 12.464103698730469, |
| "rewards/rejected": -13.334858894348145, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 9.939245119418206e-07, |
| "logits/chosen": -1.4721529483795166, |
| "logits/rejected": -1.445346713066101, |
| "logps/chosen": -178.68853759765625, |
| "logps/rejected": -172.24676513671875, |
| "loss": 0.1945, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.9977798461914062, |
| "rewards/margins": 2.5249760150909424, |
| "rewards/rejected": -4.522756099700928, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 9.938095776160674e-07, |
| "logits/chosen": -0.6039644479751587, |
| "logits/rejected": -0.8809584975242615, |
| "logps/chosen": -337.0422058105469, |
| "logps/rejected": -204.4918212890625, |
| "loss": 0.1941, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2001136839389801, |
| "rewards/margins": 9.865687370300293, |
| "rewards/rejected": -10.065800666809082, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.58, |
| "learning_rate": 9.936935730770093e-07, |
| "logits/chosen": -1.719544768333435, |
| "logits/rejected": -1.678938865661621, |
| "logps/chosen": -548.88916015625, |
| "logps/rejected": -351.3948669433594, |
| "loss": 0.166, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.9289765357971191, |
| "rewards/margins": 1.596639633178711, |
| "rewards/rejected": -2.52561616897583, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 9.935764985760582e-07, |
| "logits/chosen": -1.567973256111145, |
| "logits/rejected": -1.7810003757476807, |
| "logps/chosen": -64.80793762207031, |
| "logps/rejected": -145.93380737304688, |
| "loss": 0.1616, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.15498466789722443, |
| "rewards/margins": 4.86065149307251, |
| "rewards/rejected": -5.015635967254639, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 9.934583543669453e-07, |
| "logits/chosen": -2.2390925884246826, |
| "logits/rejected": -1.3963329792022705, |
| "logps/chosen": -86.66363525390625, |
| "logps/rejected": -205.68148803710938, |
| "loss": 0.1828, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.016691789031028748, |
| "rewards/margins": 5.741617202758789, |
| "rewards/rejected": -5.7583088874816895, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 9.933391407057195e-07, |
| "logits/chosen": -1.3134796619415283, |
| "logits/rejected": -1.861419439315796, |
| "logps/chosen": -224.83663940429688, |
| "logps/rejected": -265.0384521484375, |
| "loss": 0.1408, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.34677210450172424, |
| "rewards/margins": 13.738977432250977, |
| "rewards/rejected": -13.392204284667969, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 9.932188578507474e-07, |
| "logits/chosen": -1.9301025867462158, |
| "logits/rejected": -1.5889010429382324, |
| "logps/chosen": -50.467098236083984, |
| "logps/rejected": -331.0249938964844, |
| "loss": 0.1679, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6283357739448547, |
| "rewards/margins": 14.762405395507812, |
| "rewards/rejected": -15.390741348266602, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 9.930975060627136e-07, |
| "logits/chosen": -2.602487325668335, |
| "logits/rejected": -1.6359155178070068, |
| "logps/chosen": -654.8057861328125, |
| "logps/rejected": -385.56781005859375, |
| "loss": 0.2003, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9707611203193665, |
| "rewards/margins": 4.78594970703125, |
| "rewards/rejected": -5.756711006164551, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.59, |
| "learning_rate": 9.929750856046187e-07, |
| "logits/chosen": -0.7864140868186951, |
| "logits/rejected": -1.5045098066329956, |
| "logps/chosen": -400.8957214355469, |
| "logps/rejected": -302.7469787597656, |
| "loss": 0.1987, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": 0.44732972979545593, |
| "rewards/margins": 1.3191795349121094, |
| "rewards/rejected": -0.8718498945236206, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 9.928515967417792e-07, |
| "logits/chosen": -0.948026716709137, |
| "logits/rejected": -1.3471962213516235, |
| "logps/chosen": -238.0382843017578, |
| "logps/rejected": -111.223876953125, |
| "loss": 0.1799, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.48494261503219604, |
| "rewards/margins": 7.656557083129883, |
| "rewards/rejected": -7.171614170074463, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 9.927270397418278e-07, |
| "logits/chosen": -1.5953532457351685, |
| "logits/rejected": -1.2294915914535522, |
| "logps/chosen": -99.09436798095703, |
| "logps/rejected": -213.32522583007812, |
| "loss": 0.1767, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.6239818930625916, |
| "rewards/margins": 4.957241058349609, |
| "rewards/rejected": -5.581223011016846, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 9.92601414874712e-07, |
| "logits/chosen": -0.9315654635429382, |
| "logits/rejected": -1.4781970977783203, |
| "logps/chosen": -360.9663391113281, |
| "logps/rejected": -193.1742401123047, |
| "loss": 0.1714, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.39375075697898865, |
| "rewards/margins": 6.650259971618652, |
| "rewards/rejected": -7.044010639190674, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 9.924747224126931e-07, |
| "logits/chosen": -1.1415091753005981, |
| "logits/rejected": -1.5295865535736084, |
| "logps/chosen": -136.76544189453125, |
| "logps/rejected": -136.62179565429688, |
| "loss": 0.1877, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1295880377292633, |
| "rewards/margins": 4.632694244384766, |
| "rewards/rejected": -4.762282371520996, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 9.923469626303464e-07, |
| "logits/chosen": -2.146008014678955, |
| "logits/rejected": -1.7241287231445312, |
| "logps/chosen": -200.16453552246094, |
| "logps/rejected": -284.6964416503906, |
| "loss": 0.1901, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.12157487869262695, |
| "rewards/margins": 12.299015045166016, |
| "rewards/rejected": -12.420589447021484, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.6, |
| "learning_rate": 9.922181358045606e-07, |
| "logits/chosen": -2.4690566062927246, |
| "logits/rejected": -2.171621084213257, |
| "logps/chosen": -12.845178604125977, |
| "logps/rejected": -186.6353302001953, |
| "loss": 0.1612, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3692449927330017, |
| "rewards/margins": 6.611391544342041, |
| "rewards/rejected": -6.980636119842529, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.92088242214537e-07, |
| "logits/chosen": -1.0186376571655273, |
| "logits/rejected": -1.3315547704696655, |
| "logps/chosen": -394.67236328125, |
| "logps/rejected": -266.37738037109375, |
| "loss": 0.1621, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5413925647735596, |
| "rewards/margins": 7.737855434417725, |
| "rewards/rejected": -7.196463108062744, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.919572821417885e-07, |
| "logits/chosen": -0.9612762928009033, |
| "logits/rejected": -1.0460320711135864, |
| "logps/chosen": -69.64960479736328, |
| "logps/rejected": -178.71676635742188, |
| "loss": 0.1798, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.11741314828395844, |
| "rewards/margins": 11.80695629119873, |
| "rewards/rejected": -11.924369812011719, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.918252558701396e-07, |
| "logits/chosen": -1.4163517951965332, |
| "logits/rejected": -1.2571159601211548, |
| "logps/chosen": -48.93925857543945, |
| "logps/rejected": -165.11422729492188, |
| "loss": 0.1627, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.40845078229904175, |
| "rewards/margins": 8.386886596679688, |
| "rewards/rejected": -8.795337677001953, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.91692163685725e-07, |
| "logits/chosen": -1.6064996719360352, |
| "logits/rejected": -1.1707127094268799, |
| "logps/chosen": -13.273357391357422, |
| "logps/rejected": -70.61612701416016, |
| "loss": 0.1802, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.01451602578163147, |
| "rewards/margins": 3.4827561378479004, |
| "rewards/rejected": -3.468240261077881, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.915580058769908e-07, |
| "logits/chosen": -1.8302160501480103, |
| "logits/rejected": -1.5408596992492676, |
| "logps/chosen": -49.54063034057617, |
| "logps/rejected": -165.20664978027344, |
| "loss": 0.1484, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.45899465680122375, |
| "rewards/margins": 0.5188831686973572, |
| "rewards/rejected": -0.9778778553009033, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.61, |
| "learning_rate": 9.914227827346908e-07, |
| "logits/chosen": -1.4065834283828735, |
| "logits/rejected": -1.661447525024414, |
| "logps/chosen": -73.02536010742188, |
| "logps/rejected": -106.48794555664062, |
| "loss": 0.1977, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.30404195189476013, |
| "rewards/margins": 2.008908987045288, |
| "rewards/rejected": -2.31295108795166, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.912864945518893e-07, |
| "logits/chosen": -1.741304636001587, |
| "logits/rejected": -2.134251356124878, |
| "logps/chosen": -274.7901916503906, |
| "logps/rejected": -197.7181396484375, |
| "loss": 0.2159, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4863889813423157, |
| "rewards/margins": 1.1557424068450928, |
| "rewards/rejected": -1.6421314477920532, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.911491416239577e-07, |
| "logits/chosen": -0.297260582447052, |
| "logits/rejected": -0.43519172072410583, |
| "logps/chosen": -465.78167724609375, |
| "logps/rejected": -288.87237548828125, |
| "loss": 0.1746, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.9665650129318237, |
| "rewards/margins": 12.498331069946289, |
| "rewards/rejected": -14.464896202087402, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.910107242485756e-07, |
| "logits/chosen": -2.1462013721466064, |
| "logits/rejected": -1.7071665525436401, |
| "logps/chosen": -158.352783203125, |
| "logps/rejected": -259.8519287109375, |
| "loss": 0.1644, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8491896390914917, |
| "rewards/margins": 12.058333396911621, |
| "rewards/rejected": -12.907523155212402, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.908712427257291e-07, |
| "logits/chosen": -1.6921484470367432, |
| "logits/rejected": -1.673211932182312, |
| "logps/chosen": -38.84759521484375, |
| "logps/rejected": -308.70318603515625, |
| "loss": 0.1658, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.669171929359436, |
| "rewards/margins": 10.507699966430664, |
| "rewards/rejected": -11.176872253417969, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.907306973577109e-07, |
| "logits/chosen": -1.1072365045547485, |
| "logits/rejected": -1.6484124660491943, |
| "logps/chosen": -140.98245239257812, |
| "logps/rejected": -191.68093872070312, |
| "loss": 0.1682, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5839079022407532, |
| "rewards/margins": 9.572250366210938, |
| "rewards/rejected": -10.156158447265625, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.62, |
| "learning_rate": 9.905890884491194e-07, |
| "logits/chosen": -2.0668201446533203, |
| "logits/rejected": -1.8850473165512085, |
| "logps/chosen": -19.376293182373047, |
| "logps/rejected": -101.66358184814453, |
| "loss": 0.1931, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.12364569306373596, |
| "rewards/margins": 4.169041633605957, |
| "rewards/rejected": -4.29268741607666, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 9.904464163068577e-07, |
| "logits/chosen": -1.7173949480056763, |
| "logits/rejected": -2.278022289276123, |
| "logps/chosen": -288.80584716796875, |
| "logps/rejected": -257.0757141113281, |
| "loss": 0.2352, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6069202423095703, |
| "rewards/margins": 8.09630298614502, |
| "rewards/rejected": -8.70322322845459, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 9.903026812401332e-07, |
| "logits/chosen": -1.8909093141555786, |
| "logits/rejected": -1.652140498161316, |
| "logps/chosen": -170.0011444091797, |
| "logps/rejected": -244.4969482421875, |
| "loss": 0.172, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.49996358156204224, |
| "rewards/margins": 10.814790725708008, |
| "rewards/rejected": -11.314754486083984, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 9.90157883560457e-07, |
| "logits/chosen": -0.7032025456428528, |
| "logits/rejected": -0.7244059443473816, |
| "logps/chosen": -17.019268035888672, |
| "logps/rejected": -98.73320770263672, |
| "loss": 0.2018, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.16112473607063293, |
| "rewards/margins": 7.4662017822265625, |
| "rewards/rejected": -7.305077075958252, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 9.900120235816433e-07, |
| "logits/chosen": -1.4089502096176147, |
| "logits/rejected": -1.385013461112976, |
| "logps/chosen": -298.27459716796875, |
| "logps/rejected": -263.1510314941406, |
| "loss": 0.1913, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2739990949630737, |
| "rewards/margins": 6.35389518737793, |
| "rewards/rejected": -7.627894401550293, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 9.898651016198085e-07, |
| "logits/chosen": -1.6036759614944458, |
| "logits/rejected": -2.3823535442352295, |
| "logps/chosen": -85.63179016113281, |
| "logps/rejected": -93.74755859375, |
| "loss": 0.1946, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.04974517598748207, |
| "rewards/margins": 4.797137260437012, |
| "rewards/rejected": -4.747392177581787, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.63, |
| "learning_rate": 9.897171179933706e-07, |
| "logits/chosen": -1.0535942316055298, |
| "logits/rejected": -2.1571121215820312, |
| "logps/chosen": -717.8565673828125, |
| "logps/rejected": -162.77194213867188, |
| "loss": 0.2117, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.7600006461143494, |
| "rewards/margins": 0.8016586899757385, |
| "rewards/rejected": -1.561659336090088, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 9.895680730230483e-07, |
| "logits/chosen": -1.6556205749511719, |
| "logits/rejected": -1.703450083732605, |
| "logps/chosen": -159.5177764892578, |
| "logps/rejected": -124.55549621582031, |
| "loss": 0.2291, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6513794660568237, |
| "rewards/margins": 5.128384113311768, |
| "rewards/rejected": -5.779763221740723, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 9.894179670318606e-07, |
| "logits/chosen": -1.3000423908233643, |
| "logits/rejected": -2.175767183303833, |
| "logps/chosen": -206.19947814941406, |
| "logps/rejected": -79.72200012207031, |
| "loss": 0.1892, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.14822006225585938, |
| "rewards/margins": 1.46049165725708, |
| "rewards/rejected": -1.608711838722229, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 9.892668003451264e-07, |
| "logits/chosen": -1.3806241750717163, |
| "logits/rejected": -2.067390203475952, |
| "logps/chosen": -281.26068115234375, |
| "logps/rejected": -130.72952270507812, |
| "loss": 0.1605, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3341960906982422, |
| "rewards/margins": 7.966523170471191, |
| "rewards/rejected": -7.632327079772949, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 9.891145732904626e-07, |
| "logits/chosen": -2.4080684185028076, |
| "logits/rejected": -2.4052534103393555, |
| "logps/chosen": -23.60405731201172, |
| "logps/rejected": -156.3341522216797, |
| "loss": 0.1842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3208608627319336, |
| "rewards/margins": 9.061665534973145, |
| "rewards/rejected": -9.382526397705078, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 9.889612861977853e-07, |
| "logits/chosen": -1.899614691734314, |
| "logits/rejected": -1.3512424230575562, |
| "logps/chosen": -52.9254035949707, |
| "logps/rejected": -142.5703125, |
| "loss": 0.195, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.25636160373687744, |
| "rewards/margins": 4.048177242279053, |
| "rewards/rejected": -3.791815757751465, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.64, |
| "learning_rate": 9.888069393993068e-07, |
| "logits/chosen": -1.6391324996948242, |
| "logits/rejected": -2.6511809825897217, |
| "logps/chosen": -357.7758483886719, |
| "logps/rejected": -136.27767944335938, |
| "loss": 0.1648, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7596569061279297, |
| "rewards/margins": 3.358733892440796, |
| "rewards/rejected": -2.599076986312866, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 9.886515332295368e-07, |
| "logits/chosen": -1.600348711013794, |
| "logits/rejected": -2.2982192039489746, |
| "logps/chosen": -228.30441284179688, |
| "logps/rejected": -234.28158569335938, |
| "loss": 0.1545, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.35866472125053406, |
| "rewards/margins": 5.956897735595703, |
| "rewards/rejected": -5.598233222961426, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 9.88495068025281e-07, |
| "logits/chosen": -1.3065029382705688, |
| "logits/rejected": -1.0549356937408447, |
| "logps/chosen": -226.62106323242188, |
| "logps/rejected": -404.517822265625, |
| "loss": 0.2052, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2328369617462158, |
| "rewards/margins": 17.27766227722168, |
| "rewards/rejected": -18.510498046875, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 9.883375441256397e-07, |
| "logits/chosen": -2.497364044189453, |
| "logits/rejected": -2.2807140350341797, |
| "logps/chosen": -79.43330383300781, |
| "logps/rejected": -1763.1298828125, |
| "loss": 0.2064, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6368292570114136, |
| "rewards/margins": 4.40083646774292, |
| "rewards/rejected": -5.037665843963623, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 9.88178961872008e-07, |
| "logits/chosen": -2.6438419818878174, |
| "logits/rejected": -1.569690227508545, |
| "logps/chosen": -492.58660888671875, |
| "logps/rejected": -267.66607666015625, |
| "loss": 0.1912, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.091853380203247, |
| "rewards/margins": 10.429098129272461, |
| "rewards/rejected": -11.520952224731445, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 9.880193216080748e-07, |
| "logits/chosen": -1.5667064189910889, |
| "logits/rejected": -0.8056033253669739, |
| "logps/chosen": -272.8934020996094, |
| "logps/rejected": -404.6851806640625, |
| "loss": 0.1471, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8850148916244507, |
| "rewards/margins": 8.984339714050293, |
| "rewards/rejected": -9.869355201721191, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.65, |
| "learning_rate": 9.878586236798221e-07, |
| "logits/chosen": -1.8617156744003296, |
| "logits/rejected": -1.3088810443878174, |
| "logps/chosen": -240.18605041503906, |
| "logps/rejected": -271.2889099121094, |
| "loss": 0.1661, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.44331783056259155, |
| "rewards/margins": 11.63255500793457, |
| "rewards/rejected": -12.075872421264648, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 9.876968684355238e-07, |
| "logits/chosen": -0.60749751329422, |
| "logits/rejected": -0.9327036738395691, |
| "logps/chosen": -248.83444213867188, |
| "logps/rejected": -199.20504760742188, |
| "loss": 0.179, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9339897632598877, |
| "rewards/margins": 9.368557929992676, |
| "rewards/rejected": -10.302547454833984, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 9.875340562257452e-07, |
| "logits/chosen": -1.6634929180145264, |
| "logits/rejected": -1.8427734375, |
| "logps/chosen": -225.20516967773438, |
| "logps/rejected": -224.0525665283203, |
| "loss": 0.159, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1540244221687317, |
| "rewards/margins": 4.2535014152526855, |
| "rewards/rejected": -4.099477291107178, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 9.87370187403343e-07, |
| "logits/chosen": -1.5186893939971924, |
| "logits/rejected": -1.5003349781036377, |
| "logps/chosen": -355.7366638183594, |
| "logps/rejected": -268.9139404296875, |
| "loss": 0.1805, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.4280953407287598, |
| "rewards/margins": 10.353462219238281, |
| "rewards/rejected": -11.781557083129883, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 9.872052623234631e-07, |
| "logits/chosen": -1.2197469472885132, |
| "logits/rejected": -2.3034727573394775, |
| "logps/chosen": -305.0716552734375, |
| "logps/rejected": -119.30422973632812, |
| "loss": 0.1617, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9030243158340454, |
| "rewards/margins": 6.021801471710205, |
| "rewards/rejected": -6.924825668334961, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 9.870392813435408e-07, |
| "logits/chosen": -1.6853370666503906, |
| "logits/rejected": -1.8906601667404175, |
| "logps/chosen": -54.065086364746094, |
| "logps/rejected": -109.49345397949219, |
| "loss": 0.183, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0523476600646973, |
| "rewards/margins": 4.095767021179199, |
| "rewards/rejected": -5.1481146812438965, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.66, |
| "learning_rate": 9.868722448233003e-07, |
| "logits/chosen": -1.3388574123382568, |
| "logits/rejected": -1.6647964715957642, |
| "logps/chosen": -239.39515686035156, |
| "logps/rejected": -119.59913635253906, |
| "loss": 0.1716, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1848201751708984, |
| "rewards/margins": 4.421016693115234, |
| "rewards/rejected": -5.605837345123291, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 9.867041531247524e-07, |
| "logits/chosen": -1.7558951377868652, |
| "logits/rejected": -1.7084178924560547, |
| "logps/chosen": -49.69265365600586, |
| "logps/rejected": -83.90230560302734, |
| "loss": 0.2227, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.1897386610507965, |
| "rewards/margins": 3.7224905490875244, |
| "rewards/rejected": -3.912229061126709, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 9.86535006612196e-07, |
| "logits/chosen": -1.0474004745483398, |
| "logits/rejected": -0.7698359489440918, |
| "logps/chosen": -255.73220825195312, |
| "logps/rejected": -224.53810119628906, |
| "loss": 0.129, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -3.990817070007324, |
| "rewards/margins": 3.028542995452881, |
| "rewards/rejected": -7.019360065460205, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 9.86364805652215e-07, |
| "logits/chosen": -1.4001511335372925, |
| "logits/rejected": -0.2056565284729004, |
| "logps/chosen": -338.36224365234375, |
| "logps/rejected": -943.743408203125, |
| "loss": 0.1818, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8232328295707703, |
| "rewards/margins": 8.953399658203125, |
| "rewards/rejected": -9.776632308959961, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 9.861935506136793e-07, |
| "logits/chosen": -2.1997809410095215, |
| "logits/rejected": -2.1212539672851562, |
| "logps/chosen": -115.73619079589844, |
| "logps/rejected": -155.65628051757812, |
| "loss": 0.1635, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3197250366210938, |
| "rewards/margins": 5.0713791847229, |
| "rewards/rejected": -6.391103744506836, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 9.860212418677425e-07, |
| "logits/chosen": -1.3245618343353271, |
| "logits/rejected": -1.5838465690612793, |
| "logps/chosen": -192.42564392089844, |
| "logps/rejected": -121.29484558105469, |
| "loss": 0.1951, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.7826123237609863, |
| "rewards/margins": 6.154439926147461, |
| "rewards/rejected": -5.371828079223633, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.67, |
| "learning_rate": 9.858478797878428e-07, |
| "logits/chosen": -1.1259046792984009, |
| "logits/rejected": -1.549862265586853, |
| "logps/chosen": -599.236328125, |
| "logps/rejected": -300.1288757324219, |
| "loss": 0.1937, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.30740663409233093, |
| "rewards/margins": 7.251522064208984, |
| "rewards/rejected": -6.94411563873291, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 9.856734647497004e-07, |
| "logits/chosen": -1.8423173427581787, |
| "logits/rejected": -1.7820255756378174, |
| "logps/chosen": -338.8928527832031, |
| "logps/rejected": -427.98590087890625, |
| "loss": 0.1951, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.7935646176338196, |
| "rewards/margins": 1.3270835876464844, |
| "rewards/rejected": -2.120648145675659, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 9.854979971313182e-07, |
| "logits/chosen": -1.5276292562484741, |
| "logits/rejected": -1.362746000289917, |
| "logps/chosen": -153.15682983398438, |
| "logps/rejected": -178.7244873046875, |
| "loss": 0.1921, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.658955693244934, |
| "rewards/margins": 6.521320343017578, |
| "rewards/rejected": -8.180275917053223, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 9.853214773129795e-07, |
| "logits/chosen": -1.574330449104309, |
| "logits/rejected": -1.6447815895080566, |
| "logps/chosen": -203.70518493652344, |
| "logps/rejected": -186.30174255371094, |
| "loss": 0.1799, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5951843857765198, |
| "rewards/margins": 10.695902824401855, |
| "rewards/rejected": -11.29108715057373, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 9.851439056772488e-07, |
| "logits/chosen": -2.2148728370666504, |
| "logits/rejected": -0.5473410487174988, |
| "logps/chosen": -114.89361572265625, |
| "logps/rejected": -270.749755859375, |
| "loss": 0.1814, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.34365734457969666, |
| "rewards/margins": 10.512922286987305, |
| "rewards/rejected": -10.856579780578613, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 9.8496528260897e-07, |
| "logits/chosen": -1.523105502128601, |
| "logits/rejected": -0.9805685877799988, |
| "logps/chosen": -263.91705322265625, |
| "logps/rejected": -362.019287109375, |
| "loss": 0.1232, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.9856665134429932, |
| "rewards/margins": 13.921882629394531, |
| "rewards/rejected": -15.907548904418945, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.68, |
| "learning_rate": 9.847856084952652e-07, |
| "logits/chosen": -1.4170777797698975, |
| "logits/rejected": -1.4437755346298218, |
| "logps/chosen": -71.64923858642578, |
| "logps/rejected": -111.52011108398438, |
| "loss": 0.1337, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.24141913652420044, |
| "rewards/margins": 1.8372013568878174, |
| "rewards/rejected": -2.078620433807373, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 9.846048837255353e-07, |
| "logits/chosen": -1.8761231899261475, |
| "logits/rejected": -1.4860190153121948, |
| "logps/chosen": -321.606689453125, |
| "logps/rejected": -248.3543701171875, |
| "loss": 0.1918, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1618309020996094, |
| "rewards/margins": 8.921380996704102, |
| "rewards/rejected": -10.083211898803711, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 9.84423108691457e-07, |
| "logits/chosen": -1.91605544090271, |
| "logits/rejected": -1.6970082521438599, |
| "logps/chosen": -59.132293701171875, |
| "logps/rejected": -167.10081481933594, |
| "loss": 0.2158, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.300628423690796, |
| "rewards/margins": 9.486973762512207, |
| "rewards/rejected": -10.787602424621582, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 9.842402837869842e-07, |
| "logits/chosen": -0.6720188856124878, |
| "logits/rejected": -1.0757673978805542, |
| "logps/chosen": -532.5537109375, |
| "logps/rejected": -276.9455261230469, |
| "loss": 0.1982, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.42199867963790894, |
| "rewards/margins": 12.428057670593262, |
| "rewards/rejected": -12.850056648254395, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 9.84056409408346e-07, |
| "logits/chosen": -1.7832905054092407, |
| "logits/rejected": -2.1222946643829346, |
| "logps/chosen": -311.7086181640625, |
| "logps/rejected": -320.260009765625, |
| "loss": 0.2174, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.5798829793930054, |
| "rewards/margins": 5.759591102600098, |
| "rewards/rejected": -7.339474201202393, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 9.838714859540458e-07, |
| "logits/chosen": -1.6252554655075073, |
| "logits/rejected": -2.4704833030700684, |
| "logps/chosen": -273.0162048339844, |
| "logps/rejected": -206.291015625, |
| "loss": 0.1782, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6414878964424133, |
| "rewards/margins": 7.542596817016602, |
| "rewards/rejected": -8.18408489227295, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.69, |
| "learning_rate": 9.836855138248602e-07, |
| "logits/chosen": -1.5945271253585815, |
| "logits/rejected": -1.039263129234314, |
| "logps/chosen": -213.08566284179688, |
| "logps/rejected": -276.2701416015625, |
| "loss": 0.2015, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.5801552534103394, |
| "rewards/margins": 4.361068248748779, |
| "rewards/rejected": -5.941223621368408, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 9.834984934238397e-07, |
| "logits/chosen": -2.0274336338043213, |
| "logits/rejected": -1.1639341115951538, |
| "logps/chosen": -224.14974975585938, |
| "logps/rejected": -492.0933532714844, |
| "loss": 0.1541, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -1.5109009742736816, |
| "rewards/margins": -0.4450409412384033, |
| "rewards/rejected": -1.0658600330352783, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 9.833104251563055e-07, |
| "logits/chosen": -1.1577857732772827, |
| "logits/rejected": -1.7103009223937988, |
| "logps/chosen": -294.07470703125, |
| "logps/rejected": -189.33216857910156, |
| "loss": 0.1796, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6231157779693604, |
| "rewards/margins": 8.794317245483398, |
| "rewards/rejected": -10.41743278503418, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 9.831213094298504e-07, |
| "logits/chosen": -2.4059667587280273, |
| "logits/rejected": -2.134012222290039, |
| "logps/chosen": -89.21797180175781, |
| "logps/rejected": -205.1034393310547, |
| "loss": 0.2001, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.15212784707546234, |
| "rewards/margins": 1.837868571281433, |
| "rewards/rejected": -1.9899964332580566, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 9.829311466543372e-07, |
| "logits/chosen": -0.7545611262321472, |
| "logits/rejected": -2.312044382095337, |
| "logps/chosen": -298.3918151855469, |
| "logps/rejected": -100.84245300292969, |
| "loss": 0.1934, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2649837732315063, |
| "rewards/margins": 4.35391902923584, |
| "rewards/rejected": -5.618902683258057, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 9.827399372418978e-07, |
| "logits/chosen": -1.689001202583313, |
| "logits/rejected": -1.7924022674560547, |
| "logps/chosen": -490.21630859375, |
| "logps/rejected": -572.5187377929688, |
| "loss": 0.1589, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.4802818298339844, |
| "rewards/margins": 1.4142119884490967, |
| "rewards/rejected": -1.894493818283081, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 9.825476816069325e-07, |
| "logits/chosen": -1.687720537185669, |
| "logits/rejected": -2.245692014694214, |
| "logps/chosen": -252.95947265625, |
| "logps/rejected": -272.66729736328125, |
| "loss": 0.1749, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0029993057250977, |
| "rewards/margins": 6.877737522125244, |
| "rewards/rejected": -7.880736827850342, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 9.823543801661093e-07, |
| "logits/chosen": -1.327947735786438, |
| "logits/rejected": -2.4989848136901855, |
| "logps/chosen": -171.38914489746094, |
| "logps/rejected": -114.95437622070312, |
| "loss": 0.1847, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1186927556991577, |
| "rewards/margins": 3.9151296615600586, |
| "rewards/rejected": -5.033822536468506, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 9.821600333383624e-07, |
| "logits/chosen": -1.2999866008758545, |
| "logits/rejected": -1.940646767616272, |
| "logps/chosen": -404.37384033203125, |
| "logps/rejected": -225.85601806640625, |
| "loss": 0.18, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6930325031280518, |
| "rewards/margins": 9.95132827758789, |
| "rewards/rejected": -11.64436149597168, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 9.819646415448917e-07, |
| "logits/chosen": -0.8139424324035645, |
| "logits/rejected": -1.0725926160812378, |
| "logps/chosen": -449.7269287109375, |
| "logps/rejected": -254.24598693847656, |
| "loss": 0.1662, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6415162086486816, |
| "rewards/margins": 9.845947265625, |
| "rewards/rejected": -11.487462997436523, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 9.817682052091617e-07, |
| "logits/chosen": -1.257497787475586, |
| "logits/rejected": -1.384131908416748, |
| "logps/chosen": -306.87060546875, |
| "logps/rejected": -237.9322509765625, |
| "loss": 0.165, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.43299102783203125, |
| "rewards/margins": 6.2627716064453125, |
| "rewards/rejected": -5.829780578613281, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.71, |
| "learning_rate": 9.815707247569012e-07, |
| "logits/chosen": -1.3054028749465942, |
| "logits/rejected": -1.9904706478118896, |
| "logps/chosen": -199.9625244140625, |
| "logps/rejected": -174.94766235351562, |
| "loss": 0.2305, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0188324451446533, |
| "rewards/margins": 9.714458465576172, |
| "rewards/rejected": -10.733290672302246, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 9.81372200616101e-07, |
| "logits/chosen": -1.711554765701294, |
| "logits/rejected": -1.9116215705871582, |
| "logps/chosen": -244.9205780029297, |
| "logps/rejected": -235.5703125, |
| "loss": 0.1702, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.49955570697784424, |
| "rewards/margins": 6.994556427001953, |
| "rewards/rejected": -7.494112014770508, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 9.81172633217015e-07, |
| "logits/chosen": -1.4319877624511719, |
| "logits/rejected": -1.4931282997131348, |
| "logps/chosen": -227.3060760498047, |
| "logps/rejected": -265.25616455078125, |
| "loss": 0.2099, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.4414575099945068, |
| "rewards/margins": 11.510025024414062, |
| "rewards/rejected": -12.951482772827148, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 9.809720229921572e-07, |
| "logits/chosen": -1.3125286102294922, |
| "logits/rejected": -1.061862587928772, |
| "logps/chosen": -175.32794189453125, |
| "logps/rejected": -224.84396362304688, |
| "loss": 0.1759, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.008734322153031826, |
| "rewards/margins": 4.605374336242676, |
| "rewards/rejected": -4.614108562469482, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 9.807703703763015e-07, |
| "logits/chosen": -1.8269639015197754, |
| "logits/rejected": -1.7482253313064575, |
| "logps/chosen": -33.005889892578125, |
| "logps/rejected": -88.10383605957031, |
| "loss": 0.1665, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.19627046585083008, |
| "rewards/margins": 3.915902614593506, |
| "rewards/rejected": -4.112173080444336, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 9.80567675806482e-07, |
| "logits/chosen": -1.4262571334838867, |
| "logits/rejected": -1.5540329217910767, |
| "logps/chosen": -73.27861785888672, |
| "logps/rejected": -102.44043731689453, |
| "loss": 0.165, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.6107984781265259, |
| "rewards/margins": 0.42136502265930176, |
| "rewards/rejected": -2.032163619995117, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.72, |
| "learning_rate": 9.8036393972199e-07, |
| "logits/chosen": -2.002448320388794, |
| "logits/rejected": -2.299851655960083, |
| "logps/chosen": -87.68385314941406, |
| "logps/rejected": -85.51641845703125, |
| "loss": 0.1533, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3282948732376099, |
| "rewards/margins": 2.7130818367004395, |
| "rewards/rejected": -4.04137659072876, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 9.801591625643743e-07, |
| "logits/chosen": -0.7014894485473633, |
| "logits/rejected": -1.4072990417480469, |
| "logps/chosen": -695.2222900390625, |
| "logps/rejected": -328.1853942871094, |
| "loss": 0.1957, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7655518054962158, |
| "rewards/margins": 12.59501838684082, |
| "rewards/rejected": -14.360569953918457, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 9.799533447774404e-07, |
| "logits/chosen": -1.8441977500915527, |
| "logits/rejected": -2.370758533477783, |
| "logps/chosen": -488.61370849609375, |
| "logps/rejected": -225.34573364257812, |
| "loss": 0.1664, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5413681268692017, |
| "rewards/margins": 6.072797775268555, |
| "rewards/rejected": -6.614165782928467, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 9.797464868072486e-07, |
| "logits/chosen": -1.492543339729309, |
| "logits/rejected": -1.383135437965393, |
| "logps/chosen": -128.7510223388672, |
| "logps/rejected": -252.252685546875, |
| "loss": 0.1783, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5648261904716492, |
| "rewards/margins": 2.220522880554199, |
| "rewards/rejected": -2.7853493690490723, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 9.795385891021136e-07, |
| "logits/chosen": -1.3558893203735352, |
| "logits/rejected": -2.4246795177459717, |
| "logps/chosen": -88.99250793457031, |
| "logps/rejected": -69.09440612792969, |
| "loss": 0.137, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3191506564617157, |
| "rewards/margins": 4.714672565460205, |
| "rewards/rejected": -5.033823013305664, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 9.79329652112604e-07, |
| "logits/chosen": -2.187133312225342, |
| "logits/rejected": -2.0523133277893066, |
| "logps/chosen": -237.15127563476562, |
| "logps/rejected": -257.36614990234375, |
| "loss": 0.2559, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.08025474846363068, |
| "rewards/margins": 6.414705276489258, |
| "rewards/rejected": -6.494959831237793, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.73, |
| "learning_rate": 9.7911967629154e-07, |
| "logits/chosen": -0.5673585534095764, |
| "logits/rejected": -0.7198299169540405, |
| "logps/chosen": -272.7919921875, |
| "logps/rejected": -180.43826293945312, |
| "loss": 0.1932, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.3921341001987457, |
| "rewards/margins": 10.290606498718262, |
| "rewards/rejected": -9.89847183227539, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 9.789086620939935e-07, |
| "logits/chosen": -1.5646958351135254, |
| "logits/rejected": -1.3017683029174805, |
| "logps/chosen": -246.85928344726562, |
| "logps/rejected": -270.7761535644531, |
| "loss": 0.2036, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7477285265922546, |
| "rewards/margins": 11.448813438415527, |
| "rewards/rejected": -12.196542739868164, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 9.786966099772873e-07, |
| "logits/chosen": -1.1668096780776978, |
| "logits/rejected": -1.150396466255188, |
| "logps/chosen": -214.652099609375, |
| "logps/rejected": -179.91836547851562, |
| "loss": 0.1923, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1188222169876099, |
| "rewards/margins": 8.251716613769531, |
| "rewards/rejected": -9.370538711547852, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 9.784835204009932e-07, |
| "logits/chosen": -0.9191622734069824, |
| "logits/rejected": -0.8720898032188416, |
| "logps/chosen": -184.92555236816406, |
| "logps/rejected": -170.7421112060547, |
| "loss": 0.1958, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.486758828163147, |
| "rewards/margins": 5.754380702972412, |
| "rewards/rejected": -7.2411394119262695, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 9.782693938269312e-07, |
| "logits/chosen": -1.8319122791290283, |
| "logits/rejected": -1.2807798385620117, |
| "logps/chosen": -209.416259765625, |
| "logps/rejected": -269.36041259765625, |
| "loss": 0.1844, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.37221360206604, |
| "rewards/margins": 13.217639923095703, |
| "rewards/rejected": -14.589853286743164, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 9.780542307191697e-07, |
| "logits/chosen": -1.5643036365509033, |
| "logits/rejected": -1.2528153657913208, |
| "logps/chosen": -608.1365356445312, |
| "logps/rejected": -491.44134521484375, |
| "loss": 0.1582, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6126511096954346, |
| "rewards/margins": 10.805754661560059, |
| "rewards/rejected": -11.418405532836914, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.74, |
| "learning_rate": 9.778380315440223e-07, |
| "logits/chosen": -1.5918906927108765, |
| "logits/rejected": -1.0028258562088013, |
| "logps/chosen": -284.98736572265625, |
| "logps/rejected": -302.61846923828125, |
| "loss": 0.187, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8316802978515625, |
| "rewards/margins": 8.248760223388672, |
| "rewards/rejected": -9.080440521240234, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 9.776207967700489e-07, |
| "logits/chosen": -2.523466110229492, |
| "logits/rejected": -1.5323551893234253, |
| "logps/chosen": -88.10340881347656, |
| "logps/rejected": -281.467041015625, |
| "loss": 0.1714, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7970980405807495, |
| "rewards/margins": 10.560657501220703, |
| "rewards/rejected": -11.357755661010742, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 9.774025268680538e-07, |
| "logits/chosen": -1.2854011058807373, |
| "logits/rejected": -1.61070716381073, |
| "logps/chosen": -567.8692016601562, |
| "logps/rejected": -314.9006042480469, |
| "loss": 0.1934, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.4001374244689941, |
| "rewards/margins": 9.382172584533691, |
| "rewards/rejected": -10.782309532165527, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 9.77183222311084e-07, |
| "logits/chosen": -1.8872003555297852, |
| "logits/rejected": -1.3685364723205566, |
| "logps/chosen": -155.8271942138672, |
| "logps/rejected": -398.8939208984375, |
| "loss": 0.1818, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.7372512817382812, |
| "rewards/margins": 0.28054046630859375, |
| "rewards/rejected": -1.017791748046875, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 9.769628835744292e-07, |
| "logits/chosen": -2.153257131576538, |
| "logits/rejected": -2.538587808609009, |
| "logps/chosen": -219.20547485351562, |
| "logps/rejected": -285.2276611328125, |
| "loss": 0.1725, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1647541522979736, |
| "rewards/margins": 6.780641555786133, |
| "rewards/rejected": -7.945395469665527, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 9.767415111356208e-07, |
| "logits/chosen": -1.6822693347930908, |
| "logits/rejected": -1.125065565109253, |
| "logps/chosen": -213.1407928466797, |
| "logps/rejected": -248.02450561523438, |
| "loss": 0.1815, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.2097599506378174, |
| "rewards/margins": 7.717904090881348, |
| "rewards/rejected": -8.927663803100586, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.75, |
| "learning_rate": 9.765191054744304e-07, |
| "logits/chosen": -0.9882490038871765, |
| "logits/rejected": -1.6791247129440308, |
| "logps/chosen": -272.322998046875, |
| "logps/rejected": -146.1826171875, |
| "loss": 0.1619, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7433132529258728, |
| "rewards/margins": 5.633927345275879, |
| "rewards/rejected": -6.377241134643555, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 9.762956670728683e-07, |
| "logits/chosen": -1.6359366178512573, |
| "logits/rejected": -1.28961181640625, |
| "logps/chosen": -62.76860046386719, |
| "logps/rejected": -229.94815063476562, |
| "loss": 0.1326, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.22195053100585938, |
| "rewards/margins": 12.753116607666016, |
| "rewards/rejected": -12.975067138671875, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 9.76071196415184e-07, |
| "logits/chosen": -1.9846500158309937, |
| "logits/rejected": -1.8391332626342773, |
| "logps/chosen": -223.3437042236328, |
| "logps/rejected": -206.13235473632812, |
| "loss": 0.1572, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.34529823064804077, |
| "rewards/margins": 8.844955444335938, |
| "rewards/rejected": -9.190253257751465, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 9.758456939878629e-07, |
| "logits/chosen": -0.6676144003868103, |
| "logits/rejected": -0.5327748656272888, |
| "logps/chosen": -334.7253723144531, |
| "logps/rejected": -288.2399597167969, |
| "loss": 0.1739, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5323821902275085, |
| "rewards/margins": 7.189955234527588, |
| "rewards/rejected": -7.72233772277832, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 9.756191602796275e-07, |
| "logits/chosen": -0.7316077947616577, |
| "logits/rejected": -1.6688975095748901, |
| "logps/chosen": -526.944580078125, |
| "logps/rejected": -288.75018310546875, |
| "loss": 0.1576, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8874114751815796, |
| "rewards/margins": 8.355914115905762, |
| "rewards/rejected": -9.243326187133789, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 9.753915957814352e-07, |
| "logits/chosen": -1.136466145515442, |
| "logits/rejected": -1.8164993524551392, |
| "logps/chosen": -503.8302307128906, |
| "logps/rejected": -425.62176513671875, |
| "loss": 0.1423, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.10046082735061646, |
| "rewards/margins": 1.3869491815567017, |
| "rewards/rejected": -1.2864882946014404, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.76, |
| "learning_rate": 9.751630009864768e-07, |
| "logits/chosen": -0.9611995816230774, |
| "logits/rejected": -1.4471514225006104, |
| "logps/chosen": -530.7092895507812, |
| "logps/rejected": -166.03952026367188, |
| "loss": 0.2416, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7246857285499573, |
| "rewards/margins": 6.8713698387146, |
| "rewards/rejected": -7.596055507659912, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 9.74933376390177e-07, |
| "logits/chosen": -1.8776335716247559, |
| "logits/rejected": -2.2231943607330322, |
| "logps/chosen": -299.9205627441406, |
| "logps/rejected": -328.975341796875, |
| "loss": 0.1723, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1313549131155014, |
| "rewards/margins": 5.5653581619262695, |
| "rewards/rejected": -5.696713447570801, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 9.747027224901912e-07, |
| "logits/chosen": -1.4517310857772827, |
| "logits/rejected": -1.5876500606536865, |
| "logps/chosen": -20.67554473876953, |
| "logps/rejected": -94.16972351074219, |
| "loss": 0.2211, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.09321515262126923, |
| "rewards/margins": 4.643195629119873, |
| "rewards/rejected": -4.736410617828369, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 9.744710397864066e-07, |
| "logits/chosen": -2.7332839965820312, |
| "logits/rejected": -1.3261853456497192, |
| "logps/chosen": -23.798072814941406, |
| "logps/rejected": -210.81732177734375, |
| "loss": 0.1521, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3627491295337677, |
| "rewards/margins": 9.340432167053223, |
| "rewards/rejected": -9.703181266784668, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 9.742383287809396e-07, |
| "logits/chosen": -1.964377999305725, |
| "logits/rejected": -1.4948861598968506, |
| "logps/chosen": -281.6620788574219, |
| "logps/rejected": -280.5074157714844, |
| "loss": 0.2045, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.4957534670829773, |
| "rewards/margins": 11.540742874145508, |
| "rewards/rejected": -12.036495208740234, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 9.740045899781352e-07, |
| "logits/chosen": -0.5110257863998413, |
| "logits/rejected": -0.6007272601127625, |
| "logps/chosen": -226.26109313964844, |
| "logps/rejected": -171.11524963378906, |
| "loss": 0.168, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -2.9013428688049316, |
| "rewards/margins": -1.4838972091674805, |
| "rewards/rejected": -1.4174456596374512, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.77, |
| "learning_rate": 9.737698238845658e-07, |
| "logits/chosen": -1.7447842359542847, |
| "logits/rejected": -2.5760953426361084, |
| "logps/chosen": -257.5279846191406, |
| "logps/rejected": -195.7313995361328, |
| "loss": 0.1909, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.032274626195430756, |
| "rewards/margins": 7.554751396179199, |
| "rewards/rejected": -7.5870256423950195, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 9.735340310090306e-07, |
| "logits/chosen": -1.7694307565689087, |
| "logits/rejected": -2.6311557292938232, |
| "logps/chosen": -307.8857116699219, |
| "logps/rejected": -171.5406494140625, |
| "loss": 0.1577, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.29933395981788635, |
| "rewards/margins": 8.812219619750977, |
| "rewards/rejected": -9.111554145812988, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 9.732972118625536e-07, |
| "logits/chosen": -1.408591866493225, |
| "logits/rejected": -1.5779300928115845, |
| "logps/chosen": -169.06919860839844, |
| "logps/rejected": -177.49819946289062, |
| "loss": 0.1833, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2286117523908615, |
| "rewards/margins": 0.6607635617256165, |
| "rewards/rejected": -0.8893753290176392, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 9.730593669583835e-07, |
| "logits/chosen": -1.7068212032318115, |
| "logits/rejected": -1.9184765815734863, |
| "logps/chosen": -70.21788024902344, |
| "logps/rejected": -134.25051879882812, |
| "loss": 0.1484, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.1888481080532074, |
| "rewards/margins": 5.6896843910217285, |
| "rewards/rejected": -5.500836372375488, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 9.728204968119915e-07, |
| "logits/chosen": -1.1536794900894165, |
| "logits/rejected": -1.827487826347351, |
| "logps/chosen": -354.2060546875, |
| "logps/rejected": -361.4664001464844, |
| "loss": 0.1828, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -2.5781145095825195, |
| "rewards/margins": 4.080938339233398, |
| "rewards/rejected": -6.659052848815918, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 9.725806019410717e-07, |
| "logits/chosen": -0.7136868834495544, |
| "logits/rejected": -0.650567352771759, |
| "logps/chosen": -158.14785766601562, |
| "logps/rejected": -164.43289184570312, |
| "loss": 0.1672, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -2.082798719406128, |
| "rewards/margins": -0.6656165719032288, |
| "rewards/rejected": -1.417182207107544, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.78, |
| "learning_rate": 9.723396828655376e-07, |
| "logits/chosen": -1.1137374639511108, |
| "logits/rejected": -0.8390330076217651, |
| "logps/chosen": -389.09088134765625, |
| "logps/rejected": -321.860107421875, |
| "loss": 0.1468, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.09591063857078552, |
| "rewards/margins": 12.648015022277832, |
| "rewards/rejected": -12.743925094604492, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 9.72097740107524e-07, |
| "logits/chosen": -1.2167657613754272, |
| "logits/rejected": -1.5186375379562378, |
| "logps/chosen": -69.07684326171875, |
| "logps/rejected": -136.32656860351562, |
| "loss": 0.1958, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.024441910907626152, |
| "rewards/margins": 7.298024654388428, |
| "rewards/rejected": -7.273582458496094, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 9.718547741913833e-07, |
| "logits/chosen": -1.3299391269683838, |
| "logits/rejected": -1.1457806825637817, |
| "logps/chosen": -497.37103271484375, |
| "logps/rejected": -386.53143310546875, |
| "loss": 0.186, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 1.3952667713165283, |
| "rewards/margins": 8.335859298706055, |
| "rewards/rejected": -6.940592288970947, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 9.716107856436855e-07, |
| "logits/chosen": -1.1360148191452026, |
| "logits/rejected": -0.6608507037162781, |
| "logps/chosen": -247.17117309570312, |
| "logps/rejected": -258.4505920410156, |
| "loss": 0.2208, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1492900848388672, |
| "rewards/margins": 6.689465045928955, |
| "rewards/rejected": -7.838755130767822, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 9.713657749932171e-07, |
| "logits/chosen": -0.8903838396072388, |
| "logits/rejected": -1.1991342306137085, |
| "logps/chosen": -475.362548828125, |
| "logps/rejected": -285.0642395019531, |
| "loss": 0.1696, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -2.755786180496216, |
| "rewards/margins": 7.545994281768799, |
| "rewards/rejected": -10.301780700683594, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 9.711197427709795e-07, |
| "logits/chosen": -0.6181639432907104, |
| "logits/rejected": -0.9941724538803101, |
| "logps/chosen": -130.269775390625, |
| "logps/rejected": -202.236083984375, |
| "loss": 0.1921, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.045879364013671875, |
| "rewards/margins": 6.135862827301025, |
| "rewards/rejected": -6.181741714477539, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.79, |
| "learning_rate": 9.708726895101885e-07, |
| "logits/chosen": -0.6759887337684631, |
| "logits/rejected": -1.0811114311218262, |
| "logps/chosen": -349.7430725097656, |
| "logps/rejected": -202.4884490966797, |
| "loss": 0.1463, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6895374059677124, |
| "rewards/margins": 10.514432907104492, |
| "rewards/rejected": -11.203970909118652, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.706246157462726e-07, |
| "logits/chosen": -1.6232417821884155, |
| "logits/rejected": -1.6543527841567993, |
| "logps/chosen": -154.53765869140625, |
| "logps/rejected": -209.14459228515625, |
| "loss": 0.2433, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1862388849258423, |
| "rewards/margins": 2.6417124271392822, |
| "rewards/rejected": -3.827951431274414, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.703755220168714e-07, |
| "logits/chosen": -2.7903904914855957, |
| "logits/rejected": -1.3351569175720215, |
| "logps/chosen": -257.2842712402344, |
| "logps/rejected": -224.76751708984375, |
| "loss": 0.1928, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.061550140380859375, |
| "rewards/margins": 2.485539197921753, |
| "rewards/rejected": -2.4239890575408936, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.701254088618362e-07, |
| "logits/chosen": -0.9130764007568359, |
| "logits/rejected": -1.3383204936981201, |
| "logps/chosen": -381.96124267578125, |
| "logps/rejected": -193.50051879882812, |
| "loss": 0.1515, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1674940139055252, |
| "rewards/margins": 8.00719928741455, |
| "rewards/rejected": -8.17469310760498, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.698742768232265e-07, |
| "logits/chosen": -0.8350385427474976, |
| "logits/rejected": -0.8087922930717468, |
| "logps/chosen": -260.8985900878906, |
| "logps/rejected": -207.9868927001953, |
| "loss": 0.1985, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.3379669189453125, |
| "rewards/margins": 6.639181137084961, |
| "rewards/rejected": -8.977148056030273, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.696221264453108e-07, |
| "logits/chosen": -1.8277158737182617, |
| "logits/rejected": -1.8809436559677124, |
| "logps/chosen": -102.64655303955078, |
| "logps/rejected": -81.0972671508789, |
| "loss": 0.1846, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.0536823272705078, |
| "rewards/margins": -0.04416823387145996, |
| "rewards/rejected": -1.0095140933990479, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.8, |
| "learning_rate": 9.693689582745643e-07, |
| "logits/chosen": -2.200521230697632, |
| "logits/rejected": -2.2031776905059814, |
| "logps/chosen": -78.0069808959961, |
| "logps/rejected": -148.51446533203125, |
| "loss": 0.213, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6362529397010803, |
| "rewards/margins": 3.200962781906128, |
| "rewards/rejected": -3.8372156620025635, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.691147728596681e-07, |
| "logits/chosen": -0.9372101426124573, |
| "logits/rejected": -0.8971385359764099, |
| "logps/chosen": -158.72555541992188, |
| "logps/rejected": -178.3800506591797, |
| "loss": 0.1383, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3784370422363281, |
| "rewards/margins": 5.4618988037109375, |
| "rewards/rejected": -5.840336322784424, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.688595707515076e-07, |
| "logits/chosen": -2.139923572540283, |
| "logits/rejected": -1.6465723514556885, |
| "logps/chosen": -119.42794799804688, |
| "logps/rejected": -210.23606872558594, |
| "loss": 0.2018, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -2.1089653968811035, |
| "rewards/margins": 4.937000274658203, |
| "rewards/rejected": -7.045965671539307, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.686033525031719e-07, |
| "logits/chosen": -1.4923748970031738, |
| "logits/rejected": -1.2859959602355957, |
| "logps/chosen": -226.31365966796875, |
| "logps/rejected": -233.12567138671875, |
| "loss": 0.1772, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.537055492401123, |
| "rewards/margins": 6.823518753051758, |
| "rewards/rejected": -8.360574722290039, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.683461186699524e-07, |
| "logits/chosen": -0.9652807116508484, |
| "logits/rejected": -1.830547571182251, |
| "logps/chosen": -774.6458129882812, |
| "logps/rejected": -302.51702880859375, |
| "loss": 0.185, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -2.865536689758301, |
| "rewards/margins": 6.375579833984375, |
| "rewards/rejected": -9.241116523742676, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.680878698093415e-07, |
| "logits/chosen": -0.6591046452522278, |
| "logits/rejected": -1.157971739768982, |
| "logps/chosen": -304.5384216308594, |
| "logps/rejected": -185.4188232421875, |
| "loss": 0.1882, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8371291756629944, |
| "rewards/margins": 6.621988296508789, |
| "rewards/rejected": -7.4591169357299805, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.81, |
| "learning_rate": 9.678286064810316e-07, |
| "logits/chosen": -1.630854845046997, |
| "logits/rejected": -1.1944377422332764, |
| "logps/chosen": -63.372398376464844, |
| "logps/rejected": -118.29740905761719, |
| "loss": 0.2208, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.9332462549209595, |
| "rewards/margins": -0.14584654569625854, |
| "rewards/rejected": -0.7873997092247009, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 9.67568329246913e-07, |
| "logits/chosen": -1.5651147365570068, |
| "logits/rejected": -2.2136974334716797, |
| "logps/chosen": -356.625244140625, |
| "logps/rejected": -257.2489013671875, |
| "loss": 0.1678, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.220522880554199, |
| "rewards/margins": 11.928407669067383, |
| "rewards/rejected": -14.148930549621582, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 9.673070386710745e-07, |
| "logits/chosen": -0.5516372323036194, |
| "logits/rejected": -0.5296367406845093, |
| "logps/chosen": -12.047179222106934, |
| "logps/rejected": -111.52848052978516, |
| "loss": 0.1797, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2791111469268799, |
| "rewards/margins": 8.235075950622559, |
| "rewards/rejected": -8.51418685913086, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 9.670447353197998e-07, |
| "logits/chosen": -1.5218621492385864, |
| "logits/rejected": -1.7300869226455688, |
| "logps/chosen": -215.33021545410156, |
| "logps/rejected": -353.5644226074219, |
| "loss": 0.1917, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7259873151779175, |
| "rewards/margins": 16.542556762695312, |
| "rewards/rejected": -18.268545150756836, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 9.66781419761569e-07, |
| "logits/chosen": -1.3317276239395142, |
| "logits/rejected": -1.3344420194625854, |
| "logps/chosen": -116.13327026367188, |
| "logps/rejected": -97.93529510498047, |
| "loss": 0.1949, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -3.2147207260131836, |
| "rewards/margins": -0.9069676399230957, |
| "rewards/rejected": -2.307753086090088, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 9.665170925670546e-07, |
| "logits/chosen": -1.387438416481018, |
| "logits/rejected": -1.5614135265350342, |
| "logps/chosen": -156.07591247558594, |
| "logps/rejected": -155.57327270507812, |
| "loss": 0.1721, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.0742095708847046, |
| "rewards/margins": 3.273390293121338, |
| "rewards/rejected": -4.347599983215332, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.82, |
| "learning_rate": 9.662517543091224e-07, |
| "logits/chosen": -0.29915913939476013, |
| "logits/rejected": -1.622786521911621, |
| "logps/chosen": -332.6976013183594, |
| "logps/rejected": -172.04876708984375, |
| "loss": 0.1802, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.1141693592071533, |
| "rewards/margins": 10.140617370605469, |
| "rewards/rejected": -8.026447296142578, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 9.659854055628289e-07, |
| "logits/chosen": -2.113335371017456, |
| "logits/rejected": -1.7780506610870361, |
| "logps/chosen": -116.96385192871094, |
| "logps/rejected": -280.6581115722656, |
| "loss": 0.1313, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7822120785713196, |
| "rewards/margins": 15.220739364624023, |
| "rewards/rejected": -16.00295066833496, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 9.657180469054212e-07, |
| "logits/chosen": -1.7113397121429443, |
| "logits/rejected": -1.1981744766235352, |
| "logps/chosen": -214.93051147460938, |
| "logps/rejected": -303.4566650390625, |
| "loss": 0.1392, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7917399406433105, |
| "rewards/margins": 11.92519760131836, |
| "rewards/rejected": -13.716938018798828, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 9.654496789163343e-07, |
| "logits/chosen": -1.5774062871932983, |
| "logits/rejected": -1.3951590061187744, |
| "logps/chosen": -136.52076721191406, |
| "logps/rejected": -191.61000061035156, |
| "loss": 0.1411, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8900161981582642, |
| "rewards/margins": 10.306234359741211, |
| "rewards/rejected": -11.196249961853027, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 9.651803021771917e-07, |
| "logits/chosen": -2.1784653663635254, |
| "logits/rejected": -1.2229230403900146, |
| "logps/chosen": -72.89522552490234, |
| "logps/rejected": -309.3623046875, |
| "loss": 0.1811, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6526200771331787, |
| "rewards/margins": 7.078956604003906, |
| "rewards/rejected": -7.731576919555664, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 9.64909917271802e-07, |
| "logits/chosen": -0.9013615250587463, |
| "logits/rejected": -1.0959047079086304, |
| "logps/chosen": -426.6611022949219, |
| "logps/rejected": -216.3023681640625, |
| "loss": 0.1746, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8864956498146057, |
| "rewards/margins": 9.015869140625, |
| "rewards/rejected": -9.902364730834961, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.83, |
| "learning_rate": 9.6463852478616e-07, |
| "logits/chosen": -1.6150933504104614, |
| "logits/rejected": -2.258601427078247, |
| "logps/chosen": -96.07403564453125, |
| "logps/rejected": -62.08420181274414, |
| "loss": 0.1906, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.1480413675308228, |
| "rewards/margins": 1.0792427062988281, |
| "rewards/rejected": -2.2272841930389404, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 9.643661253084429e-07, |
| "logits/chosen": -0.35942748188972473, |
| "logits/rejected": -1.3206900358200073, |
| "logps/chosen": -472.96929931640625, |
| "logps/rejected": -234.10690307617188, |
| "loss": 0.1597, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.2673812806606293, |
| "rewards/margins": 13.789386749267578, |
| "rewards/rejected": -13.522006034851074, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 9.640927194290116e-07, |
| "logits/chosen": -1.594843864440918, |
| "logits/rejected": -1.8921819925308228, |
| "logps/chosen": -260.1965026855469, |
| "logps/rejected": -257.98199462890625, |
| "loss": 0.1985, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7323309183120728, |
| "rewards/margins": 6.5982561111450195, |
| "rewards/rejected": -7.330586910247803, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 9.638183077404068e-07, |
| "logits/chosen": -2.052978754043579, |
| "logits/rejected": -1.9758281707763672, |
| "logps/chosen": -19.00727081298828, |
| "logps/rejected": -73.09623718261719, |
| "loss": 0.1491, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.739374041557312, |
| "rewards/margins": 2.3671977519989014, |
| "rewards/rejected": -3.106571674346924, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 9.635428908373502e-07, |
| "logits/chosen": -1.1580097675323486, |
| "logits/rejected": -1.896628975868225, |
| "logps/chosen": -570.9942626953125, |
| "logps/rejected": -265.4372863769531, |
| "loss": 0.1812, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3030930757522583, |
| "rewards/margins": 11.787357330322266, |
| "rewards/rejected": -13.090450286865234, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 9.632664693167416e-07, |
| "logits/chosen": -1.720935583114624, |
| "logits/rejected": -1.7201050519943237, |
| "logps/chosen": -355.47833251953125, |
| "logps/rejected": -418.269775390625, |
| "loss": 0.1801, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.297267198562622, |
| "rewards/margins": 9.594414710998535, |
| "rewards/rejected": -10.891682624816895, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 9.629890437776579e-07, |
| "logits/chosen": -1.9035245180130005, |
| "logits/rejected": -1.9480910301208496, |
| "logps/chosen": -131.1505889892578, |
| "logps/rejected": -121.52857208251953, |
| "loss": 0.1662, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5765511989593506, |
| "rewards/margins": 5.626952171325684, |
| "rewards/rejected": -5.050400733947754, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 9.62710614821352e-07, |
| "logits/chosen": -0.8639505505561829, |
| "logits/rejected": -0.9645960927009583, |
| "logps/chosen": -76.16290283203125, |
| "logps/rejected": -115.89288330078125, |
| "loss": 0.2082, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -2.1743850708007812, |
| "rewards/margins": 3.6736721992492676, |
| "rewards/rejected": -5.848057270050049, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 9.624311830512519e-07, |
| "logits/chosen": -1.9524139165878296, |
| "logits/rejected": -2.4563539028167725, |
| "logps/chosen": -120.10502624511719, |
| "logps/rejected": -73.08750915527344, |
| "loss": 0.194, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.409701943397522, |
| "rewards/margins": 1.4130560159683228, |
| "rewards/rejected": -1.8227579593658447, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 9.621507490729584e-07, |
| "logits/chosen": -1.4694656133651733, |
| "logits/rejected": -1.635840892791748, |
| "logps/chosen": -73.20475769042969, |
| "logps/rejected": -231.72525024414062, |
| "loss": 0.1675, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7220495343208313, |
| "rewards/margins": 7.296611309051514, |
| "rewards/rejected": -8.018661499023438, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 9.618693134942448e-07, |
| "logits/chosen": -0.5826085805892944, |
| "logits/rejected": -1.8275094032287598, |
| "logps/chosen": -112.27894592285156, |
| "logps/rejected": -76.41281127929688, |
| "loss": 0.197, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1780475676059723, |
| "rewards/margins": 6.233481407165527, |
| "rewards/rejected": -6.411529064178467, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 9.615868769250545e-07, |
| "logits/chosen": -1.1828463077545166, |
| "logits/rejected": -1.4431803226470947, |
| "logps/chosen": -299.8600158691406, |
| "logps/rejected": -209.68878173828125, |
| "loss": 0.1821, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3483529090881348, |
| "rewards/margins": 10.797613143920898, |
| "rewards/rejected": -12.145965576171875, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.85, |
| "learning_rate": 9.613034399775013e-07, |
| "logits/chosen": -1.7455992698669434, |
| "logits/rejected": -1.877217173576355, |
| "logps/chosen": -80.41563415527344, |
| "logps/rejected": -180.38148498535156, |
| "loss": 0.1738, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0337793827056885, |
| "rewards/margins": 5.948246955871582, |
| "rewards/rejected": -6.982026100158691, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.610190032658663e-07, |
| "logits/chosen": -1.9526875019073486, |
| "logits/rejected": -1.7264868021011353, |
| "logps/chosen": -117.76008605957031, |
| "logps/rejected": -175.2601318359375, |
| "loss": 0.1698, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.3269020318984985, |
| "rewards/margins": 6.980993747711182, |
| "rewards/rejected": -8.30789566040039, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.607335674065975e-07, |
| "logits/chosen": -0.6679888963699341, |
| "logits/rejected": -0.14870330691337585, |
| "logps/chosen": -339.99151611328125, |
| "logps/rejected": -369.12860107421875, |
| "loss": 0.1847, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.8299164772033691, |
| "rewards/margins": 15.75944709777832, |
| "rewards/rejected": -17.58936309814453, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.604471330183081e-07, |
| "logits/chosen": -1.387742042541504, |
| "logits/rejected": -1.4050238132476807, |
| "logps/chosen": -387.7100524902344, |
| "logps/rejected": -354.6739807128906, |
| "loss": 0.1736, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.4351615905761719, |
| "rewards/margins": 15.635486602783203, |
| "rewards/rejected": -17.070648193359375, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.601597007217761e-07, |
| "logits/chosen": -1.01943039894104, |
| "logits/rejected": -1.701319932937622, |
| "logps/chosen": -526.007568359375, |
| "logps/rejected": -369.4110107421875, |
| "loss": 0.2055, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7693207263946533, |
| "rewards/margins": 2.1190032958984375, |
| "rewards/rejected": -2.888324022293091, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.86, |
| "learning_rate": 9.598712711399415e-07, |
| "logits/chosen": -1.298659086227417, |
| "logits/rejected": -1.1038153171539307, |
| "logps/chosen": -530.2131958007812, |
| "logps/rejected": -275.9097595214844, |
| "loss": 0.2014, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.26557284593582153, |
| "rewards/margins": 12.243782997131348, |
| "rewards/rejected": -11.97821044921875, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.59581844897906e-07, |
| "logits/chosen": -1.3148828744888306, |
| "logits/rejected": -1.9683585166931152, |
| "logps/chosen": -369.1336364746094, |
| "logps/rejected": -188.69871520996094, |
| "loss": 0.1784, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.694780945777893, |
| "rewards/margins": 6.680124759674072, |
| "rewards/rejected": -8.374905586242676, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.592914226229314e-07, |
| "logits/chosen": -1.4996843338012695, |
| "logits/rejected": -1.4723166227340698, |
| "logps/chosen": -149.41529846191406, |
| "logps/rejected": -236.1160888671875, |
| "loss": 0.1857, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2980178892612457, |
| "rewards/margins": 12.551896095275879, |
| "rewards/rejected": -12.849913597106934, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.590000049444376e-07, |
| "logits/chosen": -1.6375008821487427, |
| "logits/rejected": -2.431309461593628, |
| "logps/chosen": -514.4171752929688, |
| "logps/rejected": -307.718017578125, |
| "loss": 0.1961, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7196776866912842, |
| "rewards/margins": 6.7644124031066895, |
| "rewards/rejected": -8.484090805053711, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.587075924940028e-07, |
| "logits/chosen": -1.0333702564239502, |
| "logits/rejected": -1.2659507989883423, |
| "logps/chosen": -454.99566650390625, |
| "logps/rejected": -293.19573974609375, |
| "loss": 0.1689, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.113037109375, |
| "rewards/margins": 9.598712921142578, |
| "rewards/rejected": -9.711750030517578, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.5841418590536e-07, |
| "logits/chosen": -1.6277176141738892, |
| "logits/rejected": -2.497152090072632, |
| "logps/chosen": -325.90850830078125, |
| "logps/rejected": -145.48583984375, |
| "loss": 0.2187, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.105952501296997, |
| "rewards/margins": 0.280051052570343, |
| "rewards/rejected": -1.3860034942626953, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.87, |
| "learning_rate": 9.581197858143977e-07, |
| "logits/chosen": -2.2153377532958984, |
| "logits/rejected": -1.4255385398864746, |
| "logps/chosen": -80.20429229736328, |
| "logps/rejected": -332.54937744140625, |
| "loss": 0.1985, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.3072952032089233, |
| "rewards/margins": 13.6825532913208, |
| "rewards/rejected": -14.989848136901855, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.578243928591569e-07, |
| "logits/chosen": -1.0782465934753418, |
| "logits/rejected": -1.0369610786437988, |
| "logps/chosen": -295.9336853027344, |
| "logps/rejected": -218.00802612304688, |
| "loss": 0.1549, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.03710126876831055, |
| "rewards/margins": 10.719480514526367, |
| "rewards/rejected": -10.682379722595215, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.57528007679831e-07, |
| "logits/chosen": -1.1398615837097168, |
| "logits/rejected": -1.8708375692367554, |
| "logps/chosen": -540.9119873046875, |
| "logps/rejected": -327.2794189453125, |
| "loss": 0.2091, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.22789306938648224, |
| "rewards/margins": 5.93986701965332, |
| "rewards/rejected": -5.711973667144775, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.57230630918763e-07, |
| "logits/chosen": -2.1814510822296143, |
| "logits/rejected": -2.2879159450531006, |
| "logps/chosen": -105.28890991210938, |
| "logps/rejected": -185.7814178466797, |
| "loss": 0.1897, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.7397704124450684, |
| "rewards/margins": 6.833057403564453, |
| "rewards/rejected": -8.572827339172363, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.569322632204458e-07, |
| "logits/chosen": -2.073073148727417, |
| "logits/rejected": -1.4473570585250854, |
| "logps/chosen": -314.480712890625, |
| "logps/rejected": -325.2982482910156, |
| "loss": 0.21, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3166259825229645, |
| "rewards/margins": 14.919228553771973, |
| "rewards/rejected": -15.235854148864746, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.566329052315194e-07, |
| "logits/chosen": -1.8998284339904785, |
| "logits/rejected": -1.8589746952056885, |
| "logps/chosen": -63.7332763671875, |
| "logps/rejected": -274.74609375, |
| "loss": 0.1681, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.378976821899414, |
| "rewards/margins": 12.973987579345703, |
| "rewards/rejected": -14.352964401245117, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.88, |
| "learning_rate": 9.5633255760077e-07, |
| "logits/chosen": -2.28558611869812, |
| "logits/rejected": -2.275768280029297, |
| "logps/chosen": -40.30331039428711, |
| "logps/rejected": -180.61302185058594, |
| "loss": 0.1904, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8475544452667236, |
| "rewards/margins": 9.063840866088867, |
| "rewards/rejected": -9.911395072937012, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.56031220979129e-07, |
| "logits/chosen": -0.7669763565063477, |
| "logits/rejected": -1.784839391708374, |
| "logps/chosen": -198.1832275390625, |
| "logps/rejected": -123.54266357421875, |
| "loss": 0.1687, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.18603363633155823, |
| "rewards/margins": 5.508733749389648, |
| "rewards/rejected": -5.694766998291016, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.557288960196707e-07, |
| "logits/chosen": -1.1866172552108765, |
| "logits/rejected": -1.2076904773712158, |
| "logps/chosen": -36.310211181640625, |
| "logps/rejected": -181.77130126953125, |
| "loss": 0.1558, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9400992393493652, |
| "rewards/margins": 10.733392715454102, |
| "rewards/rejected": -11.673492431640625, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.554255833776117e-07, |
| "logits/chosen": -2.55730938911438, |
| "logits/rejected": -1.6672946214675903, |
| "logps/chosen": -80.0410385131836, |
| "logps/rejected": -193.53387451171875, |
| "loss": 0.2318, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.1338022947311401, |
| "rewards/margins": 2.3030526638031006, |
| "rewards/rejected": -3.436854839324951, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.551212837103091e-07, |
| "logits/chosen": -2.760472059249878, |
| "logits/rejected": -2.06662917137146, |
| "logps/chosen": -160.0579071044922, |
| "logps/rejected": -215.00347900390625, |
| "loss": 0.1786, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.42213478684425354, |
| "rewards/margins": 5.234758377075195, |
| "rewards/rejected": -5.656893253326416, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.548159976772592e-07, |
| "logits/chosen": -2.156421661376953, |
| "logits/rejected": -1.3467743396759033, |
| "logps/chosen": -69.24724578857422, |
| "logps/rejected": -327.8988037109375, |
| "loss": 0.192, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.53495192527771, |
| "rewards/margins": 7.171606540679932, |
| "rewards/rejected": -8.706559181213379, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.89, |
| "learning_rate": 9.545097259400958e-07, |
| "logits/chosen": -1.4128961563110352, |
| "logits/rejected": -0.9324668645858765, |
| "logps/chosen": -280.76806640625, |
| "logps/rejected": -290.4639892578125, |
| "loss": 0.1677, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.6329319477081299, |
| "rewards/margins": 7.9638519287109375, |
| "rewards/rejected": -9.596783638000488, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 9.54202469162589e-07, |
| "logits/chosen": -2.057682991027832, |
| "logits/rejected": -1.9787952899932861, |
| "logps/chosen": -197.595703125, |
| "logps/rejected": -351.2636413574219, |
| "loss": 0.2056, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.0103564262390137, |
| "rewards/margins": 4.732174396514893, |
| "rewards/rejected": -6.742530822753906, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 9.538942280106441e-07, |
| "logits/chosen": -0.3838121294975281, |
| "logits/rejected": -0.7845942378044128, |
| "logps/chosen": -481.9730224609375, |
| "logps/rejected": -301.693115234375, |
| "loss": 0.1884, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.7501007318496704, |
| "rewards/margins": 12.653074264526367, |
| "rewards/rejected": -14.403175354003906, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 9.535850031522996e-07, |
| "logits/chosen": -2.2089288234710693, |
| "logits/rejected": -2.154285430908203, |
| "logps/chosen": -34.082950592041016, |
| "logps/rejected": -219.37469482421875, |
| "loss": 0.1833, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9477675557136536, |
| "rewards/margins": 11.680444717407227, |
| "rewards/rejected": -12.628212928771973, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 9.532747952577259e-07, |
| "logits/chosen": -1.166057825088501, |
| "logits/rejected": -1.6293365955352783, |
| "logps/chosen": -674.6851806640625, |
| "logps/rejected": -405.37255859375, |
| "loss": 0.1572, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8522399663925171, |
| "rewards/margins": 10.60324478149414, |
| "rewards/rejected": -11.455485343933105, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 9.529636049992233e-07, |
| "logits/chosen": -2.216061592102051, |
| "logits/rejected": -1.3257899284362793, |
| "logps/chosen": -267.9684143066406, |
| "logps/rejected": -269.8423156738281, |
| "loss": 0.1965, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.39237213134765625, |
| "rewards/margins": 8.582952499389648, |
| "rewards/rejected": -8.975324630737305, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.9, |
| "learning_rate": 9.526514330512224e-07, |
| "logits/chosen": -1.805991291999817, |
| "logits/rejected": -2.062978744506836, |
| "logps/chosen": -129.35073852539062, |
| "logps/rejected": -154.8126220703125, |
| "loss": 0.2185, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.0079419612884521, |
| "rewards/margins": 2.9576416015625, |
| "rewards/rejected": -3.965583562850952, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 9.523382800902804e-07, |
| "logits/chosen": -0.9184517860412598, |
| "logits/rejected": -1.1293007135391235, |
| "logps/chosen": -497.22930908203125, |
| "logps/rejected": -318.4179382324219, |
| "loss": 0.1635, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.249415636062622, |
| "rewards/margins": 13.987029075622559, |
| "rewards/rejected": -16.2364444732666, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 9.52024146795081e-07, |
| "logits/chosen": -1.5992757081985474, |
| "logits/rejected": -1.759086012840271, |
| "logps/chosen": -143.24261474609375, |
| "logps/rejected": -166.47059631347656, |
| "loss": 0.1576, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6290268898010254, |
| "rewards/margins": 6.777127742767334, |
| "rewards/rejected": -7.406154632568359, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 9.517090338464324e-07, |
| "logits/chosen": -1.8976322412490845, |
| "logits/rejected": -1.2034521102905273, |
| "logps/chosen": -247.3936004638672, |
| "logps/rejected": -549.493408203125, |
| "loss": 0.1994, |
| "rewards/accuracies": 0.0, |
| "rewards/chosen": -0.5727836489677429, |
| "rewards/margins": -0.548413872718811, |
| "rewards/rejected": -0.024369820952415466, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 9.51392941927266e-07, |
| "logits/chosen": -1.9126521348953247, |
| "logits/rejected": -1.6490702629089355, |
| "logps/chosen": -350.37139892578125, |
| "logps/rejected": -361.5531005859375, |
| "loss": 0.2141, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.25605812668800354, |
| "rewards/margins": 1.3470127582550049, |
| "rewards/rejected": -1.603070855140686, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 9.510758717226351e-07, |
| "logits/chosen": -1.297040581703186, |
| "logits/rejected": -2.394585132598877, |
| "logps/chosen": -318.63446044921875, |
| "logps/rejected": -72.8540267944336, |
| "loss": 0.1844, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.1780809909105301, |
| "rewards/margins": 1.2357361316680908, |
| "rewards/rejected": -1.413817048072815, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.91, |
| "learning_rate": 9.507578239197125e-07, |
| "logits/chosen": -1.683724284172058, |
| "logits/rejected": -2.4566798210144043, |
| "logps/chosen": -198.73486328125, |
| "logps/rejected": -102.812255859375, |
| "loss": 0.1964, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.182813048362732, |
| "rewards/margins": 3.7211146354675293, |
| "rewards/rejected": -4.903927803039551, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 9.504387992077906e-07, |
| "logits/chosen": -1.7871900796890259, |
| "logits/rejected": -2.684126377105713, |
| "logps/chosen": -830.5599365234375, |
| "logps/rejected": -110.56852722167969, |
| "loss": 0.1876, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.6030117273330688, |
| "rewards/margins": 1.5069835186004639, |
| "rewards/rejected": -2.1099953651428223, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 9.501187982782784e-07, |
| "logits/chosen": -1.393845558166504, |
| "logits/rejected": -2.000667095184326, |
| "logps/chosen": -319.4396667480469, |
| "logps/rejected": -176.52003479003906, |
| "loss": 0.1713, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.090196892619133, |
| "rewards/margins": 11.154555320739746, |
| "rewards/rejected": -11.244751930236816, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 9.497978218247012e-07, |
| "logits/chosen": -1.0957725048065186, |
| "logits/rejected": -1.6756318807601929, |
| "logps/chosen": -336.61602783203125, |
| "logps/rejected": -199.86419677734375, |
| "loss": 0.1575, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.11384735256433487, |
| "rewards/margins": 7.248149871826172, |
| "rewards/rejected": -7.361997127532959, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 9.494758705426976e-07, |
| "logits/chosen": -1.4453366994857788, |
| "logits/rejected": -2.0633018016815186, |
| "logps/chosen": -223.99911499023438, |
| "logps/rejected": -254.41709899902344, |
| "loss": 0.2214, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6794521808624268, |
| "rewards/margins": 8.287046432495117, |
| "rewards/rejected": -8.966497421264648, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 9.491529451300199e-07, |
| "logits/chosen": -0.6172839999198914, |
| "logits/rejected": -0.6064785718917847, |
| "logps/chosen": -255.4965362548828, |
| "logps/rejected": -213.30917358398438, |
| "loss": 0.2023, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.845526099205017, |
| "rewards/margins": 9.538248062133789, |
| "rewards/rejected": -11.383773803710938, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.92, |
| "learning_rate": 9.48829046286531e-07, |
| "logits/chosen": -1.9111276865005493, |
| "logits/rejected": -2.0904080867767334, |
| "logps/chosen": -132.1181640625, |
| "logps/rejected": -127.7751235961914, |
| "loss": 0.148, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.23484620451927185, |
| "rewards/margins": 7.176847457885742, |
| "rewards/rejected": -6.9420013427734375, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 9.485041747142033e-07, |
| "logits/chosen": -1.5261189937591553, |
| "logits/rejected": -1.6879130601882935, |
| "logps/chosen": -272.7811279296875, |
| "logps/rejected": -296.8751525878906, |
| "loss": 0.1803, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.28870850801467896, |
| "rewards/margins": 5.03480339050293, |
| "rewards/rejected": -4.746094703674316, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 9.481783311171182e-07, |
| "logits/chosen": -0.9267941117286682, |
| "logits/rejected": -0.5881129503250122, |
| "logps/chosen": -127.84021759033203, |
| "logps/rejected": -182.4854736328125, |
| "loss": 0.2082, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.14871902763843536, |
| "rewards/margins": 10.788543701171875, |
| "rewards/rejected": -10.639824867248535, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 9.478515162014628e-07, |
| "logits/chosen": -0.44782042503356934, |
| "logits/rejected": -0.7973653078079224, |
| "logps/chosen": -449.32598876953125, |
| "logps/rejected": -296.8812255859375, |
| "loss": 0.2314, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.5391403436660767, |
| "rewards/margins": 8.781317710876465, |
| "rewards/rejected": -9.32045841217041, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 9.475237306755302e-07, |
| "logits/chosen": -2.074190855026245, |
| "logits/rejected": -1.8858578205108643, |
| "logps/chosen": -194.01026916503906, |
| "logps/rejected": -407.46392822265625, |
| "loss": 0.1754, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.1868438720703125, |
| "rewards/margins": 1.9931914806365967, |
| "rewards/rejected": -2.180035352706909, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 9.471949752497159e-07, |
| "logits/chosen": -1.0973830223083496, |
| "logits/rejected": -1.0407088994979858, |
| "logps/chosen": -387.7185363769531, |
| "logps/rejected": -300.52764892578125, |
| "loss": 0.2544, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.558343529701233, |
| "rewards/margins": 11.005720138549805, |
| "rewards/rejected": -12.564064025878906, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.93, |
| "learning_rate": 9.468652506365186e-07, |
| "logits/chosen": -0.8757210373878479, |
| "logits/rejected": -0.9368937611579895, |
| "logps/chosen": -13.649900436401367, |
| "logps/rejected": -88.50205993652344, |
| "loss": 0.1295, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2053590714931488, |
| "rewards/margins": 5.989328384399414, |
| "rewards/rejected": -6.1946868896484375, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.465345575505365e-07, |
| "logits/chosen": -1.1155973672866821, |
| "logits/rejected": -1.2391748428344727, |
| "logps/chosen": -156.2330322265625, |
| "logps/rejected": -168.20909118652344, |
| "loss": 0.1828, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.7171291708946228, |
| "rewards/margins": 8.67363452911377, |
| "rewards/rejected": -9.390764236450195, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.462028967084678e-07, |
| "logits/chosen": -2.5009000301361084, |
| "logits/rejected": -1.359316110610962, |
| "logps/chosen": -88.84703063964844, |
| "logps/rejected": -606.462158203125, |
| "loss": 0.178, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0266014337539673, |
| "rewards/margins": 5.369706153869629, |
| "rewards/rejected": -6.396307468414307, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.458702688291071e-07, |
| "logits/chosen": -1.570953369140625, |
| "logits/rejected": -1.3593248128890991, |
| "logps/chosen": -759.3133544921875, |
| "logps/rejected": -677.512939453125, |
| "loss": 0.217, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.607739269733429, |
| "rewards/margins": 1.023413062095642, |
| "rewards/rejected": -1.6311523914337158, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.455366746333453e-07, |
| "logits/chosen": -1.4975221157073975, |
| "logits/rejected": -1.9799575805664062, |
| "logps/chosen": -238.45115661621094, |
| "logps/rejected": -126.26002502441406, |
| "loss": 0.1842, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.838726818561554, |
| "rewards/margins": 0.9599689841270447, |
| "rewards/rejected": -1.7986958026885986, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.452021148441674e-07, |
| "logits/chosen": -2.3967480659484863, |
| "logits/rejected": -1.919357180595398, |
| "logps/chosen": -183.41903686523438, |
| "logps/rejected": -302.5267028808594, |
| "loss": 0.1945, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.8161871433258057, |
| "rewards/margins": 1.833760142326355, |
| "rewards/rejected": -2.649947166442871, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.94, |
| "learning_rate": 9.448665901866513e-07, |
| "logits/chosen": -1.308738112449646, |
| "logits/rejected": -1.4025413990020752, |
| "logps/chosen": -420.19940185546875, |
| "logps/rejected": -352.2750244140625, |
| "loss": 0.2113, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1311897039413452, |
| "rewards/margins": 6.021875858306885, |
| "rewards/rejected": -7.1530656814575195, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.445301013879656e-07, |
| "logits/chosen": -1.9166899919509888, |
| "logits/rejected": -1.5457202196121216, |
| "logps/chosen": -236.6947479248047, |
| "logps/rejected": -277.1458435058594, |
| "loss": 0.1837, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.5052580833435059, |
| "rewards/margins": 12.255561828613281, |
| "rewards/rejected": -13.760820388793945, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.441926491773689e-07, |
| "logits/chosen": -1.066957712173462, |
| "logits/rejected": -1.658841848373413, |
| "logps/chosen": -255.17095947265625, |
| "logps/rejected": -266.31304931640625, |
| "loss": 0.188, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.5748581290245056, |
| "rewards/margins": 0.31342390179634094, |
| "rewards/rejected": -0.888282060623169, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.438542342862075e-07, |
| "logits/chosen": -1.350300908088684, |
| "logits/rejected": -2.001142740249634, |
| "logps/chosen": -323.86114501953125, |
| "logps/rejected": -88.59988403320312, |
| "loss": 0.1661, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.39666903018951416, |
| "rewards/margins": 5.692585468292236, |
| "rewards/rejected": -6.089254856109619, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.435148574479144e-07, |
| "logits/chosen": -1.2990124225616455, |
| "logits/rejected": -1.5610456466674805, |
| "logps/chosen": -318.2091979980469, |
| "logps/rejected": -176.14430236816406, |
| "loss": 0.1185, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6469208002090454, |
| "rewards/margins": 9.246925354003906, |
| "rewards/rejected": -9.893845558166504, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.431745193980068e-07, |
| "logits/chosen": -1.8961067199707031, |
| "logits/rejected": -1.6770416498184204, |
| "logps/chosen": -308.45989990234375, |
| "logps/rejected": -212.64981079101562, |
| "loss": 0.1747, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -2.435619831085205, |
| "rewards/margins": 1.2442288398742676, |
| "rewards/rejected": -3.6798486709594727, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.95, |
| "learning_rate": 9.428332208740857e-07, |
| "logits/chosen": -0.562650740146637, |
| "logits/rejected": -1.6526992321014404, |
| "logps/chosen": -225.20419311523438, |
| "logps/rejected": -123.10130310058594, |
| "loss": 0.1821, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8313095569610596, |
| "rewards/margins": 5.300149917602539, |
| "rewards/rejected": -6.1314592361450195, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.424909626158332e-07, |
| "logits/chosen": -1.3460386991500854, |
| "logits/rejected": -1.8868428468704224, |
| "logps/chosen": -541.9500122070312, |
| "logps/rejected": -282.341552734375, |
| "loss": 0.1658, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -2.592463970184326, |
| "rewards/margins": 13.076055526733398, |
| "rewards/rejected": -15.668519973754883, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.421477453650117e-07, |
| "logits/chosen": -1.3395308256149292, |
| "logits/rejected": -2.100599527359009, |
| "logps/chosen": -186.7269287109375, |
| "logps/rejected": -117.01820373535156, |
| "loss": 0.1475, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.069182276725769, |
| "rewards/margins": 6.430233955383301, |
| "rewards/rejected": -7.499416351318359, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.41803569865462e-07, |
| "logits/chosen": -0.560535192489624, |
| "logits/rejected": -0.5541735291481018, |
| "logps/chosen": -407.7998962402344, |
| "logps/rejected": -330.052001953125, |
| "loss": 0.1814, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.46812903881073, |
| "rewards/margins": 9.190093994140625, |
| "rewards/rejected": -10.658222198486328, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.414584368631018e-07, |
| "logits/chosen": -0.2689858376979828, |
| "logits/rejected": -0.6253905892372131, |
| "logps/chosen": -302.06964111328125, |
| "logps/rejected": -183.74813842773438, |
| "loss": 0.1558, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3590884506702423, |
| "rewards/margins": 10.315533638000488, |
| "rewards/rejected": -10.67462158203125, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.411123471059232e-07, |
| "logits/chosen": -1.1581571102142334, |
| "logits/rejected": -1.2208583354949951, |
| "logps/chosen": -386.98779296875, |
| "logps/rejected": -328.5216064453125, |
| "loss": 0.1739, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.2733596861362457, |
| "rewards/margins": 0.4076034724712372, |
| "rewards/rejected": -0.6809631586074829, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.96, |
| "learning_rate": 9.407653013439927e-07, |
| "logits/chosen": -1.914874792098999, |
| "logits/rejected": -1.633007526397705, |
| "logps/chosen": -196.557373046875, |
| "logps/rejected": -189.84039306640625, |
| "loss": 0.1601, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -1.1366727352142334, |
| "rewards/margins": 0.7186151742935181, |
| "rewards/rejected": -1.8552879095077515, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.404173003294485e-07, |
| "logits/chosen": -1.0885298252105713, |
| "logits/rejected": -1.620314359664917, |
| "logps/chosen": -390.4488525390625, |
| "logps/rejected": -219.35308837890625, |
| "loss": 0.1959, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.2608696222305298, |
| "rewards/margins": 8.13377857208252, |
| "rewards/rejected": -8.394648551940918, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.400683448164986e-07, |
| "logits/chosen": -1.303099274635315, |
| "logits/rejected": -1.4152745008468628, |
| "logps/chosen": -225.566650390625, |
| "logps/rejected": -119.57087707519531, |
| "loss": 0.1869, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.179052710533142, |
| "rewards/margins": 2.7054755687713623, |
| "rewards/rejected": -3.884528160095215, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.397184355614205e-07, |
| "logits/chosen": -1.2585117816925049, |
| "logits/rejected": -1.849805474281311, |
| "logps/chosen": -247.82049560546875, |
| "logps/rejected": -223.6149139404297, |
| "loss": 0.1884, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.9630714654922485, |
| "rewards/margins": -0.37028050422668457, |
| "rewards/rejected": -0.592790961265564, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.393675733225576e-07, |
| "logits/chosen": -2.14628005027771, |
| "logits/rejected": -1.4466516971588135, |
| "logps/chosen": -36.28599548339844, |
| "logps/rejected": -156.9398651123047, |
| "loss": 0.2136, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.0615413188934326, |
| "rewards/margins": 4.366576194763184, |
| "rewards/rejected": -5.428117752075195, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.390157588603201e-07, |
| "logits/chosen": -1.2252075672149658, |
| "logits/rejected": -1.802139401435852, |
| "logps/chosen": -250.9557647705078, |
| "logps/rejected": -223.22103881835938, |
| "loss": 0.1422, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9214485287666321, |
| "rewards/margins": 1.491241455078125, |
| "rewards/rejected": -2.4126901626586914, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.97, |
| "learning_rate": 9.386629929371804e-07, |
| "logits/chosen": -1.6203854084014893, |
| "logits/rejected": -1.964041829109192, |
| "logps/chosen": -233.42408752441406, |
| "logps/rejected": -223.70074462890625, |
| "loss": 0.1747, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.3521421551704407, |
| "rewards/margins": 12.760381698608398, |
| "rewards/rejected": -13.112524032592773, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.383092763176738e-07, |
| "logits/chosen": -1.3921282291412354, |
| "logits/rejected": -1.1160563230514526, |
| "logps/chosen": -242.66835021972656, |
| "logps/rejected": -384.66790771484375, |
| "loss": 0.196, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.593625783920288, |
| "rewards/margins": 17.484588623046875, |
| "rewards/rejected": -19.078216552734375, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.379546097683962e-07, |
| "logits/chosen": -1.1282007694244385, |
| "logits/rejected": -1.4131447076797485, |
| "logps/chosen": -137.69473266601562, |
| "logps/rejected": -102.01011657714844, |
| "loss": 0.1704, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.27938154339790344, |
| "rewards/margins": 0.8095016479492188, |
| "rewards/rejected": -1.0888831615447998, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.375989940580014e-07, |
| "logits/chosen": -1.014125943183899, |
| "logits/rejected": -0.8712400197982788, |
| "logps/chosen": -333.4993896484375, |
| "logps/rejected": -282.827880859375, |
| "loss": 0.1759, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6404533386230469, |
| "rewards/margins": 12.329509735107422, |
| "rewards/rejected": -12.969963073730469, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.372424299572013e-07, |
| "logits/chosen": -1.137288212776184, |
| "logits/rejected": -1.7328457832336426, |
| "logps/chosen": -630.9047241210938, |
| "logps/rejected": -265.96551513671875, |
| "loss": 0.181, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.6024795770645142, |
| "rewards/margins": 13.53583812713623, |
| "rewards/rejected": -14.138317108154297, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.368849182387624e-07, |
| "logits/chosen": -1.1567295789718628, |
| "logits/rejected": -1.5108009576797485, |
| "logps/chosen": -423.7227478027344, |
| "logps/rejected": -177.82542419433594, |
| "loss": 0.1898, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.013415813446045, |
| "rewards/margins": 5.7657599449157715, |
| "rewards/rejected": -6.779175758361816, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 9.365264596775051e-07, |
| "logits/chosen": -1.427332878112793, |
| "logits/rejected": -1.6145710945129395, |
| "logps/chosen": -719.35400390625, |
| "logps/rejected": -739.6787109375, |
| "loss": 0.1735, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9353516101837158, |
| "rewards/margins": 8.291926383972168, |
| "rewards/rejected": -9.227277755737305, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.361670550503024e-07, |
| "logits/chosen": -0.7215293049812317, |
| "logits/rejected": -1.509445071220398, |
| "logps/chosen": -111.42332458496094, |
| "logps/rejected": -59.27985763549805, |
| "loss": 0.1395, |
| "rewards/accuracies": 0.5, |
| "rewards/chosen": -0.639510452747345, |
| "rewards/margins": 3.702336549758911, |
| "rewards/rejected": -4.341846942901611, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.35806705136077e-07, |
| "logits/chosen": -1.5403887033462524, |
| "logits/rejected": -1.7206377983093262, |
| "logps/chosen": -47.813472747802734, |
| "logps/rejected": -116.00591278076172, |
| "loss": 0.1867, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.1005914211273193, |
| "rewards/margins": 2.5221846103668213, |
| "rewards/rejected": -3.6227760314941406, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.354454107158003e-07, |
| "logits/chosen": -1.4039901494979858, |
| "logits/rejected": -1.2692376375198364, |
| "logps/chosen": -35.25469207763672, |
| "logps/rejected": -236.85321044921875, |
| "loss": 0.1741, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.9473680853843689, |
| "rewards/margins": 11.905098915100098, |
| "rewards/rejected": -12.852466583251953, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.350831725724915e-07, |
| "logits/chosen": -1.3343538045883179, |
| "logits/rejected": -1.4938243627548218, |
| "logps/chosen": -328.1713562011719, |
| "logps/rejected": -222.6204376220703, |
| "loss": 0.2049, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.934698462486267, |
| "rewards/margins": 7.677361011505127, |
| "rewards/rejected": -9.612059593200684, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.347199914912139e-07, |
| "logits/chosen": -1.4835619926452637, |
| "logits/rejected": -1.2078652381896973, |
| "logps/chosen": -425.0323791503906, |
| "logps/rejected": -336.0191955566406, |
| "loss": 0.1452, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.6457017660140991, |
| "rewards/margins": 12.339007377624512, |
| "rewards/rejected": -13.984708786010742, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.99, |
| "learning_rate": 9.343558682590755e-07, |
| "logits/chosen": -1.440277338027954, |
| "logits/rejected": -1.2249367237091064, |
| "logps/chosen": -213.87991333007812, |
| "logps/rejected": -248.79725646972656, |
| "loss": 0.1673, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -1.332812786102295, |
| "rewards/margins": 9.270461082458496, |
| "rewards/rejected": -10.60327434539795, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.339908036652254e-07, |
| "logits/chosen": -1.4295828342437744, |
| "logits/rejected": -0.9476127624511719, |
| "logps/chosen": -167.48570251464844, |
| "logps/rejected": -194.9608154296875, |
| "loss": 0.1506, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 0.5273482799530029, |
| "rewards/margins": 11.93600845336914, |
| "rewards/rejected": -11.408660888671875, |
| "step": 591 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.336247985008533e-07, |
| "logits/chosen": -1.3463596105575562, |
| "logits/rejected": -1.6185994148254395, |
| "logps/chosen": -462.23626708984375, |
| "logps/rejected": -235.42453002929688, |
| "loss": 0.1343, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": -0.8855430483818054, |
| "rewards/margins": 5.946411609649658, |
| "rewards/rejected": -6.831954479217529, |
| "step": 592 |
| }, |
| { |
| "epoch": 1.0, |
| "learning_rate": 9.33257853559187e-07, |
| "logits/chosen": -1.8037328720092773, |
| "logits/rejected": -1.4643278121948242, |
| "logps/chosen": -556.474609375, |
| "logps/rejected": -393.30126953125, |
| "loss": 0.2315, |
| "rewards/accuracies": 1.0, |
| "rewards/chosen": 2.759026527404785, |
| "rewards/margins": 9.786253929138184, |
| "rewards/rejected": -7.02722692489624, |
| "step": 593 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 2372, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 4, |
| "save_steps": 500, |
| "total_flos": 0.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|