diff --git "a/modpo/use_reward/0.9helpful_0.1harmless/checkpoint-12000/trainer_state.json" "b/modpo/use_reward/0.9helpful_0.1harmless/checkpoint-12000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/modpo/use_reward/0.9helpful_0.1harmless/checkpoint-12000/trainer_state.json" @@ -0,0 +1,15679 @@ +{ + "best_metric": 0.7306671142578125, + "best_model_checkpoint": "./output/modpo/lm/(0.9)helpful+(1-0.9)harmless/checkpoint-12000", + "epoch": 3.0, + "eval_steps": 3000, + "global_step": 12000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "accuracy": 0.48750001192092896, + "epoch": 0.0, + "learning_rate": 9.999991842021366e-06, + "logps/chosen": -93.38034057617188, + "logps/margins": 11.913398742675781, + "logps/rejected": -105.29373931884766, + "loss": 1.8336, + "rewards/chosen": 5.109144687652588, + "rewards/margins": 0.11058555543422699, + "rewards/rejected": 4.99855899810791, + "step": 10 + }, + { + "accuracy": 0.5, + "epoch": 0.01, + "learning_rate": 9.999951060762224e-06, + "logps/chosen": -135.76004028320312, + "logps/margins": -6.75750732421875, + "logps/rejected": -129.00253295898438, + "loss": 1.902, + "rewards/chosen": 6.5460524559021, + "rewards/margins": -0.06892473995685577, + "rewards/rejected": 6.614976406097412, + "step": 20 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.01, + "learning_rate": 9.999885057043291e-06, + "logps/chosen": -114.8847427368164, + "logps/margins": -1.0581402778625488, + "logps/rejected": -113.82661437988281, + "loss": 1.8892, + "rewards/chosen": 5.887767791748047, + "rewards/margins": 0.1564546674489975, + "rewards/rejected": 5.731313705444336, + "step": 30 + }, + { + "accuracy": 0.5, + "epoch": 0.01, + "learning_rate": 9.999779163833078e-06, + "logps/chosen": -120.33304595947266, + "logps/margins": 2.6488749980926514, + "logps/rejected": -122.98191833496094, + "loss": 1.8469, + "rewards/chosen": 6.306436538696289, + "rewards/margins": 0.2671842575073242, + "rewards/rejected": 6.039252281188965, + "step": 40 + }, + { + "accuracy": 0.5, + "epoch": 0.01, + "learning_rate": 9.999639002125162e-06, + "logps/chosen": -111.88919830322266, + "logps/margins": 2.995131015777588, + "logps/rejected": -114.88432312011719, + "loss": 1.5436, + "rewards/chosen": 6.004732608795166, + "rewards/margins": 0.041681695729494095, + "rewards/rejected": 5.963050842285156, + "step": 50 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.01, + "learning_rate": 9.999464572880208e-06, + "logps/chosen": -102.3431167602539, + "logps/margins": -4.245372772216797, + "logps/rejected": -98.09774017333984, + "loss": 1.6289, + "rewards/chosen": 5.343186855316162, + "rewards/margins": 0.5225377678871155, + "rewards/rejected": 4.82064962387085, + "step": 60 + }, + { + "accuracy": 0.4375, + "epoch": 0.02, + "learning_rate": 9.999255877293756e-06, + "logps/chosen": -94.01886749267578, + "logps/margins": 10.370506286621094, + "logps/rejected": -104.3893814086914, + "loss": 1.6697, + "rewards/chosen": 4.6346635818481445, + "rewards/margins": 0.017035793513059616, + "rewards/rejected": 4.6176276206970215, + "step": 70 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.02, + "learning_rate": 9.999012916796205e-06, + "logps/chosen": -106.21893310546875, + "logps/margins": -1.2109102010726929, + "logps/rejected": -105.00801849365234, + "loss": 1.5927, + "rewards/chosen": 5.781027317047119, + "rewards/margins": 0.2608667016029358, + "rewards/rejected": 5.52016019821167, + "step": 80 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.02, + "learning_rate": 9.998735693052809e-06, + "logps/chosen": -122.74066162109375, + "logps/margins": -12.237990379333496, + "logps/rejected": -110.5026626586914, + "loss": 1.8353, + "rewards/chosen": 6.233822822570801, + "rewards/margins": 0.4213257431983948, + "rewards/rejected": 5.812496662139893, + "step": 90 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.03, + "learning_rate": 9.998424207963658e-06, + "logps/chosen": -114.6319580078125, + "logps/margins": 10.312301635742188, + "logps/rejected": -124.94425964355469, + "loss": 1.9814, + "rewards/chosen": 5.596517086029053, + "rewards/margins": -0.6197859644889832, + "rewards/rejected": 6.216302871704102, + "step": 100 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.03, + "learning_rate": 9.998114579692461e-06, + "logps/chosen": -118.44438171386719, + "logps/margins": -1.2328144311904907, + "logps/rejected": -117.21158599853516, + "loss": 1.8886, + "rewards/chosen": 5.977663993835449, + "rewards/margins": 0.4643685221672058, + "rewards/rejected": 5.513296604156494, + "step": 110 + }, + { + "accuracy": 0.5, + "epoch": 0.03, + "learning_rate": 9.997738004122153e-06, + "logps/chosen": -123.51136779785156, + "logps/margins": -12.292215347290039, + "logps/rejected": -111.2191390991211, + "loss": 1.8606, + "rewards/chosen": 6.552567958831787, + "rewards/margins": 0.5511636734008789, + "rewards/rejected": 6.001404762268066, + "step": 120 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.03, + "learning_rate": 9.997327174044255e-06, + "logps/chosen": -120.5638656616211, + "logps/margins": 4.9105987548828125, + "logps/rejected": -125.47447204589844, + "loss": 1.6104, + "rewards/chosen": 6.336331367492676, + "rewards/margins": 0.21816179156303406, + "rewards/rejected": 6.118170261383057, + "step": 130 + }, + { + "accuracy": 0.4375, + "epoch": 0.04, + "learning_rate": 9.996882092274593e-06, + "logps/chosen": -130.65631103515625, + "logps/margins": -0.399627685546875, + "logps/rejected": -130.2566680908203, + "loss": 1.6149, + "rewards/chosen": 6.756723880767822, + "rewards/margins": 0.40364760160446167, + "rewards/rejected": 6.353075981140137, + "step": 140 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.04, + "learning_rate": 9.996402761863761e-06, + "logps/chosen": -126.5574951171875, + "logps/margins": -22.661352157592773, + "logps/rejected": -103.89615631103516, + "loss": 1.9684, + "rewards/chosen": 6.470550537109375, + "rewards/margins": 1.592828392982483, + "rewards/rejected": 4.877722263336182, + "step": 150 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.04, + "learning_rate": 9.995889186097093e-06, + "logps/chosen": -116.50477600097656, + "logps/margins": -6.241939544677734, + "logps/rejected": -110.26285552978516, + "loss": 2.0023, + "rewards/chosen": 6.077791213989258, + "rewards/margins": 0.7048730850219727, + "rewards/rejected": 5.372918128967285, + "step": 160 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.04, + "learning_rate": 9.995341368494632e-06, + "logps/chosen": -107.73106384277344, + "logps/margins": 2.258758544921875, + "logps/rejected": -109.98982238769531, + "loss": 1.9799, + "rewards/chosen": 5.525664329528809, + "rewards/margins": 0.11249864101409912, + "rewards/rejected": 5.41316556930542, + "step": 170 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.04, + "learning_rate": 9.994759312811127e-06, + "logps/chosen": -108.63682556152344, + "logps/margins": 8.882070541381836, + "logps/rejected": -117.5188980102539, + "loss": 1.9222, + "rewards/chosen": 5.461306095123291, + "rewards/margins": -0.3686821460723877, + "rewards/rejected": 5.8299880027771, + "step": 180 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.05, + "learning_rate": 9.994143023035987e-06, + "logps/chosen": -127.97639465332031, + "logps/margins": -1.8207848072052002, + "logps/rejected": -126.1556167602539, + "loss": 2.1024, + "rewards/chosen": 6.416459560394287, + "rewards/margins": 0.29830387234687805, + "rewards/rejected": 6.118155479431152, + "step": 190 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.05, + "learning_rate": 9.993492503393263e-06, + "logps/chosen": -103.14668273925781, + "logps/margins": 0.341788113117218, + "logps/rejected": -103.48848724365234, + "loss": 1.8638, + "rewards/chosen": 5.473768711090088, + "rewards/margins": 0.33015722036361694, + "rewards/rejected": 5.143611431121826, + "step": 200 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.05, + "learning_rate": 9.992807758341618e-06, + "logps/chosen": -133.42172241210938, + "logps/margins": -10.071492195129395, + "logps/rejected": -123.3502197265625, + "loss": 1.9081, + "rewards/chosen": 7.246514320373535, + "rewards/margins": 1.0005252361297607, + "rewards/rejected": 6.2459893226623535, + "step": 210 + }, + { + "accuracy": 0.5, + "epoch": 0.06, + "learning_rate": 9.992088792574298e-06, + "logps/chosen": -118.6468505859375, + "logps/margins": -12.179803848266602, + "logps/rejected": -106.46702575683594, + "loss": 1.6112, + "rewards/chosen": 5.844839572906494, + "rewards/margins": 0.9412676692008972, + "rewards/rejected": 4.903571605682373, + "step": 220 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.06, + "learning_rate": 9.991335611019095e-06, + "logps/chosen": -117.39437103271484, + "logps/margins": 8.628446578979492, + "logps/rejected": -126.0228271484375, + "loss": 1.6355, + "rewards/chosen": 6.054928779602051, + "rewards/margins": 0.04293825477361679, + "rewards/rejected": 6.011991024017334, + "step": 230 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.06, + "learning_rate": 9.990548218838316e-06, + "logps/chosen": -139.26937866210938, + "logps/margins": 0.9801594018936157, + "logps/rejected": -140.24954223632812, + "loss": 1.8812, + "rewards/chosen": 7.323525905609131, + "rewards/margins": 0.39287251234054565, + "rewards/rejected": 6.9306535720825195, + "step": 240 + }, + { + "accuracy": 0.4375, + "epoch": 0.06, + "learning_rate": 9.989726621428749e-06, + "logps/chosen": -120.38094329833984, + "logps/margins": 6.584187984466553, + "logps/rejected": -126.96512603759766, + "loss": 2.05, + "rewards/chosen": 5.8485941886901855, + "rewards/margins": -0.35422176122665405, + "rewards/rejected": 6.202816009521484, + "step": 250 + }, + { + "accuracy": 0.375, + "epoch": 0.07, + "learning_rate": 9.988870824421626e-06, + "logps/chosen": -107.37776947021484, + "logps/margins": 21.43488121032715, + "logps/rejected": -128.81265258789062, + "loss": 1.9466, + "rewards/chosen": 5.4471845626831055, + "rewards/margins": -0.8748068809509277, + "rewards/rejected": 6.321991443634033, + "step": 260 + }, + { + "accuracy": 0.5625, + "epoch": 0.07, + "learning_rate": 9.98798083368258e-06, + "logps/chosen": -123.7821044921875, + "logps/margins": 3.1715950965881348, + "logps/rejected": -126.95368957519531, + "loss": 1.891, + "rewards/chosen": 6.255987167358398, + "rewards/margins": 0.27523988485336304, + "rewards/rejected": 5.980746269226074, + "step": 270 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.07, + "learning_rate": 9.987056655311611e-06, + "logps/chosen": -85.76194763183594, + "logps/margins": 25.137910842895508, + "logps/rejected": -110.89986419677734, + "loss": 2.0658, + "rewards/chosen": 4.518533706665039, + "rewards/margins": -0.9006655812263489, + "rewards/rejected": 5.419199466705322, + "step": 280 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.07, + "learning_rate": 9.986098295643039e-06, + "logps/chosen": -97.52912902832031, + "logps/margins": -6.8081374168396, + "logps/rejected": -90.72099304199219, + "loss": 1.9598, + "rewards/chosen": 4.835160255432129, + "rewards/margins": 0.4367671608924866, + "rewards/rejected": 4.398393154144287, + "step": 290 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.07, + "learning_rate": 9.985105761245461e-06, + "logps/chosen": -166.57162475585938, + "logps/margins": -12.8536958694458, + "logps/rejected": -153.71792602539062, + "loss": 1.8972, + "rewards/chosen": 7.88947057723999, + "rewards/margins": 0.5230575203895569, + "rewards/rejected": 7.3664140701293945, + "step": 300 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.08, + "learning_rate": 9.984183266512048e-06, + "logps/chosen": -99.32716369628906, + "logps/margins": -9.03243350982666, + "logps/rejected": -90.29472351074219, + "loss": 1.797, + "rewards/chosen": 5.267740726470947, + "rewards/margins": 0.7921133041381836, + "rewards/rejected": 4.4756269454956055, + "step": 310 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.08, + "learning_rate": 9.983125819064725e-06, + "logps/chosen": -121.1572265625, + "logps/margins": -7.001384735107422, + "logps/rejected": -114.15582275390625, + "loss": 1.5913, + "rewards/chosen": 6.0754194259643555, + "rewards/margins": 0.7616773843765259, + "rewards/rejected": 5.313742160797119, + "step": 320 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.08, + "learning_rate": 9.98203421726176e-06, + "logps/chosen": -141.78111267089844, + "logps/margins": -14.702291488647461, + "logps/rejected": -127.07881927490234, + "loss": 1.6056, + "rewards/chosen": 7.1940436363220215, + "rewards/margins": 1.245888590812683, + "rewards/rejected": 5.948155879974365, + "step": 330 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.09, + "learning_rate": 9.980908468584996e-06, + "logps/chosen": -113.09223937988281, + "logps/margins": 12.976480484008789, + "logps/rejected": -126.06871032714844, + "loss": 2.0549, + "rewards/chosen": 6.2704925537109375, + "rewards/margins": 0.014498258009552956, + "rewards/rejected": 6.2559943199157715, + "step": 340 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.09, + "learning_rate": 9.979748580750312e-06, + "logps/chosen": -123.32918548583984, + "logps/margins": -6.59561014175415, + "logps/rejected": -116.73358154296875, + "loss": 1.6623, + "rewards/chosen": 6.254411697387695, + "rewards/margins": 0.42041030526161194, + "rewards/rejected": 5.834001064300537, + "step": 350 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.09, + "learning_rate": 9.978554561707585e-06, + "logps/chosen": -108.12400817871094, + "logps/margins": -3.0909698009490967, + "logps/rejected": -105.03304290771484, + "loss": 1.5683, + "rewards/chosen": 5.455635070800781, + "rewards/margins": 0.2906327247619629, + "rewards/rejected": 5.165002346038818, + "step": 360 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.09, + "learning_rate": 9.977326419640625e-06, + "logps/chosen": -119.7560806274414, + "logps/margins": -12.51281452178955, + "logps/rejected": -107.24327087402344, + "loss": 1.8813, + "rewards/chosen": 5.812981605529785, + "rewards/margins": 0.7436602711677551, + "rewards/rejected": 5.069321632385254, + "step": 370 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.1, + "learning_rate": 9.976064162967119e-06, + "logps/chosen": -133.084716796875, + "logps/margins": -29.818653106689453, + "logps/rejected": -103.26605224609375, + "loss": 1.9166, + "rewards/chosen": 6.449902534484863, + "rewards/margins": 1.5401164293289185, + "rewards/rejected": 4.909786224365234, + "step": 380 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.1, + "learning_rate": 9.974767800338575e-06, + "logps/chosen": -127.6087646484375, + "logps/margins": 6.958619594573975, + "logps/rejected": -134.5673828125, + "loss": 2.1036, + "rewards/chosen": 6.486325740814209, + "rewards/margins": 0.15423394739627838, + "rewards/rejected": 6.332091808319092, + "step": 390 + }, + { + "accuracy": 0.5625, + "epoch": 0.1, + "learning_rate": 9.97343734064027e-06, + "logps/chosen": -113.54975891113281, + "logps/margins": 3.7485451698303223, + "logps/rejected": -117.2983169555664, + "loss": 1.6023, + "rewards/chosen": 5.503276348114014, + "rewards/margins": -0.21575994789600372, + "rewards/rejected": 5.719037055969238, + "step": 400 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.1, + "learning_rate": 9.972072792991174e-06, + "logps/chosen": -119.14912414550781, + "logps/margins": 2.4335358142852783, + "logps/rejected": -121.5826644897461, + "loss": 1.7093, + "rewards/chosen": 5.5079240798950195, + "rewards/margins": 0.09962362051010132, + "rewards/rejected": 5.408300399780273, + "step": 410 + }, + { + "accuracy": 0.5, + "epoch": 0.1, + "learning_rate": 9.970674166743902e-06, + "logps/chosen": -111.63450622558594, + "logps/margins": 17.147808074951172, + "logps/rejected": -128.7823028564453, + "loss": 1.6341, + "rewards/chosen": 5.540095329284668, + "rewards/margins": -0.38307538628578186, + "rewards/rejected": 5.923171043395996, + "step": 420 + }, + { + "accuracy": 0.5625, + "epoch": 0.11, + "learning_rate": 9.96924147148464e-06, + "logps/chosen": -136.34762573242188, + "logps/margins": -22.253097534179688, + "logps/rejected": -114.09454345703125, + "loss": 1.3059, + "rewards/chosen": 6.375910758972168, + "rewards/margins": 1.2692499160766602, + "rewards/rejected": 5.106661796569824, + "step": 430 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.11, + "learning_rate": 9.967774717033087e-06, + "logps/chosen": -124.4723892211914, + "logps/margins": -15.860456466674805, + "logps/rejected": -108.6119384765625, + "loss": 1.5872, + "rewards/chosen": 5.366484642028809, + "rewards/margins": 0.8536316156387329, + "rewards/rejected": 4.512853145599365, + "step": 440 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.11, + "learning_rate": 9.966273913442378e-06, + "logps/chosen": -133.6871337890625, + "logps/margins": 1.6693938970565796, + "logps/rejected": -135.3565216064453, + "loss": 1.6193, + "rewards/chosen": 6.031922340393066, + "rewards/margins": -0.014263915829360485, + "rewards/rejected": 6.0461859703063965, + "step": 450 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.12, + "learning_rate": 9.964739070999025e-06, + "logps/chosen": -118.98359680175781, + "logps/margins": -11.851815223693848, + "logps/rejected": -107.13179016113281, + "loss": 1.6527, + "rewards/chosen": 5.6537957191467285, + "rewards/margins": 1.0875290632247925, + "rewards/rejected": 4.566267013549805, + "step": 460 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.12, + "learning_rate": 9.963170200222842e-06, + "logps/chosen": -142.10147094726562, + "logps/margins": 4.2556023597717285, + "logps/rejected": -146.35708618164062, + "loss": 1.7692, + "rewards/chosen": 5.802268028259277, + "rewards/margins": -0.14006440341472626, + "rewards/rejected": 5.9423322677612305, + "step": 470 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.12, + "learning_rate": 9.961567311866875e-06, + "logps/chosen": -125.03279113769531, + "logps/margins": -12.079760551452637, + "logps/rejected": -112.95301818847656, + "loss": 1.7471, + "rewards/chosen": 5.379918575286865, + "rewards/margins": 0.44170933961868286, + "rewards/rejected": 4.938209533691406, + "step": 480 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.12, + "learning_rate": 9.959930416917323e-06, + "logps/chosen": -118.28129577636719, + "logps/margins": 4.664868354797363, + "logps/rejected": -122.94615173339844, + "loss": 1.563, + "rewards/chosen": 4.769405364990234, + "rewards/margins": -0.01859002187848091, + "rewards/rejected": 4.787995338439941, + "step": 490 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.12, + "learning_rate": 9.958259526593465e-06, + "logps/chosen": -120.03038024902344, + "logps/margins": 13.22276496887207, + "logps/rejected": -133.25315856933594, + "loss": 1.6885, + "rewards/chosen": 4.757286071777344, + "rewards/margins": -0.2749316394329071, + "rewards/rejected": 5.032217979431152, + "step": 500 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.13, + "learning_rate": 9.956554652347591e-06, + "logps/chosen": -124.68904876708984, + "logps/margins": 8.64969539642334, + "logps/rejected": -133.3387451171875, + "loss": 1.6476, + "rewards/chosen": 4.9756550788879395, + "rewards/margins": 0.06309598684310913, + "rewards/rejected": 4.912558555603027, + "step": 510 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.13, + "learning_rate": 9.954815805864911e-06, + "logps/chosen": -136.79421997070312, + "logps/margins": -9.166430473327637, + "logps/rejected": -127.6278076171875, + "loss": 1.4208, + "rewards/chosen": 5.172430515289307, + "rewards/margins": 0.9554030299186707, + "rewards/rejected": 4.217027187347412, + "step": 520 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.13, + "learning_rate": 9.953042999063482e-06, + "logps/chosen": -117.29981994628906, + "logps/margins": -2.5968971252441406, + "logps/rejected": -114.70291900634766, + "loss": 1.1629, + "rewards/chosen": 3.88728666305542, + "rewards/margins": 0.352276086807251, + "rewards/rejected": 3.535010576248169, + "step": 530 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.14, + "learning_rate": 9.951236244094127e-06, + "logps/chosen": -147.85391235351562, + "logps/margins": -23.046194076538086, + "logps/rejected": -124.8077163696289, + "loss": 1.1118, + "rewards/chosen": 4.093080997467041, + "rewards/margins": 0.7096647024154663, + "rewards/rejected": 3.383415937423706, + "step": 540 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.14, + "learning_rate": 9.949395553340349e-06, + "logps/chosen": -137.44674682617188, + "logps/margins": -6.84769344329834, + "logps/rejected": -130.5990753173828, + "loss": 1.1553, + "rewards/chosen": 3.6311817169189453, + "rewards/margins": 0.5994359254837036, + "rewards/rejected": 3.0317459106445312, + "step": 550 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.14, + "learning_rate": 9.947520939418245e-06, + "logps/chosen": -131.57958984375, + "logps/margins": 6.997740268707275, + "logps/rejected": -138.57733154296875, + "loss": 1.2034, + "rewards/chosen": 3.0507590770721436, + "rewards/margins": 0.17111361026763916, + "rewards/rejected": 2.879645824432373, + "step": 560 + }, + { + "accuracy": 0.4375, + "epoch": 0.14, + "learning_rate": 9.945612415176426e-06, + "logps/chosen": -140.58132934570312, + "logps/margins": -19.616926193237305, + "logps/rejected": -120.96439361572266, + "loss": 1.0881, + "rewards/chosen": 2.8486058712005615, + "rewards/margins": 0.25832873582839966, + "rewards/rejected": 2.5902771949768066, + "step": 570 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.14, + "learning_rate": 9.943669993695919e-06, + "logps/chosen": -151.5093231201172, + "logps/margins": -18.318321228027344, + "logps/rejected": -133.1909942626953, + "loss": 1.1161, + "rewards/chosen": 2.3254706859588623, + "rewards/margins": 0.2963281571865082, + "rewards/rejected": 2.029142379760742, + "step": 580 + }, + { + "accuracy": 0.5625, + "epoch": 0.15, + "learning_rate": 9.941693688290085e-06, + "logps/chosen": -147.8477020263672, + "logps/margins": 4.324751377105713, + "logps/rejected": -152.1724395751953, + "loss": 1.0862, + "rewards/chosen": 2.0108516216278076, + "rewards/margins": 0.4088834226131439, + "rewards/rejected": 1.6019681692123413, + "step": 590 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.15, + "learning_rate": 9.939683512504528e-06, + "logps/chosen": -149.92286682128906, + "logps/margins": -1.7963898181915283, + "logps/rejected": -148.12648010253906, + "loss": 1.1338, + "rewards/chosen": 1.4565500020980835, + "rewards/margins": -0.2254774123430252, + "rewards/rejected": 1.6820274591445923, + "step": 600 + }, + { + "accuracy": 0.4000000059604645, + "epoch": 0.15, + "learning_rate": 9.937639480116993e-06, + "logps/chosen": -149.3422393798828, + "logps/margins": 5.6771135330200195, + "logps/rejected": -155.01934814453125, + "loss": 1.122, + "rewards/chosen": 1.8707154989242554, + "rewards/margins": -0.23694105446338654, + "rewards/rejected": 2.107656717300415, + "step": 610 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.15, + "learning_rate": 9.935561605137289e-06, + "logps/chosen": -154.9318389892578, + "logps/margins": -26.140798568725586, + "logps/rejected": -128.79103088378906, + "loss": 1.0635, + "rewards/chosen": 1.8631260395050049, + "rewards/margins": 0.3234782814979553, + "rewards/rejected": 1.5396478176116943, + "step": 620 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.16, + "learning_rate": 9.933449901807171e-06, + "logps/chosen": -161.14865112304688, + "logps/margins": 23.34848403930664, + "logps/rejected": -184.49713134765625, + "loss": 0.9723, + "rewards/chosen": 2.019709587097168, + "rewards/margins": -0.17707836627960205, + "rewards/rejected": 2.1967883110046387, + "step": 630 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.16, + "learning_rate": 9.931520457528119e-06, + "logps/chosen": -189.56741333007812, + "logps/margins": -4.537158012390137, + "logps/rejected": -185.0302276611328, + "loss": 0.9951, + "rewards/chosen": 2.1462416648864746, + "rewards/margins": 0.35901370644569397, + "rewards/rejected": 1.7872282266616821, + "step": 640 + }, + { + "accuracy": 0.5, + "epoch": 0.16, + "learning_rate": 9.92934452039859e-06, + "logps/chosen": -164.42535400390625, + "logps/margins": -43.8677864074707, + "logps/rejected": -120.55757904052734, + "loss": 1.1578, + "rewards/chosen": 1.4558143615722656, + "rewards/margins": -0.1470411866903305, + "rewards/rejected": 1.6028554439544678, + "step": 650 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.17, + "learning_rate": 9.927134797530561e-06, + "logps/chosen": -144.9272003173828, + "logps/margins": 6.433265686035156, + "logps/rejected": -151.36045837402344, + "loss": 1.0144, + "rewards/chosen": 1.3912265300750732, + "rewards/margins": 0.060493774712085724, + "rewards/rejected": 1.3307329416275024, + "step": 660 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.17, + "learning_rate": 9.924891304069477e-06, + "logps/chosen": -139.90447998046875, + "logps/margins": -4.661585330963135, + "logps/rejected": -135.24290466308594, + "loss": 0.9865, + "rewards/chosen": 1.7835805416107178, + "rewards/margins": -0.09399493783712387, + "rewards/rejected": 1.8775756359100342, + "step": 670 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.17, + "learning_rate": 9.92261405539225e-06, + "logps/chosen": -162.5684814453125, + "logps/margins": -37.87208557128906, + "logps/rejected": -124.6963882446289, + "loss": 1.0156, + "rewards/chosen": 1.867201566696167, + "rewards/margins": 0.2595711350440979, + "rewards/rejected": 1.6076304912567139, + "step": 680 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.17, + "learning_rate": 9.920303067107145e-06, + "logps/chosen": -157.85446166992188, + "logps/margins": -10.372437477111816, + "logps/rejected": -147.48202514648438, + "loss": 1.0458, + "rewards/chosen": 2.28122615814209, + "rewards/margins": 0.5393844842910767, + "rewards/rejected": 1.7418416738510132, + "step": 690 + }, + { + "accuracy": 0.625, + "epoch": 0.17, + "learning_rate": 9.917958355053681e-06, + "logps/chosen": -168.91946411132812, + "logps/margins": -25.6357479095459, + "logps/rejected": -143.28372192382812, + "loss": 1.0233, + "rewards/chosen": 2.2451210021972656, + "rewards/margins": 0.22837433218955994, + "rewards/rejected": 2.016746997833252, + "step": 700 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.18, + "learning_rate": 9.915579935302521e-06, + "logps/chosen": -142.89210510253906, + "logps/margins": 1.3719890117645264, + "logps/rejected": -144.2640838623047, + "loss": 1.0359, + "rewards/chosen": 1.5756165981292725, + "rewards/margins": 0.19118764996528625, + "rewards/rejected": 1.3844289779663086, + "step": 710 + }, + { + "accuracy": 0.4375, + "epoch": 0.18, + "learning_rate": 9.913167824155358e-06, + "logps/chosen": -140.5428924560547, + "logps/margins": -9.356057167053223, + "logps/rejected": -131.1868438720703, + "loss": 0.9801, + "rewards/chosen": 1.5717885494232178, + "rewards/margins": -0.061538565903902054, + "rewards/rejected": 1.6333271265029907, + "step": 720 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.18, + "learning_rate": 9.910722038144809e-06, + "logps/chosen": -147.37010192871094, + "logps/margins": 3.831080675125122, + "logps/rejected": -151.201171875, + "loss": 0.9843, + "rewards/chosen": 1.8470379114151, + "rewards/margins": 0.03712614253163338, + "rewards/rejected": 1.8099114894866943, + "step": 730 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.18, + "learning_rate": 9.908242594034293e-06, + "logps/chosen": -140.7128448486328, + "logps/margins": 5.858675479888916, + "logps/rejected": -146.57150268554688, + "loss": 1.0252, + "rewards/chosen": 1.9413522481918335, + "rewards/margins": -0.4406326413154602, + "rewards/rejected": 2.3819847106933594, + "step": 740 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.19, + "learning_rate": 9.905729508817931e-06, + "logps/chosen": -159.4577178955078, + "logps/margins": -2.965106248855591, + "logps/rejected": -156.4925994873047, + "loss": 1.0375, + "rewards/chosen": 1.6923128366470337, + "rewards/margins": 0.30112066864967346, + "rewards/rejected": 1.3911921977996826, + "step": 750 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.19, + "learning_rate": 9.90318279972041e-06, + "logps/chosen": -146.78695678710938, + "logps/margins": 10.177630424499512, + "logps/rejected": -156.96456909179688, + "loss": 0.9355, + "rewards/chosen": 1.7363805770874023, + "rewards/margins": 0.20642626285552979, + "rewards/rejected": 1.5299543142318726, + "step": 760 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.19, + "learning_rate": 9.900862027536175e-06, + "logps/chosen": -180.95803833007812, + "logps/margins": -19.27467918395996, + "logps/rejected": -161.683349609375, + "loss": 1.0141, + "rewards/chosen": 2.0001168251037598, + "rewards/margins": 0.4025591015815735, + "rewards/rejected": 1.597557783126831, + "step": 770 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.2, + "learning_rate": 9.898251481343773e-06, + "logps/chosen": -177.97366333007812, + "logps/margins": 13.344332695007324, + "logps/rejected": -191.31797790527344, + "loss": 1.0125, + "rewards/chosen": 1.648980736732483, + "rewards/margins": 0.008140301331877708, + "rewards/rejected": 1.6408401727676392, + "step": 780 + }, + { + "accuracy": 0.4375, + "epoch": 0.2, + "learning_rate": 9.895607362524631e-06, + "logps/chosen": -181.28817749023438, + "logps/margins": -24.692012786865234, + "logps/rejected": -156.59619140625, + "loss": 0.9706, + "rewards/chosen": 1.7742103338241577, + "rewards/margins": -0.08051016181707382, + "rewards/rejected": 1.8547203540802002, + "step": 790 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.2, + "learning_rate": 9.892929689201545e-06, + "logps/chosen": -178.93397521972656, + "logps/margins": -11.522749900817871, + "logps/rejected": -167.41122436523438, + "loss": 0.9685, + "rewards/chosen": 1.7500654458999634, + "rewards/margins": -0.028841054067015648, + "rewards/rejected": 1.7789065837860107, + "step": 800 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.2, + "learning_rate": 9.890218479727294e-06, + "logps/chosen": -177.42764282226562, + "logps/margins": -15.491392135620117, + "logps/rejected": -161.93624877929688, + "loss": 0.9007, + "rewards/chosen": 2.0393176078796387, + "rewards/margins": 0.34074297547340393, + "rewards/rejected": 1.6985746622085571, + "step": 810 + }, + { + "accuracy": 0.4375, + "epoch": 0.2, + "learning_rate": 9.887473752684515e-06, + "logps/chosen": -161.78909301757812, + "logps/margins": -2.778823137283325, + "logps/rejected": -159.01028442382812, + "loss": 1.0436, + "rewards/chosen": 1.682908058166504, + "rewards/margins": -0.168172687292099, + "rewards/rejected": 1.8510808944702148, + "step": 820 + }, + { + "accuracy": 0.38749998807907104, + "epoch": 0.21, + "learning_rate": 9.884695526885574e-06, + "logps/chosen": -151.64749145507812, + "logps/margins": -0.1611984223127365, + "logps/rejected": -151.48629760742188, + "loss": 0.9551, + "rewards/chosen": 1.3659042119979858, + "rewards/margins": -0.38882842659950256, + "rewards/rejected": 1.7547328472137451, + "step": 830 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.21, + "learning_rate": 9.881883821372436e-06, + "logps/chosen": -140.57131958007812, + "logps/margins": -5.530011177062988, + "logps/rejected": -135.0413055419922, + "loss": 0.9358, + "rewards/chosen": 1.2723175287246704, + "rewards/margins": -0.11282980442047119, + "rewards/rejected": 1.3851473331451416, + "step": 840 + }, + { + "accuracy": 0.4124999940395355, + "epoch": 0.21, + "learning_rate": 9.879038655416536e-06, + "logps/chosen": -179.29159545898438, + "logps/margins": -5.520005226135254, + "logps/rejected": -173.77159118652344, + "loss": 1.0266, + "rewards/chosen": 1.6179869174957275, + "rewards/margins": -0.3334459364414215, + "rewards/rejected": 1.9514325857162476, + "step": 850 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.21, + "learning_rate": 9.876160048518646e-06, + "logps/chosen": -146.00396728515625, + "logps/margins": -1.2213438749313354, + "logps/rejected": -144.7826385498047, + "loss": 0.8996, + "rewards/chosen": 1.306060791015625, + "rewards/margins": -0.033823203295469284, + "rewards/rejected": 1.3398840427398682, + "step": 860 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.22, + "learning_rate": 9.87324802040875e-06, + "logps/chosen": -150.81924438476562, + "logps/margins": -2.4973397254943848, + "logps/rejected": -148.3218994140625, + "loss": 1.0671, + "rewards/chosen": 1.421643853187561, + "rewards/margins": 0.08586404472589493, + "rewards/rejected": 1.3357797861099243, + "step": 870 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.22, + "learning_rate": 9.870302591045892e-06, + "logps/chosen": -180.90975952148438, + "logps/margins": -19.462383270263672, + "logps/rejected": -161.44740295410156, + "loss": 0.8835, + "rewards/chosen": 1.8036632537841797, + "rewards/margins": 0.12437786906957626, + "rewards/rejected": 1.6792854070663452, + "step": 880 + }, + { + "accuracy": 0.4000000059604645, + "epoch": 0.22, + "learning_rate": 9.86732378061805e-06, + "logps/chosen": -160.3754119873047, + "logps/margins": -9.997312545776367, + "logps/rejected": -150.37808227539062, + "loss": 0.9699, + "rewards/chosen": 1.781908392906189, + "rewards/margins": -0.18765194714069366, + "rewards/rejected": 1.9695602655410767, + "step": 890 + }, + { + "accuracy": 0.4000000059604645, + "epoch": 0.23, + "learning_rate": 9.864311609542002e-06, + "logps/chosen": -127.0546646118164, + "logps/margins": 17.639873504638672, + "logps/rejected": -144.69454956054688, + "loss": 0.9206, + "rewards/chosen": 1.5364660024642944, + "rewards/margins": -0.029984693974256516, + "rewards/rejected": 1.566450834274292, + "step": 900 + }, + { + "accuracy": 0.5, + "epoch": 0.23, + "learning_rate": 9.861266098463169e-06, + "logps/chosen": -147.8013153076172, + "logps/margins": -11.108197212219238, + "logps/rejected": -136.693115234375, + "loss": 0.9103, + "rewards/chosen": 2.1111650466918945, + "rewards/margins": 0.1486923098564148, + "rewards/rejected": 1.962472677230835, + "step": 910 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.23, + "learning_rate": 9.858187268255496e-06, + "logps/chosen": -140.09344482421875, + "logps/margins": -4.577602386474609, + "logps/rejected": -135.51583862304688, + "loss": 0.9162, + "rewards/chosen": 2.0921835899353027, + "rewards/margins": 0.14385966956615448, + "rewards/rejected": 1.9483239650726318, + "step": 920 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.23, + "learning_rate": 9.85507514002129e-06, + "logps/chosen": -196.1652374267578, + "logps/margins": -25.233646392822266, + "logps/rejected": -170.93161010742188, + "loss": 0.8237, + "rewards/chosen": 2.5406811237335205, + "rewards/margins": 0.5653001070022583, + "rewards/rejected": 1.9753808975219727, + "step": 930 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.23, + "learning_rate": 9.851929735091086e-06, + "logps/chosen": -163.54891967773438, + "logps/margins": -39.696372985839844, + "logps/rejected": -123.85255432128906, + "loss": 0.8814, + "rewards/chosen": 1.9840795993804932, + "rewards/margins": 0.36643046140670776, + "rewards/rejected": 1.6176488399505615, + "step": 940 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.24, + "learning_rate": 9.848751075023494e-06, + "logps/chosen": -152.39613342285156, + "logps/margins": -6.493553161621094, + "logps/rejected": -145.90255737304688, + "loss": 0.9086, + "rewards/chosen": 2.0433950424194336, + "rewards/margins": 0.1634274423122406, + "rewards/rejected": 1.8799673318862915, + "step": 950 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.24, + "learning_rate": 9.84553918160506e-06, + "logps/chosen": -157.0005645751953, + "logps/margins": 5.283432960510254, + "logps/rejected": -162.2840118408203, + "loss": 0.9004, + "rewards/chosen": 2.185926914215088, + "rewards/margins": 0.35132330656051636, + "rewards/rejected": 1.8346033096313477, + "step": 960 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.24, + "learning_rate": 9.842294076850113e-06, + "logps/chosen": -161.79470825195312, + "logps/margins": 6.562838077545166, + "logps/rejected": -168.35755920410156, + "loss": 0.9151, + "rewards/chosen": 1.789467453956604, + "rewards/margins": 0.3674093782901764, + "rewards/rejected": 1.4220579862594604, + "step": 970 + }, + { + "accuracy": 0.5625, + "epoch": 0.24, + "learning_rate": 9.839015783000597e-06, + "logps/chosen": -175.66624450683594, + "logps/margins": -25.768230438232422, + "logps/rejected": -149.89797973632812, + "loss": 0.8288, + "rewards/chosen": 1.8941459655761719, + "rewards/margins": 0.39253947138786316, + "rewards/rejected": 1.5016063451766968, + "step": 980 + }, + { + "accuracy": 0.625, + "epoch": 0.25, + "learning_rate": 9.835704322525954e-06, + "logps/chosen": -167.6163787841797, + "logps/margins": -3.1078941822052, + "logps/rejected": -164.5084991455078, + "loss": 0.8941, + "rewards/chosen": 1.8167266845703125, + "rewards/margins": 0.3749098479747772, + "rewards/rejected": 1.441817045211792, + "step": 990 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.25, + "learning_rate": 9.832359718122939e-06, + "logps/chosen": -158.74981689453125, + "logps/margins": -21.72418785095215, + "logps/rejected": -137.025634765625, + "loss": 0.8179, + "rewards/chosen": 1.9152021408081055, + "rewards/margins": 0.3552405536174774, + "rewards/rejected": 1.5599615573883057, + "step": 1000 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.25, + "learning_rate": 9.828981992715479e-06, + "logps/chosen": -145.79006958007812, + "logps/margins": -7.068762302398682, + "logps/rejected": -138.72128295898438, + "loss": 0.8845, + "rewards/chosen": 2.168658494949341, + "rewards/margins": 0.2922838628292084, + "rewards/rejected": 1.87637460231781, + "step": 1010 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.26, + "learning_rate": 9.825571169454511e-06, + "logps/chosen": -168.78416442871094, + "logps/margins": -13.750897407531738, + "logps/rejected": -155.0332489013672, + "loss": 0.8311, + "rewards/chosen": 2.1091105937957764, + "rewards/margins": 0.5144209861755371, + "rewards/rejected": 1.5946893692016602, + "step": 1020 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.26, + "learning_rate": 9.822127271717825e-06, + "logps/chosen": -136.5363311767578, + "logps/margins": 1.720564842224121, + "logps/rejected": -138.2569122314453, + "loss": 0.8649, + "rewards/chosen": 1.9865185022354126, + "rewards/margins": 0.4811386466026306, + "rewards/rejected": 1.5053794384002686, + "step": 1030 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.26, + "learning_rate": 9.818650323109904e-06, + "logps/chosen": -170.8836669921875, + "logps/margins": 10.731355667114258, + "logps/rejected": -181.61502075195312, + "loss": 0.8166, + "rewards/chosen": 1.8703758716583252, + "rewards/margins": 0.29043930768966675, + "rewards/rejected": 1.5799365043640137, + "step": 1040 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.26, + "learning_rate": 9.815140347461764e-06, + "logps/chosen": -140.1612091064453, + "logps/margins": -8.15324592590332, + "logps/rejected": -132.00796508789062, + "loss": 0.8072, + "rewards/chosen": 1.5995724201202393, + "rewards/margins": 0.1591363251209259, + "rewards/rejected": 1.4404361248016357, + "step": 1050 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.27, + "learning_rate": 9.811597368830783e-06, + "logps/chosen": -139.02783203125, + "logps/margins": 1.2959182262420654, + "logps/rejected": -140.32376098632812, + "loss": 0.9571, + "rewards/chosen": 1.4544225931167603, + "rewards/margins": -0.052755583077669144, + "rewards/rejected": 1.5071781873703003, + "step": 1060 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.27, + "learning_rate": 9.808021411500546e-06, + "logps/chosen": -136.28179931640625, + "logps/margins": -6.942041873931885, + "logps/rejected": -129.3397674560547, + "loss": 0.861, + "rewards/chosen": 1.8051027059555054, + "rewards/margins": 0.35802793502807617, + "rewards/rejected": 1.4470746517181396, + "step": 1070 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.27, + "learning_rate": 9.80441249998067e-06, + "logps/chosen": -162.54974365234375, + "logps/margins": -6.373490333557129, + "logps/rejected": -156.17626953125, + "loss": 0.8857, + "rewards/chosen": 1.5471500158309937, + "rewards/margins": 0.1165885329246521, + "rewards/rejected": 1.4305615425109863, + "step": 1080 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.27, + "learning_rate": 9.800770659006646e-06, + "logps/chosen": -178.4582977294922, + "logps/margins": -22.30353355407715, + "logps/rejected": -156.15475463867188, + "loss": 0.8928, + "rewards/chosen": 1.782330870628357, + "rewards/margins": 0.24549439549446106, + "rewards/rejected": 1.5368363857269287, + "step": 1090 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.28, + "learning_rate": 9.79709591353966e-06, + "logps/chosen": -138.05850219726562, + "logps/margins": 25.72446060180664, + "logps/rejected": -163.782958984375, + "loss": 0.9741, + "rewards/chosen": 1.8192415237426758, + "rewards/margins": -0.07427935302257538, + "rewards/rejected": 1.8935210704803467, + "step": 1100 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.28, + "learning_rate": 9.793388288766426e-06, + "logps/chosen": -162.1980743408203, + "logps/margins": -8.0133638381958, + "logps/rejected": -154.18472290039062, + "loss": 0.823, + "rewards/chosen": 1.9859386682510376, + "rewards/margins": 0.21398241817951202, + "rewards/rejected": 1.771956205368042, + "step": 1110 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.28, + "learning_rate": 9.78964781009901e-06, + "logps/chosen": -154.9339599609375, + "logps/margins": 22.017454147338867, + "logps/rejected": -176.951416015625, + "loss": 0.8848, + "rewards/chosen": 1.8680915832519531, + "rewards/margins": 0.042823903262615204, + "rewards/rejected": 1.8252675533294678, + "step": 1120 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.28, + "learning_rate": 9.785874503174662e-06, + "logps/chosen": -144.37045288085938, + "logps/margins": -26.884754180908203, + "logps/rejected": -117.4857177734375, + "loss": 0.9758, + "rewards/chosen": 1.4734928607940674, + "rewards/margins": 0.02336726151406765, + "rewards/rejected": 1.4501255750656128, + "step": 1130 + }, + { + "accuracy": 0.5, + "epoch": 0.28, + "learning_rate": 9.782068393855638e-06, + "logps/chosen": -136.8752899169922, + "logps/margins": 5.071300983428955, + "logps/rejected": -141.9465789794922, + "loss": 0.9041, + "rewards/chosen": 1.4244577884674072, + "rewards/margins": -0.05741150304675102, + "rewards/rejected": 1.4818692207336426, + "step": 1140 + }, + { + "accuracy": 0.5, + "epoch": 0.29, + "learning_rate": 9.778229508229018e-06, + "logps/chosen": -163.36964416503906, + "logps/margins": -4.506762504577637, + "logps/rejected": -158.86288452148438, + "loss": 0.8715, + "rewards/chosen": 1.357474684715271, + "rewards/margins": -0.045003920793533325, + "rewards/rejected": 1.402478575706482, + "step": 1150 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.29, + "learning_rate": 9.774357872606535e-06, + "logps/chosen": -186.77279663085938, + "logps/margins": -25.582441329956055, + "logps/rejected": -161.19036865234375, + "loss": 0.9185, + "rewards/chosen": 1.7494417428970337, + "rewards/margins": 0.10148294270038605, + "rewards/rejected": 1.647958755493164, + "step": 1160 + }, + { + "accuracy": 0.5, + "epoch": 0.29, + "learning_rate": 9.770453513524386e-06, + "logps/chosen": -146.69717407226562, + "logps/margins": -8.776657104492188, + "logps/rejected": -137.92051696777344, + "loss": 0.9027, + "rewards/chosen": 1.8887031078338623, + "rewards/margins": -0.03893054649233818, + "rewards/rejected": 1.9276340007781982, + "step": 1170 + }, + { + "accuracy": 0.5, + "epoch": 0.29, + "learning_rate": 9.766516457743058e-06, + "logps/chosen": -160.04222106933594, + "logps/margins": 5.474094390869141, + "logps/rejected": -165.5163116455078, + "loss": 0.9204, + "rewards/chosen": 1.7510411739349365, + "rewards/margins": 0.10351963341236115, + "rewards/rejected": 1.6475216150283813, + "step": 1180 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.3, + "learning_rate": 9.762546732247141e-06, + "logps/chosen": -142.36509704589844, + "logps/margins": -12.649444580078125, + "logps/rejected": -129.7156524658203, + "loss": 0.8391, + "rewards/chosen": 1.8815829753875732, + "rewards/margins": 0.42103347182273865, + "rewards/rejected": 1.4605494737625122, + "step": 1190 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.3, + "learning_rate": 9.758544364245142e-06, + "logps/chosen": -183.9442138671875, + "logps/margins": -11.90736198425293, + "logps/rejected": -172.03683471679688, + "loss": 0.8927, + "rewards/chosen": 2.1199896335601807, + "rewards/margins": -0.1310155689716339, + "rewards/rejected": 2.251004934310913, + "step": 1200 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.3, + "learning_rate": 9.7545093811693e-06, + "logps/chosen": -144.1505126953125, + "logps/margins": -5.138739109039307, + "logps/rejected": -139.0117950439453, + "loss": 0.8977, + "rewards/chosen": 1.6401937007904053, + "rewards/margins": -0.007590307388454676, + "rewards/rejected": 1.6477839946746826, + "step": 1210 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.3, + "learning_rate": 9.750441810675398e-06, + "logps/chosen": -144.27529907226562, + "logps/margins": 1.4390274286270142, + "logps/rejected": -145.71432495117188, + "loss": 0.8574, + "rewards/chosen": 1.7657264471054077, + "rewards/margins": -0.02588719129562378, + "rewards/rejected": 1.7916133403778076, + "step": 1220 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.31, + "learning_rate": 9.746341680642572e-06, + "logps/chosen": -144.88253784179688, + "logps/margins": -5.341070652008057, + "logps/rejected": -139.54147338867188, + "loss": 0.8674, + "rewards/chosen": 1.7323579788208008, + "rewards/margins": 0.19564639031887054, + "rewards/rejected": 1.5367116928100586, + "step": 1230 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.31, + "learning_rate": 9.742209019173125e-06, + "logps/chosen": -157.19650268554688, + "logps/margins": -16.224735260009766, + "logps/rejected": -140.97178649902344, + "loss": 0.9133, + "rewards/chosen": 1.8641220331192017, + "rewards/margins": -0.02793651446700096, + "rewards/rejected": 1.8920583724975586, + "step": 1240 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.31, + "learning_rate": 9.738043854592327e-06, + "logps/chosen": -186.19308471679688, + "logps/margins": 13.080360412597656, + "logps/rejected": -199.27345275878906, + "loss": 0.8, + "rewards/chosen": 2.0433402061462402, + "rewards/margins": 0.1339813470840454, + "rewards/rejected": 1.9093587398529053, + "step": 1250 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.32, + "learning_rate": 9.733846215448226e-06, + "logps/chosen": -166.62429809570312, + "logps/margins": -30.089427947998047, + "logps/rejected": -136.5348663330078, + "loss": 0.8331, + "rewards/chosen": 1.8805596828460693, + "rewards/margins": 0.24542757868766785, + "rewards/rejected": 1.6351318359375, + "step": 1260 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.32, + "learning_rate": 9.72961613051145e-06, + "logps/chosen": -171.31813049316406, + "logps/margins": -6.958993434906006, + "logps/rejected": -164.35914611816406, + "loss": 0.852, + "rewards/chosen": 2.167374610900879, + "rewards/margins": 0.27035635709762573, + "rewards/rejected": 1.8970181941986084, + "step": 1270 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.32, + "learning_rate": 9.72535362877501e-06, + "logps/chosen": -154.11856079101562, + "logps/margins": -23.472820281982422, + "logps/rejected": -130.64576721191406, + "loss": 0.775, + "rewards/chosen": 1.6765483617782593, + "rewards/margins": 0.23718352615833282, + "rewards/rejected": 1.439365029335022, + "step": 1280 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.32, + "learning_rate": 9.721058739454104e-06, + "logps/chosen": -145.79397583007812, + "logps/margins": -10.878741264343262, + "logps/rejected": -134.9152374267578, + "loss": 1.0619, + "rewards/chosen": 2.1911463737487793, + "rewards/margins": 0.34823596477508545, + "rewards/rejected": 1.8429104089736938, + "step": 1290 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.33, + "learning_rate": 9.716731491985912e-06, + "logps/chosen": -170.170166015625, + "logps/margins": -16.27873992919922, + "logps/rejected": -153.89141845703125, + "loss": 0.8089, + "rewards/chosen": 1.8226245641708374, + "rewards/margins": 0.2551000118255615, + "rewards/rejected": 1.5675245523452759, + "step": 1300 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.33, + "learning_rate": 9.712371916029398e-06, + "logps/chosen": -122.67264556884766, + "logps/margins": -7.548191070556641, + "logps/rejected": -115.12443542480469, + "loss": 0.8954, + "rewards/chosen": 1.513864278793335, + "rewards/margins": 0.12121949344873428, + "rewards/rejected": 1.392645001411438, + "step": 1310 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.33, + "learning_rate": 9.707980041465107e-06, + "logps/chosen": -140.32586669921875, + "logps/margins": 39.533992767333984, + "logps/rejected": -179.85984802246094, + "loss": 0.8803, + "rewards/chosen": 1.7652714252471924, + "rewards/margins": 0.18915463984012604, + "rewards/rejected": 1.5761165618896484, + "step": 1320 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.33, + "learning_rate": 9.703555898394953e-06, + "logps/chosen": -129.5006103515625, + "logps/margins": 2.545534610748291, + "logps/rejected": -132.04612731933594, + "loss": 0.8371, + "rewards/chosen": 1.8392177820205688, + "rewards/margins": 0.05049833655357361, + "rewards/rejected": 1.7887195348739624, + "step": 1330 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.34, + "learning_rate": 9.699099517142028e-06, + "logps/chosen": -156.70346069335938, + "logps/margins": 4.285260200500488, + "logps/rejected": -160.9887237548828, + "loss": 0.8255, + "rewards/chosen": 1.8164058923721313, + "rewards/margins": -0.15966984629631042, + "rewards/rejected": 1.9760758876800537, + "step": 1340 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.34, + "learning_rate": 9.694610928250374e-06, + "logps/chosen": -164.55393981933594, + "logps/margins": 1.9614883661270142, + "logps/rejected": -166.5154266357422, + "loss": 0.8349, + "rewards/chosen": 1.6731479167938232, + "rewards/margins": -0.0794978216290474, + "rewards/rejected": 1.752645492553711, + "step": 1350 + }, + { + "accuracy": 0.5625, + "epoch": 0.34, + "learning_rate": 9.690090162484795e-06, + "logps/chosen": -150.24972534179688, + "logps/margins": 22.32929039001465, + "logps/rejected": -172.57901000976562, + "loss": 0.8213, + "rewards/chosen": 1.6465768814086914, + "rewards/margins": 0.19454586505889893, + "rewards/rejected": 1.452030897140503, + "step": 1360 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.34, + "learning_rate": 9.685537250830625e-06, + "logps/chosen": -147.19168090820312, + "logps/margins": -8.287646293640137, + "logps/rejected": -138.90403747558594, + "loss": 0.8349, + "rewards/chosen": 2.058687925338745, + "rewards/margins": 0.2557194232940674, + "rewards/rejected": 1.802968978881836, + "step": 1370 + }, + { + "accuracy": 0.5625, + "epoch": 0.34, + "learning_rate": 9.680952224493534e-06, + "logps/chosen": -178.67041015625, + "logps/margins": -31.229793548583984, + "logps/rejected": -147.44061279296875, + "loss": 0.7511, + "rewards/chosen": 2.066157817840576, + "rewards/margins": 0.2523805499076843, + "rewards/rejected": 1.8137773275375366, + "step": 1380 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.35, + "learning_rate": 9.676335114899301e-06, + "logps/chosen": -139.00100708007812, + "logps/margins": 17.114728927612305, + "logps/rejected": -156.11573791503906, + "loss": 0.8169, + "rewards/chosen": 1.6733551025390625, + "rewards/margins": 0.125624418258667, + "rewards/rejected": 1.547730803489685, + "step": 1390 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.35, + "learning_rate": 9.671685953693606e-06, + "logps/chosen": -146.45240783691406, + "logps/margins": -3.9639840126037598, + "logps/rejected": -142.4884033203125, + "loss": 0.824, + "rewards/chosen": 1.6949371099472046, + "rewards/margins": 0.1053905338048935, + "rewards/rejected": 1.58954656124115, + "step": 1400 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.35, + "learning_rate": 9.66700477274181e-06, + "logps/chosen": -191.86099243164062, + "logps/margins": -63.5861701965332, + "logps/rejected": -128.27481079101562, + "loss": 0.7761, + "rewards/chosen": 1.8620039224624634, + "rewards/margins": 0.4337918758392334, + "rewards/rejected": 1.4282119274139404, + "step": 1410 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.35, + "learning_rate": 9.662291604128739e-06, + "logps/chosen": -151.45095825195312, + "logps/margins": -31.19417381286621, + "logps/rejected": -120.25679016113281, + "loss": 0.8699, + "rewards/chosen": 2.004847288131714, + "rewards/margins": 0.2191537618637085, + "rewards/rejected": 1.7856934070587158, + "step": 1420 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.36, + "learning_rate": 9.65754648015846e-06, + "logps/chosen": -161.12332153320312, + "logps/margins": -21.93655014038086, + "logps/rejected": -139.18673706054688, + "loss": 0.9375, + "rewards/chosen": 1.6424764394760132, + "rewards/margins": -0.1259138137102127, + "rewards/rejected": 1.7683902978897095, + "step": 1430 + }, + { + "accuracy": 0.675000011920929, + "epoch": 0.36, + "learning_rate": 9.652769433354065e-06, + "logps/chosen": -161.1542510986328, + "logps/margins": -8.030868530273438, + "logps/rejected": -153.12338256835938, + "loss": 0.8442, + "rewards/chosen": 1.9806150197982788, + "rewards/margins": 0.5255073308944702, + "rewards/rejected": 1.4551074504852295, + "step": 1440 + }, + { + "accuracy": 0.4375, + "epoch": 0.36, + "learning_rate": 9.647960496457444e-06, + "logps/chosen": -166.8245086669922, + "logps/margins": -14.479487419128418, + "logps/rejected": -152.34503173828125, + "loss": 0.9106, + "rewards/chosen": 1.7062251567840576, + "rewards/margins": -0.04488358646631241, + "rewards/rejected": 1.7511088848114014, + "step": 1450 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.36, + "learning_rate": 9.643119702429057e-06, + "logps/chosen": -173.20440673828125, + "logps/margins": -28.684823989868164, + "logps/rejected": -144.5195770263672, + "loss": 0.824, + "rewards/chosen": 1.960404634475708, + "rewards/margins": 0.31568005681037903, + "rewards/rejected": 1.6447244882583618, + "step": 1460 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.37, + "learning_rate": 9.638247084447723e-06, + "logps/chosen": -164.98948669433594, + "logps/margins": -8.795886039733887, + "logps/rejected": -156.193603515625, + "loss": 0.8262, + "rewards/chosen": 2.0697386264801025, + "rewards/margins": 0.5673502087593079, + "rewards/rejected": 1.50238835811615, + "step": 1470 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.37, + "learning_rate": 9.633342675910374e-06, + "logps/chosen": -153.81039428710938, + "logps/margins": -25.131587982177734, + "logps/rejected": -128.67880249023438, + "loss": 0.8208, + "rewards/chosen": 1.3760143518447876, + "rewards/margins": 0.07183907926082611, + "rewards/rejected": 1.3041752576828003, + "step": 1480 + }, + { + "accuracy": 0.5625, + "epoch": 0.37, + "learning_rate": 9.628406510431836e-06, + "logps/chosen": -151.3573760986328, + "logps/margins": -28.149948120117188, + "logps/rejected": -123.2074203491211, + "loss": 0.8767, + "rewards/chosen": 1.5835094451904297, + "rewards/margins": 0.44074511528015137, + "rewards/rejected": 1.1427643299102783, + "step": 1490 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.38, + "learning_rate": 9.6234386218446e-06, + "logps/chosen": -129.8954620361328, + "logps/margins": 9.33775520324707, + "logps/rejected": -139.23321533203125, + "loss": 0.8792, + "rewards/chosen": 1.6803743839263916, + "rewards/margins": 0.04734629765152931, + "rewards/rejected": 1.6330280303955078, + "step": 1500 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.38, + "learning_rate": 9.618439044198587e-06, + "logps/chosen": -145.76075744628906, + "logps/margins": -8.094935417175293, + "logps/rejected": -137.6658172607422, + "loss": 0.8854, + "rewards/chosen": 1.7104465961456299, + "rewards/margins": 0.07147760689258575, + "rewards/rejected": 1.638968825340271, + "step": 1510 + }, + { + "accuracy": 0.5, + "epoch": 0.38, + "learning_rate": 9.61340781176091e-06, + "logps/chosen": -149.36721801757812, + "logps/margins": -21.91195297241211, + "logps/rejected": -127.45526123046875, + "loss": 0.7673, + "rewards/chosen": 1.8591524362564087, + "rewards/margins": 0.3141605257987976, + "rewards/rejected": 1.5449917316436768, + "step": 1520 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.38, + "learning_rate": 9.608344959015649e-06, + "logps/chosen": -147.9269561767578, + "logps/margins": 7.347940921783447, + "logps/rejected": -155.27488708496094, + "loss": 0.8568, + "rewards/chosen": 1.672999382019043, + "rewards/margins": -0.042715318500995636, + "rewards/rejected": 1.715714693069458, + "step": 1530 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.39, + "learning_rate": 9.60325052066361e-06, + "logps/chosen": -151.431396484375, + "logps/margins": 4.354189395904541, + "logps/rejected": -155.78561401367188, + "loss": 0.848, + "rewards/chosen": 1.6201680898666382, + "rewards/margins": 0.04147641360759735, + "rewards/rejected": 1.5786917209625244, + "step": 1540 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.39, + "learning_rate": 9.598124531622084e-06, + "logps/chosen": -187.59036254882812, + "logps/margins": -24.431259155273438, + "logps/rejected": -163.15908813476562, + "loss": 0.8519, + "rewards/chosen": 1.748784065246582, + "rewards/margins": 0.050236739218235016, + "rewards/rejected": 1.69854736328125, + "step": 1550 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.39, + "learning_rate": 9.592967027024609e-06, + "logps/chosen": -166.96377563476562, + "logps/margins": -7.707032203674316, + "logps/rejected": -159.25674438476562, + "loss": 0.835, + "rewards/chosen": 1.425602674484253, + "rewards/margins": 0.29081496596336365, + "rewards/rejected": 1.1347877979278564, + "step": 1560 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.39, + "learning_rate": 9.587778042220736e-06, + "logps/chosen": -141.1848602294922, + "logps/margins": 20.280067443847656, + "logps/rejected": -161.4649200439453, + "loss": 0.8764, + "rewards/chosen": 1.8093258142471313, + "rewards/margins": -0.010090534575283527, + "rewards/rejected": 1.8194164037704468, + "step": 1570 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 0.4, + "learning_rate": 9.582557612775778e-06, + "logps/chosen": -164.12722778320312, + "logps/margins": -4.764612674713135, + "logps/rejected": -159.36260986328125, + "loss": 0.7378, + "rewards/chosen": 1.9242103099822998, + "rewards/margins": 0.3095531761646271, + "rewards/rejected": 1.6146570444107056, + "step": 1580 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.4, + "learning_rate": 9.577305774470568e-06, + "logps/chosen": -186.56993103027344, + "logps/margins": -32.362693786621094, + "logps/rejected": -154.20726013183594, + "loss": 0.7723, + "rewards/chosen": 1.6604382991790771, + "rewards/margins": 0.27905914187431335, + "rewards/rejected": 1.3813793659210205, + "step": 1590 + }, + { + "accuracy": 0.625, + "epoch": 0.4, + "learning_rate": 9.572022563301222e-06, + "logps/chosen": -151.0451202392578, + "logps/margins": -11.142895698547363, + "logps/rejected": -139.90220642089844, + "loss": 0.7617, + "rewards/chosen": 1.8053767681121826, + "rewards/margins": 0.327472448348999, + "rewards/rejected": 1.4779040813446045, + "step": 1600 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.4, + "learning_rate": 9.566708015478878e-06, + "logps/chosen": -178.99655151367188, + "logps/margins": 4.333693027496338, + "logps/rejected": -183.33023071289062, + "loss": 0.8338, + "rewards/chosen": 1.819528579711914, + "rewards/margins": 0.2881702780723572, + "rewards/rejected": 1.5313583612442017, + "step": 1610 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.41, + "learning_rate": 9.56136216742946e-06, + "logps/chosen": -155.27374267578125, + "logps/margins": 9.862548828125, + "logps/rejected": -165.1362762451172, + "loss": 0.8148, + "rewards/chosen": 1.8142101764678955, + "rewards/margins": 0.33326777815818787, + "rewards/rejected": 1.4809424877166748, + "step": 1620 + }, + { + "accuracy": 0.625, + "epoch": 0.41, + "learning_rate": 9.555985055793422e-06, + "logps/chosen": -133.46585083007812, + "logps/margins": -10.51818561553955, + "logps/rejected": -122.94764709472656, + "loss": 0.8442, + "rewards/chosen": 1.9428646564483643, + "rewards/margins": 0.40966564416885376, + "rewards/rejected": 1.5331989526748657, + "step": 1630 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.41, + "learning_rate": 9.550576717425501e-06, + "logps/chosen": -177.87936401367188, + "logps/margins": -38.132808685302734, + "logps/rejected": -139.74655151367188, + "loss": 0.8373, + "rewards/chosen": 1.8098485469818115, + "rewards/margins": 0.018934685736894608, + "rewards/rejected": 1.7909138202667236, + "step": 1640 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.41, + "learning_rate": 9.545137189394459e-06, + "logps/chosen": -176.143798828125, + "logps/margins": -5.887171745300293, + "logps/rejected": -170.2566375732422, + "loss": 0.9063, + "rewards/chosen": 1.7588393688201904, + "rewards/margins": -0.03469324856996536, + "rewards/rejected": 1.7935327291488647, + "step": 1650 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.41, + "learning_rate": 9.53966650898283e-06, + "logps/chosen": -145.28883361816406, + "logps/margins": -11.124507904052734, + "logps/rejected": -134.164306640625, + "loss": 0.8712, + "rewards/chosen": 1.575321912765503, + "rewards/margins": 0.09707476943731308, + "rewards/rejected": 1.4782471656799316, + "step": 1660 + }, + { + "accuracy": 0.5, + "epoch": 0.42, + "learning_rate": 9.534164713686677e-06, + "logps/chosen": -144.61868286132812, + "logps/margins": 25.188129425048828, + "logps/rejected": -169.8068389892578, + "loss": 0.8506, + "rewards/chosen": 1.3740068674087524, + "rewards/margins": -0.22523269057273865, + "rewards/rejected": 1.599239468574524, + "step": 1670 + }, + { + "accuracy": 0.5625, + "epoch": 0.42, + "learning_rate": 9.528631841215312e-06, + "logps/chosen": -137.08810424804688, + "logps/margins": 1.6299006938934326, + "logps/rejected": -138.71800231933594, + "loss": 0.8598, + "rewards/chosen": 1.5404658317565918, + "rewards/margins": 0.2575059235095978, + "rewards/rejected": 1.2829598188400269, + "step": 1680 + }, + { + "accuracy": 0.625, + "epoch": 0.42, + "learning_rate": 9.52306792949106e-06, + "logps/chosen": -158.11549377441406, + "logps/margins": -13.466079711914062, + "logps/rejected": -144.64942932128906, + "loss": 0.793, + "rewards/chosen": 1.9619176387786865, + "rewards/margins": 0.30079352855682373, + "rewards/rejected": 1.6611239910125732, + "step": 1690 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.42, + "learning_rate": 9.517473016648977e-06, + "logps/chosen": -157.74221801757812, + "logps/margins": 13.248675346374512, + "logps/rejected": -170.99087524414062, + "loss": 0.8689, + "rewards/chosen": 1.416425108909607, + "rewards/margins": -0.04252084344625473, + "rewards/rejected": 1.458945870399475, + "step": 1700 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.43, + "learning_rate": 9.511847141036616e-06, + "logps/chosen": -184.86428833007812, + "logps/margins": -30.957168579101562, + "logps/rejected": -153.9071044921875, + "loss": 0.7791, + "rewards/chosen": 1.7445602416992188, + "rewards/margins": 0.12616640329360962, + "rewards/rejected": 1.6183936595916748, + "step": 1710 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.43, + "learning_rate": 9.50619034121374e-06, + "logps/chosen": -155.2756805419922, + "logps/margins": 5.653558731079102, + "logps/rejected": -160.9292449951172, + "loss": 0.857, + "rewards/chosen": 1.6249401569366455, + "rewards/margins": 0.06854955852031708, + "rewards/rejected": 1.5563905239105225, + "step": 1720 + }, + { + "accuracy": 0.4375, + "epoch": 0.43, + "learning_rate": 9.500502655952064e-06, + "logps/chosen": -154.45733642578125, + "logps/margins": 3.067988634109497, + "logps/rejected": -157.52532958984375, + "loss": 0.9216, + "rewards/chosen": 1.165889024734497, + "rewards/margins": -0.16678759455680847, + "rewards/rejected": 1.3326765298843384, + "step": 1730 + }, + { + "accuracy": 0.4375, + "epoch": 0.43, + "learning_rate": 9.494784124234999e-06, + "logps/chosen": -179.69869995117188, + "logps/margins": -22.62604331970215, + "logps/rejected": -157.07266235351562, + "loss": 0.8273, + "rewards/chosen": 1.5295450687408447, + "rewards/margins": -0.042375583201646805, + "rewards/rejected": 1.57192063331604, + "step": 1740 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.44, + "learning_rate": 9.489034785257372e-06, + "logps/chosen": -179.21920776367188, + "logps/margins": -34.96796417236328, + "logps/rejected": -144.251220703125, + "loss": 0.9165, + "rewards/chosen": 1.5738520622253418, + "rewards/margins": -0.12565232813358307, + "rewards/rejected": 1.6995042562484741, + "step": 1750 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.44, + "learning_rate": 9.483254678425166e-06, + "logps/chosen": -162.9087371826172, + "logps/margins": -13.896394729614258, + "logps/rejected": -149.01234436035156, + "loss": 0.7608, + "rewards/chosen": 2.1526412963867188, + "rewards/margins": 0.2928185760974884, + "rewards/rejected": 1.8598226308822632, + "step": 1760 + }, + { + "accuracy": 0.5625, + "epoch": 0.44, + "learning_rate": 9.477443843355248e-06, + "logps/chosen": -171.17800903320312, + "logps/margins": -12.703057289123535, + "logps/rejected": -158.47496032714844, + "loss": 0.9419, + "rewards/chosen": 2.2187118530273438, + "rewards/margins": 0.2649349570274353, + "rewards/rejected": 1.9537767171859741, + "step": 1770 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.45, + "learning_rate": 9.471602319875092e-06, + "logps/chosen": -163.45155334472656, + "logps/margins": -3.0442728996276855, + "logps/rejected": -160.40728759765625, + "loss": 0.8717, + "rewards/chosen": 2.1406242847442627, + "rewards/margins": 0.17071543633937836, + "rewards/rejected": 1.9699089527130127, + "step": 1780 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.45, + "learning_rate": 9.465730148022516e-06, + "logps/chosen": -162.66729736328125, + "logps/margins": -4.228088855743408, + "logps/rejected": -158.439208984375, + "loss": 0.7452, + "rewards/chosen": 1.8978217840194702, + "rewards/margins": 0.3565039336681366, + "rewards/rejected": 1.5413178205490112, + "step": 1790 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.45, + "learning_rate": 9.459827368045398e-06, + "logps/chosen": -157.06532287597656, + "logps/margins": 12.102384567260742, + "logps/rejected": -169.16770935058594, + "loss": 0.8809, + "rewards/chosen": 1.3664157390594482, + "rewards/margins": -0.11095012724399567, + "rewards/rejected": 1.4773657321929932, + "step": 1800 + }, + { + "accuracy": 0.5625, + "epoch": 0.45, + "learning_rate": 9.453894020401405e-06, + "logps/chosen": -170.4252166748047, + "logps/margins": -24.851381301879883, + "logps/rejected": -145.57382202148438, + "loss": 0.854, + "rewards/chosen": 1.6877434253692627, + "rewards/margins": 0.0690455287694931, + "rewards/rejected": 1.6186978816986084, + "step": 1810 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.46, + "learning_rate": 9.447930145757714e-06, + "logps/chosen": -159.48239135742188, + "logps/margins": -30.633930206298828, + "logps/rejected": -128.84844970703125, + "loss": 0.8265, + "rewards/chosen": 1.6804593801498413, + "rewards/margins": 0.17598025500774384, + "rewards/rejected": 1.504479169845581, + "step": 1820 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.46, + "learning_rate": 9.44193578499074e-06, + "logps/chosen": -153.574951171875, + "logps/margins": 16.195648193359375, + "logps/rejected": -169.77061462402344, + "loss": 0.8313, + "rewards/chosen": 2.1873607635498047, + "rewards/margins": 0.2899624705314636, + "rewards/rejected": 1.8973983526229858, + "step": 1830 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.46, + "learning_rate": 9.435910979185838e-06, + "logps/chosen": -165.07798767089844, + "logps/margins": -10.375341415405273, + "logps/rejected": -154.70262145996094, + "loss": 0.775, + "rewards/chosen": 2.0324389934539795, + "rewards/margins": 0.29568594694137573, + "rewards/rejected": 1.7367531061172485, + "step": 1840 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.46, + "learning_rate": 9.429855769637046e-06, + "logps/chosen": -167.1012725830078, + "logps/margins": -13.259292602539062, + "logps/rejected": -153.8419647216797, + "loss": 0.7373, + "rewards/chosen": 1.9836927652359009, + "rewards/margins": 0.2919122576713562, + "rewards/rejected": 1.691780686378479, + "step": 1850 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.47, + "learning_rate": 9.423770197846782e-06, + "logps/chosen": -163.0049285888672, + "logps/margins": -33.225242614746094, + "logps/rejected": -129.77969360351562, + "loss": 0.7788, + "rewards/chosen": 1.8734190464019775, + "rewards/margins": 0.4123324453830719, + "rewards/rejected": 1.4610862731933594, + "step": 1860 + }, + { + "accuracy": 0.675000011920929, + "epoch": 0.47, + "learning_rate": 9.41765430552557e-06, + "logps/chosen": -149.49099731445312, + "logps/margins": -6.378541946411133, + "logps/rejected": -143.11245727539062, + "loss": 0.7847, + "rewards/chosen": 2.011195182800293, + "rewards/margins": 0.5788583159446716, + "rewards/rejected": 1.4323368072509766, + "step": 1870 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.47, + "learning_rate": 9.41150813459175e-06, + "logps/chosen": -166.43115234375, + "logps/margins": -41.93206024169922, + "logps/rejected": -124.49909973144531, + "loss": 0.8225, + "rewards/chosen": 2.0830183029174805, + "rewards/margins": 0.31741800904273987, + "rewards/rejected": 1.7656002044677734, + "step": 1880 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.47, + "learning_rate": 9.405331727171188e-06, + "logps/chosen": -167.52420043945312, + "logps/margins": -13.432635307312012, + "logps/rejected": -154.09158325195312, + "loss": 0.7891, + "rewards/chosen": 1.6620880365371704, + "rewards/margins": 0.29735979437828064, + "rewards/rejected": 1.3647282123565674, + "step": 1890 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.47, + "learning_rate": 9.399125125596998e-06, + "logps/chosen": -126.37520599365234, + "logps/margins": -0.17889738082885742, + "logps/rejected": -126.19630432128906, + "loss": 0.8321, + "rewards/chosen": 1.6267598867416382, + "rewards/margins": 0.07309775054454803, + "rewards/rejected": 1.5536620616912842, + "step": 1900 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.48, + "learning_rate": 9.392888372409241e-06, + "logps/chosen": -140.39883422851562, + "logps/margins": -4.906195640563965, + "logps/rejected": -135.4926300048828, + "loss": 0.9009, + "rewards/chosen": 1.4131845235824585, + "rewards/margins": -0.08801586925983429, + "rewards/rejected": 1.5012004375457764, + "step": 1910 + }, + { + "accuracy": 0.5, + "epoch": 0.48, + "learning_rate": 9.386621510354637e-06, + "logps/chosen": -161.00296020507812, + "logps/margins": -9.53292179107666, + "logps/rejected": -151.47003173828125, + "loss": 0.8814, + "rewards/chosen": 1.7925631999969482, + "rewards/margins": -0.0049194516614079475, + "rewards/rejected": 1.7974828481674194, + "step": 1920 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.48, + "learning_rate": 9.380324582386271e-06, + "logps/chosen": -177.1588134765625, + "logps/margins": -4.3390045166015625, + "logps/rejected": -172.81980895996094, + "loss": 0.7887, + "rewards/chosen": 2.100454330444336, + "rewards/margins": -0.054205410182476044, + "rewards/rejected": 2.1546597480773926, + "step": 1930 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.48, + "learning_rate": 9.373997631663305e-06, + "logps/chosen": -171.18698120117188, + "logps/margins": 7.4255876541137695, + "logps/rejected": -178.61257934570312, + "loss": 0.7667, + "rewards/chosen": 1.9432332515716553, + "rewards/margins": 0.1751747578382492, + "rewards/rejected": 1.7680585384368896, + "step": 1940 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.49, + "learning_rate": 9.36764070155067e-06, + "logps/chosen": -159.49673461914062, + "logps/margins": -13.000417709350586, + "logps/rejected": -146.49630737304688, + "loss": 0.8482, + "rewards/chosen": 1.8596569299697876, + "rewards/margins": 0.3872297406196594, + "rewards/rejected": 1.4724268913269043, + "step": 1950 + }, + { + "accuracy": 0.5, + "epoch": 0.49, + "learning_rate": 9.361253835618781e-06, + "logps/chosen": -156.4537811279297, + "logps/margins": -0.1926700621843338, + "logps/rejected": -156.26109313964844, + "loss": 0.8376, + "rewards/chosen": 1.8370577096939087, + "rewards/margins": 0.30063915252685547, + "rewards/rejected": 1.5364184379577637, + "step": 1960 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.49, + "learning_rate": 9.354837077643233e-06, + "logps/chosen": -190.13194274902344, + "logps/margins": -39.07542419433594, + "logps/rejected": -151.05653381347656, + "loss": 0.8207, + "rewards/chosen": 1.7069008350372314, + "rewards/margins": 0.30920490622520447, + "rewards/rejected": 1.3976958990097046, + "step": 1970 + }, + { + "accuracy": 0.625, + "epoch": 0.49, + "learning_rate": 9.348390471604495e-06, + "logps/chosen": -136.9534912109375, + "logps/margins": 15.227800369262695, + "logps/rejected": -152.18128967285156, + "loss": 0.8268, + "rewards/chosen": 1.5990936756134033, + "rewards/margins": 0.10957352817058563, + "rewards/rejected": 1.4895203113555908, + "step": 1980 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.5, + "learning_rate": 9.341914061687627e-06, + "logps/chosen": -166.63194274902344, + "logps/margins": -14.423248291015625, + "logps/rejected": -152.2086944580078, + "loss": 0.7765, + "rewards/chosen": 1.6230442523956299, + "rewards/margins": 0.3474811017513275, + "rewards/rejected": 1.2755630016326904, + "step": 1990 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.5, + "learning_rate": 9.33540789228195e-06, + "logps/chosen": -120.91500091552734, + "logps/margins": 8.692987442016602, + "logps/rejected": -129.60800170898438, + "loss": 0.8261, + "rewards/chosen": 1.6760156154632568, + "rewards/margins": 0.18651649355888367, + "rewards/rejected": 1.4894990921020508, + "step": 2000 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.5, + "learning_rate": 9.328872007980766e-06, + "logps/chosen": -167.70864868164062, + "logps/margins": -10.464682579040527, + "logps/rejected": -157.24395751953125, + "loss": 0.7936, + "rewards/chosen": 1.7660713195800781, + "rewards/margins": 0.13920119404792786, + "rewards/rejected": 1.6268701553344727, + "step": 2010 + }, + { + "accuracy": 0.5625, + "epoch": 0.51, + "learning_rate": 9.322306453581044e-06, + "logps/chosen": -158.08399963378906, + "logps/margins": -10.94042682647705, + "logps/rejected": -147.14356994628906, + "loss": 0.8144, + "rewards/chosen": 1.985290765762329, + "rewards/margins": 0.20295262336730957, + "rewards/rejected": 1.7823379039764404, + "step": 2020 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.51, + "learning_rate": 9.315711274083104e-06, + "logps/chosen": -146.8514862060547, + "logps/margins": 36.926002502441406, + "logps/rejected": -183.77749633789062, + "loss": 0.839, + "rewards/chosen": 1.5715597867965698, + "rewards/margins": -0.21107907593250275, + "rewards/rejected": 1.7826389074325562, + "step": 2030 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.51, + "learning_rate": 9.309086514690325e-06, + "logps/chosen": -165.84413146972656, + "logps/margins": -4.257134437561035, + "logps/rejected": -161.58700561523438, + "loss": 0.7648, + "rewards/chosen": 1.877524971961975, + "rewards/margins": 0.29181772470474243, + "rewards/rejected": 1.5857069492340088, + "step": 2040 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.51, + "learning_rate": 9.30243222080882e-06, + "logps/chosen": -162.48243713378906, + "logps/margins": -14.9164457321167, + "logps/rejected": -147.5659942626953, + "loss": 0.7444, + "rewards/chosen": 2.0277488231658936, + "rewards/margins": 0.3224651515483856, + "rewards/rejected": 1.705283761024475, + "step": 2050 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.52, + "learning_rate": 9.295748438047135e-06, + "logps/chosen": -167.5883026123047, + "logps/margins": -22.718687057495117, + "logps/rejected": -144.86959838867188, + "loss": 0.7477, + "rewards/chosen": 1.823158860206604, + "rewards/margins": 0.20282845199108124, + "rewards/rejected": 1.6203302145004272, + "step": 2060 + }, + { + "accuracy": 0.5625, + "epoch": 0.52, + "learning_rate": 9.289035212215934e-06, + "logps/chosen": -142.55181884765625, + "logps/margins": -10.19264030456543, + "logps/rejected": -132.35916137695312, + "loss": 0.7773, + "rewards/chosen": 1.799380898475647, + "rewards/margins": 0.1436457335948944, + "rewards/rejected": 1.6557352542877197, + "step": 2070 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.52, + "learning_rate": 9.28229258932768e-06, + "logps/chosen": -169.29144287109375, + "logps/margins": -27.8690185546875, + "logps/rejected": -141.42242431640625, + "loss": 0.7503, + "rewards/chosen": 1.7960469722747803, + "rewards/margins": 0.26247820258140564, + "rewards/rejected": 1.5335689783096313, + "step": 2080 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.52, + "learning_rate": 9.275520615596327e-06, + "logps/chosen": -152.72715759277344, + "logps/margins": -0.012831497006118298, + "logps/rejected": -152.71432495117188, + "loss": 0.7708, + "rewards/chosen": 1.7320560216903687, + "rewards/margins": 0.003945007920265198, + "rewards/rejected": 1.728110909461975, + "step": 2090 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.53, + "learning_rate": 9.268719337436996e-06, + "logps/chosen": -162.87667846679688, + "logps/margins": 2.2590041160583496, + "logps/rejected": -165.1356964111328, + "loss": 0.8203, + "rewards/chosen": 1.7294566631317139, + "rewards/margins": 0.255315363407135, + "rewards/rejected": 1.4741413593292236, + "step": 2100 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.53, + "learning_rate": 9.261888801465665e-06, + "logps/chosen": -159.4783477783203, + "logps/margins": -2.8032524585723877, + "logps/rejected": -156.67507934570312, + "loss": 0.8294, + "rewards/chosen": 1.7062733173370361, + "rewards/margins": 0.056447289884090424, + "rewards/rejected": 1.6498260498046875, + "step": 2110 + }, + { + "accuracy": 0.5, + "epoch": 0.53, + "learning_rate": 9.255029054498845e-06, + "logps/chosen": -152.52377319335938, + "logps/margins": 2.105691432952881, + "logps/rejected": -154.62945556640625, + "loss": 0.7723, + "rewards/chosen": 1.4842714071273804, + "rewards/margins": 0.08453354984521866, + "rewards/rejected": 1.39973783493042, + "step": 2120 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.53, + "learning_rate": 9.248140143553253e-06, + "logps/chosen": -125.8509521484375, + "logps/margins": 7.844496726989746, + "logps/rejected": -133.69544982910156, + "loss": 0.7172, + "rewards/chosen": 1.461629867553711, + "rewards/margins": 0.22742874920368195, + "rewards/rejected": 1.234201192855835, + "step": 2130 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.54, + "learning_rate": 9.2412221158455e-06, + "logps/chosen": -158.1907958984375, + "logps/margins": -40.407691955566406, + "logps/rejected": -117.7831039428711, + "loss": 0.8188, + "rewards/chosen": 1.3421199321746826, + "rewards/margins": -0.09975908696651459, + "rewards/rejected": 1.4418790340423584, + "step": 2140 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.54, + "learning_rate": 9.234275018791769e-06, + "logps/chosen": -148.055908203125, + "logps/margins": -5.959725379943848, + "logps/rejected": -142.09622192382812, + "loss": 0.8061, + "rewards/chosen": 1.774924635887146, + "rewards/margins": 0.25871556997299194, + "rewards/rejected": 1.5162090063095093, + "step": 2150 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.54, + "learning_rate": 9.227298900007474e-06, + "logps/chosen": -137.8963165283203, + "logps/margins": 13.159306526184082, + "logps/rejected": -151.05563354492188, + "loss": 0.8412, + "rewards/chosen": 1.4557017087936401, + "rewards/margins": 0.05740945786237717, + "rewards/rejected": 1.3982923030853271, + "step": 2160 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.54, + "learning_rate": 9.220293807306948e-06, + "logps/chosen": -178.25088500976562, + "logps/margins": -25.431116104125977, + "logps/rejected": -152.8197479248047, + "loss": 0.8634, + "rewards/chosen": 1.5417152643203735, + "rewards/margins": -0.1308407038450241, + "rewards/rejected": 1.672555923461914, + "step": 2170 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.55, + "learning_rate": 9.213259788703118e-06, + "logps/chosen": -182.5172576904297, + "logps/margins": -21.872121810913086, + "logps/rejected": -160.6451416015625, + "loss": 0.8366, + "rewards/chosen": 1.80950129032135, + "rewards/margins": 0.14453944563865662, + "rewards/rejected": 1.664961814880371, + "step": 2180 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.55, + "learning_rate": 9.206196892407158e-06, + "logps/chosen": -168.9132537841797, + "logps/margins": -35.30933380126953, + "logps/rejected": -133.6039276123047, + "loss": 0.7842, + "rewards/chosen": 1.8597646951675415, + "rewards/margins": 0.14125025272369385, + "rewards/rejected": 1.7185144424438477, + "step": 2190 + }, + { + "accuracy": 0.4375, + "epoch": 0.55, + "learning_rate": 9.199105166828179e-06, + "logps/chosen": -139.2642364501953, + "logps/margins": 16.201541900634766, + "logps/rejected": -155.46578979492188, + "loss": 0.8081, + "rewards/chosen": 1.5757122039794922, + "rewards/margins": -0.13621333241462708, + "rewards/rejected": 1.711925745010376, + "step": 2200 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.55, + "learning_rate": 9.19198466057288e-06, + "logps/chosen": -134.79681396484375, + "logps/margins": 3.2388598918914795, + "logps/rejected": -138.03567504882812, + "loss": 0.8749, + "rewards/chosen": 1.8258867263793945, + "rewards/margins": 0.27075594663619995, + "rewards/rejected": 1.555130958557129, + "step": 2210 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.56, + "learning_rate": 9.18483542244523e-06, + "logps/chosen": -178.60345458984375, + "logps/margins": -18.947158813476562, + "logps/rejected": -159.6562957763672, + "loss": 0.7734, + "rewards/chosen": 1.7505261898040771, + "rewards/margins": 0.2822708785533905, + "rewards/rejected": 1.4682552814483643, + "step": 2220 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.56, + "learning_rate": 9.177657501446125e-06, + "logps/chosen": -160.14938354492188, + "logps/margins": -24.04581069946289, + "logps/rejected": -136.1035614013672, + "loss": 0.7546, + "rewards/chosen": 1.8193641901016235, + "rewards/margins": 0.5835358500480652, + "rewards/rejected": 1.2358283996582031, + "step": 2230 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.56, + "learning_rate": 9.170450946773047e-06, + "logps/chosen": -139.8365936279297, + "logps/margins": 10.286653518676758, + "logps/rejected": -150.12326049804688, + "loss": 0.877, + "rewards/chosen": 1.694026231765747, + "rewards/margins": -0.005369952414184809, + "rewards/rejected": 1.6993963718414307, + "step": 2240 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.56, + "learning_rate": 9.163215807819742e-06, + "logps/chosen": -141.0439453125, + "logps/margins": -5.759264945983887, + "logps/rejected": -135.28466796875, + "loss": 0.8019, + "rewards/chosen": 1.4926296472549438, + "rewards/margins": 0.10668959468603134, + "rewards/rejected": 1.3859398365020752, + "step": 2250 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.56, + "learning_rate": 9.155952134175866e-06, + "logps/chosen": -147.4812469482422, + "logps/margins": 25.673351287841797, + "logps/rejected": -173.1545867919922, + "loss": 0.788, + "rewards/chosen": 1.9739952087402344, + "rewards/margins": -0.09373383224010468, + "rewards/rejected": 2.0677289962768555, + "step": 2260 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.57, + "learning_rate": 9.148659975626657e-06, + "logps/chosen": -143.13723754882812, + "logps/margins": -16.106327056884766, + "logps/rejected": -127.0309066772461, + "loss": 0.7857, + "rewards/chosen": 1.7816076278686523, + "rewards/margins": 0.26208606362342834, + "rewards/rejected": 1.5195214748382568, + "step": 2270 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.57, + "learning_rate": 9.141339382152584e-06, + "logps/chosen": -162.29852294921875, + "logps/margins": -20.860191345214844, + "logps/rejected": -141.43833923339844, + "loss": 0.7919, + "rewards/chosen": 1.8563302755355835, + "rewards/margins": 0.21059107780456543, + "rewards/rejected": 1.645738959312439, + "step": 2280 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.57, + "learning_rate": 9.133990403929013e-06, + "logps/chosen": -156.83230590820312, + "logps/margins": -16.177400588989258, + "logps/rejected": -140.6549072265625, + "loss": 0.7496, + "rewards/chosen": 1.7892961502075195, + "rewards/margins": 0.25427499413490295, + "rewards/rejected": 1.5350210666656494, + "step": 2290 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.57, + "learning_rate": 9.126613091325856e-06, + "logps/chosen": -159.8083953857422, + "logps/margins": -0.3494918942451477, + "logps/rejected": -159.45889282226562, + "loss": 0.7925, + "rewards/chosen": 1.592434287071228, + "rewards/margins": -0.017310649156570435, + "rewards/rejected": 1.6097447872161865, + "step": 2300 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.58, + "learning_rate": 9.119207494907233e-06, + "logps/chosen": -151.39175415039062, + "logps/margins": 18.52858543395996, + "logps/rejected": -169.9203338623047, + "loss": 0.7989, + "rewards/chosen": 1.9851707220077515, + "rewards/margins": 0.09721750766038895, + "rewards/rejected": 1.8879531621932983, + "step": 2310 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.58, + "learning_rate": 9.111773665431114e-06, + "logps/chosen": -145.84954833984375, + "logps/margins": 8.649538040161133, + "logps/rejected": -154.4990997314453, + "loss": 0.8464, + "rewards/chosen": 1.5865757465362549, + "rewards/margins": -0.05357605218887329, + "rewards/rejected": 1.6401517391204834, + "step": 2320 + }, + { + "accuracy": 0.625, + "epoch": 0.58, + "learning_rate": 9.104311653848988e-06, + "logps/chosen": -165.7419891357422, + "logps/margins": -16.4604434967041, + "logps/rejected": -149.2815704345703, + "loss": 0.7278, + "rewards/chosen": 2.0504238605499268, + "rewards/margins": 0.5390018224716187, + "rewards/rejected": 1.511421799659729, + "step": 2330 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.58, + "learning_rate": 9.096821511305494e-06, + "logps/chosen": -175.6136932373047, + "logps/margins": -11.866948127746582, + "logps/rejected": -163.7467498779297, + "loss": 0.8684, + "rewards/chosen": 1.8378212451934814, + "rewards/margins": 0.17367655038833618, + "rewards/rejected": 1.664144515991211, + "step": 2340 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.59, + "learning_rate": 9.089303289138091e-06, + "logps/chosen": -150.4571533203125, + "logps/margins": -9.216192245483398, + "logps/rejected": -141.24095153808594, + "loss": 0.7793, + "rewards/chosen": 1.6893303394317627, + "rewards/margins": -0.04371819272637367, + "rewards/rejected": 1.7330482006072998, + "step": 2350 + }, + { + "accuracy": 0.5, + "epoch": 0.59, + "learning_rate": 9.081757038876689e-06, + "logps/chosen": -144.45582580566406, + "logps/margins": 5.97637414932251, + "logps/rejected": -150.4322052001953, + "loss": 0.7776, + "rewards/chosen": 1.6179802417755127, + "rewards/margins": 0.029508357867598534, + "rewards/rejected": 1.5884718894958496, + "step": 2360 + }, + { + "accuracy": 0.5625, + "epoch": 0.59, + "learning_rate": 9.074182812243301e-06, + "logps/chosen": -143.5775909423828, + "logps/margins": -18.096410751342773, + "logps/rejected": -125.48116302490234, + "loss": 0.7718, + "rewards/chosen": 1.6561262607574463, + "rewards/margins": 0.27639085054397583, + "rewards/rejected": 1.3797352313995361, + "step": 2370 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.59, + "learning_rate": 9.0665806611517e-06, + "logps/chosen": -140.8172607421875, + "logps/margins": -11.822988510131836, + "logps/rejected": -128.99429321289062, + "loss": 0.821, + "rewards/chosen": 1.6826212406158447, + "rewards/margins": 0.013142955489456654, + "rewards/rejected": 1.669478178024292, + "step": 2380 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.6, + "learning_rate": 9.058950637707043e-06, + "logps/chosen": -162.18556213378906, + "logps/margins": -21.36416244506836, + "logps/rejected": -140.82139587402344, + "loss": 0.8002, + "rewards/chosen": 1.8264729976654053, + "rewards/margins": 0.42425379157066345, + "rewards/rejected": 1.4022191762924194, + "step": 2390 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.6, + "learning_rate": 9.051292794205526e-06, + "logps/chosen": -139.4217071533203, + "logps/margins": 7.258561134338379, + "logps/rejected": -146.68026733398438, + "loss": 0.8115, + "rewards/chosen": 1.5714809894561768, + "rewards/margins": 0.19860979914665222, + "rewards/rejected": 1.3728711605072021, + "step": 2400 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.6, + "learning_rate": 9.043607183134029e-06, + "logps/chosen": -170.65122985839844, + "logps/margins": -13.439526557922363, + "logps/rejected": -157.2117156982422, + "loss": 0.854, + "rewards/chosen": 1.9065945148468018, + "rewards/margins": 0.10046534240245819, + "rewards/rejected": 1.8061290979385376, + "step": 2410 + }, + { + "accuracy": 0.5625, + "epoch": 0.6, + "learning_rate": 9.035893857169746e-06, + "logps/chosen": -143.10107421875, + "logps/margins": 4.725135803222656, + "logps/rejected": -147.8262176513672, + "loss": 0.7349, + "rewards/chosen": 1.816597580909729, + "rewards/margins": 0.16226616501808167, + "rewards/rejected": 1.6543312072753906, + "step": 2420 + }, + { + "accuracy": 0.5, + "epoch": 0.61, + "learning_rate": 9.028152869179831e-06, + "logps/chosen": -153.50521850585938, + "logps/margins": 3.902616024017334, + "logps/rejected": -157.4078369140625, + "loss": 0.754, + "rewards/chosen": 1.8235887289047241, + "rewards/margins": 0.17439498007297516, + "rewards/rejected": 1.6491937637329102, + "step": 2430 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.61, + "learning_rate": 9.020384272221035e-06, + "logps/chosen": -144.56881713867188, + "logps/margins": 0.3802814483642578, + "logps/rejected": -144.9490966796875, + "loss": 0.7922, + "rewards/chosen": 2.1358447074890137, + "rewards/margins": 0.341838538646698, + "rewards/rejected": 1.794006109237671, + "step": 2440 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.61, + "learning_rate": 9.012588119539337e-06, + "logps/chosen": -174.88278198242188, + "logps/margins": -21.781993865966797, + "logps/rejected": -153.1007843017578, + "loss": 0.8242, + "rewards/chosen": 1.9694465398788452, + "rewards/margins": 0.4216902256011963, + "rewards/rejected": 1.5477564334869385, + "step": 2450 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.61, + "learning_rate": 9.004764464569584e-06, + "logps/chosen": -142.15045166015625, + "logps/margins": -4.6801066398620605, + "logps/rejected": -137.47035217285156, + "loss": 0.82, + "rewards/chosen": 1.8222211599349976, + "rewards/margins": 0.20227757096290588, + "rewards/rejected": 1.619943618774414, + "step": 2460 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.62, + "learning_rate": 8.996913360935129e-06, + "logps/chosen": -147.394775390625, + "logps/margins": -2.0654754638671875, + "logps/rejected": -145.3292999267578, + "loss": 0.8503, + "rewards/chosen": 1.5807874202728271, + "rewards/margins": 0.2154150754213333, + "rewards/rejected": 1.3653721809387207, + "step": 2470 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.62, + "learning_rate": 8.98903486244745e-06, + "logps/chosen": -180.33253479003906, + "logps/margins": -11.23771858215332, + "logps/rejected": -169.09481811523438, + "loss": 0.8198, + "rewards/chosen": 1.9343461990356445, + "rewards/margins": 0.06712658703327179, + "rewards/rejected": 1.8672195672988892, + "step": 2480 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.62, + "learning_rate": 8.981129023105795e-06, + "logps/chosen": -166.7818145751953, + "logps/margins": -7.165487766265869, + "logps/rejected": -159.61634826660156, + "loss": 0.8073, + "rewards/chosen": 1.6975730657577515, + "rewards/margins": -0.007704681716859341, + "rewards/rejected": 1.7052780389785767, + "step": 2490 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.62, + "learning_rate": 8.973195897096806e-06, + "logps/chosen": -147.55386352539062, + "logps/margins": -2.1087851524353027, + "logps/rejected": -145.445068359375, + "loss": 0.7808, + "rewards/chosen": 1.3104054927825928, + "rewards/margins": -0.2292346954345703, + "rewards/rejected": 1.5396400690078735, + "step": 2500 + }, + { + "accuracy": 0.5, + "epoch": 0.63, + "learning_rate": 8.965235538794145e-06, + "logps/chosen": -153.317138671875, + "logps/margins": -13.002995491027832, + "logps/rejected": -140.3141632080078, + "loss": 0.8174, + "rewards/chosen": 1.7759571075439453, + "rewards/margins": 0.1918468177318573, + "rewards/rejected": 1.5841103792190552, + "step": 2510 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.63, + "learning_rate": 8.957248002758125e-06, + "logps/chosen": -160.869384765625, + "logps/margins": 21.718250274658203, + "logps/rejected": -182.58761596679688, + "loss": 0.844, + "rewards/chosen": 1.6739715337753296, + "rewards/margins": -0.03881732374429703, + "rewards/rejected": 1.7127888202667236, + "step": 2520 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.63, + "learning_rate": 8.949233343735339e-06, + "logps/chosen": -172.44351196289062, + "logps/margins": -13.763504028320312, + "logps/rejected": -158.67999267578125, + "loss": 0.8251, + "rewards/chosen": 1.7791717052459717, + "rewards/margins": 0.35052114725112915, + "rewards/rejected": 1.4286506175994873, + "step": 2530 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.64, + "learning_rate": 8.941191616658275e-06, + "logps/chosen": -155.2109832763672, + "logps/margins": -3.1990745067596436, + "logps/rejected": -152.01187133789062, + "loss": 0.8031, + "rewards/chosen": 1.6604912281036377, + "rewards/margins": 0.10237333923578262, + "rewards/rejected": 1.5581179857254028, + "step": 2540 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.64, + "learning_rate": 8.933122876644953e-06, + "logps/chosen": -178.10671997070312, + "logps/margins": -14.75318431854248, + "logps/rejected": -163.3535614013672, + "loss": 0.8632, + "rewards/chosen": 1.9766952991485596, + "rewards/margins": 0.22956128418445587, + "rewards/rejected": 1.7471338510513306, + "step": 2550 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.64, + "learning_rate": 8.925027178998528e-06, + "logps/chosen": -170.93984985351562, + "logps/margins": 4.341712951660156, + "logps/rejected": -175.2815704345703, + "loss": 0.7558, + "rewards/chosen": 1.8331180810928345, + "rewards/margins": 0.30583077669143677, + "rewards/rejected": 1.527287244796753, + "step": 2560 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.64, + "learning_rate": 8.916904579206937e-06, + "logps/chosen": -183.05691528320312, + "logps/margins": 8.751852989196777, + "logps/rejected": -191.8087615966797, + "loss": 0.7759, + "rewards/chosen": 1.9488487243652344, + "rewards/margins": 0.5405310392379761, + "rewards/rejected": 1.4083175659179688, + "step": 2570 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.65, + "learning_rate": 8.908755132942494e-06, + "logps/chosen": -136.88320922851562, + "logps/margins": -11.020042419433594, + "logps/rejected": -125.8631591796875, + "loss": 0.7794, + "rewards/chosen": 1.530692458152771, + "rewards/margins": 0.2433866560459137, + "rewards/rejected": 1.2873058319091797, + "step": 2580 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.65, + "learning_rate": 8.900578896061524e-06, + "logps/chosen": -146.7117919921875, + "logps/margins": -11.104670524597168, + "logps/rejected": -135.6071319580078, + "loss": 0.8857, + "rewards/chosen": 1.2212272882461548, + "rewards/margins": 0.07540423423051834, + "rewards/rejected": 1.1458228826522827, + "step": 2590 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.65, + "learning_rate": 8.89237592460397e-06, + "logps/chosen": -138.83975219726562, + "logps/margins": -2.215001344680786, + "logps/rejected": -136.624755859375, + "loss": 0.8233, + "rewards/chosen": 1.7345571517944336, + "rewards/margins": -0.03856398165225983, + "rewards/rejected": 1.7731212377548218, + "step": 2600 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.65, + "learning_rate": 8.884146274793022e-06, + "logps/chosen": -160.31529235839844, + "logps/margins": 14.85515308380127, + "logps/rejected": -175.1704559326172, + "loss": 0.8134, + "rewards/chosen": 1.4943859577178955, + "rewards/margins": -0.018301963806152344, + "rewards/rejected": 1.5126876831054688, + "step": 2610 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.66, + "learning_rate": 8.875890003034713e-06, + "logps/chosen": -146.08267211914062, + "logps/margins": 40.0573616027832, + "logps/rejected": -186.1400146484375, + "loss": 0.7523, + "rewards/chosen": 1.6075210571289062, + "rewards/margins": 0.4434809684753418, + "rewards/rejected": 1.164040207862854, + "step": 2620 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.66, + "learning_rate": 8.86760716591755e-06, + "logps/chosen": -155.77761840820312, + "logps/margins": 11.430095672607422, + "logps/rejected": -167.20773315429688, + "loss": 0.8126, + "rewards/chosen": 1.6409187316894531, + "rewards/margins": 0.4476473927497864, + "rewards/rejected": 1.1932713985443115, + "step": 2630 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 0.66, + "learning_rate": 8.859297820212118e-06, + "logps/chosen": -188.03599548339844, + "logps/margins": -8.601591110229492, + "logps/rejected": -179.43441772460938, + "loss": 0.792, + "rewards/chosen": 1.3785462379455566, + "rewards/margins": -0.18648740649223328, + "rewards/rejected": 1.5650336742401123, + "step": 2640 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.66, + "learning_rate": 8.850962022870692e-06, + "logps/chosen": -151.62315368652344, + "logps/margins": -5.000039100646973, + "logps/rejected": -146.62313842773438, + "loss": 0.8002, + "rewards/chosen": 1.631996512413025, + "rewards/margins": 0.4164932668209076, + "rewards/rejected": 1.2155033349990845, + "step": 2650 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.67, + "learning_rate": 8.842599831026846e-06, + "logps/chosen": -197.48471069335938, + "logps/margins": -24.33269691467285, + "logps/rejected": -173.15200805664062, + "loss": 0.7711, + "rewards/chosen": 1.63047194480896, + "rewards/margins": 0.1193113699555397, + "rewards/rejected": 1.5111606121063232, + "step": 2660 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.67, + "learning_rate": 8.83421130199506e-06, + "logps/chosen": -148.03115844726562, + "logps/margins": 3.6409599781036377, + "logps/rejected": -151.67210388183594, + "loss": 0.7547, + "rewards/chosen": 1.974037766456604, + "rewards/margins": 0.2518533766269684, + "rewards/rejected": 1.722184181213379, + "step": 2670 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.67, + "learning_rate": 8.825796493270332e-06, + "logps/chosen": -147.1207275390625, + "logps/margins": -1.126124620437622, + "logps/rejected": -145.99459838867188, + "loss": 0.7721, + "rewards/chosen": 1.8635289669036865, + "rewards/margins": 0.28442710638046265, + "rewards/rejected": 1.579101800918579, + "step": 2680 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.67, + "learning_rate": 8.817355462527779e-06, + "logps/chosen": -141.459716796875, + "logps/margins": 5.246458530426025, + "logps/rejected": -146.7061767578125, + "loss": 0.774, + "rewards/chosen": 1.573882818222046, + "rewards/margins": 0.14829522371292114, + "rewards/rejected": 1.4255876541137695, + "step": 2690 + }, + { + "accuracy": 0.625, + "epoch": 0.68, + "learning_rate": 8.808888267622243e-06, + "logps/chosen": -156.78652954101562, + "logps/margins": -9.923416137695312, + "logps/rejected": -146.8631134033203, + "loss": 0.7625, + "rewards/chosen": 1.564741611480713, + "rewards/margins": 0.2138366401195526, + "rewards/rejected": 1.3509048223495483, + "step": 2700 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.68, + "learning_rate": 8.800394966587905e-06, + "logps/chosen": -161.2109375, + "logps/margins": -22.525259017944336, + "logps/rejected": -138.68565368652344, + "loss": 0.7972, + "rewards/chosen": 1.6655502319335938, + "rewards/margins": 0.21709592640399933, + "rewards/rejected": 1.4484546184539795, + "step": 2710 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.68, + "learning_rate": 8.791875617637861e-06, + "logps/chosen": -167.04269409179688, + "logps/margins": -10.494729995727539, + "logps/rejected": -156.54795837402344, + "loss": 0.8381, + "rewards/chosen": 1.614736557006836, + "rewards/margins": 0.13579224050045013, + "rewards/rejected": 1.4789444208145142, + "step": 2720 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.68, + "learning_rate": 8.783330279163753e-06, + "logps/chosen": -123.72306060791016, + "logps/margins": -10.220602035522461, + "logps/rejected": -113.5024642944336, + "loss": 0.7595, + "rewards/chosen": 1.708743691444397, + "rewards/margins": 0.11826448142528534, + "rewards/rejected": 1.5904791355133057, + "step": 2730 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.69, + "learning_rate": 8.77475900973535e-06, + "logps/chosen": -192.87059020996094, + "logps/margins": -20.477632522583008, + "logps/rejected": -172.3929443359375, + "loss": 0.7957, + "rewards/chosen": 1.6575543880462646, + "rewards/margins": 0.050416022539138794, + "rewards/rejected": 1.6071382761001587, + "step": 2740 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.69, + "learning_rate": 8.766161868100147e-06, + "logps/chosen": -183.8955078125, + "logps/margins": 1.8340591192245483, + "logps/rejected": -185.72958374023438, + "loss": 0.767, + "rewards/chosen": 1.5932084321975708, + "rewards/margins": -0.040756307542324066, + "rewards/rejected": 1.6339647769927979, + "step": 2750 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.69, + "learning_rate": 8.757538913182977e-06, + "logps/chosen": -129.14199829101562, + "logps/margins": 30.8277645111084, + "logps/rejected": -159.96975708007812, + "loss": 0.81, + "rewards/chosen": 1.3314985036849976, + "rewards/margins": -0.10257778316736221, + "rewards/rejected": 1.4340763092041016, + "step": 2760 + }, + { + "accuracy": 0.5625, + "epoch": 0.69, + "learning_rate": 8.748890204085593e-06, + "logps/chosen": -130.80186462402344, + "logps/margins": 19.50129508972168, + "logps/rejected": -150.30316162109375, + "loss": 0.8098, + "rewards/chosen": 1.486842393875122, + "rewards/margins": 0.145850270986557, + "rewards/rejected": 1.3409919738769531, + "step": 2770 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.69, + "learning_rate": 8.740215800086262e-06, + "logps/chosen": -150.61740112304688, + "logps/margins": 2.5855257511138916, + "logps/rejected": -153.20294189453125, + "loss": 0.7317, + "rewards/chosen": 1.428755521774292, + "rewards/margins": 0.2611759305000305, + "rewards/rejected": 1.1675795316696167, + "step": 2780 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.7, + "learning_rate": 8.731515760639375e-06, + "logps/chosen": -149.7344207763672, + "logps/margins": -28.57843017578125, + "logps/rejected": -121.1559829711914, + "loss": 0.7053, + "rewards/chosen": 1.700934648513794, + "rewards/margins": 0.31255167722702026, + "rewards/rejected": 1.388382911682129, + "step": 2790 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.7, + "learning_rate": 8.722790145375018e-06, + "logps/chosen": -165.7180938720703, + "logps/margins": -15.831774711608887, + "logps/rejected": -149.8863067626953, + "loss": 0.8156, + "rewards/chosen": 2.0403616428375244, + "rewards/margins": 0.31420427560806274, + "rewards/rejected": 1.7261574268341064, + "step": 2800 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.7, + "learning_rate": 8.714039014098577e-06, + "logps/chosen": -138.14642333984375, + "logps/margins": 24.827730178833008, + "logps/rejected": -162.97415161132812, + "loss": 0.7578, + "rewards/chosen": 1.554478645324707, + "rewards/margins": 0.22009606659412384, + "rewards/rejected": 1.3343826532363892, + "step": 2810 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.7, + "learning_rate": 8.705262426790328e-06, + "logps/chosen": -144.08914184570312, + "logps/margins": 11.892007827758789, + "logps/rejected": -155.9811553955078, + "loss": 0.8661, + "rewards/chosen": 1.545195460319519, + "rewards/margins": -0.10685305297374725, + "rewards/rejected": 1.6520484685897827, + "step": 2820 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.71, + "learning_rate": 8.69646044360502e-06, + "logps/chosen": -156.28567504882812, + "logps/margins": -11.327496528625488, + "logps/rejected": -144.95816040039062, + "loss": 0.8077, + "rewards/chosen": 1.528448462486267, + "rewards/margins": -0.09929704666137695, + "rewards/rejected": 1.6277453899383545, + "step": 2830 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.71, + "learning_rate": 8.687633124871462e-06, + "logps/chosen": -150.17543029785156, + "logps/margins": 6.499469757080078, + "logps/rejected": -156.67489624023438, + "loss": 0.7569, + "rewards/chosen": 1.5286836624145508, + "rewards/margins": 0.01687273196876049, + "rewards/rejected": 1.5118108987808228, + "step": 2840 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.71, + "learning_rate": 8.678780531092122e-06, + "logps/chosen": -164.65780639648438, + "logps/margins": -2.193402051925659, + "logps/rejected": -162.4644012451172, + "loss": 0.7836, + "rewards/chosen": 1.6761707067489624, + "rewards/margins": 0.13601139187812805, + "rewards/rejected": 1.5401592254638672, + "step": 2850 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.71, + "learning_rate": 8.669902722942695e-06, + "logps/chosen": -179.60025024414062, + "logps/margins": 8.734827041625977, + "logps/rejected": -188.33505249023438, + "loss": 0.8083, + "rewards/chosen": 1.4502613544464111, + "rewards/margins": 0.0022860795725136995, + "rewards/rejected": 1.4479749202728271, + "step": 2860 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.72, + "learning_rate": 8.6609997612717e-06, + "logps/chosen": -185.20169067382812, + "logps/margins": -14.612690925598145, + "logps/rejected": -170.5889892578125, + "loss": 0.8609, + "rewards/chosen": 1.4797416925430298, + "rewards/margins": 0.02350521646440029, + "rewards/rejected": 1.4562366008758545, + "step": 2870 + }, + { + "accuracy": 0.625, + "epoch": 0.72, + "learning_rate": 8.652071707100054e-06, + "logps/chosen": -139.81324768066406, + "logps/margins": -10.358976364135742, + "logps/rejected": -129.45425415039062, + "loss": 0.7497, + "rewards/chosen": 1.483909010887146, + "rewards/margins": 0.2548331618309021, + "rewards/rejected": 1.2290759086608887, + "step": 2880 + }, + { + "accuracy": 0.5, + "epoch": 0.72, + "learning_rate": 8.64311862162066e-06, + "logps/chosen": -153.6216278076172, + "logps/margins": -6.6676788330078125, + "logps/rejected": -146.9539337158203, + "loss": 0.7835, + "rewards/chosen": 1.508525013923645, + "rewards/margins": 0.02880988083779812, + "rewards/rejected": 1.47971510887146, + "step": 2890 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.72, + "learning_rate": 8.63414056619799e-06, + "logps/chosen": -144.7294921875, + "logps/margins": 7.182837009429932, + "logps/rejected": -151.91233825683594, + "loss": 0.8166, + "rewards/chosen": 1.8214155435562134, + "rewards/margins": 0.23993360996246338, + "rewards/rejected": 1.58148193359375, + "step": 2900 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.73, + "learning_rate": 8.625137602367653e-06, + "logps/chosen": -163.61148071289062, + "logps/margins": 3.5040078163146973, + "logps/rejected": -167.115478515625, + "loss": 0.7534, + "rewards/chosen": 1.5655899047851562, + "rewards/margins": 0.2786282002925873, + "rewards/rejected": 1.2869617938995361, + "step": 2910 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.73, + "learning_rate": 8.616109791835984e-06, + "logps/chosen": -163.35218811035156, + "logps/margins": -1.1953474283218384, + "logps/rejected": -162.15682983398438, + "loss": 0.7732, + "rewards/chosen": 1.7934242486953735, + "rewards/margins": 0.08110041171312332, + "rewards/rejected": 1.712323784828186, + "step": 2920 + }, + { + "accuracy": 0.5625, + "epoch": 0.73, + "learning_rate": 8.607057196479617e-06, + "logps/chosen": -148.98927307128906, + "logps/margins": -19.40814781188965, + "logps/rejected": -129.5811309814453, + "loss": 0.8467, + "rewards/chosen": 1.4410368204116821, + "rewards/margins": -0.014748156070709229, + "rewards/rejected": 1.4557850360870361, + "step": 2930 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.73, + "learning_rate": 8.597979878345062e-06, + "logps/chosen": -162.67930603027344, + "logps/margins": -0.1245359405875206, + "logps/rejected": -162.5547637939453, + "loss": 0.7511, + "rewards/chosen": 1.7473443746566772, + "rewards/margins": 0.48985424637794495, + "rewards/rejected": 1.2574901580810547, + "step": 2940 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.74, + "learning_rate": 8.588877899648276e-06, + "logps/chosen": -175.69664001464844, + "logps/margins": -20.07693862915039, + "logps/rejected": -155.6197052001953, + "loss": 0.828, + "rewards/chosen": 1.8812503814697266, + "rewards/margins": 0.3405774235725403, + "rewards/rejected": 1.540673017501831, + "step": 2950 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.74, + "learning_rate": 8.579751322774244e-06, + "logps/chosen": -147.95489501953125, + "logps/margins": -2.609814405441284, + "logps/rejected": -145.34506225585938, + "loss": 0.8053, + "rewards/chosen": 1.5963928699493408, + "rewards/margins": 0.03403860330581665, + "rewards/rejected": 1.562354326248169, + "step": 2960 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.74, + "learning_rate": 8.570600210276541e-06, + "logps/chosen": -136.46490478515625, + "logps/margins": -5.8197021484375, + "logps/rejected": -130.6452178955078, + "loss": 0.7715, + "rewards/chosen": 1.5920674800872803, + "rewards/margins": 0.10355864465236664, + "rewards/rejected": 1.4885088205337524, + "step": 2970 + }, + { + "accuracy": 0.4124999940395355, + "epoch": 0.74, + "learning_rate": 8.561424624876917e-06, + "logps/chosen": -138.11569213867188, + "logps/margins": 10.209531784057617, + "logps/rejected": -148.32522583007812, + "loss": 0.8056, + "rewards/chosen": 1.659637212753296, + "rewards/margins": -0.0970558226108551, + "rewards/rejected": 1.7566931247711182, + "step": 2980 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.75, + "learning_rate": 8.552224629464854e-06, + "logps/chosen": -155.06893920898438, + "logps/margins": -21.292409896850586, + "logps/rejected": -133.77651977539062, + "loss": 0.7958, + "rewards/chosen": 1.474408507347107, + "rewards/margins": -0.11505673080682755, + "rewards/rejected": 1.5894651412963867, + "step": 2990 + }, + { + "accuracy": 0.6875, + "epoch": 0.75, + "learning_rate": 8.543000287097141e-06, + "logps/chosen": -156.76683044433594, + "logps/margins": 13.566311836242676, + "logps/rejected": -170.33316040039062, + "loss": 0.7411, + "rewards/chosen": 1.4821707010269165, + "rewards/margins": 0.5528401136398315, + "rewards/rejected": 0.9293305277824402, + "step": 3000 + }, + { + "epoch": 0.75, + "eval_accuracy": 0.5486257928118393, + "eval_logps/chosen": -155.9010772705078, + "eval_logps/margins": -6.521376609802246, + "eval_logps/rejected": -149.37969970703125, + "eval_loss": 0.8003174066543579, + "eval_rewards/chosen": 1.7650580406188965, + "eval_rewards/margins": 0.16896478831768036, + "eval_rewards/rejected": 1.5960932970046997, + "eval_runtime": 1294.3661, + "eval_samples_per_second": 10.963, + "eval_steps_per_second": 1.371, + "step": 3000 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.75, + "learning_rate": 8.53375166099744e-06, + "logps/chosen": -161.08810424804688, + "logps/margins": -0.3329780697822571, + "logps/rejected": -160.755126953125, + "loss": 0.8118, + "rewards/chosen": 1.634568452835083, + "rewards/margins": 0.07734532654285431, + "rewards/rejected": 1.5572230815887451, + "step": 3010 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.76, + "learning_rate": 8.524478814555855e-06, + "logps/chosen": -185.60482788085938, + "logps/margins": -4.625077247619629, + "logps/rejected": -180.97975158691406, + "loss": 0.8838, + "rewards/chosen": 2.093295097351074, + "rewards/margins": 0.30580079555511475, + "rewards/rejected": 1.787494421005249, + "step": 3020 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.76, + "learning_rate": 8.515181811328498e-06, + "logps/chosen": -150.15780639648438, + "logps/margins": 7.099617004394531, + "logps/rejected": -157.25741577148438, + "loss": 0.7537, + "rewards/chosen": 1.8199164867401123, + "rewards/margins": 0.25283852219581604, + "rewards/rejected": 1.5670779943466187, + "step": 3030 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.76, + "learning_rate": 8.505860715037054e-06, + "logps/chosen": -144.1829833984375, + "logps/margins": -16.597780227661133, + "logps/rejected": -127.58522033691406, + "loss": 0.7326, + "rewards/chosen": 1.6525452136993408, + "rewards/margins": 0.22734513878822327, + "rewards/rejected": 1.4252002239227295, + "step": 3040 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.76, + "learning_rate": 8.49651558956833e-06, + "logps/chosen": -167.9637451171875, + "logps/margins": -9.961614608764648, + "logps/rejected": -158.00213623046875, + "loss": 0.8065, + "rewards/chosen": 1.675106406211853, + "rewards/margins": 0.17599111795425415, + "rewards/rejected": 1.4991153478622437, + "step": 3050 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.77, + "learning_rate": 8.487146498973841e-06, + "logps/chosen": -174.6428680419922, + "logps/margins": -25.455190658569336, + "logps/rejected": -149.1876678466797, + "loss": 0.8235, + "rewards/chosen": 2.4818341732025146, + "rewards/margins": 0.07841117680072784, + "rewards/rejected": 2.4034228324890137, + "step": 3060 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.77, + "learning_rate": 8.477753507469357e-06, + "logps/chosen": -174.24676513671875, + "logps/margins": -13.05420970916748, + "logps/rejected": -161.19256591796875, + "loss": 0.8725, + "rewards/chosen": 1.5876998901367188, + "rewards/margins": 0.03590340167284012, + "rewards/rejected": 1.5517964363098145, + "step": 3070 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 0.77, + "learning_rate": 8.468336679434461e-06, + "logps/chosen": -192.79833984375, + "logps/margins": -43.42304992675781, + "logps/rejected": -149.3752899169922, + "loss": 0.7302, + "rewards/chosen": 1.7570441961288452, + "rewards/margins": 0.40183025598526, + "rewards/rejected": 1.3552137613296509, + "step": 3080 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.77, + "learning_rate": 8.458896079412114e-06, + "logps/chosen": -192.26625061035156, + "logps/margins": -50.38763427734375, + "logps/rejected": -141.87860107421875, + "loss": 0.7633, + "rewards/chosen": 2.1395068168640137, + "rewards/margins": 0.3148060441017151, + "rewards/rejected": 1.8247007131576538, + "step": 3090 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.78, + "learning_rate": 8.44943177210821e-06, + "logps/chosen": -146.1753387451172, + "logps/margins": 11.312209129333496, + "logps/rejected": -157.487548828125, + "loss": 0.7106, + "rewards/chosen": 2.024766683578491, + "rewards/margins": 0.24736681580543518, + "rewards/rejected": 1.7773997783660889, + "step": 3100 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 0.78, + "learning_rate": 8.439943822391132e-06, + "logps/chosen": -183.7640838623047, + "logps/margins": -50.22600173950195, + "logps/rejected": -133.53811645507812, + "loss": 0.7581, + "rewards/chosen": 1.8503338098526, + "rewards/margins": 0.027590256184339523, + "rewards/rejected": 1.8227436542510986, + "step": 3110 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.78, + "learning_rate": 8.43043229529131e-06, + "logps/chosen": -122.57987213134766, + "logps/margins": 15.709098815917969, + "logps/rejected": -138.28897094726562, + "loss": 0.7385, + "rewards/chosen": 1.671735405921936, + "rewards/margins": 0.1854889839887619, + "rewards/rejected": 1.4862463474273682, + "step": 3120 + }, + { + "accuracy": 0.5, + "epoch": 0.78, + "learning_rate": 8.420897256000771e-06, + "logps/chosen": -143.1592254638672, + "logps/margins": -12.776369094848633, + "logps/rejected": -130.3828582763672, + "loss": 0.7978, + "rewards/chosen": 1.4812790155410767, + "rewards/margins": -0.010179603472352028, + "rewards/rejected": 1.491458535194397, + "step": 3130 + }, + { + "accuracy": 0.5625, + "epoch": 0.79, + "learning_rate": 8.411338769872697e-06, + "logps/chosen": -158.930908203125, + "logps/margins": 1.2776397466659546, + "logps/rejected": -160.2085418701172, + "loss": 0.8615, + "rewards/chosen": 1.7491512298583984, + "rewards/margins": -0.10457686334848404, + "rewards/rejected": 1.85372793674469, + "step": 3140 + }, + { + "accuracy": 0.5625, + "epoch": 0.79, + "learning_rate": 8.401756902420975e-06, + "logps/chosen": -134.58473205566406, + "logps/margins": 25.656137466430664, + "logps/rejected": -160.24087524414062, + "loss": 0.7601, + "rewards/chosen": 1.51541268825531, + "rewards/margins": 0.3289361000061035, + "rewards/rejected": 1.186476707458496, + "step": 3150 + }, + { + "accuracy": 0.5625, + "epoch": 0.79, + "learning_rate": 8.39215171931974e-06, + "logps/chosen": -155.1742706298828, + "logps/margins": -8.295679092407227, + "logps/rejected": -146.8785858154297, + "loss": 0.7783, + "rewards/chosen": 1.5980865955352783, + "rewards/margins": 0.29294320940971375, + "rewards/rejected": 1.3051433563232422, + "step": 3160 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.79, + "learning_rate": 8.382523286402947e-06, + "logps/chosen": -183.69366455078125, + "logps/margins": -3.2288055419921875, + "logps/rejected": -180.46484375, + "loss": 0.7377, + "rewards/chosen": 1.7234938144683838, + "rewards/margins": 0.37495914101600647, + "rewards/rejected": 1.3485344648361206, + "step": 3170 + }, + { + "accuracy": 0.5625, + "epoch": 0.8, + "learning_rate": 8.37287166966389e-06, + "logps/chosen": -145.9056854248047, + "logps/margins": -2.2215399742126465, + "logps/rejected": -143.68414306640625, + "loss": 0.7742, + "rewards/chosen": 1.5664255619049072, + "rewards/margins": 0.25297456979751587, + "rewards/rejected": 1.3134510517120361, + "step": 3180 + }, + { + "accuracy": 0.5625, + "epoch": 0.8, + "learning_rate": 8.363196935254778e-06, + "logps/chosen": -126.447998046875, + "logps/margins": 23.140649795532227, + "logps/rejected": -149.58863830566406, + "loss": 0.8042, + "rewards/chosen": 1.3213412761688232, + "rewards/margins": 0.06879094988107681, + "rewards/rejected": 1.252550482749939, + "step": 3190 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.8, + "learning_rate": 8.35349914948626e-06, + "logps/chosen": -153.10629272460938, + "logps/margins": -7.21134090423584, + "logps/rejected": -145.8949432373047, + "loss": 0.8203, + "rewards/chosen": 1.3600586652755737, + "rewards/margins": 0.21659858524799347, + "rewards/rejected": 1.1434600353240967, + "step": 3200 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.8, + "learning_rate": 8.34377837882698e-06, + "logps/chosen": -162.39382934570312, + "logps/margins": 7.168750762939453, + "logps/rejected": -169.5625762939453, + "loss": 0.7405, + "rewards/chosen": 1.3610525131225586, + "rewards/margins": 0.3161638379096985, + "rewards/rejected": 1.0448886156082153, + "step": 3210 + }, + { + "accuracy": 0.625, + "epoch": 0.81, + "learning_rate": 8.334034689903121e-06, + "logps/chosen": -160.90206909179688, + "logps/margins": -21.66839027404785, + "logps/rejected": -139.2336883544922, + "loss": 0.7945, + "rewards/chosen": 1.3712489604949951, + "rewards/margins": 0.28245019912719727, + "rewards/rejected": 1.0887987613677979, + "step": 3220 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.81, + "learning_rate": 8.324268149497954e-06, + "logps/chosen": -158.0965576171875, + "logps/margins": -7.221676826477051, + "logps/rejected": -150.8748779296875, + "loss": 0.7716, + "rewards/chosen": 1.2841920852661133, + "rewards/margins": 0.15127582848072052, + "rewards/rejected": 1.1329162120819092, + "step": 3230 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.81, + "learning_rate": 8.314478824551364e-06, + "logps/chosen": -128.3480987548828, + "logps/margins": 14.737256050109863, + "logps/rejected": -143.08535766601562, + "loss": 0.7549, + "rewards/chosen": 1.4126204252243042, + "rewards/margins": 0.26671385765075684, + "rewards/rejected": 1.1459064483642578, + "step": 3240 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.81, + "learning_rate": 8.30466678215941e-06, + "logps/chosen": -193.5419921875, + "logps/margins": -31.942550659179688, + "logps/rejected": -161.5994110107422, + "loss": 0.8155, + "rewards/chosen": 1.6444562673568726, + "rewards/margins": 0.03188382461667061, + "rewards/rejected": 1.612572431564331, + "step": 3250 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.81, + "learning_rate": 8.294832089573853e-06, + "logps/chosen": -161.07583618164062, + "logps/margins": -4.5086164474487305, + "logps/rejected": -156.5672149658203, + "loss": 0.7744, + "rewards/chosen": 1.5193272829055786, + "rewards/margins": 0.2577361464500427, + "rewards/rejected": 1.2615910768508911, + "step": 3260 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.82, + "learning_rate": 8.284974814201694e-06, + "logps/chosen": -146.56365966796875, + "logps/margins": 14.04448413848877, + "logps/rejected": -160.608154296875, + "loss": 0.8143, + "rewards/chosen": 1.751704216003418, + "rewards/margins": 0.15864914655685425, + "rewards/rejected": 1.5930547714233398, + "step": 3270 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.82, + "learning_rate": 8.275095023604724e-06, + "logps/chosen": -179.9011993408203, + "logps/margins": -13.970852851867676, + "logps/rejected": -165.93035888671875, + "loss": 0.7845, + "rewards/chosen": 1.7380592823028564, + "rewards/margins": 0.36463767290115356, + "rewards/rejected": 1.3734214305877686, + "step": 3280 + }, + { + "accuracy": 0.5, + "epoch": 0.82, + "learning_rate": 8.26519278549905e-06, + "logps/chosen": -169.5118865966797, + "logps/margins": 15.470805168151855, + "logps/rejected": -184.98269653320312, + "loss": 0.8802, + "rewards/chosen": 1.6773399114608765, + "rewards/margins": -0.0064588068053126335, + "rewards/rejected": 1.6837987899780273, + "step": 3290 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.82, + "learning_rate": 8.255268167754632e-06, + "logps/chosen": -138.21778869628906, + "logps/margins": -0.12071685492992401, + "logps/rejected": -138.09707641601562, + "loss": 0.7784, + "rewards/chosen": 1.7837855815887451, + "rewards/margins": 0.18200163543224335, + "rewards/rejected": 1.601784110069275, + "step": 3300 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.83, + "learning_rate": 8.245321238394827e-06, + "logps/chosen": -168.6961669921875, + "logps/margins": -19.171979904174805, + "logps/rejected": -149.52420043945312, + "loss": 0.7968, + "rewards/chosen": 2.0667340755462646, + "rewards/margins": 0.529453694820404, + "rewards/rejected": 1.5372803211212158, + "step": 3310 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.83, + "learning_rate": 8.235352065595908e-06, + "logps/chosen": -173.46902465820312, + "logps/margins": -6.0329108238220215, + "logps/rejected": -167.4361114501953, + "loss": 0.7435, + "rewards/chosen": 1.6746422052383423, + "rewards/margins": 0.10841004550457001, + "rewards/rejected": 1.5662320852279663, + "step": 3320 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.83, + "learning_rate": 8.225360717686606e-06, + "logps/chosen": -156.3230743408203, + "logps/margins": -6.365569114685059, + "logps/rejected": -149.95748901367188, + "loss": 0.7835, + "rewards/chosen": 1.6193897724151611, + "rewards/margins": 0.1265874207019806, + "rewards/rejected": 1.4928020238876343, + "step": 3330 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.83, + "learning_rate": 8.215347263147649e-06, + "logps/chosen": -210.0120086669922, + "logps/margins": -30.997217178344727, + "logps/rejected": -179.01480102539062, + "loss": 0.8086, + "rewards/chosen": 1.8879203796386719, + "rewards/margins": 0.15007278323173523, + "rewards/rejected": 1.7378475666046143, + "step": 3340 + }, + { + "accuracy": 0.5625, + "epoch": 0.84, + "learning_rate": 8.205311770611269e-06, + "logps/chosen": -149.6796417236328, + "logps/margins": 11.477659225463867, + "logps/rejected": -161.1573028564453, + "loss": 0.7589, + "rewards/chosen": 1.603755235671997, + "rewards/margins": 0.21250459551811218, + "rewards/rejected": 1.3912506103515625, + "step": 3350 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.84, + "learning_rate": 8.19525430886076e-06, + "logps/chosen": -175.27981567382812, + "logps/margins": -27.390121459960938, + "logps/rejected": -147.88967895507812, + "loss": 0.7069, + "rewards/chosen": 2.0394558906555176, + "rewards/margins": 0.36864930391311646, + "rewards/rejected": 1.6708062887191772, + "step": 3360 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.84, + "learning_rate": 8.185174946829986e-06, + "logps/chosen": -175.32638549804688, + "logps/margins": -30.108501434326172, + "logps/rejected": -145.2178955078125, + "loss": 0.734, + "rewards/chosen": 1.8845609426498413, + "rewards/margins": 0.43025127053260803, + "rewards/rejected": 1.4543099403381348, + "step": 3370 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.84, + "learning_rate": 8.175073753602918e-06, + "logps/chosen": -154.2449493408203, + "logps/margins": -1.7641388177871704, + "logps/rejected": -152.4807891845703, + "loss": 0.74, + "rewards/chosen": 1.7165447473526, + "rewards/margins": 0.28324785828590393, + "rewards/rejected": 1.433296799659729, + "step": 3380 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.85, + "learning_rate": 8.164950798413153e-06, + "logps/chosen": -153.0354461669922, + "logps/margins": 1.106896996498108, + "logps/rejected": -154.14234924316406, + "loss": 0.7639, + "rewards/chosen": 1.8354475498199463, + "rewards/margins": 0.21832025051116943, + "rewards/rejected": 1.6171271800994873, + "step": 3390 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.85, + "learning_rate": 8.154806150643456e-06, + "logps/chosen": -177.07888793945312, + "logps/margins": -6.0497541427612305, + "logps/rejected": -171.0291290283203, + "loss": 0.7419, + "rewards/chosen": 1.5892397165298462, + "rewards/margins": 0.21471253037452698, + "rewards/rejected": 1.3745272159576416, + "step": 3400 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.85, + "learning_rate": 8.144639879825262e-06, + "logps/chosen": -175.3135528564453, + "logps/margins": -18.72549057006836, + "logps/rejected": -156.5880584716797, + "loss": 0.7904, + "rewards/chosen": 2.0188117027282715, + "rewards/margins": 0.30751943588256836, + "rewards/rejected": 1.7112922668457031, + "step": 3410 + }, + { + "accuracy": 0.625, + "epoch": 0.85, + "learning_rate": 8.134452055638211e-06, + "logps/chosen": -165.5194854736328, + "logps/margins": -20.98834800720215, + "logps/rejected": -144.5311279296875, + "loss": 0.6977, + "rewards/chosen": 1.8513362407684326, + "rewards/margins": 0.3126828372478485, + "rewards/rejected": 1.5386536121368408, + "step": 3420 + }, + { + "accuracy": 0.5625, + "epoch": 0.86, + "learning_rate": 8.124242747909678e-06, + "logps/chosen": -186.35317993164062, + "logps/margins": -33.190338134765625, + "logps/rejected": -153.162841796875, + "loss": 0.7514, + "rewards/chosen": 2.014575719833374, + "rewards/margins": 0.35961395502090454, + "rewards/rejected": 1.6549618244171143, + "step": 3430 + }, + { + "accuracy": 0.5625, + "epoch": 0.86, + "learning_rate": 8.11401202661428e-06, + "logps/chosen": -154.21261596679688, + "logps/margins": 1.2508293390274048, + "logps/rejected": -155.46347045898438, + "loss": 0.7763, + "rewards/chosen": 1.75021493434906, + "rewards/margins": 0.11648325622081757, + "rewards/rejected": 1.6337318420410156, + "step": 3440 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.86, + "learning_rate": 8.103759961873403e-06, + "logps/chosen": -131.79953002929688, + "logps/margins": 12.465619087219238, + "logps/rejected": -144.26516723632812, + "loss": 0.7711, + "rewards/chosen": 1.8068921566009521, + "rewards/margins": -0.018017753958702087, + "rewards/rejected": 1.8249099254608154, + "step": 3450 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.86, + "learning_rate": 8.093486623954723e-06, + "logps/chosen": -146.01734924316406, + "logps/margins": 10.133278846740723, + "logps/rejected": -156.150634765625, + "loss": 0.7455, + "rewards/chosen": 1.706298828125, + "rewards/margins": 0.08658869564533234, + "rewards/rejected": 1.6197102069854736, + "step": 3460 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.87, + "learning_rate": 8.08319208327172e-06, + "logps/chosen": -191.7599639892578, + "logps/margins": 2.3573758602142334, + "logps/rejected": -194.11734008789062, + "loss": 0.7013, + "rewards/chosen": 1.9074863195419312, + "rewards/margins": 0.4116293787956238, + "rewards/rejected": 1.4958570003509521, + "step": 3470 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.87, + "learning_rate": 8.072876410383199e-06, + "logps/chosen": -147.97479248046875, + "logps/margins": -15.253326416015625, + "logps/rejected": -132.72146606445312, + "loss": 0.708, + "rewards/chosen": 1.3487708568572998, + "rewards/margins": 0.23281045258045197, + "rewards/rejected": 1.1159604787826538, + "step": 3480 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.87, + "learning_rate": 8.062539675992807e-06, + "logps/chosen": -138.44216918945312, + "logps/margins": 36.819217681884766, + "logps/rejected": -175.26136779785156, + "loss": 0.8048, + "rewards/chosen": 1.5128448009490967, + "rewards/margins": 0.09899057447910309, + "rewards/rejected": 1.4138542413711548, + "step": 3490 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.88, + "learning_rate": 8.052181950948544e-06, + "logps/chosen": -171.80516052246094, + "logps/margins": -51.980369567871094, + "logps/rejected": -119.8248062133789, + "loss": 0.7365, + "rewards/chosen": 1.331697702407837, + "rewards/margins": 0.1663578301668167, + "rewards/rejected": 1.1653398275375366, + "step": 3500 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.88, + "learning_rate": 8.041803306242282e-06, + "logps/chosen": -168.68783569335938, + "logps/margins": -8.771944046020508, + "logps/rejected": -159.91590881347656, + "loss": 0.8014, + "rewards/chosen": 1.6454660892486572, + "rewards/margins": 0.2971499562263489, + "rewards/rejected": 1.3483160734176636, + "step": 3510 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.88, + "learning_rate": 8.031403813009273e-06, + "logps/chosen": -150.67105102539062, + "logps/margins": 19.517623901367188, + "logps/rejected": -170.1886749267578, + "loss": 0.8019, + "rewards/chosen": 2.0029842853546143, + "rewards/margins": 0.2589420974254608, + "rewards/rejected": 1.744042158126831, + "step": 3520 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.88, + "learning_rate": 8.020983542527669e-06, + "logps/chosen": -161.2472381591797, + "logps/margins": 6.7901434898376465, + "logps/rejected": -168.03738403320312, + "loss": 0.7681, + "rewards/chosen": 1.5617696046829224, + "rewards/margins": 0.023355094715952873, + "rewards/rejected": 1.5384143590927124, + "step": 3530 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.89, + "learning_rate": 8.010542566218026e-06, + "logps/chosen": -125.4128189086914, + "logps/margins": 8.50660514831543, + "logps/rejected": -133.91943359375, + "loss": 0.7969, + "rewards/chosen": 1.4624067544937134, + "rewards/margins": 0.07958179712295532, + "rewards/rejected": 1.3828251361846924, + "step": 3540 + }, + { + "accuracy": 0.625, + "epoch": 0.89, + "learning_rate": 8.000080955642819e-06, + "logps/chosen": -186.24436950683594, + "logps/margins": -36.11909103393555, + "logps/rejected": -150.12530517578125, + "loss": 0.7742, + "rewards/chosen": 1.564686894416809, + "rewards/margins": 0.33360210061073303, + "rewards/rejected": 1.2310845851898193, + "step": 3550 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 0.89, + "learning_rate": 7.989598782505954e-06, + "logps/chosen": -143.9381866455078, + "logps/margins": 32.61821746826172, + "logps/rejected": -176.55642700195312, + "loss": 0.838, + "rewards/chosen": 1.272360920906067, + "rewards/margins": -0.05570871755480766, + "rewards/rejected": 1.3280696868896484, + "step": 3560 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.89, + "learning_rate": 7.979096118652267e-06, + "logps/chosen": -183.2843780517578, + "logps/margins": 1.9387588500976562, + "logps/rejected": -185.22312927246094, + "loss": 0.7738, + "rewards/chosen": 1.6671069860458374, + "rewards/margins": 0.4146422743797302, + "rewards/rejected": 1.252464771270752, + "step": 3570 + }, + { + "accuracy": 0.5625, + "epoch": 0.9, + "learning_rate": 7.96857303606704e-06, + "logps/chosen": -153.29017639160156, + "logps/margins": -39.799129486083984, + "logps/rejected": -113.49104309082031, + "loss": 0.7831, + "rewards/chosen": 1.3374580144882202, + "rewards/margins": 0.10456130653619766, + "rewards/rejected": 1.2328965663909912, + "step": 3580 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.9, + "learning_rate": 7.958029606875503e-06, + "logps/chosen": -128.26589965820312, + "logps/margins": 15.221063613891602, + "logps/rejected": -143.48695373535156, + "loss": 0.7696, + "rewards/chosen": 1.7023261785507202, + "rewards/margins": 0.2918190360069275, + "rewards/rejected": 1.4105072021484375, + "step": 3590 + }, + { + "accuracy": 0.5, + "epoch": 0.9, + "learning_rate": 7.947465903342348e-06, + "logps/chosen": -153.58782958984375, + "logps/margins": 15.419158935546875, + "logps/rejected": -169.00698852539062, + "loss": 0.8004, + "rewards/chosen": 1.6826026439666748, + "rewards/margins": -0.01102372445166111, + "rewards/rejected": 1.6936264038085938, + "step": 3600 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.9, + "learning_rate": 7.936881997871217e-06, + "logps/chosen": -138.57852172851562, + "logps/margins": -13.574409484863281, + "logps/rejected": -125.00411224365234, + "loss": 0.7191, + "rewards/chosen": 1.5303868055343628, + "rewards/margins": 0.2576327919960022, + "rewards/rejected": 1.2727539539337158, + "step": 3610 + }, + { + "accuracy": 0.625, + "epoch": 0.91, + "learning_rate": 7.926277963004227e-06, + "logps/chosen": -185.51663208007812, + "logps/margins": -20.25797462463379, + "logps/rejected": -165.25863647460938, + "loss": 0.7274, + "rewards/chosen": 2.054831027984619, + "rewards/margins": 0.3326185345649719, + "rewards/rejected": 1.7222124338150024, + "step": 3620 + }, + { + "accuracy": 0.5625, + "epoch": 0.91, + "learning_rate": 7.915653871421458e-06, + "logps/chosen": -152.35568237304688, + "logps/margins": 16.110992431640625, + "logps/rejected": -168.46670532226562, + "loss": 0.7801, + "rewards/chosen": 1.678218126296997, + "rewards/margins": 0.03800428658723831, + "rewards/rejected": 1.640213966369629, + "step": 3630 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.91, + "learning_rate": 7.905009795940451e-06, + "logps/chosen": -157.21725463867188, + "logps/margins": 0.5339088439941406, + "logps/rejected": -157.7511444091797, + "loss": 0.771, + "rewards/chosen": 1.8164851665496826, + "rewards/margins": 0.3296557068824768, + "rewards/rejected": 1.4868295192718506, + "step": 3640 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.91, + "learning_rate": 7.894345809515728e-06, + "logps/chosen": -143.79592895507812, + "logps/margins": -19.141103744506836, + "logps/rejected": -124.65482330322266, + "loss": 0.747, + "rewards/chosen": 1.6596828699111938, + "rewards/margins": 0.2845556139945984, + "rewards/rejected": 1.3751273155212402, + "step": 3650 + }, + { + "accuracy": 0.612500011920929, + "epoch": 0.92, + "learning_rate": 7.883661985238277e-06, + "logps/chosen": -142.795166015625, + "logps/margins": 24.80935287475586, + "logps/rejected": -167.60450744628906, + "loss": 0.7595, + "rewards/chosen": 1.5907258987426758, + "rewards/margins": 0.24582405388355255, + "rewards/rejected": 1.3449019193649292, + "step": 3660 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.92, + "learning_rate": 7.872958396335052e-06, + "logps/chosen": -144.52029418945312, + "logps/margins": 11.030893325805664, + "logps/rejected": -155.55117797851562, + "loss": 0.8323, + "rewards/chosen": 1.3688607215881348, + "rewards/margins": -0.032265789806842804, + "rewards/rejected": 1.4011263847351074, + "step": 3670 + }, + { + "accuracy": 0.5, + "epoch": 0.92, + "learning_rate": 7.862235116168476e-06, + "logps/chosen": -162.84774780273438, + "logps/margins": 13.034228324890137, + "logps/rejected": -175.8819580078125, + "loss": 0.7692, + "rewards/chosen": 1.3408589363098145, + "rewards/margins": -0.12653210759162903, + "rewards/rejected": 1.467391014099121, + "step": 3680 + }, + { + "accuracy": 0.5625, + "epoch": 0.92, + "learning_rate": 7.851492218235936e-06, + "logps/chosen": -145.5940704345703, + "logps/margins": -8.614776611328125, + "logps/rejected": -136.97927856445312, + "loss": 0.8084, + "rewards/chosen": 1.659777283668518, + "rewards/margins": 0.23170706629753113, + "rewards/rejected": 1.428070306777954, + "step": 3690 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.93, + "learning_rate": 7.840729776169277e-06, + "logps/chosen": -144.14027404785156, + "logps/margins": 8.698692321777344, + "logps/rejected": -152.83897399902344, + "loss": 0.753, + "rewards/chosen": 1.6009562015533447, + "rewards/margins": 0.2810685634613037, + "rewards/rejected": 1.319887638092041, + "step": 3700 + }, + { + "accuracy": 0.625, + "epoch": 0.93, + "learning_rate": 7.829947863734302e-06, + "logps/chosen": -148.00088500976562, + "logps/margins": -4.441626071929932, + "logps/rejected": -143.55926513671875, + "loss": 0.6968, + "rewards/chosen": 1.6761270761489868, + "rewards/margins": 0.43606337904930115, + "rewards/rejected": 1.2400636672973633, + "step": 3710 + }, + { + "accuracy": 0.5625, + "epoch": 0.93, + "learning_rate": 7.819146554830265e-06, + "logps/chosen": -177.2362060546875, + "logps/margins": -36.520179748535156, + "logps/rejected": -140.7160186767578, + "loss": 0.7705, + "rewards/chosen": 1.4532076120376587, + "rewards/margins": 0.19381985068321228, + "rewards/rejected": 1.259387731552124, + "step": 3720 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.93, + "learning_rate": 7.808325923489361e-06, + "logps/chosen": -180.3607635498047, + "logps/margins": -6.179944038391113, + "logps/rejected": -174.18081665039062, + "loss": 0.7403, + "rewards/chosen": 2.095268487930298, + "rewards/margins": 0.3455480635166168, + "rewards/rejected": 1.7497203350067139, + "step": 3730 + }, + { + "accuracy": 0.625, + "epoch": 0.94, + "learning_rate": 7.79748604387622e-06, + "logps/chosen": -145.1133575439453, + "logps/margins": -23.09562110900879, + "logps/rejected": -122.01773834228516, + "loss": 0.7348, + "rewards/chosen": 1.813936471939087, + "rewards/margins": 0.3782232701778412, + "rewards/rejected": 1.4357131719589233, + "step": 3740 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.94, + "learning_rate": 7.786626990287403e-06, + "logps/chosen": -160.62728881835938, + "logps/margins": -16.29179573059082, + "logps/rejected": -144.33551025390625, + "loss": 0.7393, + "rewards/chosen": 1.7499898672103882, + "rewards/margins": 0.27588844299316406, + "rewards/rejected": 1.4741013050079346, + "step": 3750 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 0.94, + "learning_rate": 7.775748837150887e-06, + "logps/chosen": -138.15023803710938, + "logps/margins": 11.254546165466309, + "logps/rejected": -149.40476989746094, + "loss": 0.7397, + "rewards/chosen": 1.5744158029556274, + "rewards/margins": -0.11831261217594147, + "rewards/rejected": 1.6927284002304077, + "step": 3760 + }, + { + "accuracy": 0.5625, + "epoch": 0.94, + "learning_rate": 7.764851659025557e-06, + "logps/chosen": -182.69862365722656, + "logps/margins": -28.24844741821289, + "logps/rejected": -154.45018005371094, + "loss": 0.8041, + "rewards/chosen": 1.814875602722168, + "rewards/margins": 0.14602433145046234, + "rewards/rejected": 1.6688512563705444, + "step": 3770 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.94, + "learning_rate": 7.753935530600701e-06, + "logps/chosen": -148.72219848632812, + "logps/margins": 26.870590209960938, + "logps/rejected": -175.59280395507812, + "loss": 0.7701, + "rewards/chosen": 1.6487045288085938, + "rewards/margins": 0.2798656225204468, + "rewards/rejected": 1.3688390254974365, + "step": 3780 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.95, + "learning_rate": 7.743000526695483e-06, + "logps/chosen": -154.10372924804688, + "logps/margins": -19.46083641052246, + "logps/rejected": -134.6428985595703, + "loss": 0.814, + "rewards/chosen": 1.5845978260040283, + "rewards/margins": 0.22639739513397217, + "rewards/rejected": 1.3582004308700562, + "step": 3790 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.95, + "learning_rate": 7.732046722258449e-06, + "logps/chosen": -128.5774383544922, + "logps/margins": 44.12200927734375, + "logps/rejected": -172.69947814941406, + "loss": 0.7305, + "rewards/chosen": 1.4345492124557495, + "rewards/margins": 0.1706922948360443, + "rewards/rejected": 1.2638570070266724, + "step": 3800 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.95, + "learning_rate": 7.721074192366995e-06, + "logps/chosen": -182.56686401367188, + "logps/margins": -63.1977424621582, + "logps/rejected": -119.369140625, + "loss": 0.753, + "rewards/chosen": 1.6460587978363037, + "rewards/margins": 0.32916659116744995, + "rewards/rejected": 1.3168920278549194, + "step": 3810 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.95, + "learning_rate": 7.710083012226867e-06, + "logps/chosen": -153.7138671875, + "logps/margins": 25.897411346435547, + "logps/rejected": -179.61126708984375, + "loss": 0.7474, + "rewards/chosen": 1.5397789478302002, + "rewards/margins": 0.23249849677085876, + "rewards/rejected": 1.3072805404663086, + "step": 3820 + }, + { + "accuracy": 0.637499988079071, + "epoch": 0.96, + "learning_rate": 7.69907325717164e-06, + "logps/chosen": -141.46884155273438, + "logps/margins": -4.953783988952637, + "logps/rejected": -136.51507568359375, + "loss": 0.6873, + "rewards/chosen": 1.8768560886383057, + "rewards/margins": 0.4181802272796631, + "rewards/rejected": 1.4586756229400635, + "step": 3830 + }, + { + "accuracy": 0.625, + "epoch": 0.96, + "learning_rate": 7.6880450026622e-06, + "logps/chosen": -154.4681854248047, + "logps/margins": 18.503726959228516, + "logps/rejected": -172.97189331054688, + "loss": 0.7152, + "rewards/chosen": 1.681918740272522, + "rewards/margins": 0.39809325337409973, + "rewards/rejected": 1.2838256359100342, + "step": 3840 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.96, + "learning_rate": 7.676998324286227e-06, + "logps/chosen": -146.54550170898438, + "logps/margins": 2.264216899871826, + "logps/rejected": -148.80972290039062, + "loss": 0.7201, + "rewards/chosen": 1.6331093311309814, + "rewards/margins": 0.2702735960483551, + "rewards/rejected": 1.3628356456756592, + "step": 3850 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.96, + "learning_rate": 7.665933297757681e-06, + "logps/chosen": -188.87542724609375, + "logps/margins": -4.7278594970703125, + "logps/rejected": -184.14756774902344, + "loss": 0.8157, + "rewards/chosen": 1.738433599472046, + "rewards/margins": 0.28007808327674866, + "rewards/rejected": 1.4583555459976196, + "step": 3860 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.97, + "learning_rate": 7.654849998916279e-06, + "logps/chosen": -137.9560546875, + "logps/margins": 15.675437927246094, + "logps/rejected": -153.63150024414062, + "loss": 0.7777, + "rewards/chosen": 1.5735784769058228, + "rewards/margins": 0.2500787377357483, + "rewards/rejected": 1.3234997987747192, + "step": 3870 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.97, + "learning_rate": 7.643748503726972e-06, + "logps/chosen": -148.77391052246094, + "logps/margins": 11.068714141845703, + "logps/rejected": -159.84262084960938, + "loss": 0.7857, + "rewards/chosen": 1.5443099737167358, + "rewards/margins": 0.19855117797851562, + "rewards/rejected": 1.3457587957382202, + "step": 3880 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 0.97, + "learning_rate": 7.63262888827944e-06, + "logps/chosen": -172.26815795898438, + "logps/margins": -22.975425720214844, + "logps/rejected": -149.29270935058594, + "loss": 0.7007, + "rewards/chosen": 1.791285753250122, + "rewards/margins": 0.5659340023994446, + "rewards/rejected": 1.2253516912460327, + "step": 3890 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 0.97, + "learning_rate": 7.621491228787549e-06, + "logps/chosen": -135.41494750976562, + "logps/margins": 11.223118782043457, + "logps/rejected": -146.63807678222656, + "loss": 0.749, + "rewards/chosen": 1.467739462852478, + "rewards/margins": 0.27108117938041687, + "rewards/rejected": 1.1966582536697388, + "step": 3900 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 0.98, + "learning_rate": 7.610335601588841e-06, + "logps/chosen": -139.9392547607422, + "logps/margins": -2.3184967041015625, + "logps/rejected": -137.62075805664062, + "loss": 0.72, + "rewards/chosen": 1.1532618999481201, + "rewards/margins": 0.17669668793678284, + "rewards/rejected": 0.9765651822090149, + "step": 3910 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 0.98, + "learning_rate": 7.5991620831440115e-06, + "logps/chosen": -151.81661987304688, + "logps/margins": -13.098960876464844, + "logps/rejected": -138.7176513671875, + "loss": 0.7655, + "rewards/chosen": 1.327757716178894, + "rewards/margins": 0.21372659504413605, + "rewards/rejected": 1.1140310764312744, + "step": 3920 + }, + { + "accuracy": 0.5, + "epoch": 0.98, + "learning_rate": 7.587970750036382e-06, + "logps/chosen": -139.4608917236328, + "logps/margins": 43.11005401611328, + "logps/rejected": -182.57093811035156, + "loss": 0.7331, + "rewards/chosen": 1.573331594467163, + "rewards/margins": 0.09147181361913681, + "rewards/rejected": 1.481859803199768, + "step": 3930 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 0.98, + "learning_rate": 7.576761678971374e-06, + "logps/chosen": -185.39028930664062, + "logps/margins": -18.38593292236328, + "logps/rejected": -167.00436401367188, + "loss": 0.7898, + "rewards/chosen": 1.3942126035690308, + "rewards/margins": -0.03146573528647423, + "rewards/rejected": 1.4256784915924072, + "step": 3940 + }, + { + "accuracy": 0.574999988079071, + "epoch": 0.99, + "learning_rate": 7.565534946775987e-06, + "logps/chosen": -149.90420532226562, + "logps/margins": 9.147459030151367, + "logps/rejected": -159.05166625976562, + "loss": 0.7562, + "rewards/chosen": 1.4092555046081543, + "rewards/margins": 0.2562856078147888, + "rewards/rejected": 1.1529698371887207, + "step": 3950 + }, + { + "accuracy": 0.625, + "epoch": 0.99, + "learning_rate": 7.554290630398266e-06, + "logps/chosen": -192.16375732421875, + "logps/margins": -16.23194694519043, + "logps/rejected": -175.93179321289062, + "loss": 0.7734, + "rewards/chosen": 1.9337066411972046, + "rewards/margins": 0.36547115445137024, + "rewards/rejected": 1.5682355165481567, + "step": 3960 + }, + { + "accuracy": 0.550000011920929, + "epoch": 0.99, + "learning_rate": 7.5430288069067845e-06, + "logps/chosen": -148.0521240234375, + "logps/margins": 13.075075149536133, + "logps/rejected": -161.12718200683594, + "loss": 0.7528, + "rewards/chosen": 1.4726979732513428, + "rewards/margins": 0.19843187928199768, + "rewards/rejected": 1.2742661237716675, + "step": 3970 + }, + { + "accuracy": 0.512499988079071, + "epoch": 0.99, + "learning_rate": 7.531749553490104e-06, + "logps/chosen": -164.43051147460938, + "logps/margins": -4.897665977478027, + "logps/rejected": -159.53282165527344, + "loss": 0.904, + "rewards/chosen": 1.2066371440887451, + "rewards/margins": 0.09104237705469131, + "rewards/rejected": 1.115594744682312, + "step": 3980 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.0, + "learning_rate": 7.520452947456253e-06, + "logps/chosen": -136.290283203125, + "logps/margins": -15.671384811401367, + "logps/rejected": -120.618896484375, + "loss": 0.8051, + "rewards/chosen": 1.5731546878814697, + "rewards/margins": 0.22923961281776428, + "rewards/rejected": 1.3439149856567383, + "step": 3990 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.0, + "learning_rate": 7.509139066232199e-06, + "logps/chosen": -177.18353271484375, + "logps/margins": -12.226296424865723, + "logps/rejected": -164.95724487304688, + "loss": 0.7727, + "rewards/chosen": 1.8766725063323975, + "rewards/margins": 0.3457058370113373, + "rewards/rejected": 1.5309667587280273, + "step": 4000 + }, + { + "accuracy": 0.42500001192092896, + "epoch": 1.0, + "learning_rate": 7.497807987363302e-06, + "logps/chosen": -174.06900024414062, + "logps/margins": 15.997894287109375, + "logps/rejected": -190.06689453125, + "loss": 0.7832, + "rewards/chosen": 1.6984426975250244, + "rewards/margins": 0.02764594554901123, + "rewards/rejected": 1.6707966327667236, + "step": 4010 + }, + { + "accuracy": 0.5625, + "epoch": 1.0, + "learning_rate": 7.486459788512807e-06, + "logps/chosen": -172.26309204101562, + "logps/margins": 13.097381591796875, + "logps/rejected": -185.3604736328125, + "loss": 0.7865, + "rewards/chosen": 1.7627757787704468, + "rewards/margins": 0.2671945095062256, + "rewards/rejected": 1.4955812692642212, + "step": 4020 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.01, + "learning_rate": 7.475094547461292e-06, + "logps/chosen": -160.47525024414062, + "logps/margins": -0.2944812774658203, + "logps/rejected": -160.18075561523438, + "loss": 0.737, + "rewards/chosen": 1.5085080862045288, + "rewards/margins": 0.24001213908195496, + "rewards/rejected": 1.2684959173202515, + "step": 4030 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.01, + "learning_rate": 7.46371234210615e-06, + "logps/chosen": -167.26171875, + "logps/margins": 2.0640063285827637, + "logps/rejected": -169.3257293701172, + "loss": 0.7642, + "rewards/chosen": 1.8065824508666992, + "rewards/margins": 0.19954144954681396, + "rewards/rejected": 1.6070410013198853, + "step": 4040 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.01, + "learning_rate": 7.4523132504610385e-06, + "logps/chosen": -176.74020385742188, + "logps/margins": 2.258197546005249, + "logps/rejected": -178.9984130859375, + "loss": 0.7187, + "rewards/chosen": 1.6201982498168945, + "rewards/margins": 0.30109286308288574, + "rewards/rejected": 1.3191055059432983, + "step": 4050 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.01, + "learning_rate": 7.440897350655356e-06, + "logps/chosen": -169.9674530029297, + "logps/margins": 3.152961015701294, + "logps/rejected": -173.12039184570312, + "loss": 0.686, + "rewards/chosen": 1.5426915884017944, + "rewards/margins": 0.5120700001716614, + "rewards/rejected": 1.0306216478347778, + "step": 4060 + }, + { + "accuracy": 0.5, + "epoch": 1.02, + "learning_rate": 7.429464720933708e-06, + "logps/chosen": -172.28445434570312, + "logps/margins": 5.591904640197754, + "logps/rejected": -177.8763427734375, + "loss": 0.7539, + "rewards/chosen": 1.620485544204712, + "rewards/margins": 0.15299120545387268, + "rewards/rejected": 1.467494249343872, + "step": 4070 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.02, + "learning_rate": 7.4180154396553635e-06, + "logps/chosen": -182.07839965820312, + "logps/margins": -23.49332046508789, + "logps/rejected": -158.5850830078125, + "loss": 0.734, + "rewards/chosen": 1.6804590225219727, + "rewards/margins": 0.20921507477760315, + "rewards/rejected": 1.4712437391281128, + "step": 4080 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.02, + "learning_rate": 7.406549585293723e-06, + "logps/chosen": -141.35646057128906, + "logps/margins": -2.136355400085449, + "logps/rejected": -139.2200927734375, + "loss": 0.6839, + "rewards/chosen": 1.5659420490264893, + "rewards/margins": 0.4100092053413391, + "rewards/rejected": 1.155932903289795, + "step": 4090 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.02, + "learning_rate": 7.395067236435779e-06, + "logps/chosen": -166.07357788085938, + "logps/margins": -18.360103607177734, + "logps/rejected": -147.71347045898438, + "loss": 0.7753, + "rewards/chosen": 1.3077415227890015, + "rewards/margins": -0.0014196217525750399, + "rewards/rejected": 1.3091611862182617, + "step": 4100 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.03, + "learning_rate": 7.383568471781575e-06, + "logps/chosen": -154.1851043701172, + "logps/margins": -15.830960273742676, + "logps/rejected": -138.35415649414062, + "loss": 0.7385, + "rewards/chosen": 1.4739139080047607, + "rewards/margins": 0.2409222573041916, + "rewards/rejected": 1.2329918146133423, + "step": 4110 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.03, + "learning_rate": 7.372053370143671e-06, + "logps/chosen": -171.49594116210938, + "logps/margins": -2.2147507667541504, + "logps/rejected": -169.2811737060547, + "loss": 0.7531, + "rewards/chosen": 1.4803273677825928, + "rewards/margins": 0.22889061272144318, + "rewards/rejected": 1.251436710357666, + "step": 4120 + }, + { + "accuracy": 0.4375, + "epoch": 1.03, + "learning_rate": 7.360522010446598e-06, + "logps/chosen": -172.559814453125, + "logps/margins": -4.311659812927246, + "logps/rejected": -168.2481689453125, + "loss": 0.7692, + "rewards/chosen": 1.453960657119751, + "rewards/margins": 0.009570717811584473, + "rewards/rejected": 1.4443899393081665, + "step": 4130 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.03, + "learning_rate": 7.348974471726324e-06, + "logps/chosen": -169.0891876220703, + "logps/margins": 14.942840576171875, + "logps/rejected": -184.03204345703125, + "loss": 0.7398, + "rewards/chosen": 2.0780797004699707, + "rewards/margins": 0.4016752243041992, + "rewards/rejected": 1.676404595375061, + "step": 4140 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 1.04, + "learning_rate": 7.337410833129702e-06, + "logps/chosen": -177.57479858398438, + "logps/margins": -49.22270584106445, + "logps/rejected": -128.35211181640625, + "loss": 0.7239, + "rewards/chosen": 1.6473169326782227, + "rewards/margins": 0.14460408687591553, + "rewards/rejected": 1.5027129650115967, + "step": 4150 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.04, + "learning_rate": 7.325831173913941e-06, + "logps/chosen": -146.4510955810547, + "logps/margins": -3.8535125255584717, + "logps/rejected": -142.59756469726562, + "loss": 0.7712, + "rewards/chosen": 1.5185668468475342, + "rewards/margins": 0.22370903193950653, + "rewards/rejected": 1.2948578596115112, + "step": 4160 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.04, + "learning_rate": 7.3142355734460505e-06, + "logps/chosen": -149.79026794433594, + "logps/margins": 1.4765808582305908, + "logps/rejected": -151.26686096191406, + "loss": 0.7457, + "rewards/chosen": 1.6303132772445679, + "rewards/margins": 0.054227955639362335, + "rewards/rejected": 1.5760853290557861, + "step": 4170 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.04, + "learning_rate": 7.3026241112023e-06, + "logps/chosen": -144.48402404785156, + "logps/margins": 1.231209397315979, + "logps/rejected": -145.71524047851562, + "loss": 0.7475, + "rewards/chosen": 1.5024534463882446, + "rewards/margins": 0.03385138511657715, + "rewards/rejected": 1.4686022996902466, + "step": 4180 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.05, + "learning_rate": 7.290996866767679e-06, + "logps/chosen": -164.1485137939453, + "logps/margins": -20.309356689453125, + "logps/rejected": -143.8391571044922, + "loss": 0.7513, + "rewards/chosen": 1.7277233600616455, + "rewards/margins": 0.5337013602256775, + "rewards/rejected": 1.1940219402313232, + "step": 4190 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.05, + "learning_rate": 7.279353919835349e-06, + "logps/chosen": -177.1483612060547, + "logps/margins": -28.922061920166016, + "logps/rejected": -148.226318359375, + "loss": 0.6995, + "rewards/chosen": 1.7972309589385986, + "rewards/margins": 0.4310983717441559, + "rewards/rejected": 1.366132378578186, + "step": 4200 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.05, + "learning_rate": 7.267695350206094e-06, + "logps/chosen": -144.38973999023438, + "logps/margins": 10.877660751342773, + "logps/rejected": -155.2673797607422, + "loss": 0.8322, + "rewards/chosen": 1.438655138015747, + "rewards/margins": 0.23612526059150696, + "rewards/rejected": 1.2025299072265625, + "step": 4210 + }, + { + "accuracy": 0.699999988079071, + "epoch": 1.05, + "learning_rate": 7.256021237787775e-06, + "logps/chosen": -155.82058715820312, + "logps/margins": 0.8385831117630005, + "logps/rejected": -156.65916442871094, + "loss": 0.728, + "rewards/chosen": 1.9981924295425415, + "rewards/margins": 0.5415070652961731, + "rewards/rejected": 1.4566853046417236, + "step": 4220 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.06, + "learning_rate": 7.244331662594787e-06, + "logps/chosen": -167.79994201660156, + "logps/margins": -6.7847795486450195, + "logps/rejected": -161.01515197753906, + "loss": 0.8022, + "rewards/chosen": 1.7649271488189697, + "rewards/margins": 0.01399625837802887, + "rewards/rejected": 1.7509310245513916, + "step": 4230 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.06, + "learning_rate": 7.232626704747502e-06, + "logps/chosen": -125.04264831542969, + "logps/margins": 12.3101806640625, + "logps/rejected": -137.3528289794922, + "loss": 0.7487, + "rewards/chosen": 1.5529954433441162, + "rewards/margins": 0.21140269935131073, + "rewards/rejected": 1.341592788696289, + "step": 4240 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.06, + "learning_rate": 7.22090644447173e-06, + "logps/chosen": -137.6590118408203, + "logps/margins": 24.768798828125, + "logps/rejected": -162.42779541015625, + "loss": 0.8526, + "rewards/chosen": 1.300793170928955, + "rewards/margins": -0.022921044379472733, + "rewards/rejected": 1.323714256286621, + "step": 4250 + }, + { + "accuracy": 0.5625, + "epoch": 1.06, + "learning_rate": 7.209170962098161e-06, + "logps/chosen": -157.95462036132812, + "logps/margins": -33.26263427734375, + "logps/rejected": -124.69200134277344, + "loss": 0.7308, + "rewards/chosen": 1.5474984645843506, + "rewards/margins": 0.1565721333026886, + "rewards/rejected": 1.3909262418746948, + "step": 4260 + }, + { + "accuracy": 0.625, + "epoch": 1.07, + "learning_rate": 7.197420338061818e-06, + "logps/chosen": -163.55955505371094, + "logps/margins": -10.182560920715332, + "logps/rejected": -153.3769989013672, + "loss": 0.7416, + "rewards/chosen": 1.7411181926727295, + "rewards/margins": 0.4385937750339508, + "rewards/rejected": 1.3025243282318115, + "step": 4270 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.07, + "learning_rate": 7.1856546529015055e-06, + "logps/chosen": -123.6063003540039, + "logps/margins": 13.551968574523926, + "logps/rejected": -137.1582794189453, + "loss": 0.7675, + "rewards/chosen": 1.3951785564422607, + "rewards/margins": 0.17083539068698883, + "rewards/rejected": 1.2243432998657227, + "step": 4280 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.07, + "learning_rate": 7.173873987259254e-06, + "logps/chosen": -179.62075805664062, + "logps/margins": -38.27435302734375, + "logps/rejected": -141.34640502929688, + "loss": 0.7412, + "rewards/chosen": 1.5116660594940186, + "rewards/margins": 0.26735085248947144, + "rewards/rejected": 1.2443150281906128, + "step": 4290 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.07, + "learning_rate": 7.1620784218797724e-06, + "logps/chosen": -170.80270385742188, + "logps/margins": -0.8492755889892578, + "logps/rejected": -169.95346069335938, + "loss": 0.7269, + "rewards/chosen": 1.8155040740966797, + "rewards/margins": 0.3146992325782776, + "rewards/rejected": 1.5008049011230469, + "step": 4300 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.08, + "learning_rate": 7.150268037609893e-06, + "logps/chosen": -145.50621032714844, + "logps/margins": -4.444832801818848, + "logps/rejected": -141.06137084960938, + "loss": 0.743, + "rewards/chosen": 1.6890052556991577, + "rewards/margins": 0.23061080276966095, + "rewards/rejected": 1.4583944082260132, + "step": 4310 + }, + { + "accuracy": 0.625, + "epoch": 1.08, + "learning_rate": 7.138442915398021e-06, + "logps/chosen": -149.55722045898438, + "logps/margins": -9.289469718933105, + "logps/rejected": -140.2677459716797, + "loss": 0.7399, + "rewards/chosen": 1.3576009273529053, + "rewards/margins": 0.11820483207702637, + "rewards/rejected": 1.239396095275879, + "step": 4320 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.08, + "learning_rate": 7.126603136293564e-06, + "logps/chosen": -181.40245056152344, + "logps/margins": -5.146039009094238, + "logps/rejected": -176.2564239501953, + "loss": 0.7677, + "rewards/chosen": 1.7800486087799072, + "rewards/margins": -0.07062134891748428, + "rewards/rejected": 1.8506698608398438, + "step": 4330 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.08, + "learning_rate": 7.114748781446397e-06, + "logps/chosen": -163.51194763183594, + "logps/margins": -5.786983489990234, + "logps/rejected": -157.7249755859375, + "loss": 0.7623, + "rewards/chosen": 1.6984193325042725, + "rewards/margins": 0.06969909369945526, + "rewards/rejected": 1.6287205219268799, + "step": 4340 + }, + { + "accuracy": 0.5, + "epoch": 1.09, + "learning_rate": 7.102879932106296e-06, + "logps/chosen": -161.2931365966797, + "logps/margins": 5.0004563331604, + "logps/rejected": -166.29360961914062, + "loss": 0.7716, + "rewards/chosen": 1.6429948806762695, + "rewards/margins": -0.09241975098848343, + "rewards/rejected": 1.735414743423462, + "step": 4350 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.09, + "learning_rate": 7.090996669622381e-06, + "logps/chosen": -154.58047485351562, + "logps/margins": -6.111647129058838, + "logps/rejected": -148.46884155273438, + "loss": 0.7228, + "rewards/chosen": 1.878414511680603, + "rewards/margins": 0.49233540892601013, + "rewards/rejected": 1.3860793113708496, + "step": 4360 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.09, + "learning_rate": 7.079099075442559e-06, + "logps/chosen": -169.3294677734375, + "logps/margins": -27.80454444885254, + "logps/rejected": -141.52493286132812, + "loss": 0.7339, + "rewards/chosen": 1.7300500869750977, + "rewards/margins": 0.06426367908716202, + "rewards/rejected": 1.6657863855361938, + "step": 4370 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.09, + "learning_rate": 7.067187231112969e-06, + "logps/chosen": -134.58961486816406, + "logps/margins": -16.743486404418945, + "logps/rejected": -117.84611511230469, + "loss": 0.6995, + "rewards/chosen": 1.6379743814468384, + "rewards/margins": 0.5354331135749817, + "rewards/rejected": 1.102541208267212, + "step": 4380 + }, + { + "accuracy": 0.699999988079071, + "epoch": 1.1, + "learning_rate": 7.055261218277418e-06, + "logps/chosen": -149.31375122070312, + "logps/margins": -3.707923412322998, + "logps/rejected": -145.6058349609375, + "loss": 0.67, + "rewards/chosen": 1.926243543624878, + "rewards/margins": 0.724867582321167, + "rewards/rejected": 1.2013760805130005, + "step": 4390 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.1, + "learning_rate": 7.043321118676826e-06, + "logps/chosen": -187.04171752929688, + "logps/margins": -15.380317687988281, + "logps/rejected": -171.66140747070312, + "loss": 0.6769, + "rewards/chosen": 1.7952638864517212, + "rewards/margins": 0.5859988927841187, + "rewards/rejected": 1.2092649936676025, + "step": 4400 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.1, + "learning_rate": 7.03136701414866e-06, + "logps/chosen": -156.5198516845703, + "logps/margins": -12.496341705322266, + "logps/rejected": -144.02349853515625, + "loss": 0.7027, + "rewards/chosen": 1.772512674331665, + "rewards/margins": 0.2056950330734253, + "rewards/rejected": 1.5668176412582397, + "step": 4410 + }, + { + "accuracy": 0.5625, + "epoch": 1.1, + "learning_rate": 7.019398986626381e-06, + "logps/chosen": -163.7215118408203, + "logps/margins": -28.276840209960938, + "logps/rejected": -135.44467163085938, + "loss": 0.7138, + "rewards/chosen": 1.6778713464736938, + "rewards/margins": 0.12970879673957825, + "rewards/rejected": 1.548162579536438, + "step": 4420 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.11, + "learning_rate": 7.007417118138879e-06, + "logps/chosen": -154.2147979736328, + "logps/margins": -1.8716331720352173, + "logps/rejected": -152.34317016601562, + "loss": 0.7362, + "rewards/chosen": 1.58733332157135, + "rewards/margins": 0.19882899522781372, + "rewards/rejected": 1.3885042667388916, + "step": 4430 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.11, + "learning_rate": 6.9954214908099064e-06, + "logps/chosen": -141.78665161132812, + "logps/margins": -21.271657943725586, + "logps/rejected": -120.51497650146484, + "loss": 0.7502, + "rewards/chosen": 1.447582483291626, + "rewards/margins": 0.28070706129074097, + "rewards/rejected": 1.1668753623962402, + "step": 4440 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.11, + "learning_rate": 6.9834121868575165e-06, + "logps/chosen": -182.0108642578125, + "logps/margins": -37.0256233215332, + "logps/rejected": -144.98524475097656, + "loss": 0.717, + "rewards/chosen": 1.8737337589263916, + "rewards/margins": 0.47903576493263245, + "rewards/rejected": 1.394697666168213, + "step": 4450 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.11, + "learning_rate": 6.97138928859351e-06, + "logps/chosen": -147.23580932617188, + "logps/margins": 17.645111083984375, + "logps/rejected": -164.8809051513672, + "loss": 0.7096, + "rewards/chosen": 1.4505765438079834, + "rewards/margins": 0.0757400244474411, + "rewards/rejected": 1.3748365640640259, + "step": 4460 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.12, + "learning_rate": 6.959352878422856e-06, + "logps/chosen": -186.58334350585938, + "logps/margins": -21.638961791992188, + "logps/rejected": -164.94439697265625, + "loss": 0.6958, + "rewards/chosen": 1.5360299348831177, + "rewards/margins": 0.12578359246253967, + "rewards/rejected": 1.4102461338043213, + "step": 4470 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.12, + "learning_rate": 6.947303038843141e-06, + "logps/chosen": -158.3206787109375, + "logps/margins": 2.165442943572998, + "logps/rejected": -160.4861297607422, + "loss": 0.7901, + "rewards/chosen": 1.6433191299438477, + "rewards/margins": 0.22361640632152557, + "rewards/rejected": 1.4197026491165161, + "step": 4480 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.12, + "learning_rate": 6.935239852443989e-06, + "logps/chosen": -159.32958984375, + "logps/margins": -23.872114181518555, + "logps/rejected": -135.4574737548828, + "loss": 0.7102, + "rewards/chosen": 1.581380009651184, + "rewards/margins": 0.24028635025024414, + "rewards/rejected": 1.34109365940094, + "step": 4490 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.12, + "learning_rate": 6.92316340190651e-06, + "logps/chosen": -198.69090270996094, + "logps/margins": -23.158008575439453, + "logps/rejected": -175.5328826904297, + "loss": 0.7108, + "rewards/chosen": 1.8736053705215454, + "rewards/margins": 0.3234180808067322, + "rewards/rejected": 1.550187349319458, + "step": 4500 + }, + { + "accuracy": 0.625, + "epoch": 1.13, + "learning_rate": 6.911073770002718e-06, + "logps/chosen": -157.5779266357422, + "logps/margins": -30.79327392578125, + "logps/rejected": -126.78465270996094, + "loss": 0.7329, + "rewards/chosen": 1.625817894935608, + "rewards/margins": 0.45610690116882324, + "rewards/rejected": 1.1697109937667847, + "step": 4510 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.13, + "learning_rate": 6.898971039594983e-06, + "logps/chosen": -153.0753173828125, + "logps/margins": -8.957990646362305, + "logps/rejected": -144.11732482910156, + "loss": 0.7981, + "rewards/chosen": 1.60171639919281, + "rewards/margins": 0.17321477830410004, + "rewards/rejected": 1.4285017251968384, + "step": 4520 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.13, + "learning_rate": 6.886855293635444e-06, + "logps/chosen": -164.86683654785156, + "logps/margins": -13.63398265838623, + "logps/rejected": -151.2328643798828, + "loss": 0.7408, + "rewards/chosen": 1.8711875677108765, + "rewards/margins": 0.42857685685157776, + "rewards/rejected": 1.4426108598709106, + "step": 4530 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.14, + "learning_rate": 6.874726615165453e-06, + "logps/chosen": -138.4912109375, + "logps/margins": -21.73735809326172, + "logps/rejected": -116.75386047363281, + "loss": 0.7853, + "rewards/chosen": 1.2705638408660889, + "rewards/margins": -0.026124369353055954, + "rewards/rejected": 1.296688199043274, + "step": 4540 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.14, + "learning_rate": 6.862585087314998e-06, + "logps/chosen": -157.77963256835938, + "logps/margins": -24.891592025756836, + "logps/rejected": -132.88804626464844, + "loss": 0.7262, + "rewards/chosen": 1.5259357690811157, + "rewards/margins": 0.2608209252357483, + "rewards/rejected": 1.2651147842407227, + "step": 4550 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.14, + "learning_rate": 6.850430793302137e-06, + "logps/chosen": -134.70266723632812, + "logps/margins": 32.7626838684082, + "logps/rejected": -167.46533203125, + "loss": 0.778, + "rewards/chosen": 1.5278652906417847, + "rewards/margins": 0.14634697139263153, + "rewards/rejected": 1.3815182447433472, + "step": 4560 + }, + { + "accuracy": 0.5625, + "epoch": 1.14, + "learning_rate": 6.838263816432433e-06, + "logps/chosen": -144.55160522460938, + "logps/margins": -26.96480941772461, + "logps/rejected": -117.58680725097656, + "loss": 0.7416, + "rewards/chosen": 1.9209918975830078, + "rewards/margins": 0.3755248785018921, + "rewards/rejected": 1.5454667806625366, + "step": 4570 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.15, + "learning_rate": 6.826084240098369e-06, + "logps/chosen": -157.5438690185547, + "logps/margins": 4.361745834350586, + "logps/rejected": -161.90560913085938, + "loss": 0.7426, + "rewards/chosen": 1.524963140487671, + "rewards/margins": 0.08754493296146393, + "rewards/rejected": 1.4374183416366577, + "step": 4580 + }, + { + "accuracy": 0.5625, + "epoch": 1.15, + "learning_rate": 6.813892147778789e-06, + "logps/chosen": -156.1680450439453, + "logps/margins": -16.739023208618164, + "logps/rejected": -139.42901611328125, + "loss": 0.758, + "rewards/chosen": 1.4516359567642212, + "rewards/margins": 0.1226608008146286, + "rewards/rejected": 1.328974962234497, + "step": 4590 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.15, + "learning_rate": 6.801687623038324e-06, + "logps/chosen": -158.90335083007812, + "logps/margins": 2.980297803878784, + "logps/rejected": -161.88365173339844, + "loss": 0.7656, + "rewards/chosen": 1.615722417831421, + "rewards/margins": 0.21170561015605927, + "rewards/rejected": 1.4040168523788452, + "step": 4600 + }, + { + "accuracy": 0.5625, + "epoch": 1.15, + "learning_rate": 6.789470749526814e-06, + "logps/chosen": -164.73312377929688, + "logps/margins": -19.807016372680664, + "logps/rejected": -144.92611694335938, + "loss": 0.7119, + "rewards/chosen": 1.5639922618865967, + "rewards/margins": 0.19748859107494354, + "rewards/rejected": 1.3665037155151367, + "step": 4610 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.16, + "learning_rate": 6.777241610978736e-06, + "logps/chosen": -167.4090576171875, + "logps/margins": -9.23045539855957, + "logps/rejected": -158.17860412597656, + "loss": 0.7487, + "rewards/chosen": 1.492275595664978, + "rewards/margins": 0.11757062375545502, + "rewards/rejected": 1.3747050762176514, + "step": 4620 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.16, + "learning_rate": 6.765000291212632e-06, + "logps/chosen": -145.50167846679688, + "logps/margins": 26.4615535736084, + "logps/rejected": -171.96322631835938, + "loss": 0.7526, + "rewards/chosen": 1.7057723999023438, + "rewards/margins": 0.305040568113327, + "rewards/rejected": 1.4007318019866943, + "step": 4630 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.16, + "learning_rate": 6.752746874130538e-06, + "logps/chosen": -151.9162139892578, + "logps/margins": -17.291059494018555, + "logps/rejected": -134.62515258789062, + "loss": 0.7187, + "rewards/chosen": 1.5880920886993408, + "rewards/margins": 0.4144747853279114, + "rewards/rejected": 1.1736172437667847, + "step": 4640 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.16, + "learning_rate": 6.740481443717403e-06, + "logps/chosen": -147.86163330078125, + "logps/margins": -6.114023685455322, + "logps/rejected": -141.74758911132812, + "loss": 0.7385, + "rewards/chosen": 1.6136209964752197, + "rewards/margins": 0.17959676682949066, + "rewards/rejected": 1.4340239763259888, + "step": 4650 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.17, + "learning_rate": 6.728204084040513e-06, + "logps/chosen": -184.00587463378906, + "logps/margins": -40.49638366699219, + "logps/rejected": -143.50949096679688, + "loss": 0.7737, + "rewards/chosen": 1.5602309703826904, + "rewards/margins": 0.15864290297031403, + "rewards/rejected": 1.4015882015228271, + "step": 4660 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.17, + "learning_rate": 6.715914879248918e-06, + "logps/chosen": -178.43313598632812, + "logps/margins": -22.022329330444336, + "logps/rejected": -156.41079711914062, + "loss": 0.8003, + "rewards/chosen": 1.5573582649230957, + "rewards/margins": 0.1599697321653366, + "rewards/rejected": 1.3973884582519531, + "step": 4670 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.17, + "learning_rate": 6.703613913572857e-06, + "logps/chosen": -159.41444396972656, + "logps/margins": -21.514019012451172, + "logps/rejected": -137.90042114257812, + "loss": 0.7118, + "rewards/chosen": 1.942657470703125, + "rewards/margins": 0.40954598784446716, + "rewards/rejected": 1.533111572265625, + "step": 4680 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.17, + "learning_rate": 6.691301271323178e-06, + "logps/chosen": -140.05186462402344, + "logps/margins": -15.965902328491211, + "logps/rejected": -124.08597564697266, + "loss": 0.7296, + "rewards/chosen": 1.7811996936798096, + "rewards/margins": 0.3652705252170563, + "rewards/rejected": 1.4159290790557861, + "step": 4690 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.18, + "learning_rate": 6.678977036890754e-06, + "logps/chosen": -157.95156860351562, + "logps/margins": 6.859846591949463, + "logps/rejected": -164.8114013671875, + "loss": 0.7019, + "rewards/chosen": 1.6833372116088867, + "rewards/margins": 0.11221089214086533, + "rewards/rejected": 1.5711263418197632, + "step": 4700 + }, + { + "accuracy": 0.737500011920929, + "epoch": 1.18, + "learning_rate": 6.666641294745923e-06, + "logps/chosen": -146.15841674804688, + "logps/margins": 13.32597541809082, + "logps/rejected": -159.48439025878906, + "loss": 0.7545, + "rewards/chosen": 1.709477186203003, + "rewards/margins": 0.25687000155448914, + "rewards/rejected": 1.452607274055481, + "step": 4710 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.18, + "learning_rate": 6.654294129437885e-06, + "logps/chosen": -151.46261596679688, + "logps/margins": -9.845657348632812, + "logps/rejected": -141.61697387695312, + "loss": 0.7667, + "rewards/chosen": 1.5328562259674072, + "rewards/margins": 0.08666034042835236, + "rewards/rejected": 1.4461958408355713, + "step": 4720 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.18, + "learning_rate": 6.641935625594138e-06, + "logps/chosen": -178.7781982421875, + "logps/margins": 28.1187801361084, + "logps/rejected": -206.89700317382812, + "loss": 0.7624, + "rewards/chosen": 1.5296825170516968, + "rewards/margins": 0.3762456774711609, + "rewards/rejected": 1.1534368991851807, + "step": 4730 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.19, + "learning_rate": 6.629565867919897e-06, + "logps/chosen": -153.3395538330078, + "logps/margins": -14.405820846557617, + "logps/rejected": -138.9337158203125, + "loss": 0.732, + "rewards/chosen": 1.5936164855957031, + "rewards/margins": 0.404587984085083, + "rewards/rejected": 1.1890285015106201, + "step": 4740 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.19, + "learning_rate": 6.617184941197509e-06, + "logps/chosen": -158.10215759277344, + "logps/margins": 1.7747207880020142, + "logps/rejected": -159.87689208984375, + "loss": 0.7268, + "rewards/chosen": 1.8418605327606201, + "rewards/margins": 0.3878012001514435, + "rewards/rejected": 1.454059362411499, + "step": 4750 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.19, + "learning_rate": 6.604792930285875e-06, + "logps/chosen": -157.2828826904297, + "logps/margins": -21.42497444152832, + "logps/rejected": -135.85789489746094, + "loss": 0.7455, + "rewards/chosen": 1.6213194131851196, + "rewards/margins": 0.3128574788570404, + "rewards/rejected": 1.3084617853164673, + "step": 4760 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.19, + "learning_rate": 6.592389920119864e-06, + "logps/chosen": -171.2572021484375, + "logps/margins": -20.275569915771484, + "logps/rejected": -150.98162841796875, + "loss": 0.8146, + "rewards/chosen": 1.721016526222229, + "rewards/margins": 0.12398360669612885, + "rewards/rejected": 1.597032904624939, + "step": 4770 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.2, + "learning_rate": 6.579975995709736e-06, + "logps/chosen": -142.03707885742188, + "logps/margins": -21.605384826660156, + "logps/rejected": -120.43168640136719, + "loss": 0.7, + "rewards/chosen": 1.5253794193267822, + "rewards/margins": 0.2672179639339447, + "rewards/rejected": 1.2581614255905151, + "step": 4780 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.2, + "learning_rate": 6.567551242140555e-06, + "logps/chosen": -157.65887451171875, + "logps/margins": 5.36818265914917, + "logps/rejected": -163.02706909179688, + "loss": 0.8064, + "rewards/chosen": 1.6819689273834229, + "rewards/margins": 0.06451436132192612, + "rewards/rejected": 1.6174545288085938, + "step": 4790 + }, + { + "accuracy": 0.625, + "epoch": 1.2, + "learning_rate": 6.555115744571615e-06, + "logps/chosen": -147.1795654296875, + "logps/margins": 21.539039611816406, + "logps/rejected": -168.71861267089844, + "loss": 0.7622, + "rewards/chosen": 1.353946566581726, + "rewards/margins": 0.3413313627243042, + "rewards/rejected": 1.0126152038574219, + "step": 4800 + }, + { + "accuracy": 0.625, + "epoch": 1.2, + "learning_rate": 6.542669588235841e-06, + "logps/chosen": -161.65402221679688, + "logps/margins": 7.2505202293396, + "logps/rejected": -168.90457153320312, + "loss": 0.7022, + "rewards/chosen": 1.7740504741668701, + "rewards/margins": 0.35845574736595154, + "rewards/rejected": 1.4155948162078857, + "step": 4810 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.21, + "learning_rate": 6.53021285843922e-06, + "logps/chosen": -159.63368225097656, + "logps/margins": -11.669804573059082, + "logps/rejected": -147.96389770507812, + "loss": 0.7178, + "rewards/chosen": 1.607292890548706, + "rewards/margins": 0.34129029512405396, + "rewards/rejected": 1.2660025358200073, + "step": 4820 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.21, + "learning_rate": 6.517745640560201e-06, + "logps/chosen": -159.4026336669922, + "logps/margins": -11.522550582885742, + "logps/rejected": -147.88009643554688, + "loss": 0.7043, + "rewards/chosen": 1.55764901638031, + "rewards/margins": 0.2731221318244934, + "rewards/rejected": 1.284527063369751, + "step": 4830 + }, + { + "accuracy": 0.5625, + "epoch": 1.21, + "learning_rate": 6.505268020049127e-06, + "logps/chosen": -143.4359588623047, + "logps/margins": 10.558084487915039, + "logps/rejected": -153.99404907226562, + "loss": 0.7497, + "rewards/chosen": 1.4685932397842407, + "rewards/margins": 0.2947949469089508, + "rewards/rejected": 1.1737983226776123, + "step": 4840 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.21, + "learning_rate": 6.492780082427637e-06, + "logps/chosen": -189.3929901123047, + "logps/margins": -49.71562957763672, + "logps/rejected": -139.6773681640625, + "loss": 0.7285, + "rewards/chosen": 1.7643178701400757, + "rewards/margins": 0.4061128497123718, + "rewards/rejected": 1.3582050800323486, + "step": 4850 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.22, + "learning_rate": 6.480281913288083e-06, + "logps/chosen": -160.0203399658203, + "logps/margins": -16.416759490966797, + "logps/rejected": -143.60357666015625, + "loss": 0.6886, + "rewards/chosen": 1.394260287284851, + "rewards/margins": 0.09722080826759338, + "rewards/rejected": 1.2970393896102905, + "step": 4860 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.22, + "learning_rate": 6.467773598292946e-06, + "logps/chosen": -155.70057678222656, + "logps/margins": 16.508405685424805, + "logps/rejected": -172.20895385742188, + "loss": 0.7475, + "rewards/chosen": 1.60134756565094, + "rewards/margins": 0.18941007554531097, + "rewards/rejected": 1.4119374752044678, + "step": 4870 + }, + { + "accuracy": 0.38749998807907104, + "epoch": 1.22, + "learning_rate": 6.455255223174243e-06, + "logps/chosen": -157.41146850585938, + "logps/margins": 18.128217697143555, + "logps/rejected": -175.53970336914062, + "loss": 0.824, + "rewards/chosen": 1.5767641067504883, + "rewards/margins": -0.034236349165439606, + "rewards/rejected": 1.611000418663025, + "step": 4880 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.22, + "learning_rate": 6.442726873732947e-06, + "logps/chosen": -152.3926544189453, + "logps/margins": -25.8064022064209, + "logps/rejected": -126.58624267578125, + "loss": 0.706, + "rewards/chosen": 1.5150153636932373, + "rewards/margins": 0.406000554561615, + "rewards/rejected": 1.109014868736267, + "step": 4890 + }, + { + "accuracy": 0.625, + "epoch": 1.23, + "learning_rate": 6.430188635838392e-06, + "logps/chosen": -172.89962768554688, + "logps/margins": -40.604454040527344, + "logps/rejected": -132.29519653320312, + "loss": 0.7495, + "rewards/chosen": 1.4899805784225464, + "rewards/margins": 0.25234854221343994, + "rewards/rejected": 1.237632155418396, + "step": 4900 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.23, + "learning_rate": 6.41764059542769e-06, + "logps/chosen": -141.91195678710938, + "logps/margins": 7.650518894195557, + "logps/rejected": -149.56246948242188, + "loss": 0.7188, + "rewards/chosen": 1.6412073373794556, + "rewards/margins": 0.34154170751571655, + "rewards/rejected": 1.2996656894683838, + "step": 4910 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.23, + "learning_rate": 6.40508283850514e-06, + "logps/chosen": -169.83633422851562, + "logps/margins": -10.035348892211914, + "logps/rejected": -159.8009796142578, + "loss": 0.7436, + "rewards/chosen": 1.6321032047271729, + "rewards/margins": 0.18890276551246643, + "rewards/rejected": 1.4432004690170288, + "step": 4920 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.23, + "learning_rate": 6.3925154511416345e-06, + "logps/chosen": -127.94288635253906, + "logps/margins": 8.8143310546875, + "logps/rejected": -136.75721740722656, + "loss": 0.7437, + "rewards/chosen": 1.6725658178329468, + "rewards/margins": 0.2667856812477112, + "rewards/rejected": 1.4057800769805908, + "step": 4930 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.23, + "learning_rate": 6.3799385194740735e-06, + "logps/chosen": -122.64558410644531, + "logps/margins": 12.790786743164062, + "logps/rejected": -135.43637084960938, + "loss": 0.7081, + "rewards/chosen": 1.461777925491333, + "rewards/margins": 0.3406018614768982, + "rewards/rejected": 1.12117600440979, + "step": 4940 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.24, + "learning_rate": 6.367352129704777e-06, + "logps/chosen": -149.3013916015625, + "logps/margins": 16.090312957763672, + "logps/rejected": -165.3917236328125, + "loss": 0.725, + "rewards/chosen": 1.5528943538665771, + "rewards/margins": 0.3267061114311218, + "rewards/rejected": 1.2261884212493896, + "step": 4950 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 1.24, + "learning_rate": 6.354756368100888e-06, + "logps/chosen": -179.0587615966797, + "logps/margins": -17.813461303710938, + "logps/rejected": -161.2452850341797, + "loss": 0.7819, + "rewards/chosen": 1.5369234085083008, + "rewards/margins": -0.058191489428281784, + "rewards/rejected": 1.5951149463653564, + "step": 4960 + }, + { + "accuracy": 0.5625, + "epoch": 1.24, + "learning_rate": 6.342151320993788e-06, + "logps/chosen": -122.87980651855469, + "logps/margins": 21.917587280273438, + "logps/rejected": -144.7974090576172, + "loss": 0.7713, + "rewards/chosen": 1.431814432144165, + "rewards/margins": 0.16931791603565216, + "rewards/rejected": 1.2624963521957397, + "step": 4970 + }, + { + "accuracy": 0.5625, + "epoch": 1.25, + "learning_rate": 6.329537074778494e-06, + "logps/chosen": -179.00137329101562, + "logps/margins": -8.288309097290039, + "logps/rejected": -170.71307373046875, + "loss": 0.7655, + "rewards/chosen": 1.9044349193572998, + "rewards/margins": 0.2563362717628479, + "rewards/rejected": 1.6480985879898071, + "step": 4980 + }, + { + "accuracy": 0.699999988079071, + "epoch": 1.25, + "learning_rate": 6.316913715913082e-06, + "logps/chosen": -142.3838348388672, + "logps/margins": 5.806654930114746, + "logps/rejected": -148.19049072265625, + "loss": 0.7863, + "rewards/chosen": 1.7120916843414307, + "rewards/margins": 0.5562489628791809, + "rewards/rejected": 1.1558425426483154, + "step": 4990 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.25, + "learning_rate": 6.304281330918079e-06, + "logps/chosen": -130.83453369140625, + "logps/margins": 2.3205955028533936, + "logps/rejected": -133.15513610839844, + "loss": 0.7871, + "rewards/chosen": 1.5653244256973267, + "rewards/margins": 0.38230443000793457, + "rewards/rejected": 1.1830198764801025, + "step": 5000 + }, + { + "accuracy": 0.625, + "epoch": 1.25, + "learning_rate": 6.291640006375882e-06, + "logps/chosen": -151.2340850830078, + "logps/margins": -10.201196670532227, + "logps/rejected": -141.0328826904297, + "loss": 0.8121, + "rewards/chosen": 1.6454102993011475, + "rewards/margins": 0.23128366470336914, + "rewards/rejected": 1.4141267538070679, + "step": 5010 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.25, + "learning_rate": 6.27898982893016e-06, + "logps/chosen": -171.12440490722656, + "logps/margins": -1.9663136005401611, + "logps/rejected": -169.1580810546875, + "loss": 0.7431, + "rewards/chosen": 1.7115182876586914, + "rewards/margins": 0.37104958295822144, + "rewards/rejected": 1.3404687643051147, + "step": 5020 + }, + { + "accuracy": 0.5625, + "epoch": 1.26, + "learning_rate": 6.2663308852852525e-06, + "logps/chosen": -155.58973693847656, + "logps/margins": -4.143744468688965, + "logps/rejected": -151.4459991455078, + "loss": 0.7712, + "rewards/chosen": 1.5143144130706787, + "rewards/margins": 0.32183441519737244, + "rewards/rejected": 1.1924798488616943, + "step": 5030 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.26, + "learning_rate": 6.253663262205593e-06, + "logps/chosen": -177.77529907226562, + "logps/margins": -58.832725524902344, + "logps/rejected": -118.94258117675781, + "loss": 0.788, + "rewards/chosen": 1.6441223621368408, + "rewards/margins": 0.2770431637763977, + "rewards/rejected": 1.3670790195465088, + "step": 5040 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.26, + "learning_rate": 6.240987046515096e-06, + "logps/chosen": -131.17593383789062, + "logps/margins": 10.835644721984863, + "logps/rejected": -142.01156616210938, + "loss": 0.7118, + "rewards/chosen": 1.3152341842651367, + "rewards/margins": 0.2057148963212967, + "rewards/rejected": 1.109519362449646, + "step": 5050 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.27, + "learning_rate": 6.228302325096574e-06, + "logps/chosen": -158.71505737304688, + "logps/margins": -12.218968391418457, + "logps/rejected": -146.49607849121094, + "loss": 0.7528, + "rewards/chosen": 2.0128684043884277, + "rewards/margins": 0.41734856367111206, + "rewards/rejected": 1.5955199003219604, + "step": 5060 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.27, + "learning_rate": 6.215609184891133e-06, + "logps/chosen": -174.6935272216797, + "logps/margins": -22.03658103942871, + "logps/rejected": -152.65695190429688, + "loss": 0.7107, + "rewards/chosen": 1.7201868295669556, + "rewards/margins": 0.35576948523521423, + "rewards/rejected": 1.3644174337387085, + "step": 5070 + }, + { + "accuracy": 0.5625, + "epoch": 1.27, + "learning_rate": 6.202907712897591e-06, + "logps/chosen": -148.1775665283203, + "logps/margins": 13.041943550109863, + "logps/rejected": -161.21949768066406, + "loss": 0.8075, + "rewards/chosen": 1.9800361394882202, + "rewards/margins": 0.23238544166088104, + "rewards/rejected": 1.7476507425308228, + "step": 5080 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.27, + "learning_rate": 6.190197996171861e-06, + "logps/chosen": -169.82705688476562, + "logps/margins": 31.24056053161621, + "logps/rejected": -201.06761169433594, + "loss": 0.7652, + "rewards/chosen": 1.46670663356781, + "rewards/margins": 0.1525311917066574, + "rewards/rejected": 1.3141753673553467, + "step": 5090 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.27, + "learning_rate": 6.177480121826372e-06, + "logps/chosen": -176.33309936523438, + "logps/margins": -26.30515480041504, + "logps/rejected": -150.02793884277344, + "loss": 0.7385, + "rewards/chosen": 1.586025595664978, + "rewards/margins": 0.0009313821792602539, + "rewards/rejected": 1.5850943326950073, + "step": 5100 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.28, + "learning_rate": 6.1647541770294614e-06, + "logps/chosen": -157.5048828125, + "logps/margins": 11.542614936828613, + "logps/rejected": -169.04751586914062, + "loss": 0.7027, + "rewards/chosen": 1.8223068714141846, + "rewards/margins": 0.4998514652252197, + "rewards/rejected": 1.3224552869796753, + "step": 5110 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.28, + "learning_rate": 6.152020249004786e-06, + "logps/chosen": -157.6554412841797, + "logps/margins": -12.17426586151123, + "logps/rejected": -145.48118591308594, + "loss": 0.6801, + "rewards/chosen": 1.6239858865737915, + "rewards/margins": 0.3804410696029663, + "rewards/rejected": 1.2435449361801147, + "step": 5120 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.28, + "learning_rate": 6.139278425030717e-06, + "logps/chosen": -159.12953186035156, + "logps/margins": -6.698736667633057, + "logps/rejected": -152.4307861328125, + "loss": 0.6877, + "rewards/chosen": 1.6081244945526123, + "rewards/margins": 0.40253764390945435, + "rewards/rejected": 1.2055867910385132, + "step": 5130 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.28, + "learning_rate": 6.126528792439743e-06, + "logps/chosen": -189.52659606933594, + "logps/margins": -40.89472579956055, + "logps/rejected": -148.63186645507812, + "loss": 0.7399, + "rewards/chosen": 1.3909708261489868, + "rewards/margins": 0.011243830434978008, + "rewards/rejected": 1.3797270059585571, + "step": 5140 + }, + { + "accuracy": 0.625, + "epoch": 1.29, + "learning_rate": 6.113771438617877e-06, + "logps/chosen": -150.76974487304688, + "logps/margins": 18.153573989868164, + "logps/rejected": -168.92333984375, + "loss": 0.7704, + "rewards/chosen": 1.578174114227295, + "rewards/margins": 0.29279306530952454, + "rewards/rejected": 1.2853810787200928, + "step": 5150 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.29, + "learning_rate": 6.101006451004049e-06, + "logps/chosen": -142.00057983398438, + "logps/margins": -0.3727971911430359, + "logps/rejected": -141.62779235839844, + "loss": 0.7128, + "rewards/chosen": 1.6239397525787354, + "rewards/margins": 0.49854373931884766, + "rewards/rejected": 1.1253958940505981, + "step": 5160 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.29, + "learning_rate": 6.088233917089513e-06, + "logps/chosen": -141.65484619140625, + "logps/margins": -8.830339431762695, + "logps/rejected": -132.8245086669922, + "loss": 0.6555, + "rewards/chosen": 1.7338759899139404, + "rewards/margins": 0.5189031362533569, + "rewards/rejected": 1.2149730920791626, + "step": 5170 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.29, + "learning_rate": 6.075453924417248e-06, + "logps/chosen": -168.49122619628906, + "logps/margins": -42.011863708496094, + "logps/rejected": -126.4793472290039, + "loss": 0.7436, + "rewards/chosen": 1.810417890548706, + "rewards/margins": 0.20606985688209534, + "rewards/rejected": 1.6043481826782227, + "step": 5180 + }, + { + "accuracy": 0.625, + "epoch": 1.3, + "learning_rate": 6.06266656058135e-06, + "logps/chosen": -157.50521850585938, + "logps/margins": -12.730794906616211, + "logps/rejected": -144.7744140625, + "loss": 0.6994, + "rewards/chosen": 1.4022176265716553, + "rewards/margins": 0.22986188530921936, + "rewards/rejected": 1.1723556518554688, + "step": 5190 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.3, + "learning_rate": 6.04987191322644e-06, + "logps/chosen": -173.8582763671875, + "logps/margins": -15.063423156738281, + "logps/rejected": -158.7948455810547, + "loss": 0.691, + "rewards/chosen": 1.5703957080841064, + "rewards/margins": 0.2514503598213196, + "rewards/rejected": 1.3189454078674316, + "step": 5200 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.3, + "learning_rate": 6.037070070047063e-06, + "logps/chosen": -175.35873413085938, + "logps/margins": -50.71623992919922, + "logps/rejected": -124.64250183105469, + "loss": 0.7064, + "rewards/chosen": 2.007868766784668, + "rewards/margins": 0.5471641421318054, + "rewards/rejected": 1.4607045650482178, + "step": 5210 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.3, + "learning_rate": 6.0242611187870756e-06, + "logps/chosen": -135.90957641601562, + "logps/margins": -12.127862930297852, + "logps/rejected": -123.78172302246094, + "loss": 0.8495, + "rewards/chosen": 1.7278273105621338, + "rewards/margins": 0.31238415837287903, + "rewards/rejected": 1.4154431819915771, + "step": 5220 + }, + { + "accuracy": 0.625, + "epoch": 1.31, + "learning_rate": 6.011445147239063e-06, + "logps/chosen": -146.40159606933594, + "logps/margins": -18.971403121948242, + "logps/rejected": -127.43019104003906, + "loss": 0.7001, + "rewards/chosen": 1.6193259954452515, + "rewards/margins": 0.3653852045536041, + "rewards/rejected": 1.2539408206939697, + "step": 5230 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.31, + "learning_rate": 5.998622243243723e-06, + "logps/chosen": -187.36436462402344, + "logps/margins": 1.5915634632110596, + "logps/rejected": -188.95594787597656, + "loss": 0.6876, + "rewards/chosen": 1.7392375469207764, + "rewards/margins": 0.4315492510795593, + "rewards/rejected": 1.3076883554458618, + "step": 5240 + }, + { + "accuracy": 0.5625, + "epoch": 1.31, + "learning_rate": 5.985792494689265e-06, + "logps/chosen": -145.41183471679688, + "logps/margins": 3.6851959228515625, + "logps/rejected": -149.09703063964844, + "loss": 0.7216, + "rewards/chosen": 1.6869869232177734, + "rewards/margins": 0.29782360792160034, + "rewards/rejected": 1.3891632556915283, + "step": 5250 + }, + { + "accuracy": 0.625, + "epoch": 1.31, + "learning_rate": 5.97295598951082e-06, + "logps/chosen": -164.96902465820312, + "logps/margins": -0.19436874985694885, + "logps/rejected": -164.77464294433594, + "loss": 0.7062, + "rewards/chosen": 1.6675602197647095, + "rewards/margins": 0.4102051854133606, + "rewards/rejected": 1.2573550939559937, + "step": 5260 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.32, + "learning_rate": 5.960112815689819e-06, + "logps/chosen": -145.9226837158203, + "logps/margins": -0.42312487959861755, + "logps/rejected": -145.49954223632812, + "loss": 0.7508, + "rewards/chosen": 1.3987632989883423, + "rewards/margins": 0.17990709841251373, + "rewards/rejected": 1.2188560962677002, + "step": 5270 + }, + { + "accuracy": 0.5625, + "epoch": 1.32, + "learning_rate": 5.9472630612534055e-06, + "logps/chosen": -135.58197021484375, + "logps/margins": -0.3163507580757141, + "logps/rejected": -135.265625, + "loss": 0.7564, + "rewards/chosen": 1.4992902278900146, + "rewards/margins": 0.23811094462871552, + "rewards/rejected": 1.2611792087554932, + "step": 5280 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.32, + "learning_rate": 5.934406814273829e-06, + "logps/chosen": -167.3329620361328, + "logps/margins": -12.887275695800781, + "logps/rejected": -154.44570922851562, + "loss": 0.756, + "rewards/chosen": 1.5215914249420166, + "rewards/margins": 0.21805009245872498, + "rewards/rejected": 1.3035413026809692, + "step": 5290 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.32, + "learning_rate": 5.921544162867829e-06, + "logps/chosen": -161.7301788330078, + "logps/margins": -26.398529052734375, + "logps/rejected": -135.33164978027344, + "loss": 0.7043, + "rewards/chosen": 1.704332709312439, + "rewards/margins": 0.3039229214191437, + "rewards/rejected": 1.4004098176956177, + "step": 5300 + }, + { + "accuracy": 0.5625, + "epoch": 1.33, + "learning_rate": 5.908675195196053e-06, + "logps/chosen": -148.4464111328125, + "logps/margins": 16.045602798461914, + "logps/rejected": -164.4920196533203, + "loss": 0.7021, + "rewards/chosen": 1.5473792552947998, + "rewards/margins": 0.17474989593029022, + "rewards/rejected": 1.3726295232772827, + "step": 5310 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.33, + "learning_rate": 5.895799999462433e-06, + "logps/chosen": -134.166015625, + "logps/margins": 1.5058866739273071, + "logps/rejected": -135.67190551757812, + "loss": 0.8065, + "rewards/chosen": 1.5211851596832275, + "rewards/margins": 0.44475632905960083, + "rewards/rejected": 1.076428771018982, + "step": 5320 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.33, + "learning_rate": 5.882918663913587e-06, + "logps/chosen": -171.63241577148438, + "logps/margins": 27.762609481811523, + "logps/rejected": -199.39501953125, + "loss": 0.7176, + "rewards/chosen": 1.5653254985809326, + "rewards/margins": 0.18104076385498047, + "rewards/rejected": 1.3842847347259521, + "step": 5330 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.33, + "learning_rate": 5.870031276838223e-06, + "logps/chosen": -137.03872680664062, + "logps/margins": -9.805635452270508, + "logps/rejected": -127.23310852050781, + "loss": 0.7345, + "rewards/chosen": 1.5989949703216553, + "rewards/margins": 0.30325326323509216, + "rewards/rejected": 1.2957415580749512, + "step": 5340 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.34, + "learning_rate": 5.857137926566516e-06, + "logps/chosen": -160.0192108154297, + "logps/margins": 25.99019432067871, + "logps/rejected": -186.0093994140625, + "loss": 0.7601, + "rewards/chosen": 1.7115033864974976, + "rewards/margins": -0.004513204097747803, + "rewards/rejected": 1.716016411781311, + "step": 5350 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.34, + "learning_rate": 5.84423870146952e-06, + "logps/chosen": -182.6631622314453, + "logps/margins": 1.3142541646957397, + "logps/rejected": -183.97738647460938, + "loss": 0.7329, + "rewards/chosen": 1.8308374881744385, + "rewards/margins": 0.31798094511032104, + "rewards/rejected": 1.5128564834594727, + "step": 5360 + }, + { + "accuracy": 0.5, + "epoch": 1.34, + "learning_rate": 5.831333689958555e-06, + "logps/chosen": -172.5367889404297, + "logps/margins": -6.605926513671875, + "logps/rejected": -165.93087768554688, + "loss": 0.7356, + "rewards/chosen": 1.5800632238388062, + "rewards/margins": 0.2258564978837967, + "rewards/rejected": 1.3542068004608154, + "step": 5370 + }, + { + "accuracy": 0.625, + "epoch": 1.34, + "learning_rate": 5.818422980484597e-06, + "logps/chosen": -134.66600036621094, + "logps/margins": 29.83827781677246, + "logps/rejected": -164.5042724609375, + "loss": 0.704, + "rewards/chosen": 1.3328368663787842, + "rewards/margins": 0.3366268575191498, + "rewards/rejected": 0.9962100982666016, + "step": 5380 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.35, + "learning_rate": 5.805506661537678e-06, + "logps/chosen": -176.3031463623047, + "logps/margins": -45.98903274536133, + "logps/rejected": -130.31411743164062, + "loss": 0.7184, + "rewards/chosen": 1.4929618835449219, + "rewards/margins": 0.0719807967543602, + "rewards/rejected": 1.4209811687469482, + "step": 5390 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.35, + "learning_rate": 5.792584821646278e-06, + "logps/chosen": -129.8586883544922, + "logps/margins": -2.243013381958008, + "logps/rejected": -127.61567687988281, + "loss": 0.6933, + "rewards/chosen": 1.510141134262085, + "rewards/margins": 0.6418448686599731, + "rewards/rejected": 0.8682962656021118, + "step": 5400 + }, + { + "accuracy": 0.675000011920929, + "epoch": 1.35, + "learning_rate": 5.779657549376716e-06, + "logps/chosen": -189.4259033203125, + "logps/margins": -22.510643005371094, + "logps/rejected": -166.91526794433594, + "loss": 0.6758, + "rewards/chosen": 1.813315749168396, + "rewards/margins": 0.5916121006011963, + "rewards/rejected": 1.2217038869857788, + "step": 5410 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.35, + "learning_rate": 5.766724933332545e-06, + "logps/chosen": -186.01217651367188, + "logps/margins": -31.01999282836914, + "logps/rejected": -154.9921875, + "loss": 0.7017, + "rewards/chosen": 1.8551937341690063, + "rewards/margins": 0.42761191725730896, + "rewards/rejected": 1.4275819063186646, + "step": 5420 + }, + { + "accuracy": 0.5625, + "epoch": 1.36, + "learning_rate": 5.753787062153947e-06, + "logps/chosen": -128.2163543701172, + "logps/margins": 39.49677658081055, + "logps/rejected": -167.713134765625, + "loss": 0.7297, + "rewards/chosen": 1.3552345037460327, + "rewards/margins": 0.11643465608358383, + "rewards/rejected": 1.2387999296188354, + "step": 5430 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.36, + "learning_rate": 5.7408440245171185e-06, + "logps/chosen": -170.32110595703125, + "logps/margins": -10.857378005981445, + "logps/rejected": -159.46372985839844, + "loss": 0.7105, + "rewards/chosen": 2.0298686027526855, + "rewards/margins": 0.4292038083076477, + "rewards/rejected": 1.6006648540496826, + "step": 5440 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.36, + "learning_rate": 5.72789590913367e-06, + "logps/chosen": -166.81875610351562, + "logps/margins": 28.72918128967285, + "logps/rejected": -195.54794311523438, + "loss": 0.7614, + "rewards/chosen": 2.0543618202209473, + "rewards/margins": 0.49189358949661255, + "rewards/rejected": 1.562468409538269, + "step": 5450 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.36, + "learning_rate": 5.714942804750012e-06, + "logps/chosen": -173.94088745117188, + "logps/margins": 4.52652645111084, + "logps/rejected": -178.46742248535156, + "loss": 0.8208, + "rewards/chosen": 1.8147386312484741, + "rewards/margins": 0.3165207803249359, + "rewards/rejected": 1.4982178211212158, + "step": 5460 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.37, + "learning_rate": 5.7019848001467516e-06, + "logps/chosen": -205.4329376220703, + "logps/margins": -48.174537658691406, + "logps/rejected": -157.25839233398438, + "loss": 0.718, + "rewards/chosen": 1.7369821071624756, + "rewards/margins": 0.18487076461315155, + "rewards/rejected": 1.5521115064620972, + "step": 5470 + }, + { + "accuracy": 0.5625, + "epoch": 1.37, + "learning_rate": 5.6890219841380835e-06, + "logps/chosen": -137.05026245117188, + "logps/margins": 24.883108139038086, + "logps/rejected": -161.93338012695312, + "loss": 0.6838, + "rewards/chosen": 1.8056952953338623, + "rewards/margins": 0.3463909924030304, + "rewards/rejected": 1.4593042135238647, + "step": 5480 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.37, + "learning_rate": 5.676054445571175e-06, + "logps/chosen": -145.9791259765625, + "logps/margins": 8.895891189575195, + "logps/rejected": -154.87503051757812, + "loss": 0.7424, + "rewards/chosen": 1.5609519481658936, + "rewards/margins": 0.13086891174316406, + "rewards/rejected": 1.430083155632019, + "step": 5490 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.38, + "learning_rate": 5.663082273325568e-06, + "logps/chosen": -160.75375366210938, + "logps/margins": -1.261610984802246, + "logps/rejected": -159.4921417236328, + "loss": 0.7634, + "rewards/chosen": 1.6964995861053467, + "rewards/margins": 0.3085925281047821, + "rewards/rejected": 1.3879071474075317, + "step": 5500 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.38, + "learning_rate": 5.6501055563125574e-06, + "logps/chosen": -163.81549072265625, + "logps/margins": 1.4159587621688843, + "logps/rejected": -165.2314453125, + "loss": 0.7543, + "rewards/chosen": 1.4497385025024414, + "rewards/margins": 0.10968782752752304, + "rewards/rejected": 1.3400506973266602, + "step": 5510 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.38, + "learning_rate": 5.637124383474592e-06, + "logps/chosen": -207.0496063232422, + "logps/margins": -26.114797592163086, + "logps/rejected": -180.93484497070312, + "loss": 0.7674, + "rewards/chosen": 1.6942847967147827, + "rewards/margins": 0.32077234983444214, + "rewards/rejected": 1.3735123872756958, + "step": 5520 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.38, + "learning_rate": 5.624138843784662e-06, + "logps/chosen": -183.96987915039062, + "logps/margins": -3.3738930225372314, + "logps/rejected": -180.59597778320312, + "loss": 0.7426, + "rewards/chosen": 1.6988093852996826, + "rewards/margins": 0.48210129141807556, + "rewards/rejected": 1.2167081832885742, + "step": 5530 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.39, + "learning_rate": 5.611149026245683e-06, + "logps/chosen": -149.31764221191406, + "logps/margins": -3.167083263397217, + "logps/rejected": -146.15054321289062, + "loss": 0.7478, + "rewards/chosen": 1.484277606010437, + "rewards/margins": 0.0687042698264122, + "rewards/rejected": 1.415573239326477, + "step": 5540 + }, + { + "accuracy": 0.5625, + "epoch": 1.39, + "learning_rate": 5.598155019889896e-06, + "logps/chosen": -156.6920928955078, + "logps/margins": 10.109004020690918, + "logps/rejected": -166.8011016845703, + "loss": 0.7236, + "rewards/chosen": 1.4883196353912354, + "rewards/margins": 0.16127082705497742, + "rewards/rejected": 1.327048659324646, + "step": 5550 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.39, + "learning_rate": 5.5851569137782465e-06, + "logps/chosen": -154.56295776367188, + "logps/margins": -15.01945686340332, + "logps/rejected": -139.54348754882812, + "loss": 0.797, + "rewards/chosen": 1.2589267492294312, + "rewards/margins": -0.056695032864809036, + "rewards/rejected": 1.3156219720840454, + "step": 5560 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.39, + "learning_rate": 5.5721547969997866e-06, + "logps/chosen": -133.49951171875, + "logps/margins": 6.751640319824219, + "logps/rejected": -140.2511444091797, + "loss": 0.7185, + "rewards/chosen": 1.2730587720870972, + "rewards/margins": 0.09053818881511688, + "rewards/rejected": 1.1825206279754639, + "step": 5570 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.4, + "learning_rate": 5.5604495364333085e-06, + "logps/chosen": -152.4004669189453, + "logps/margins": -5.293655872344971, + "logps/rejected": -147.10678100585938, + "loss": 0.7691, + "rewards/chosen": 1.4804632663726807, + "rewards/margins": 0.14566269516944885, + "rewards/rejected": 1.3348006010055542, + "step": 5580 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.4, + "learning_rate": 5.547440044926198e-06, + "logps/chosen": -140.5923614501953, + "logps/margins": 6.542186737060547, + "logps/rejected": -147.13455200195312, + "loss": 0.782, + "rewards/chosen": 1.136541724205017, + "rewards/margins": 0.13275261223316193, + "rewards/rejected": 1.0037891864776611, + "step": 5590 + }, + { + "accuracy": 0.5625, + "epoch": 1.4, + "learning_rate": 5.534426801263779e-06, + "logps/chosen": -169.9979248046875, + "logps/margins": -22.774517059326172, + "logps/rejected": -147.22340393066406, + "loss": 0.7946, + "rewards/chosen": 1.4123528003692627, + "rewards/margins": 0.162205770611763, + "rewards/rejected": 1.2501471042633057, + "step": 5600 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.4, + "learning_rate": 5.521409894638854e-06, + "logps/chosen": -178.98597717285156, + "logps/margins": -35.57769775390625, + "logps/rejected": -143.4082794189453, + "loss": 0.6927, + "rewards/chosen": 1.6132652759552002, + "rewards/margins": 0.3370493948459625, + "rewards/rejected": 1.27621591091156, + "step": 5610 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.41, + "learning_rate": 5.5083894142693315e-06, + "logps/chosen": -144.6505126953125, + "logps/margins": 1.944797158241272, + "logps/rejected": -146.59530639648438, + "loss": 0.7037, + "rewards/chosen": 1.405866026878357, + "rewards/margins": 0.2136956751346588, + "rewards/rejected": 1.192170262336731, + "step": 5620 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.41, + "learning_rate": 5.495365449397613e-06, + "logps/chosen": -149.2288055419922, + "logps/margins": -12.753352165222168, + "logps/rejected": -136.47544860839844, + "loss": 0.7394, + "rewards/chosen": 1.3791271448135376, + "rewards/margins": 0.18719109892845154, + "rewards/rejected": 1.1919360160827637, + "step": 5630 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.41, + "learning_rate": 5.482338089289982e-06, + "logps/chosen": -183.57162475585938, + "logps/margins": -28.113910675048828, + "logps/rejected": -155.45773315429688, + "loss": 0.7237, + "rewards/chosen": 1.741651177406311, + "rewards/margins": 0.24294829368591309, + "rewards/rejected": 1.4987030029296875, + "step": 5640 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.41, + "learning_rate": 5.469307423235997e-06, + "logps/chosen": -157.01309204101562, + "logps/margins": -14.2610445022583, + "logps/rejected": -142.7520294189453, + "loss": 0.7091, + "rewards/chosen": 1.6885029077529907, + "rewards/margins": 0.3885074257850647, + "rewards/rejected": 1.2999956607818604, + "step": 5650 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 1.42, + "learning_rate": 5.456273540547871e-06, + "logps/chosen": -162.21449279785156, + "logps/margins": -14.72693157196045, + "logps/rejected": -147.48757934570312, + "loss": 0.8143, + "rewards/chosen": 1.530364990234375, + "rewards/margins": -0.1417171210050583, + "rewards/rejected": 1.6720821857452393, + "step": 5660 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.42, + "learning_rate": 5.443236530559863e-06, + "logps/chosen": -170.67613220214844, + "logps/margins": -4.8558807373046875, + "logps/rejected": -165.8202667236328, + "loss": 0.7288, + "rewards/chosen": 1.4650720357894897, + "rewards/margins": 0.26263687014579773, + "rewards/rejected": 1.2024351358413696, + "step": 5670 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.42, + "learning_rate": 5.430196482627673e-06, + "logps/chosen": -158.334228515625, + "logps/margins": 11.41904354095459, + "logps/rejected": -169.75326538085938, + "loss": 0.765, + "rewards/chosen": 1.6002607345581055, + "rewards/margins": 0.24975450336933136, + "rewards/rejected": 1.35050630569458, + "step": 5680 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.42, + "learning_rate": 5.417153486127818e-06, + "logps/chosen": -151.82003784179688, + "logps/margins": -10.875391960144043, + "logps/rejected": -140.9446258544922, + "loss": 0.723, + "rewards/chosen": 1.6592031717300415, + "rewards/margins": 0.2735028862953186, + "rewards/rejected": 1.3857002258300781, + "step": 5690 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.43, + "learning_rate": 5.404107630457024e-06, + "logps/chosen": -161.78799438476562, + "logps/margins": -13.234598159790039, + "logps/rejected": -148.5534210205078, + "loss": 0.754, + "rewards/chosen": 1.5500133037567139, + "rewards/margins": 0.30869191884994507, + "rewards/rejected": 1.2413215637207031, + "step": 5700 + }, + { + "accuracy": 0.625, + "epoch": 1.43, + "learning_rate": 5.3910590050316165e-06, + "logps/chosen": -160.0657196044922, + "logps/margins": -13.919367790222168, + "logps/rejected": -146.14637756347656, + "loss": 0.7357, + "rewards/chosen": 1.483858585357666, + "rewards/margins": 0.419452041387558, + "rewards/rejected": 1.064406394958496, + "step": 5710 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.43, + "learning_rate": 5.378007699286904e-06, + "logps/chosen": -154.39894104003906, + "logps/margins": -9.410590171813965, + "logps/rejected": -144.9883575439453, + "loss": 0.7421, + "rewards/chosen": 1.5918755531311035, + "rewards/margins": 0.2058297097682953, + "rewards/rejected": 1.3860459327697754, + "step": 5720 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.43, + "learning_rate": 5.364953802676563e-06, + "logps/chosen": -139.1897735595703, + "logps/margins": -17.51617431640625, + "logps/rejected": -121.673583984375, + "loss": 0.7099, + "rewards/chosen": 1.3949676752090454, + "rewards/margins": 0.2602354884147644, + "rewards/rejected": 1.1347322463989258, + "step": 5730 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.44, + "learning_rate": 5.351897404672033e-06, + "logps/chosen": -136.61865234375, + "logps/margins": -13.783647537231445, + "logps/rejected": -122.8349838256836, + "loss": 0.7439, + "rewards/chosen": 1.2710367441177368, + "rewards/margins": 0.032409533858299255, + "rewards/rejected": 1.2386271953582764, + "step": 5740 + }, + { + "accuracy": 0.625, + "epoch": 1.44, + "learning_rate": 5.338838594761896e-06, + "logps/chosen": -168.38601684570312, + "logps/margins": -28.6624698638916, + "logps/rejected": -139.72354125976562, + "loss": 0.7537, + "rewards/chosen": 1.7158035039901733, + "rewards/margins": 0.3373875916004181, + "rewards/rejected": 1.378415822982788, + "step": 5750 + }, + { + "accuracy": 0.5625, + "epoch": 1.44, + "learning_rate": 5.325777462451262e-06, + "logps/chosen": -146.7727813720703, + "logps/margins": 12.768696784973145, + "logps/rejected": -159.54147338867188, + "loss": 0.8007, + "rewards/chosen": 1.3416997194290161, + "rewards/margins": 0.13176873326301575, + "rewards/rejected": 1.2099311351776123, + "step": 5760 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.44, + "learning_rate": 5.3127140972611654e-06, + "logps/chosen": -156.87179565429688, + "logps/margins": 8.120512008666992, + "logps/rejected": -164.99227905273438, + "loss": 0.7656, + "rewards/chosen": 1.3799902200698853, + "rewards/margins": 0.03770449757575989, + "rewards/rejected": 1.3422856330871582, + "step": 5770 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.45, + "learning_rate": 5.299648588727937e-06, + "logps/chosen": -153.733642578125, + "logps/margins": -9.389876365661621, + "logps/rejected": -144.34378051757812, + "loss": 0.7328, + "rewards/chosen": 1.4924728870391846, + "rewards/margins": 0.3293708562850952, + "rewards/rejected": 1.163102149963379, + "step": 5780 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.45, + "learning_rate": 5.286581026402603e-06, + "logps/chosen": -153.72698974609375, + "logps/margins": -9.940195083618164, + "logps/rejected": -143.7867889404297, + "loss": 0.6922, + "rewards/chosen": 1.8377565145492554, + "rewards/margins": 0.35638314485549927, + "rewards/rejected": 1.4813735485076904, + "step": 5790 + }, + { + "accuracy": 0.5625, + "epoch": 1.45, + "learning_rate": 5.273511499850267e-06, + "logps/chosen": -160.94082641601562, + "logps/margins": -29.102447509765625, + "logps/rejected": -131.83839416503906, + "loss": 0.7204, + "rewards/chosen": 1.5206208229064941, + "rewards/margins": 0.4060136675834656, + "rewards/rejected": 1.1146070957183838, + "step": 5800 + }, + { + "accuracy": 0.625, + "epoch": 1.45, + "learning_rate": 5.26044009864949e-06, + "logps/chosen": -171.05935668945312, + "logps/margins": 5.292855262756348, + "logps/rejected": -176.35220336914062, + "loss": 0.7462, + "rewards/chosen": 1.9989153146743774, + "rewards/margins": 0.5080349445343018, + "rewards/rejected": 1.4908804893493652, + "step": 5810 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.46, + "learning_rate": 5.247366912391689e-06, + "logps/chosen": -185.25357055664062, + "logps/margins": -32.15409469604492, + "logps/rejected": -153.0994873046875, + "loss": 0.75, + "rewards/chosen": 1.4722042083740234, + "rewards/margins": 0.14152461290359497, + "rewards/rejected": 1.3306795358657837, + "step": 5820 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.46, + "learning_rate": 5.234292030680509e-06, + "logps/chosen": -157.81483459472656, + "logps/margins": -3.082869052886963, + "logps/rejected": -154.73196411132812, + "loss": 0.7286, + "rewards/chosen": 1.8751846551895142, + "rewards/margins": 0.24605628848075867, + "rewards/rejected": 1.6291286945343018, + "step": 5830 + }, + { + "accuracy": 0.6875, + "epoch": 1.46, + "learning_rate": 5.221215543131221e-06, + "logps/chosen": -163.16171264648438, + "logps/margins": -11.626922607421875, + "logps/rejected": -151.53477478027344, + "loss": 0.6477, + "rewards/chosen": 1.732105016708374, + "rewards/margins": 0.3726832866668701, + "rewards/rejected": 1.359421730041504, + "step": 5840 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.46, + "learning_rate": 5.208137539370101e-06, + "logps/chosen": -149.0413360595703, + "logps/margins": 13.026446342468262, + "logps/rejected": -162.06777954101562, + "loss": 0.7375, + "rewards/chosen": 1.8997814655303955, + "rewards/margins": 0.2037171870470047, + "rewards/rejected": 1.6960642337799072, + "step": 5850 + }, + { + "accuracy": 0.625, + "epoch": 1.47, + "learning_rate": 5.195058109033813e-06, + "logps/chosen": -162.4295654296875, + "logps/margins": -4.865842342376709, + "logps/rejected": -157.56370544433594, + "loss": 0.735, + "rewards/chosen": 1.7543100118637085, + "rewards/margins": 0.43926873803138733, + "rewards/rejected": 1.3150413036346436, + "step": 5860 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.47, + "learning_rate": 5.181977341768805e-06, + "logps/chosen": -140.98873901367188, + "logps/margins": 17.297122955322266, + "logps/rejected": -158.285888671875, + "loss": 0.742, + "rewards/chosen": 1.3781960010528564, + "rewards/margins": 0.1770985871553421, + "rewards/rejected": 1.2010974884033203, + "step": 5870 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.47, + "learning_rate": 5.1688953272306855e-06, + "logps/chosen": -154.7940216064453, + "logps/margins": -24.776287078857422, + "logps/rejected": -130.01773071289062, + "loss": 0.6457, + "rewards/chosen": 1.7411186695098877, + "rewards/margins": 0.4411458373069763, + "rewards/rejected": 1.2999727725982666, + "step": 5880 + }, + { + "accuracy": 0.7250000238418579, + "epoch": 1.47, + "learning_rate": 5.155812155083609e-06, + "logps/chosen": -173.3274383544922, + "logps/margins": -27.61617088317871, + "logps/rejected": -145.71127319335938, + "loss": 0.6555, + "rewards/chosen": 1.591841459274292, + "rewards/margins": 0.47192493081092834, + "rewards/rejected": 1.1199166774749756, + "step": 5890 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.48, + "learning_rate": 5.142727914999669e-06, + "logps/chosen": -150.91864013671875, + "logps/margins": 19.20001220703125, + "logps/rejected": -170.11865234375, + "loss": 0.7354, + "rewards/chosen": 1.711281418800354, + "rewards/margins": 0.2782706320285797, + "rewards/rejected": 1.4330108165740967, + "step": 5900 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.48, + "learning_rate": 5.129642696658279e-06, + "logps/chosen": -154.39859008789062, + "logps/margins": 27.85101318359375, + "logps/rejected": -182.24960327148438, + "loss": 0.7114, + "rewards/chosen": 1.7411203384399414, + "rewards/margins": 0.5508637428283691, + "rewards/rejected": 1.1902567148208618, + "step": 5910 + }, + { + "accuracy": 0.5625, + "epoch": 1.48, + "learning_rate": 5.11655658974555e-06, + "logps/chosen": -168.35702514648438, + "logps/margins": -27.318897247314453, + "logps/rejected": -141.03811645507812, + "loss": 0.7338, + "rewards/chosen": 1.6101325750350952, + "rewards/margins": 0.3776584267616272, + "rewards/rejected": 1.2324742078781128, + "step": 5920 + }, + { + "accuracy": 0.6875, + "epoch": 1.48, + "learning_rate": 5.103469683953694e-06, + "logps/chosen": -144.71237182617188, + "logps/margins": -13.303802490234375, + "logps/rejected": -131.40859985351562, + "loss": 0.7454, + "rewards/chosen": 1.5730059146881104, + "rewards/margins": 0.43756207823753357, + "rewards/rejected": 1.135443925857544, + "step": 5930 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.48, + "learning_rate": 5.0903820689803874e-06, + "logps/chosen": -162.18429565429688, + "logps/margins": -10.229942321777344, + "logps/rejected": -151.95436096191406, + "loss": 0.7485, + "rewards/chosen": 1.626556396484375, + "rewards/margins": 0.3839971423149109, + "rewards/rejected": 1.2425591945648193, + "step": 5940 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.49, + "learning_rate": 5.077293834528175e-06, + "logps/chosen": -175.92588806152344, + "logps/margins": -4.947401523590088, + "logps/rejected": -170.978515625, + "loss": 0.723, + "rewards/chosen": 2.0475027561187744, + "rewards/margins": 0.44176197052001953, + "rewards/rejected": 1.6057409048080444, + "step": 5950 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.49, + "learning_rate": 5.064205070303848e-06, + "logps/chosen": -152.53414916992188, + "logps/margins": -5.0932488441467285, + "logps/rejected": -147.44091796875, + "loss": 0.7117, + "rewards/chosen": 1.7001174688339233, + "rewards/margins": 0.4611106812953949, + "rewards/rejected": 1.2390069961547852, + "step": 5960 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.49, + "learning_rate": 5.051115866017823e-06, + "logps/chosen": -164.5131072998047, + "logps/margins": 13.223596572875977, + "logps/rejected": -177.7366943359375, + "loss": 0.7198, + "rewards/chosen": 1.655583381652832, + "rewards/margins": 0.4308537542819977, + "rewards/rejected": 1.2247296571731567, + "step": 5970 + }, + { + "accuracy": 0.5, + "epoch": 1.5, + "learning_rate": 5.038026311383536e-06, + "logps/chosen": -146.76358032226562, + "logps/margins": -16.049083709716797, + "logps/rejected": -130.71450805664062, + "loss": 0.7681, + "rewards/chosen": 1.3532880544662476, + "rewards/margins": -0.13548573851585388, + "rewards/rejected": 1.4887738227844238, + "step": 5980 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.5, + "learning_rate": 5.0249364961168255e-06, + "logps/chosen": -153.42922973632812, + "logps/margins": 18.665233612060547, + "logps/rejected": -172.09446716308594, + "loss": 0.7755, + "rewards/chosen": 1.3128784894943237, + "rewards/margins": 0.15676501393318176, + "rewards/rejected": 1.1561133861541748, + "step": 5990 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.5, + "learning_rate": 5.011846509935314e-06, + "logps/chosen": -145.02706909179688, + "logps/margins": 3.740046739578247, + "logps/rejected": -148.76710510253906, + "loss": 0.7661, + "rewards/chosen": 1.772761583328247, + "rewards/margins": 0.23846474289894104, + "rewards/rejected": 1.5342966318130493, + "step": 6000 + }, + { + "epoch": 1.5, + "eval_accuracy": 0.580338266384778, + "eval_logps/chosen": -159.01666259765625, + "eval_logps/margins": -5.753211975097656, + "eval_logps/rejected": -153.26345825195312, + "eval_loss": 0.7478973269462585, + "eval_rewards/chosen": 1.418882131576538, + "eval_rewards/margins": 0.2543162703514099, + "eval_rewards/rejected": 1.1645658016204834, + "eval_runtime": 1290.8664, + "eval_samples_per_second": 10.993, + "eval_steps_per_second": 1.374, + "step": 6000 + }, + { + "accuracy": 0.7124999761581421, + "epoch": 1.5, + "learning_rate": 4.998756442557797e-06, + "logps/chosen": -178.22705078125, + "logps/margins": 13.54114055633545, + "logps/rejected": -191.7681884765625, + "loss": 0.7574, + "rewards/chosen": 1.5041896104812622, + "rewards/margins": 0.35466068983078003, + "rewards/rejected": 1.149528980255127, + "step": 6010 + }, + { + "accuracy": 0.5625, + "epoch": 1.5, + "learning_rate": 4.985666383703625e-06, + "logps/chosen": -166.67965698242188, + "logps/margins": -10.14887523651123, + "logps/rejected": -156.53079223632812, + "loss": 0.7234, + "rewards/chosen": 1.4498412609100342, + "rewards/margins": 0.267724871635437, + "rewards/rejected": 1.1821165084838867, + "step": 6020 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.51, + "learning_rate": 4.972576423092091e-06, + "logps/chosen": -145.42662048339844, + "logps/margins": 0.4987010955810547, + "logps/rejected": -145.92532348632812, + "loss": 0.7257, + "rewards/chosen": 1.2325624227523804, + "rewards/margins": 0.26328903436660767, + "rewards/rejected": 0.9692733883857727, + "step": 6030 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.51, + "learning_rate": 4.959486650441817e-06, + "logps/chosen": -175.24191284179688, + "logps/margins": -32.371551513671875, + "logps/rejected": -142.87037658691406, + "loss": 0.6858, + "rewards/chosen": 1.460433006286621, + "rewards/margins": 0.19694818556308746, + "rewards/rejected": 1.2634848356246948, + "step": 6040 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.51, + "learning_rate": 4.94639715547013e-06, + "logps/chosen": -177.6490020751953, + "logps/margins": -31.388757705688477, + "logps/rejected": -146.26022338867188, + "loss": 0.758, + "rewards/chosen": 1.4562304019927979, + "rewards/margins": 0.5268961191177368, + "rewards/rejected": 0.9293343424797058, + "step": 6050 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.52, + "learning_rate": 4.933308027892462e-06, + "logps/chosen": -136.9001007080078, + "logps/margins": 24.11701202392578, + "logps/rejected": -161.01710510253906, + "loss": 0.7236, + "rewards/chosen": 1.3791011571884155, + "rewards/margins": 0.026722168549895287, + "rewards/rejected": 1.3523789644241333, + "step": 6060 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.52, + "learning_rate": 4.920219357421721e-06, + "logps/chosen": -188.46273803710938, + "logps/margins": -60.37711715698242, + "logps/rejected": -128.08563232421875, + "loss": 0.74, + "rewards/chosen": 1.4501302242279053, + "rewards/margins": 0.22862792015075684, + "rewards/rejected": 1.2215025424957275, + "step": 6070 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.52, + "learning_rate": 4.9071312337676855e-06, + "logps/chosen": -158.39907836914062, + "logps/margins": -4.17291259765625, + "logps/rejected": -154.22616577148438, + "loss": 0.7114, + "rewards/chosen": 1.3499590158462524, + "rewards/margins": 0.2717706859111786, + "rewards/rejected": 1.078188180923462, + "step": 6080 + }, + { + "accuracy": 0.5625, + "epoch": 1.52, + "learning_rate": 4.894043746636386e-06, + "logps/chosen": -120.92205810546875, + "logps/margins": 42.211761474609375, + "logps/rejected": -163.13381958007812, + "loss": 0.6848, + "rewards/chosen": 1.407041311264038, + "rewards/margins": 0.12416472285985947, + "rewards/rejected": 1.28287672996521, + "step": 6090 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.52, + "learning_rate": 4.880956985729485e-06, + "logps/chosen": -155.20899963378906, + "logps/margins": -4.366304874420166, + "logps/rejected": -150.8426971435547, + "loss": 0.8152, + "rewards/chosen": 1.2876375913619995, + "rewards/margins": 0.004538728389889002, + "rewards/rejected": 1.2830989360809326, + "step": 6100 + }, + { + "accuracy": 0.4375, + "epoch": 1.53, + "learning_rate": 4.867871040743673e-06, + "logps/chosen": -141.95993041992188, + "logps/margins": 12.730494499206543, + "logps/rejected": -154.69044494628906, + "loss": 0.7232, + "rewards/chosen": 1.311213731765747, + "rewards/margins": -0.05168135091662407, + "rewards/rejected": 1.3628950119018555, + "step": 6110 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.53, + "learning_rate": 4.854786001370047e-06, + "logps/chosen": -156.51925659179688, + "logps/margins": -13.266352653503418, + "logps/rejected": -143.25291442871094, + "loss": 0.7146, + "rewards/chosen": 1.493833065032959, + "rewards/margins": 0.42274075746536255, + "rewards/rejected": 1.071092128753662, + "step": 6120 + }, + { + "accuracy": 0.5625, + "epoch": 1.53, + "learning_rate": 4.841701957293496e-06, + "logps/chosen": -135.60165405273438, + "logps/margins": 28.352550506591797, + "logps/rejected": -163.9542236328125, + "loss": 0.6861, + "rewards/chosen": 1.4635498523712158, + "rewards/margins": 0.3221639394760132, + "rewards/rejected": 1.1413861513137817, + "step": 6130 + }, + { + "accuracy": 0.625, + "epoch": 1.54, + "learning_rate": 4.828618998192088e-06, + "logps/chosen": -154.69845581054688, + "logps/margins": -28.32016372680664, + "logps/rejected": -126.37828063964844, + "loss": 0.6862, + "rewards/chosen": 1.4652659893035889, + "rewards/margins": 0.3927001357078552, + "rewards/rejected": 1.0725657939910889, + "step": 6140 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.54, + "learning_rate": 4.8155372137364554e-06, + "logps/chosen": -146.55516052246094, + "logps/margins": 19.447429656982422, + "logps/rejected": -166.00259399414062, + "loss": 0.7414, + "rewards/chosen": 1.2500078678131104, + "rewards/margins": 0.2703130543231964, + "rewards/rejected": 0.9796948432922363, + "step": 6150 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.54, + "learning_rate": 4.8024566935891755e-06, + "logps/chosen": -169.6123809814453, + "logps/margins": -19.6403751373291, + "logps/rejected": -149.9720001220703, + "loss": 0.6893, + "rewards/chosen": 1.5016560554504395, + "rewards/margins": 0.2581610679626465, + "rewards/rejected": 1.2434948682785034, + "step": 6160 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.54, + "learning_rate": 4.789377527404166e-06, + "logps/chosen": -163.14315795898438, + "logps/margins": 4.642525672912598, + "logps/rejected": -167.78567504882812, + "loss": 0.7218, + "rewards/chosen": 1.9313108921051025, + "rewards/margins": 0.39638620615005493, + "rewards/rejected": 1.5349247455596924, + "step": 6170 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.54, + "learning_rate": 4.7762998048260605e-06, + "logps/chosen": -153.6059112548828, + "logps/margins": 12.890775680541992, + "logps/rejected": -166.49668884277344, + "loss": 0.7049, + "rewards/chosen": 1.5185697078704834, + "rewards/margins": 0.4051073491573334, + "rewards/rejected": 1.113462209701538, + "step": 6180 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.55, + "learning_rate": 4.763223615489598e-06, + "logps/chosen": -164.93531799316406, + "logps/margins": -14.905157089233398, + "logps/rejected": -150.03018188476562, + "loss": 0.7331, + "rewards/chosen": 1.7619644403457642, + "rewards/margins": 0.38960522413253784, + "rewards/rejected": 1.372359275817871, + "step": 6190 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.55, + "learning_rate": 4.750149049019013e-06, + "logps/chosen": -171.9315643310547, + "logps/margins": -25.17544174194336, + "logps/rejected": -146.75611877441406, + "loss": 0.7463, + "rewards/chosen": 1.5869166851043701, + "rewards/margins": 0.22739450633525848, + "rewards/rejected": 1.3595221042633057, + "step": 6200 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.55, + "learning_rate": 4.73707619502741e-06, + "logps/chosen": -183.80325317382812, + "logps/margins": -21.5816593170166, + "logps/rejected": -162.2216033935547, + "loss": 0.6754, + "rewards/chosen": 1.7000925540924072, + "rewards/margins": 0.3392341434955597, + "rewards/rejected": 1.36085844039917, + "step": 6210 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.56, + "learning_rate": 4.724005143116162e-06, + "logps/chosen": -161.692626953125, + "logps/margins": 10.203815460205078, + "logps/rejected": -171.8964385986328, + "loss": 0.7576, + "rewards/chosen": 1.521255373954773, + "rewards/margins": 0.22446303069591522, + "rewards/rejected": 1.2967925071716309, + "step": 6220 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.56, + "learning_rate": 4.7109359828742874e-06, + "logps/chosen": -174.71807861328125, + "logps/margins": -2.7351412773132324, + "logps/rejected": -171.9829559326172, + "loss": 0.7753, + "rewards/chosen": 1.4995148181915283, + "rewards/margins": 0.18564018607139587, + "rewards/rejected": 1.3138749599456787, + "step": 6230 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.56, + "learning_rate": 4.6978688038778395e-06, + "logps/chosen": -137.96676635742188, + "logps/margins": -22.68442726135254, + "logps/rejected": -115.28233337402344, + "loss": 0.7517, + "rewards/chosen": 1.7156333923339844, + "rewards/margins": 0.4992446005344391, + "rewards/rejected": 1.2163885831832886, + "step": 6240 + }, + { + "accuracy": 0.625, + "epoch": 1.56, + "learning_rate": 4.684803695689294e-06, + "logps/chosen": -160.47088623046875, + "logps/margins": 12.839515686035156, + "logps/rejected": -173.31040954589844, + "loss": 0.7435, + "rewards/chosen": 1.685003638267517, + "rewards/margins": 0.4182893633842468, + "rewards/rejected": 1.2667142152786255, + "step": 6250 + }, + { + "accuracy": 0.5625, + "epoch": 1.56, + "learning_rate": 4.671740747856933e-06, + "logps/chosen": -166.97100830078125, + "logps/margins": -29.862524032592773, + "logps/rejected": -137.10848999023438, + "loss": 0.7047, + "rewards/chosen": 1.390038013458252, + "rewards/margins": 0.24917110800743103, + "rewards/rejected": 1.140866994857788, + "step": 6260 + }, + { + "accuracy": 0.625, + "epoch": 1.57, + "learning_rate": 4.658680049914228e-06, + "logps/chosen": -143.7956085205078, + "logps/margins": -16.204912185668945, + "logps/rejected": -127.5906982421875, + "loss": 0.6779, + "rewards/chosen": 1.5141279697418213, + "rewards/margins": 0.271851122379303, + "rewards/rejected": 1.242276906967163, + "step": 6270 + }, + { + "accuracy": 0.5625, + "epoch": 1.57, + "learning_rate": 4.645621691379234e-06, + "logps/chosen": -155.58114624023438, + "logps/margins": 28.6274356842041, + "logps/rejected": -184.2085723876953, + "loss": 0.7522, + "rewards/chosen": 1.3851292133331299, + "rewards/margins": 0.21756787598133087, + "rewards/rejected": 1.1675612926483154, + "step": 6280 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 1.57, + "learning_rate": 4.632565761753968e-06, + "logps/chosen": -172.4317169189453, + "logps/margins": -22.572383880615234, + "logps/rejected": -149.85934448242188, + "loss": 0.757, + "rewards/chosen": 1.4730703830718994, + "rewards/margins": 0.21463827788829803, + "rewards/rejected": 1.2584320306777954, + "step": 6290 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.57, + "learning_rate": 4.619512350523806e-06, + "logps/chosen": -167.3540802001953, + "logps/margins": -2.197544813156128, + "logps/rejected": -165.15652465820312, + "loss": 0.7139, + "rewards/chosen": 1.8999391794204712, + "rewards/margins": 0.2117716521024704, + "rewards/rejected": 1.6881673336029053, + "step": 6300 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.58, + "learning_rate": 4.606461547156852e-06, + "logps/chosen": -159.94677734375, + "logps/margins": 9.751134872436523, + "logps/rejected": -169.69790649414062, + "loss": 0.7119, + "rewards/chosen": 1.5084736347198486, + "rewards/margins": 0.29665184020996094, + "rewards/rejected": 1.2118217945098877, + "step": 6310 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.58, + "learning_rate": 4.593413441103346e-06, + "logps/chosen": -150.86587524414062, + "logps/margins": -9.911309242248535, + "logps/rejected": -140.95458984375, + "loss": 0.7016, + "rewards/chosen": 1.7114589214324951, + "rewards/margins": 0.4001246392726898, + "rewards/rejected": 1.311334490776062, + "step": 6320 + }, + { + "accuracy": 0.5625, + "epoch": 1.58, + "learning_rate": 4.580368121795033e-06, + "logps/chosen": -172.2426300048828, + "logps/margins": -4.372363090515137, + "logps/rejected": -167.87026977539062, + "loss": 0.7369, + "rewards/chosen": 1.6780755519866943, + "rewards/margins": 0.2720969319343567, + "rewards/rejected": 1.4059786796569824, + "step": 6330 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.58, + "learning_rate": 4.567325678644564e-06, + "logps/chosen": -155.9556121826172, + "logps/margins": -31.65757179260254, + "logps/rejected": -124.29804992675781, + "loss": 0.7605, + "rewards/chosen": 1.5564334392547607, + "rewards/margins": 0.18370254337787628, + "rewards/rejected": 1.3727308511734009, + "step": 6340 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.59, + "learning_rate": 4.5542862010448716e-06, + "logps/chosen": -175.94134521484375, + "logps/margins": -22.17601776123047, + "logps/rejected": -153.76531982421875, + "loss": 0.7236, + "rewards/chosen": 1.6951125860214233, + "rewards/margins": 0.6090446710586548, + "rewards/rejected": 1.0860679149627686, + "step": 6350 + }, + { + "accuracy": 0.7124999761581421, + "epoch": 1.59, + "learning_rate": 4.5412497783685675e-06, + "logps/chosen": -162.08810424804688, + "logps/margins": -4.337316989898682, + "logps/rejected": -157.7508087158203, + "loss": 0.6953, + "rewards/chosen": 1.6564887762069702, + "rewards/margins": 0.47260332107543945, + "rewards/rejected": 1.1838856935501099, + "step": 6360 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.59, + "learning_rate": 4.528216499967321e-06, + "logps/chosen": -172.24362182617188, + "logps/margins": -18.937410354614258, + "logps/rejected": -153.30618286132812, + "loss": 0.7187, + "rewards/chosen": 1.6501439809799194, + "rewards/margins": 0.35545700788497925, + "rewards/rejected": 1.294687032699585, + "step": 6370 + }, + { + "accuracy": 0.675000011920929, + "epoch": 1.59, + "learning_rate": 4.515186455171251e-06, + "logps/chosen": -158.17214965820312, + "logps/margins": -18.50545310974121, + "logps/rejected": -139.6666717529297, + "loss": 0.7196, + "rewards/chosen": 1.683708906173706, + "rewards/margins": 0.38084477186203003, + "rewards/rejected": 1.3028640747070312, + "step": 6380 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.6, + "learning_rate": 4.502159733288314e-06, + "logps/chosen": -204.3793487548828, + "logps/margins": -21.53754997253418, + "logps/rejected": -182.84181213378906, + "loss": 0.7352, + "rewards/chosen": 2.1964669227600098, + "rewards/margins": 0.5059443712234497, + "rewards/rejected": 1.6905224323272705, + "step": 6390 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.6, + "learning_rate": 4.489136423603692e-06, + "logps/chosen": -173.08242797851562, + "logps/margins": -1.3702410459518433, + "logps/rejected": -171.71218872070312, + "loss": 0.7127, + "rewards/chosen": 1.6480600833892822, + "rewards/margins": 0.29389628767967224, + "rewards/rejected": 1.354163646697998, + "step": 6400 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.6, + "learning_rate": 4.476116615379181e-06, + "logps/chosen": -133.32424926757812, + "logps/margins": 41.56899642944336, + "logps/rejected": -174.89324951171875, + "loss": 0.7386, + "rewards/chosen": 1.4775035381317139, + "rewards/margins": 0.4425325393676758, + "rewards/rejected": 1.034970998764038, + "step": 6410 + }, + { + "accuracy": 0.6875, + "epoch": 1.6, + "learning_rate": 4.463100397852572e-06, + "logps/chosen": -163.25204467773438, + "logps/margins": -8.757268905639648, + "logps/rejected": -154.49478149414062, + "loss": 0.7366, + "rewards/chosen": 1.5446226596832275, + "rewards/margins": 0.4463632106781006, + "rewards/rejected": 1.0982593297958374, + "step": 6420 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.61, + "learning_rate": 4.450087860237052e-06, + "logps/chosen": -132.59994506835938, + "logps/margins": 13.911750793457031, + "logps/rejected": -146.51168823242188, + "loss": 0.6689, + "rewards/chosen": 1.6332676410675049, + "rewards/margins": 0.224029541015625, + "rewards/rejected": 1.4092381000518799, + "step": 6430 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.61, + "learning_rate": 4.437079091720583e-06, + "logps/chosen": -163.89395141601562, + "logps/margins": 5.634055137634277, + "logps/rejected": -169.52801513671875, + "loss": 0.734, + "rewards/chosen": 1.4734240770339966, + "rewards/margins": 0.2688307762145996, + "rewards/rejected": 1.2045934200286865, + "step": 6440 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.61, + "learning_rate": 4.424074181465291e-06, + "logps/chosen": -149.08279418945312, + "logps/margins": -2.024761199951172, + "logps/rejected": -147.0580291748047, + "loss": 0.7076, + "rewards/chosen": 1.5936577320098877, + "rewards/margins": 0.35599249601364136, + "rewards/rejected": 1.2376651763916016, + "step": 6450 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.61, + "learning_rate": 4.411073218606864e-06, + "logps/chosen": -132.3000030517578, + "logps/margins": 15.349029541015625, + "logps/rejected": -147.64901733398438, + "loss": 0.757, + "rewards/chosen": 1.3727271556854248, + "rewards/margins": 0.33733969926834106, + "rewards/rejected": 1.0353872776031494, + "step": 6460 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.62, + "learning_rate": 4.398076292253931e-06, + "logps/chosen": -158.4666748046875, + "logps/margins": -13.679998397827148, + "logps/rejected": -144.78668212890625, + "loss": 0.7257, + "rewards/chosen": 1.7065582275390625, + "rewards/margins": 0.2765595316886902, + "rewards/rejected": 1.429998755455017, + "step": 6470 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.62, + "learning_rate": 4.385083491487452e-06, + "logps/chosen": -119.96173095703125, + "logps/margins": 6.474104404449463, + "logps/rejected": -126.43583679199219, + "loss": 0.6644, + "rewards/chosen": 1.2856088876724243, + "rewards/margins": 0.2256680279970169, + "rewards/rejected": 1.0599409341812134, + "step": 6480 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.62, + "learning_rate": 4.372094905360115e-06, + "logps/chosen": -150.596923828125, + "logps/margins": -18.646228790283203, + "logps/rejected": -131.95071411132812, + "loss": 0.6948, + "rewards/chosen": 1.5138792991638184, + "rewards/margins": 0.4565415382385254, + "rewards/rejected": 1.0573378801345825, + "step": 6490 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.62, + "learning_rate": 4.359110622895716e-06, + "logps/chosen": -170.7530517578125, + "logps/margins": -2.3616690635681152, + "logps/rejected": -168.39138793945312, + "loss": 0.7172, + "rewards/chosen": 1.4929420948028564, + "rewards/margins": 0.22217953205108643, + "rewards/rejected": 1.2707626819610596, + "step": 6500 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.63, + "learning_rate": 4.346130733088559e-06, + "logps/chosen": -128.15896606445312, + "logps/margins": 1.079858422279358, + "logps/rejected": -129.2388153076172, + "loss": 0.7233, + "rewards/chosen": 1.5765819549560547, + "rewards/margins": 0.23462390899658203, + "rewards/rejected": 1.3419581651687622, + "step": 6510 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.63, + "learning_rate": 4.33315532490284e-06, + "logps/chosen": -138.62637329101562, + "logps/margins": 11.888833999633789, + "logps/rejected": -150.51519775390625, + "loss": 0.7445, + "rewards/chosen": 1.2448725700378418, + "rewards/margins": 0.3661494255065918, + "rewards/rejected": 0.87872314453125, + "step": 6520 + }, + { + "accuracy": 0.675000011920929, + "epoch": 1.63, + "learning_rate": 4.320184487272031e-06, + "logps/chosen": -168.5774383544922, + "logps/margins": -12.81226921081543, + "logps/rejected": -155.76519775390625, + "loss": 0.6611, + "rewards/chosen": 1.9139267206192017, + "rewards/margins": 0.47948408126831055, + "rewards/rejected": 1.4344426393508911, + "step": 6530 + }, + { + "accuracy": 0.625, + "epoch": 1.64, + "learning_rate": 4.307218309098287e-06, + "logps/chosen": -160.67478942871094, + "logps/margins": 3.058276414871216, + "logps/rejected": -163.73306274414062, + "loss": 0.7379, + "rewards/chosen": 1.4978049993515015, + "rewards/margins": 0.4167202115058899, + "rewards/rejected": 1.0810847282409668, + "step": 6540 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.64, + "learning_rate": 4.294256879251818e-06, + "logps/chosen": -140.1722412109375, + "logps/margins": 0.2024724930524826, + "logps/rejected": -140.37472534179688, + "loss": 0.7479, + "rewards/chosen": 1.663670301437378, + "rewards/margins": 0.15399914979934692, + "rewards/rejected": 1.5096709728240967, + "step": 6550 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.64, + "learning_rate": 4.281300286570297e-06, + "logps/chosen": -178.0838623046875, + "logps/margins": -5.541223049163818, + "logps/rejected": -172.54263305664062, + "loss": 0.6847, + "rewards/chosen": 1.7769016027450562, + "rewards/margins": 0.5859988927841187, + "rewards/rejected": 1.1909029483795166, + "step": 6560 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.64, + "learning_rate": 4.268348619858237e-06, + "logps/chosen": -140.5729217529297, + "logps/margins": -2.8682024478912354, + "logps/rejected": -137.7047119140625, + "loss": 0.7349, + "rewards/chosen": 1.4616376161575317, + "rewards/margins": 0.276151180267334, + "rewards/rejected": 1.1854865550994873, + "step": 6570 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.65, + "learning_rate": 4.255401967886394e-06, + "logps/chosen": -182.50552368164062, + "logps/margins": -11.391947746276855, + "logps/rejected": -171.11358642578125, + "loss": 0.6849, + "rewards/chosen": 1.7051641941070557, + "rewards/margins": 0.24314716458320618, + "rewards/rejected": 1.4620170593261719, + "step": 6580 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.65, + "learning_rate": 4.242460419391148e-06, + "logps/chosen": -181.9458770751953, + "logps/margins": -32.073097229003906, + "logps/rejected": -149.87277221679688, + "loss": 0.7871, + "rewards/chosen": 1.7225593328475952, + "rewards/margins": 0.1881454586982727, + "rewards/rejected": 1.5344138145446777, + "step": 6590 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.65, + "learning_rate": 4.229524063073902e-06, + "logps/chosen": -140.6343994140625, + "logps/margins": 12.676996231079102, + "logps/rejected": -153.3114013671875, + "loss": 0.6545, + "rewards/chosen": 1.5459976196289062, + "rewards/margins": 0.3303530216217041, + "rewards/rejected": 1.2156445980072021, + "step": 6600 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.65, + "learning_rate": 4.2165929876004715e-06, + "logps/chosen": -171.68870544433594, + "logps/margins": 13.917673110961914, + "logps/rejected": -185.6063690185547, + "loss": 0.7618, + "rewards/chosen": 1.8194377422332764, + "rewards/margins": 0.3546406626701355, + "rewards/rejected": 1.4647972583770752, + "step": 6610 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.66, + "learning_rate": 4.20366728160048e-06, + "logps/chosen": -127.8653564453125, + "logps/margins": 4.835513114929199, + "logps/rejected": -132.70083618164062, + "loss": 0.7321, + "rewards/chosen": 1.4538450241088867, + "rewards/margins": 0.2261325865983963, + "rewards/rejected": 1.227712631225586, + "step": 6620 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 1.66, + "learning_rate": 4.19074703366674e-06, + "logps/chosen": -172.46337890625, + "logps/margins": -12.375957489013672, + "logps/rejected": -160.08740234375, + "loss": 0.7754, + "rewards/chosen": 1.7319765090942383, + "rewards/margins": 0.16227373480796814, + "rewards/rejected": 1.5697027444839478, + "step": 6630 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.66, + "learning_rate": 4.177832332354662e-06, + "logps/chosen": -143.63906860351562, + "logps/margins": 15.220006942749023, + "logps/rejected": -158.85906982421875, + "loss": 0.7262, + "rewards/chosen": 1.6866559982299805, + "rewards/margins": 0.4321692883968353, + "rewards/rejected": 1.2544866800308228, + "step": 6640 + }, + { + "accuracy": 0.5625, + "epoch": 1.66, + "learning_rate": 4.164923266181641e-06, + "logps/chosen": -166.7517547607422, + "logps/margins": -13.072016716003418, + "logps/rejected": -153.6797637939453, + "loss": 0.7263, + "rewards/chosen": 1.7802671194076538, + "rewards/margins": 0.17927296459674835, + "rewards/rejected": 1.600994348526001, + "step": 6650 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.67, + "learning_rate": 4.1520199236264425e-06, + "logps/chosen": -146.63943481445312, + "logps/margins": -9.242958068847656, + "logps/rejected": -137.396484375, + "loss": 0.757, + "rewards/chosen": 1.5669875144958496, + "rewards/margins": 0.24951672554016113, + "rewards/rejected": 1.317470908164978, + "step": 6660 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.67, + "learning_rate": 4.139122393128607e-06, + "logps/chosen": -154.74659729003906, + "logps/margins": 10.962950706481934, + "logps/rejected": -165.7095489501953, + "loss": 0.7323, + "rewards/chosen": 1.7030436992645264, + "rewards/margins": 0.196172833442688, + "rewards/rejected": 1.5068708658218384, + "step": 6670 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.67, + "learning_rate": 4.126230763087837e-06, + "logps/chosen": -204.58815002441406, + "logps/margins": -25.492568969726562, + "logps/rejected": -179.0955810546875, + "loss": 0.6681, + "rewards/chosen": 2.1701443195343018, + "rewards/margins": 0.5017116665840149, + "rewards/rejected": 1.6684324741363525, + "step": 6680 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.67, + "learning_rate": 4.113345121863395e-06, + "logps/chosen": -151.72512817382812, + "logps/margins": -2.0030715465545654, + "logps/rejected": -149.72203063964844, + "loss": 0.735, + "rewards/chosen": 1.427788496017456, + "rewards/margins": 0.1302785873413086, + "rewards/rejected": 1.2975099086761475, + "step": 6690 + }, + { + "accuracy": 0.675000011920929, + "epoch": 1.68, + "learning_rate": 4.100465557773495e-06, + "logps/chosen": -157.68789672851562, + "logps/margins": -29.917308807373047, + "logps/rejected": -127.77056884765625, + "loss": 0.6968, + "rewards/chosen": 1.65158212184906, + "rewards/margins": 0.5463930368423462, + "rewards/rejected": 1.1051890850067139, + "step": 6700 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.68, + "learning_rate": 4.087592159094697e-06, + "logps/chosen": -169.71957397460938, + "logps/margins": 6.895395755767822, + "logps/rejected": -176.61495971679688, + "loss": 0.7232, + "rewards/chosen": 2.012377977371216, + "rewards/margins": 0.34723567962646484, + "rewards/rejected": 1.6651424169540405, + "step": 6710 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.68, + "learning_rate": 4.074725014061306e-06, + "logps/chosen": -177.43174743652344, + "logps/margins": -17.387361526489258, + "logps/rejected": -160.04440307617188, + "loss": 0.6892, + "rewards/chosen": 1.7255293130874634, + "rewards/margins": 0.36370953917503357, + "rewards/rejected": 1.361819863319397, + "step": 6720 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.68, + "learning_rate": 4.061864210864765e-06, + "logps/chosen": -178.18478393554688, + "logps/margins": -20.677087783813477, + "logps/rejected": -157.50772094726562, + "loss": 0.7731, + "rewards/chosen": 1.7428748607635498, + "rewards/margins": 0.3560285270214081, + "rewards/rejected": 1.3868463039398193, + "step": 6730 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.69, + "learning_rate": 4.049009837653044e-06, + "logps/chosen": -163.3809051513672, + "logps/margins": 11.287524223327637, + "logps/rejected": -174.66842651367188, + "loss": 0.6908, + "rewards/chosen": 1.9405453205108643, + "rewards/margins": 0.49984535574913025, + "rewards/rejected": 1.4406999349594116, + "step": 6740 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.69, + "learning_rate": 4.036161982530048e-06, + "logps/chosen": -174.5832061767578, + "logps/margins": -25.94174575805664, + "logps/rejected": -148.64146423339844, + "loss": 0.7442, + "rewards/chosen": 1.6726661920547485, + "rewards/margins": 0.25984472036361694, + "rewards/rejected": 1.4128214120864868, + "step": 6750 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.69, + "learning_rate": 4.023320733555006e-06, + "logps/chosen": -173.62986755371094, + "logps/margins": -17.39786720275879, + "logps/rejected": -156.2320098876953, + "loss": 0.6969, + "rewards/chosen": 1.32326340675354, + "rewards/margins": 0.23298314213752747, + "rewards/rejected": 1.0902801752090454, + "step": 6760 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.69, + "learning_rate": 4.010486178741867e-06, + "logps/chosen": -149.34742736816406, + "logps/margins": -8.556588172912598, + "logps/rejected": -140.79083251953125, + "loss": 0.7489, + "rewards/chosen": 1.3939416408538818, + "rewards/margins": 0.19151577353477478, + "rewards/rejected": 1.2024257183074951, + "step": 6770 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.69, + "learning_rate": 3.997658406058697e-06, + "logps/chosen": -146.2511749267578, + "logps/margins": 0.07511825859546661, + "logps/rejected": -146.3262939453125, + "loss": 0.6919, + "rewards/chosen": 1.6005375385284424, + "rewards/margins": 0.4136505126953125, + "rewards/rejected": 1.1868870258331299, + "step": 6780 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.7, + "learning_rate": 3.984837503427081e-06, + "logps/chosen": -159.47991943359375, + "logps/margins": -14.165512084960938, + "logps/rejected": -145.3144073486328, + "loss": 0.7238, + "rewards/chosen": 1.6023223400115967, + "rewards/margins": 0.31503981351852417, + "rewards/rejected": 1.2872823476791382, + "step": 6790 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.7, + "learning_rate": 3.972023558721515e-06, + "logps/chosen": -156.34718322753906, + "logps/margins": -9.087708473205566, + "logps/rejected": -147.2594757080078, + "loss": 0.7132, + "rewards/chosen": 1.4691150188446045, + "rewards/margins": 0.17879317700862885, + "rewards/rejected": 1.290321946144104, + "step": 6800 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.7, + "learning_rate": 3.959216659768805e-06, + "logps/chosen": -161.23997497558594, + "logps/margins": -8.713390350341797, + "logps/rejected": -152.52658081054688, + "loss": 0.7283, + "rewards/chosen": 1.4847370386123657, + "rewards/margins": 0.2641260325908661, + "rewards/rejected": 1.2206110954284668, + "step": 6810 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.71, + "learning_rate": 3.946416894347463e-06, + "logps/chosen": -157.5177001953125, + "logps/margins": -9.977945327758789, + "logps/rejected": -147.53976440429688, + "loss": 0.7337, + "rewards/chosen": 1.6254346370697021, + "rewards/margins": 0.4399493336677551, + "rewards/rejected": 1.1854854822158813, + "step": 6820 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.71, + "learning_rate": 3.933624350187114e-06, + "logps/chosen": -185.5244140625, + "logps/margins": -22.026912689208984, + "logps/rejected": -163.49749755859375, + "loss": 0.6719, + "rewards/chosen": 1.424984335899353, + "rewards/margins": 0.26903867721557617, + "rewards/rejected": 1.1559455394744873, + "step": 6830 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.71, + "learning_rate": 3.92083911496788e-06, + "logps/chosen": -143.92501831054688, + "logps/margins": -7.3387131690979, + "logps/rejected": -136.58631896972656, + "loss": 0.6917, + "rewards/chosen": 1.9015741348266602, + "rewards/margins": 0.5276426672935486, + "rewards/rejected": 1.3739315271377563, + "step": 6840 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.71, + "learning_rate": 3.908061276319795e-06, + "logps/chosen": -155.42800903320312, + "logps/margins": -16.35663604736328, + "logps/rejected": -139.0713653564453, + "loss": 0.7645, + "rewards/chosen": 1.8137505054473877, + "rewards/margins": 0.22690951824188232, + "rewards/rejected": 1.5868409872055054, + "step": 6850 + }, + { + "accuracy": 0.4000000059604645, + "epoch": 1.71, + "learning_rate": 3.895290921822195e-06, + "logps/chosen": -114.3714370727539, + "logps/margins": 4.784496784210205, + "logps/rejected": -119.15592193603516, + "loss": 0.7151, + "rewards/chosen": 1.5914109945297241, + "rewards/margins": 0.09470517933368683, + "rewards/rejected": 1.4967057704925537, + "step": 6860 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.72, + "learning_rate": 3.8825281390031164e-06, + "logps/chosen": -150.35653686523438, + "logps/margins": 25.39633560180664, + "logps/rejected": -175.7528839111328, + "loss": 0.6928, + "rewards/chosen": 1.9147886037826538, + "rewards/margins": 0.3604622185230255, + "rewards/rejected": 1.5543262958526611, + "step": 6870 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.72, + "learning_rate": 3.869773015338702e-06, + "logps/chosen": -129.58197021484375, + "logps/margins": 31.118057250976562, + "logps/rejected": -160.70004272460938, + "loss": 0.7631, + "rewards/chosen": 1.5291416645050049, + "rewards/margins": 0.10039478540420532, + "rewards/rejected": 1.4287469387054443, + "step": 6880 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.72, + "learning_rate": 3.8570256382525985e-06, + "logps/chosen": -167.4872589111328, + "logps/margins": -20.09029769897461, + "logps/rejected": -147.3969268798828, + "loss": 0.77, + "rewards/chosen": 1.617287278175354, + "rewards/margins": 0.2320544272661209, + "rewards/rejected": 1.385232925415039, + "step": 6890 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.73, + "learning_rate": 3.844286095115357e-06, + "logps/chosen": -151.36456298828125, + "logps/margins": -2.995993137359619, + "logps/rejected": -148.36856079101562, + "loss": 0.8008, + "rewards/chosen": 1.8141626119613647, + "rewards/margins": 0.20993950963020325, + "rewards/rejected": 1.6042228937149048, + "step": 6900 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.73, + "learning_rate": 3.831554473243836e-06, + "logps/chosen": -169.8393096923828, + "logps/margins": -5.376766204833984, + "logps/rejected": -164.46255493164062, + "loss": 0.7145, + "rewards/chosen": 1.713913917541504, + "rewards/margins": 0.13852782547473907, + "rewards/rejected": 1.5753860473632812, + "step": 6910 + }, + { + "accuracy": 0.6875, + "epoch": 1.73, + "learning_rate": 3.818830859900601e-06, + "logps/chosen": -158.87396240234375, + "logps/margins": -5.6355695724487305, + "logps/rejected": -153.23838806152344, + "loss": 0.7063, + "rewards/chosen": 1.5138578414916992, + "rewards/margins": 0.35583415627479553, + "rewards/rejected": 1.1580235958099365, + "step": 6920 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.73, + "learning_rate": 3.806115342293324e-06, + "logps/chosen": -161.66049194335938, + "logps/margins": 14.587471008300781, + "logps/rejected": -176.24794006347656, + "loss": 0.7241, + "rewards/chosen": 1.5252536535263062, + "rewards/margins": 0.1867510974407196, + "rewards/rejected": 1.3385025262832642, + "step": 6930 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.73, + "learning_rate": 3.793408007574196e-06, + "logps/chosen": -160.85671997070312, + "logps/margins": -26.460430145263672, + "logps/rejected": -134.39627075195312, + "loss": 0.8073, + "rewards/chosen": 1.5750184059143066, + "rewards/margins": 0.1284082680940628, + "rewards/rejected": 1.4466100931167603, + "step": 6940 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.74, + "learning_rate": 3.7807089428393124e-06, + "logps/chosen": -142.08981323242188, + "logps/margins": -0.10599174350500107, + "logps/rejected": -141.98379516601562, + "loss": 0.7571, + "rewards/chosen": 1.3943493366241455, + "rewards/margins": 0.3656498193740845, + "rewards/rejected": 1.028699517250061, + "step": 6950 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 1.74, + "learning_rate": 3.768018235128094e-06, + "logps/chosen": -146.45840454101562, + "logps/margins": 2.0057597160339355, + "logps/rejected": -148.46417236328125, + "loss": 0.7569, + "rewards/chosen": 1.7569948434829712, + "rewards/margins": 0.07248908281326294, + "rewards/rejected": 1.6845057010650635, + "step": 6960 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.74, + "learning_rate": 3.7553359714226813e-06, + "logps/chosen": -126.7012939453125, + "logps/margins": 1.2685275077819824, + "logps/rejected": -127.9698257446289, + "loss": 0.7574, + "rewards/chosen": 1.5858689546585083, + "rewards/margins": 0.26632317900657654, + "rewards/rejected": 1.3195457458496094, + "step": 6970 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.75, + "learning_rate": 3.7426622386473384e-06, + "logps/chosen": -173.5938262939453, + "logps/margins": -11.32087516784668, + "logps/rejected": -162.27294921875, + "loss": 0.6462, + "rewards/chosen": 2.126638650894165, + "rewards/margins": 0.5569209456443787, + "rewards/rejected": 1.5697177648544312, + "step": 6980 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.75, + "learning_rate": 3.729997123667857e-06, + "logps/chosen": -175.18601989746094, + "logps/margins": -14.659891128540039, + "logps/rejected": -160.52613830566406, + "loss": 0.7242, + "rewards/chosen": 2.047013521194458, + "rewards/margins": 0.23422813415527344, + "rewards/rejected": 1.8127855062484741, + "step": 6990 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.75, + "learning_rate": 3.717340713290963e-06, + "logps/chosen": -161.9573974609375, + "logps/margins": -0.8324821591377258, + "logps/rejected": -161.12490844726562, + "loss": 0.7393, + "rewards/chosen": 1.7015488147735596, + "rewards/margins": 0.34370309114456177, + "rewards/rejected": 1.357845664024353, + "step": 7000 + }, + { + "accuracy": 0.675000011920929, + "epoch": 1.75, + "learning_rate": 3.704693094263725e-06, + "logps/chosen": -173.98716735839844, + "logps/margins": -16.46856117248535, + "logps/rejected": -157.5186004638672, + "loss": 0.7112, + "rewards/chosen": 1.6857118606567383, + "rewards/margins": 0.5741534233093262, + "rewards/rejected": 1.111558437347412, + "step": 7010 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.75, + "learning_rate": 3.692054353272951e-06, + "logps/chosen": -190.32464599609375, + "logps/margins": -15.618194580078125, + "logps/rejected": -174.70645141601562, + "loss": 0.7052, + "rewards/chosen": 1.516990303993225, + "rewards/margins": 0.436454713344574, + "rewards/rejected": 1.0805355310440063, + "step": 7020 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.76, + "learning_rate": 3.679424576944599e-06, + "logps/chosen": -182.04928588867188, + "logps/margins": -14.429516792297363, + "logps/rejected": -167.6197509765625, + "loss": 0.7327, + "rewards/chosen": 1.5069899559020996, + "rewards/margins": 0.008828687481582165, + "rewards/rejected": 1.4981614351272583, + "step": 7030 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.76, + "learning_rate": 3.666803851843185e-06, + "logps/chosen": -177.2584686279297, + "logps/margins": -27.902135848999023, + "logps/rejected": -149.35635375976562, + "loss": 0.6726, + "rewards/chosen": 1.4496058225631714, + "rewards/margins": 0.15264640748500824, + "rewards/rejected": 1.2969595193862915, + "step": 7040 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.76, + "learning_rate": 3.6541922644711913e-06, + "logps/chosen": -154.53219604492188, + "logps/margins": -13.419695854187012, + "logps/rejected": -141.1124725341797, + "loss": 0.7169, + "rewards/chosen": 1.6939529180526733, + "rewards/margins": 0.3008750081062317, + "rewards/rejected": 1.3930778503417969, + "step": 7050 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.77, + "learning_rate": 3.6415899012684596e-06, + "logps/chosen": -125.5202407836914, + "logps/margins": 7.046398162841797, + "logps/rejected": -132.56663513183594, + "loss": 0.6977, + "rewards/chosen": 1.4478719234466553, + "rewards/margins": 0.23979803919792175, + "rewards/rejected": 1.2080737352371216, + "step": 7060 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.77, + "learning_rate": 3.62899684861162e-06, + "logps/chosen": -147.63552856445312, + "logps/margins": 9.005585670471191, + "logps/rejected": -156.64111328125, + "loss": 0.7496, + "rewards/chosen": 1.376513123512268, + "rewards/margins": 0.21242213249206543, + "rewards/rejected": 1.164090871810913, + "step": 7070 + }, + { + "accuracy": 0.6875, + "epoch": 1.77, + "learning_rate": 3.616413192813483e-06, + "logps/chosen": -146.91473388671875, + "logps/margins": 16.642072677612305, + "logps/rejected": -163.55679321289062, + "loss": 0.6547, + "rewards/chosen": 1.6972068548202515, + "rewards/margins": 0.5289878845214844, + "rewards/rejected": 1.1682188510894775, + "step": 7080 + }, + { + "accuracy": 0.512499988079071, + "epoch": 1.77, + "learning_rate": 3.603839020122455e-06, + "logps/chosen": -126.9017562866211, + "logps/margins": 23.529781341552734, + "logps/rejected": -150.4315185546875, + "loss": 0.8431, + "rewards/chosen": 0.9939861297607422, + "rewards/margins": -0.13155114650726318, + "rewards/rejected": 1.1255372762680054, + "step": 7090 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.77, + "learning_rate": 3.5912744167219425e-06, + "logps/chosen": -163.22808837890625, + "logps/margins": -15.318765640258789, + "logps/rejected": -147.90933227539062, + "loss": 0.7084, + "rewards/chosen": 1.3736497163772583, + "rewards/margins": 0.22370870411396027, + "rewards/rejected": 1.1499409675598145, + "step": 7100 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.78, + "learning_rate": 3.5787194687297655e-06, + "logps/chosen": -134.70509338378906, + "logps/margins": 3.340815782546997, + "logps/rejected": -138.04591369628906, + "loss": 0.7796, + "rewards/chosen": 1.5358192920684814, + "rewards/margins": 0.3581804931163788, + "rewards/rejected": 1.1776387691497803, + "step": 7110 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.78, + "learning_rate": 3.5661742621975678e-06, + "logps/chosen": -132.65538024902344, + "logps/margins": 27.655193328857422, + "logps/rejected": -160.31057739257812, + "loss": 0.7843, + "rewards/chosen": 1.3225517272949219, + "rewards/margins": 0.1296599805355072, + "rewards/rejected": 1.1928919553756714, + "step": 7120 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.78, + "learning_rate": 3.553638883110222e-06, + "logps/chosen": -161.3286895751953, + "logps/margins": -3.5587971210479736, + "logps/rejected": -157.7698974609375, + "loss": 0.6904, + "rewards/chosen": 1.3315457105636597, + "rewards/margins": 0.2465551346540451, + "rewards/rejected": 1.0849906206130981, + "step": 7130 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.79, + "learning_rate": 3.5411134173852447e-06, + "logps/chosen": -153.1470184326172, + "logps/margins": -3.9751434326171875, + "logps/rejected": -149.17189025878906, + "loss": 0.6446, + "rewards/chosen": 1.6182762384414673, + "rewards/margins": 0.3828577399253845, + "rewards/rejected": 1.2354180812835693, + "step": 7140 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.79, + "learning_rate": 3.528597950872209e-06, + "logps/chosen": -156.3068084716797, + "logps/margins": -12.240486145019531, + "logps/rejected": -144.0663299560547, + "loss": 0.7563, + "rewards/chosen": 1.4026468992233276, + "rewards/margins": 0.2039092481136322, + "rewards/rejected": 1.1987375020980835, + "step": 7150 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.79, + "learning_rate": 3.5160925693521456e-06, + "logps/chosen": -169.03414916992188, + "logps/margins": -20.8201904296875, + "logps/rejected": -148.21395874023438, + "loss": 0.6627, + "rewards/chosen": 1.734702467918396, + "rewards/margins": 0.43718546628952026, + "rewards/rejected": 1.29751718044281, + "step": 7160 + }, + { + "accuracy": 0.625, + "epoch": 1.79, + "learning_rate": 3.5035973585369708e-06, + "logps/chosen": -151.37301635742188, + "logps/margins": -30.596887588500977, + "logps/rejected": -120.77613830566406, + "loss": 0.7454, + "rewards/chosen": 1.382054090499878, + "rewards/margins": 0.22107401490211487, + "rewards/rejected": 1.160979986190796, + "step": 7170 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.79, + "learning_rate": 3.4911124040688884e-06, + "logps/chosen": -135.38656616210938, + "logps/margins": -19.591060638427734, + "logps/rejected": -115.7955322265625, + "loss": 0.7324, + "rewards/chosen": 1.5287854671478271, + "rewards/margins": 0.18112435936927795, + "rewards/rejected": 1.3476612567901611, + "step": 7180 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.8, + "learning_rate": 3.478637791519802e-06, + "logps/chosen": -163.11178588867188, + "logps/margins": 8.439321517944336, + "logps/rejected": -171.55111694335938, + "loss": 0.6682, + "rewards/chosen": 1.768151044845581, + "rewards/margins": 0.5765558481216431, + "rewards/rejected": 1.1915953159332275, + "step": 7190 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.8, + "learning_rate": 3.466173606390738e-06, + "logps/chosen": -137.94178771972656, + "logps/margins": 13.02086067199707, + "logps/rejected": -150.96266174316406, + "loss": 0.7419, + "rewards/chosen": 1.6535203456878662, + "rewards/margins": 0.20561671257019043, + "rewards/rejected": 1.4479037523269653, + "step": 7200 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.8, + "learning_rate": 3.4537199341112465e-06, + "logps/chosen": -133.9405059814453, + "logps/margins": -12.144119262695312, + "logps/rejected": -121.79638671875, + "loss": 0.6755, + "rewards/chosen": 1.4897305965423584, + "rewards/margins": 0.42974597215652466, + "rewards/rejected": 1.059984803199768, + "step": 7210 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.81, + "learning_rate": 3.441276860038826e-06, + "logps/chosen": -177.4734649658203, + "logps/margins": -13.882000923156738, + "logps/rejected": -163.5914764404297, + "loss": 0.7373, + "rewards/chosen": 1.7223703861236572, + "rewards/margins": 0.42813006043434143, + "rewards/rejected": 1.2942403554916382, + "step": 7220 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.81, + "learning_rate": 3.428844469458335e-06, + "logps/chosen": -149.46212768554688, + "logps/margins": -3.995753049850464, + "logps/rejected": -145.4663848876953, + "loss": 0.7155, + "rewards/chosen": 1.6451709270477295, + "rewards/margins": 0.5497690439224243, + "rewards/rejected": 1.0954020023345947, + "step": 7230 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.81, + "learning_rate": 3.4164228475814076e-06, + "logps/chosen": -156.49032592773438, + "logps/margins": -10.410895347595215, + "logps/rejected": -146.07945251464844, + "loss": 0.6875, + "rewards/chosen": 1.5609855651855469, + "rewards/margins": 0.19640588760375977, + "rewards/rejected": 1.3645799160003662, + "step": 7240 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.81, + "learning_rate": 3.404012079545866e-06, + "logps/chosen": -133.50485229492188, + "logps/margins": 13.032699584960938, + "logps/rejected": -146.5375518798828, + "loss": 0.7677, + "rewards/chosen": 1.5706121921539307, + "rewards/margins": 0.23015134036540985, + "rewards/rejected": 1.3404607772827148, + "step": 7250 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.81, + "learning_rate": 3.3916122504151457e-06, + "logps/chosen": -138.97213745117188, + "logps/margins": -0.2785850465297699, + "logps/rejected": -138.6935577392578, + "loss": 0.724, + "rewards/chosen": 1.515358805656433, + "rewards/margins": 0.16556455194950104, + "rewards/rejected": 1.3497942686080933, + "step": 7260 + }, + { + "accuracy": 0.699999988079071, + "epoch": 1.82, + "learning_rate": 3.379223445177699e-06, + "logps/chosen": -149.6521453857422, + "logps/margins": -13.424753189086914, + "logps/rejected": -136.22738647460938, + "loss": 0.7055, + "rewards/chosen": 1.4431949853897095, + "rewards/margins": 0.376304566860199, + "rewards/rejected": 1.0668903589248657, + "step": 7270 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.82, + "learning_rate": 3.366845748746428e-06, + "logps/chosen": -144.44488525390625, + "logps/margins": -3.6491057872772217, + "logps/rejected": -140.79579162597656, + "loss": 0.7194, + "rewards/chosen": 1.4644749164581299, + "rewards/margins": 0.1887628734111786, + "rewards/rejected": 1.275712251663208, + "step": 7280 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.82, + "learning_rate": 3.354479245958091e-06, + "logps/chosen": -152.7649383544922, + "logps/margins": -8.675726890563965, + "logps/rejected": -144.08921813964844, + "loss": 0.6728, + "rewards/chosen": 1.4421701431274414, + "rewards/margins": 0.4940800666809082, + "rewards/rejected": 0.9480901956558228, + "step": 7290 + }, + { + "accuracy": 0.5625, + "epoch": 1.82, + "learning_rate": 3.3421240215727268e-06, + "logps/chosen": -174.30563354492188, + "logps/margins": -25.59578514099121, + "logps/rejected": -148.70985412597656, + "loss": 0.6996, + "rewards/chosen": 1.5213866233825684, + "rewards/margins": 0.3365876078605652, + "rewards/rejected": 1.1847991943359375, + "step": 7300 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.83, + "learning_rate": 3.329780160273072e-06, + "logps/chosen": -179.99588012695312, + "logps/margins": -3.168114423751831, + "logps/rejected": -176.82777404785156, + "loss": 0.773, + "rewards/chosen": 1.61554753780365, + "rewards/margins": 0.0479532890021801, + "rewards/rejected": 1.5675941705703735, + "step": 7310 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.83, + "learning_rate": 3.317447746663978e-06, + "logps/chosen": -193.88619995117188, + "logps/margins": -26.073741912841797, + "logps/rejected": -167.81246948242188, + "loss": 0.7224, + "rewards/chosen": 1.7221057415008545, + "rewards/margins": 0.34187543392181396, + "rewards/rejected": 1.380230188369751, + "step": 7320 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.83, + "learning_rate": 3.3051268652718373e-06, + "logps/chosen": -153.88182067871094, + "logps/margins": -43.78116989135742, + "logps/rejected": -110.10066223144531, + "loss": 0.7298, + "rewards/chosen": 1.573335886001587, + "rewards/margins": 0.19779759645462036, + "rewards/rejected": 1.3755383491516113, + "step": 7330 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.83, + "learning_rate": 3.292817600543997e-06, + "logps/chosen": -161.3231201171875, + "logps/margins": -8.46288776397705, + "logps/rejected": -152.86021423339844, + "loss": 0.7454, + "rewards/chosen": 1.800299048423767, + "rewards/margins": 0.3501875102519989, + "rewards/rejected": 1.4501116275787354, + "step": 7340 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.84, + "learning_rate": 3.2805200368481887e-06, + "logps/chosen": -152.79110717773438, + "logps/margins": 8.526643753051758, + "logps/rejected": -161.31773376464844, + "loss": 0.7519, + "rewards/chosen": 1.5041614770889282, + "rewards/margins": 0.025880539789795876, + "rewards/rejected": 1.478280782699585, + "step": 7350 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.84, + "learning_rate": 3.2682342584719382e-06, + "logps/chosen": -160.1204833984375, + "logps/margins": 16.51639175415039, + "logps/rejected": -176.63687133789062, + "loss": 0.7704, + "rewards/chosen": 1.9906885623931885, + "rewards/margins": 0.3690589666366577, + "rewards/rejected": 1.6216293573379517, + "step": 7360 + }, + { + "accuracy": 0.675000011920929, + "epoch": 1.84, + "learning_rate": 3.2559603496220016e-06, + "logps/chosen": -169.76174926757812, + "logps/margins": 2.596442461013794, + "logps/rejected": -172.3582000732422, + "loss": 0.7545, + "rewards/chosen": 1.3869796991348267, + "rewards/margins": 0.34900856018066406, + "rewards/rejected": 1.0379711389541626, + "step": 7370 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.84, + "learning_rate": 3.2436983944237742e-06, + "logps/chosen": -173.19410705566406, + "logps/margins": -10.458559036254883, + "logps/rejected": -162.7355499267578, + "loss": 0.85, + "rewards/chosen": 1.7418943643569946, + "rewards/margins": 0.2610716223716736, + "rewards/rejected": 1.4808228015899658, + "step": 7380 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 1.85, + "learning_rate": 3.231448476920727e-06, + "logps/chosen": -153.5210723876953, + "logps/margins": -10.992345809936523, + "logps/rejected": -142.5287322998047, + "loss": 0.7832, + "rewards/chosen": 1.739044427871704, + "rewards/margins": 0.3704701066017151, + "rewards/rejected": 1.3685743808746338, + "step": 7390 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.85, + "learning_rate": 3.2192106810738216e-06, + "logps/chosen": -163.16371154785156, + "logps/margins": 3.3779468536376953, + "logps/rejected": -166.54165649414062, + "loss": 0.7241, + "rewards/chosen": 1.388930320739746, + "rewards/margins": 0.3345949649810791, + "rewards/rejected": 1.054335355758667, + "step": 7400 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.85, + "learning_rate": 3.206985090760939e-06, + "logps/chosen": -155.855712890625, + "logps/margins": -10.50706672668457, + "logps/rejected": -145.34866333007812, + "loss": 0.6717, + "rewards/chosen": 1.5462801456451416, + "rewards/margins": 0.27139636874198914, + "rewards/rejected": 1.27488374710083, + "step": 7410 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.85, + "learning_rate": 3.194771789776303e-06, + "logps/chosen": -178.34249877929688, + "logps/margins": -32.75047302246094, + "logps/rejected": -145.59202575683594, + "loss": 0.6604, + "rewards/chosen": 1.558485746383667, + "rewards/margins": 0.25661927461624146, + "rewards/rejected": 1.3018665313720703, + "step": 7420 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.86, + "learning_rate": 3.1825708618299054e-06, + "logps/chosen": -137.3893585205078, + "logps/margins": 8.994932174682617, + "logps/rejected": -146.38429260253906, + "loss": 0.7289, + "rewards/chosen": 1.5379340648651123, + "rewards/margins": 0.3739892542362213, + "rewards/rejected": 1.163944959640503, + "step": 7430 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.86, + "learning_rate": 3.170382390546934e-06, + "logps/chosen": -176.42198181152344, + "logps/margins": 15.362886428833008, + "logps/rejected": -191.78488159179688, + "loss": 0.7304, + "rewards/chosen": 1.5599313974380493, + "rewards/margins": 0.45862922072410583, + "rewards/rejected": 1.1013023853302002, + "step": 7440 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.86, + "learning_rate": 3.158206459467199e-06, + "logps/chosen": -146.7998504638672, + "logps/margins": -16.68153953552246, + "logps/rejected": -130.11831665039062, + "loss": 0.7677, + "rewards/chosen": 1.5185534954071045, + "rewards/margins": 0.1395501345396042, + "rewards/rejected": 1.3790035247802734, + "step": 7450 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.86, + "learning_rate": 3.14604315204456e-06, + "logps/chosen": -164.53750610351562, + "logps/margins": -17.98402214050293, + "logps/rejected": -146.553466796875, + "loss": 0.719, + "rewards/chosen": 1.4467166662216187, + "rewards/margins": 0.24487526714801788, + "rewards/rejected": 1.2018414735794067, + "step": 7460 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.87, + "learning_rate": 3.133892551646354e-06, + "logps/chosen": -145.02476501464844, + "logps/margins": -25.481233596801758, + "logps/rejected": -119.54353332519531, + "loss": 0.7041, + "rewards/chosen": 1.5625585317611694, + "rewards/margins": 0.3866068720817566, + "rewards/rejected": 1.1759517192840576, + "step": 7470 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.87, + "learning_rate": 3.1217547415528228e-06, + "logps/chosen": -158.24957275390625, + "logps/margins": -12.251078605651855, + "logps/rejected": -145.99850463867188, + "loss": 0.6623, + "rewards/chosen": 1.8684002161026, + "rewards/margins": 0.5195127725601196, + "rewards/rejected": 1.3488874435424805, + "step": 7480 + }, + { + "accuracy": 0.625, + "epoch": 1.87, + "learning_rate": 3.109629804956542e-06, + "logps/chosen": -149.3507080078125, + "logps/margins": 0.7894134521484375, + "logps/rejected": -150.14012145996094, + "loss": 0.6507, + "rewards/chosen": 1.5323169231414795, + "rewards/margins": 0.3360532820224762, + "rewards/rejected": 1.1962639093399048, + "step": 7490 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.88, + "learning_rate": 3.0975178249618574e-06, + "logps/chosen": -171.88690185546875, + "logps/margins": -23.281326293945312, + "logps/rejected": -148.60556030273438, + "loss": 0.7338, + "rewards/chosen": 1.6133110523223877, + "rewards/margins": 0.3347011208534241, + "rewards/rejected": 1.2786099910736084, + "step": 7500 + }, + { + "accuracy": 0.5625, + "epoch": 1.88, + "learning_rate": 3.0854188845843062e-06, + "logps/chosen": -168.537353515625, + "logps/margins": -21.223506927490234, + "logps/rejected": -147.31385803222656, + "loss": 0.7198, + "rewards/chosen": 1.4604127407073975, + "rewards/margins": 0.30231982469558716, + "rewards/rejected": 1.158092975616455, + "step": 7510 + }, + { + "accuracy": 0.7124999761581421, + "epoch": 1.88, + "learning_rate": 3.0733330667500538e-06, + "logps/chosen": -157.2186737060547, + "logps/margins": 0.053977202624082565, + "logps/rejected": -157.2726593017578, + "loss": 0.6912, + "rewards/chosen": 1.9707252979278564, + "rewards/margins": 0.6227328777313232, + "rewards/rejected": 1.3479923009872437, + "step": 7520 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.88, + "learning_rate": 3.061260454295324e-06, + "logps/chosen": -114.64295959472656, + "logps/margins": 20.90644073486328, + "logps/rejected": -135.54940795898438, + "loss": 0.7032, + "rewards/chosen": 1.645041823387146, + "rewards/margins": 0.33050480484962463, + "rewards/rejected": 1.3145370483398438, + "step": 7530 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.89, + "learning_rate": 3.049201129965827e-06, + "logps/chosen": -171.25985717773438, + "logps/margins": 12.424259185791016, + "logps/rejected": -183.68411254882812, + "loss": 0.7062, + "rewards/chosen": 1.6989891529083252, + "rewards/margins": 0.16583018004894257, + "rewards/rejected": 1.5331586599349976, + "step": 7540 + }, + { + "accuracy": 0.625, + "epoch": 1.89, + "learning_rate": 3.037155176416201e-06, + "logps/chosen": -139.49082946777344, + "logps/margins": 2.6195502281188965, + "logps/rejected": -142.11038208007812, + "loss": 0.6799, + "rewards/chosen": 1.2641832828521729, + "rewards/margins": 0.4053414463996887, + "rewards/rejected": 0.8588417768478394, + "step": 7550 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.89, + "learning_rate": 3.0251226762094384e-06, + "logps/chosen": -132.73126220703125, + "logps/margins": 19.557737350463867, + "logps/rejected": -152.2890167236328, + "loss": 0.7621, + "rewards/chosen": 1.5577361583709717, + "rewards/margins": 0.1638454645872116, + "rewards/rejected": 1.393890619277954, + "step": 7560 + }, + { + "accuracy": 0.675000011920929, + "epoch": 1.89, + "learning_rate": 3.0131037118163238e-06, + "logps/chosen": -136.0181121826172, + "logps/margins": -12.956840515136719, + "logps/rejected": -123.0612564086914, + "loss": 0.6836, + "rewards/chosen": 1.6885261535644531, + "rewards/margins": 0.5305490493774414, + "rewards/rejected": 1.1579769849777222, + "step": 7570 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.9, + "learning_rate": 3.0010983656148653e-06, + "logps/chosen": -152.7870330810547, + "logps/margins": -33.605613708496094, + "logps/rejected": -119.18141174316406, + "loss": 0.7494, + "rewards/chosen": 1.5151035785675049, + "rewards/margins": 0.4451755881309509, + "rewards/rejected": 1.0699279308319092, + "step": 7580 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.9, + "learning_rate": 2.989106719889734e-06, + "logps/chosen": -161.0879364013672, + "logps/margins": 3.9269020557403564, + "logps/rejected": -165.0148468017578, + "loss": 0.8964, + "rewards/chosen": 1.6523844003677368, + "rewards/margins": 0.11767072975635529, + "rewards/rejected": 1.5347135066986084, + "step": 7590 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.9, + "learning_rate": 2.9771288568316935e-06, + "logps/chosen": -147.85128784179688, + "logps/margins": -21.64183807373047, + "logps/rejected": -126.2094497680664, + "loss": 0.7033, + "rewards/chosen": 1.6057159900665283, + "rewards/margins": 0.3859025239944458, + "rewards/rejected": 1.2198134660720825, + "step": 7600 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.9, + "learning_rate": 2.965164858537048e-06, + "logps/chosen": -148.21966552734375, + "logps/margins": 22.800125122070312, + "logps/rejected": -171.019775390625, + "loss": 0.709, + "rewards/chosen": 1.5176199674606323, + "rewards/margins": 0.2852620482444763, + "rewards/rejected": 1.2323577404022217, + "step": 7610 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.91, + "learning_rate": 2.9532148070070676e-06, + "logps/chosen": -181.43087768554688, + "logps/margins": -18.598739624023438, + "logps/rejected": -162.83213806152344, + "loss": 0.7127, + "rewards/chosen": 1.280906319618225, + "rewards/margins": 0.21598482131958008, + "rewards/rejected": 1.0649213790893555, + "step": 7620 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.91, + "learning_rate": 2.941278784147433e-06, + "logps/chosen": -138.15829467773438, + "logps/margins": 6.86111307144165, + "logps/rejected": -145.01942443847656, + "loss": 0.6313, + "rewards/chosen": 1.6140018701553345, + "rewards/margins": 0.37931883335113525, + "rewards/rejected": 1.2346830368041992, + "step": 7630 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.91, + "learning_rate": 2.9293568717676713e-06, + "logps/chosen": -130.59201049804688, + "logps/margins": 20.620311737060547, + "logps/rejected": -151.21231079101562, + "loss": 0.6574, + "rewards/chosen": 1.3647019863128662, + "rewards/margins": 0.45029154419898987, + "rewards/rejected": 0.9144105911254883, + "step": 7640 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.91, + "learning_rate": 2.917449151580597e-06, + "logps/chosen": -165.51426696777344, + "logps/margins": -9.239412307739258, + "logps/rejected": -156.2748565673828, + "loss": 0.7084, + "rewards/chosen": 1.7278873920440674, + "rewards/margins": 0.34119173884391785, + "rewards/rejected": 1.3866956233978271, + "step": 7650 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.92, + "learning_rate": 2.905555705201751e-06, + "logps/chosen": -152.6835479736328, + "logps/margins": 8.02257251739502, + "logps/rejected": -160.70611572265625, + "loss": 0.7231, + "rewards/chosen": 1.5909334421157837, + "rewards/margins": 0.2137170135974884, + "rewards/rejected": 1.3772162199020386, + "step": 7660 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.92, + "learning_rate": 2.893676614148842e-06, + "logps/chosen": -146.96034240722656, + "logps/margins": 2.7324893474578857, + "logps/rejected": -149.69284057617188, + "loss": 0.6597, + "rewards/chosen": 1.3870294094085693, + "rewards/margins": 0.27289390563964844, + "rewards/rejected": 1.1141353845596313, + "step": 7670 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.92, + "learning_rate": 2.8818119598411892e-06, + "logps/chosen": -174.6747283935547, + "logps/margins": -2.6159424781799316, + "logps/rejected": -172.0587921142578, + "loss": 0.6943, + "rewards/chosen": 1.6511671543121338, + "rewards/margins": 0.5539500117301941, + "rewards/rejected": 1.0972169637680054, + "step": 7680 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.92, + "learning_rate": 2.869961823599156e-06, + "logps/chosen": -189.27664184570312, + "logps/margins": -16.507299423217773, + "logps/rejected": -172.76934814453125, + "loss": 0.7324, + "rewards/chosen": 1.9205595254898071, + "rewards/margins": 0.577292799949646, + "rewards/rejected": 1.3432666063308716, + "step": 7690 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.93, + "learning_rate": 2.858126286643605e-06, + "logps/chosen": -140.23565673828125, + "logps/margins": 3.937549591064453, + "logps/rejected": -144.17320251464844, + "loss": 0.7201, + "rewards/chosen": 1.391732931137085, + "rewards/margins": 0.3172801733016968, + "rewards/rejected": 1.0744529962539673, + "step": 7700 + }, + { + "accuracy": 0.625, + "epoch": 1.93, + "learning_rate": 2.846305430095333e-06, + "logps/chosen": -171.41970825195312, + "logps/margins": -21.400100708007812, + "logps/rejected": -150.01959228515625, + "loss": 0.6766, + "rewards/chosen": 2.113341808319092, + "rewards/margins": 0.3781481683254242, + "rewards/rejected": 1.7351936101913452, + "step": 7710 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 1.93, + "learning_rate": 2.83449933497452e-06, + "logps/chosen": -152.0800323486328, + "logps/margins": -20.473773956298828, + "logps/rejected": -131.6062469482422, + "loss": 0.7243, + "rewards/chosen": 1.2974114418029785, + "rewards/margins": -0.024049963802099228, + "rewards/rejected": 1.3214614391326904, + "step": 7720 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.93, + "learning_rate": 2.822708082200164e-06, + "logps/chosen": -138.2881622314453, + "logps/margins": 8.09184455871582, + "logps/rejected": -146.3800048828125, + "loss": 0.7466, + "rewards/chosen": 1.7134519815444946, + "rewards/margins": 0.4318350851535797, + "rewards/rejected": 1.2816169261932373, + "step": 7730 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.94, + "learning_rate": 2.8109317525895413e-06, + "logps/chosen": -193.68228149414062, + "logps/margins": -8.259164810180664, + "logps/rejected": -185.42312622070312, + "loss": 0.7323, + "rewards/chosen": 1.9114049673080444, + "rewards/margins": 0.14855656027793884, + "rewards/rejected": 1.7628484964370728, + "step": 7740 + }, + { + "accuracy": 0.5625, + "epoch": 1.94, + "learning_rate": 2.7991704268576402e-06, + "logps/chosen": -153.97610473632812, + "logps/margins": -9.580062866210938, + "logps/rejected": -144.39605712890625, + "loss": 0.7268, + "rewards/chosen": 1.699318289756775, + "rewards/margins": 0.3191028833389282, + "rewards/rejected": 1.3802154064178467, + "step": 7750 + }, + { + "accuracy": 0.612500011920929, + "epoch": 1.94, + "learning_rate": 2.7874241856166156e-06, + "logps/chosen": -153.59385681152344, + "logps/margins": -6.709082126617432, + "logps/rejected": -146.88478088378906, + "loss": 0.7442, + "rewards/chosen": 1.7737582921981812, + "rewards/margins": 0.312399685382843, + "rewards/rejected": 1.4613585472106934, + "step": 7760 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.94, + "learning_rate": 2.7756931093752304e-06, + "logps/chosen": -152.15255737304688, + "logps/margins": 27.0755615234375, + "logps/rejected": -179.2281036376953, + "loss": 0.7101, + "rewards/chosen": 1.4687507152557373, + "rewards/margins": 0.15640540421009064, + "rewards/rejected": 1.312345266342163, + "step": 7770 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.94, + "learning_rate": 2.7639772785383077e-06, + "logps/chosen": -147.29771423339844, + "logps/margins": 1.9709599018096924, + "logps/rejected": -149.26866149902344, + "loss": 0.7272, + "rewards/chosen": 1.3575958013534546, + "rewards/margins": 0.22117845714092255, + "rewards/rejected": 1.1364173889160156, + "step": 7780 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.95, + "learning_rate": 2.7522767734061813e-06, + "logps/chosen": -158.07467651367188, + "logps/margins": 10.38896656036377, + "logps/rejected": -168.46363830566406, + "loss": 0.7305, + "rewards/chosen": 1.211384654045105, + "rewards/margins": 0.06727229058742523, + "rewards/rejected": 1.144112467765808, + "step": 7790 + }, + { + "accuracy": 0.637499988079071, + "epoch": 1.95, + "learning_rate": 2.740591674174135e-06, + "logps/chosen": -148.889404296875, + "logps/margins": 2.8909945487976074, + "logps/rejected": -151.7803955078125, + "loss": 0.6926, + "rewards/chosen": 1.6856343746185303, + "rewards/margins": 0.3579682409763336, + "rewards/rejected": 1.3276662826538086, + "step": 7800 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 1.95, + "learning_rate": 2.7289220609318678e-06, + "logps/chosen": -178.54592895507812, + "logps/margins": -26.054874420166016, + "logps/rejected": -152.49105834960938, + "loss": 0.7516, + "rewards/chosen": 1.5361932516098022, + "rewards/margins": 0.012251311913132668, + "rewards/rejected": 1.5239421129226685, + "step": 7810 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 1.96, + "learning_rate": 2.717268013662936e-06, + "logps/chosen": -184.67164611816406, + "logps/margins": 4.767717361450195, + "logps/rejected": -189.43936157226562, + "loss": 0.7128, + "rewards/chosen": 1.4181771278381348, + "rewards/margins": -0.004927635192871094, + "rewards/rejected": 1.4231046438217163, + "step": 7820 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 1.96, + "learning_rate": 2.705629612244206e-06, + "logps/chosen": -167.29547119140625, + "logps/margins": -6.868272304534912, + "logps/rejected": -160.4272003173828, + "loss": 0.7088, + "rewards/chosen": 1.6981045007705688, + "rewards/margins": 0.2844163775444031, + "rewards/rejected": 1.4136881828308105, + "step": 7830 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.96, + "learning_rate": 2.6940069364453057e-06, + "logps/chosen": -147.83935546875, + "logps/margins": 7.617604732513428, + "logps/rejected": -155.45697021484375, + "loss": 0.7778, + "rewards/chosen": 1.5293800830841064, + "rewards/margins": 0.2776310443878174, + "rewards/rejected": 1.2517491579055786, + "step": 7840 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 1.96, + "learning_rate": 2.6824000659280826e-06, + "logps/chosen": -152.8638458251953, + "logps/margins": 3.4601120948791504, + "logps/rejected": -156.32394409179688, + "loss": 0.6807, + "rewards/chosen": 1.6714723110198975, + "rewards/margins": 0.3548671007156372, + "rewards/rejected": 1.3166053295135498, + "step": 7850 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.96, + "learning_rate": 2.670809080246052e-06, + "logps/chosen": -142.57293701171875, + "logps/margins": 14.857213973999023, + "logps/rejected": -157.43014526367188, + "loss": 0.7071, + "rewards/chosen": 1.5352548360824585, + "rewards/margins": 0.2829630970954895, + "rewards/rejected": 1.2522917985916138, + "step": 7860 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.97, + "learning_rate": 2.6592340588438582e-06, + "logps/chosen": -155.18411254882812, + "logps/margins": -0.6307185888290405, + "logps/rejected": -154.5533905029297, + "loss": 0.6878, + "rewards/chosen": 1.619874358177185, + "rewards/margins": 0.38393598794937134, + "rewards/rejected": 1.235938310623169, + "step": 7870 + }, + { + "accuracy": 0.5, + "epoch": 1.97, + "learning_rate": 2.6476750810567226e-06, + "logps/chosen": -177.02804565429688, + "logps/margins": -15.48786735534668, + "logps/rejected": -161.54017639160156, + "loss": 0.6915, + "rewards/chosen": 1.3247684240341187, + "rewards/margins": 0.13504883646965027, + "rewards/rejected": 1.189719557762146, + "step": 7880 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.97, + "learning_rate": 2.6361322261099058e-06, + "logps/chosen": -160.01585388183594, + "logps/margins": 4.996635437011719, + "logps/rejected": -165.01251220703125, + "loss": 0.7447, + "rewards/chosen": 1.4600803852081299, + "rewards/margins": 0.2839662432670593, + "rewards/rejected": 1.1761140823364258, + "step": 7890 + }, + { + "accuracy": 0.5625, + "epoch": 1.98, + "learning_rate": 2.624605573118164e-06, + "logps/chosen": -174.19577026367188, + "logps/margins": -2.3253426551818848, + "logps/rejected": -171.87046813964844, + "loss": 0.7128, + "rewards/chosen": 1.6581220626831055, + "rewards/margins": 0.2280033826828003, + "rewards/rejected": 1.4301183223724365, + "step": 7900 + }, + { + "accuracy": 0.574999988079071, + "epoch": 1.98, + "learning_rate": 2.613095201085198e-06, + "logps/chosen": -159.39633178710938, + "logps/margins": -5.231651782989502, + "logps/rejected": -154.1646728515625, + "loss": 0.7807, + "rewards/chosen": 1.3933546543121338, + "rewards/margins": 0.18160532414913177, + "rewards/rejected": 1.2117493152618408, + "step": 7910 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 1.98, + "learning_rate": 2.6016011889031263e-06, + "logps/chosen": -173.76036071777344, + "logps/margins": -34.854591369628906, + "logps/rejected": -138.90577697753906, + "loss": 0.7291, + "rewards/chosen": 1.5486682653427124, + "rewards/margins": 0.43231630325317383, + "rewards/rejected": 1.1163519620895386, + "step": 7920 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.98, + "learning_rate": 2.5901236153519343e-06, + "logps/chosen": -149.42263793945312, + "logps/margins": 11.698648452758789, + "logps/rejected": -161.1212615966797, + "loss": 0.7384, + "rewards/chosen": 1.705370306968689, + "rewards/margins": 0.329956591129303, + "rewards/rejected": 1.3754137754440308, + "step": 7930 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.98, + "learning_rate": 2.5786625590989387e-06, + "logps/chosen": -155.7959747314453, + "logps/margins": -5.108429908752441, + "logps/rejected": -150.68751525878906, + "loss": 0.6982, + "rewards/chosen": 1.503784418106079, + "rewards/margins": 0.393483966588974, + "rewards/rejected": 1.1103004217147827, + "step": 7940 + }, + { + "accuracy": 0.550000011920929, + "epoch": 1.99, + "learning_rate": 2.5672180986982427e-06, + "logps/chosen": -172.7320098876953, + "logps/margins": -9.138218879699707, + "logps/rejected": -163.5937957763672, + "loss": 0.7065, + "rewards/chosen": 1.6389904022216797, + "rewards/margins": 0.21593406796455383, + "rewards/rejected": 1.4230562448501587, + "step": 7950 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 1.99, + "learning_rate": 2.555790312590204e-06, + "logps/chosen": -174.55982971191406, + "logps/margins": -33.466270446777344, + "logps/rejected": -141.09356689453125, + "loss": 0.7419, + "rewards/chosen": 1.577143907546997, + "rewards/margins": 0.16334354877471924, + "rewards/rejected": 1.4138003587722778, + "step": 7960 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 1.99, + "learning_rate": 2.544379279100895e-06, + "logps/chosen": -152.06436157226562, + "logps/margins": 16.437637329101562, + "logps/rejected": -168.5019989013672, + "loss": 0.7487, + "rewards/chosen": 1.5301748514175415, + "rewards/margins": 0.3579128682613373, + "rewards/rejected": 1.1722620725631714, + "step": 7970 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.0, + "learning_rate": 2.5329850764415644e-06, + "logps/chosen": -156.33700561523438, + "logps/margins": -4.853341102600098, + "logps/rejected": -151.48367309570312, + "loss": 0.6587, + "rewards/chosen": 1.4469630718231201, + "rewards/margins": 0.2963835299015045, + "rewards/rejected": 1.150579571723938, + "step": 7980 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 2.0, + "learning_rate": 2.521607782708104e-06, + "logps/chosen": -158.8189697265625, + "logps/margins": -13.295373916625977, + "logps/rejected": -145.5236053466797, + "loss": 0.7066, + "rewards/chosen": 1.4658772945404053, + "rewards/margins": 0.1531190127134323, + "rewards/rejected": 1.312758207321167, + "step": 7990 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.0, + "learning_rate": 2.5102474758805124e-06, + "logps/chosen": -162.83212280273438, + "logps/margins": 14.477209091186523, + "logps/rejected": -177.30935668945312, + "loss": 0.7615, + "rewards/chosen": 1.6065471172332764, + "rewards/margins": 0.17348530888557434, + "rewards/rejected": 1.433061957359314, + "step": 8000 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.0, + "learning_rate": 2.4989042338223546e-06, + "logps/chosen": -174.42794799804688, + "logps/margins": -30.117450714111328, + "logps/rejected": -144.3105010986328, + "loss": 0.7701, + "rewards/chosen": 1.7789043188095093, + "rewards/margins": 0.43178433179855347, + "rewards/rejected": 1.3471200466156006, + "step": 8010 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.0, + "learning_rate": 2.487578134280239e-06, + "logps/chosen": -140.2782440185547, + "logps/margins": 5.517704010009766, + "logps/rejected": -145.79595947265625, + "loss": 0.7067, + "rewards/chosen": 1.4794094562530518, + "rewards/margins": 0.08387868106365204, + "rewards/rejected": 1.3955308198928833, + "step": 8020 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.01, + "learning_rate": 2.476269254883278e-06, + "logps/chosen": -146.164794921875, + "logps/margins": 9.8067045211792, + "logps/rejected": -155.9714813232422, + "loss": 0.6848, + "rewards/chosen": 1.5103113651275635, + "rewards/margins": 0.28387340903282166, + "rewards/rejected": 1.226438045501709, + "step": 8030 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.01, + "learning_rate": 2.4649776731425556e-06, + "logps/chosen": -177.56961059570312, + "logps/margins": -0.9985759854316711, + "logps/rejected": -176.57101440429688, + "loss": 0.7022, + "rewards/chosen": 1.4511569738388062, + "rewards/margins": 0.15511052310466766, + "rewards/rejected": 1.296046495437622, + "step": 8040 + }, + { + "accuracy": 0.625, + "epoch": 2.01, + "learning_rate": 2.453703466450601e-06, + "logps/chosen": -147.03701782226562, + "logps/margins": -12.085954666137695, + "logps/rejected": -134.95108032226562, + "loss": 0.7033, + "rewards/chosen": 1.4322772026062012, + "rewards/margins": 0.36984866857528687, + "rewards/rejected": 1.0624284744262695, + "step": 8050 + }, + { + "accuracy": 0.6875, + "epoch": 2.02, + "learning_rate": 2.4424467120808494e-06, + "logps/chosen": -143.1505584716797, + "logps/margins": -22.610355377197266, + "logps/rejected": -120.54020690917969, + "loss": 0.6934, + "rewards/chosen": 1.638169527053833, + "rewards/margins": 0.39775046706199646, + "rewards/rejected": 1.2404190301895142, + "step": 8060 + }, + { + "accuracy": 0.699999988079071, + "epoch": 2.02, + "learning_rate": 2.431207487187121e-06, + "logps/chosen": -160.63702392578125, + "logps/margins": -17.046031951904297, + "logps/rejected": -143.5909881591797, + "loss": 0.6522, + "rewards/chosen": 1.4349191188812256, + "rewards/margins": 0.4233461916446686, + "rewards/rejected": 1.0115729570388794, + "step": 8070 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.02, + "learning_rate": 2.4199858688030903e-06, + "logps/chosen": -181.83872985839844, + "logps/margins": -24.872325897216797, + "logps/rejected": -156.9663848876953, + "loss": 0.7151, + "rewards/chosen": 1.5881997346878052, + "rewards/margins": 0.43364739418029785, + "rewards/rejected": 1.1545523405075073, + "step": 8080 + }, + { + "accuracy": 0.5625, + "epoch": 2.02, + "learning_rate": 2.4087819338417544e-06, + "logps/chosen": -162.61141967773438, + "logps/margins": 18.381328582763672, + "logps/rejected": -180.99276733398438, + "loss": 0.7195, + "rewards/chosen": 1.6559603214263916, + "rewards/margins": 0.34932026267051697, + "rewards/rejected": 1.3066399097442627, + "step": 8090 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.02, + "learning_rate": 2.3975957590949085e-06, + "logps/chosen": -211.8200225830078, + "logps/margins": -32.483890533447266, + "logps/rejected": -179.33615112304688, + "loss": 0.6887, + "rewards/chosen": 2.1886820793151855, + "rewards/margins": 0.5621092915534973, + "rewards/rejected": 1.626572847366333, + "step": 8100 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.03, + "learning_rate": 2.3864274212326223e-06, + "logps/chosen": -120.06292724609375, + "logps/margins": 2.796501636505127, + "logps/rejected": -122.85942077636719, + "loss": 0.7167, + "rewards/chosen": 1.658726692199707, + "rewards/margins": 0.39983028173446655, + "rewards/rejected": 1.2588964700698853, + "step": 8110 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.03, + "learning_rate": 2.375276996802704e-06, + "logps/chosen": -175.3258514404297, + "logps/margins": -31.839832305908203, + "logps/rejected": -143.4860076904297, + "loss": 0.7878, + "rewards/chosen": 1.3705482482910156, + "rewards/margins": 0.3800581097602844, + "rewards/rejected": 0.9904901385307312, + "step": 8120 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.03, + "learning_rate": 2.3641445622301905e-06, + "logps/chosen": -174.14651489257812, + "logps/margins": 5.786733150482178, + "logps/rejected": -179.9332275390625, + "loss": 0.6465, + "rewards/chosen": 1.7012012004852295, + "rewards/margins": 0.4851377010345459, + "rewards/rejected": 1.2160634994506836, + "step": 8130 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 2.04, + "learning_rate": 2.3530301938168137e-06, + "logps/chosen": -142.46383666992188, + "logps/margins": -11.864608764648438, + "logps/rejected": -130.59922790527344, + "loss": 0.749, + "rewards/chosen": 1.5138269662857056, + "rewards/margins": 0.08619476109743118, + "rewards/rejected": 1.427632212638855, + "step": 8140 + }, + { + "accuracy": 0.5625, + "epoch": 2.04, + "learning_rate": 2.3419339677404796e-06, + "logps/chosen": -166.1076202392578, + "logps/margins": -19.827808380126953, + "logps/rejected": -146.27981567382812, + "loss": 0.6781, + "rewards/chosen": 1.7410123348236084, + "rewards/margins": 0.39905327558517456, + "rewards/rejected": 1.3419592380523682, + "step": 8150 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.04, + "learning_rate": 2.3308559600547483e-06, + "logps/chosen": -158.71141052246094, + "logps/margins": -8.637177467346191, + "logps/rejected": -150.07423400878906, + "loss": 0.7064, + "rewards/chosen": 1.399379849433899, + "rewards/margins": -0.03323918581008911, + "rewards/rejected": 1.4326189756393433, + "step": 8160 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.04, + "learning_rate": 2.319796246688306e-06, + "logps/chosen": -172.4421844482422, + "logps/margins": -23.948816299438477, + "logps/rejected": -148.4933624267578, + "loss": 0.6791, + "rewards/chosen": 1.728541374206543, + "rewards/margins": 0.463245689868927, + "rewards/rejected": 1.2652957439422607, + "step": 8170 + }, + { + "accuracy": 0.4749999940395355, + "epoch": 2.04, + "learning_rate": 2.308754903444456e-06, + "logps/chosen": -161.19541931152344, + "logps/margins": -5.49510383605957, + "logps/rejected": -155.7003173828125, + "loss": 0.7849, + "rewards/chosen": 1.5838899612426758, + "rewards/margins": -0.08446411788463593, + "rewards/rejected": 1.668353796005249, + "step": 8180 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.05, + "learning_rate": 2.2977320060005886e-06, + "logps/chosen": -151.86788940429688, + "logps/margins": -15.464696884155273, + "logps/rejected": -136.4031982421875, + "loss": 0.6147, + "rewards/chosen": 1.5466384887695312, + "rewards/margins": 0.5367838144302368, + "rewards/rejected": 1.009854793548584, + "step": 8190 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.05, + "learning_rate": 2.2867276299076685e-06, + "logps/chosen": -145.94644165039062, + "logps/margins": 11.566511154174805, + "logps/rejected": -157.512939453125, + "loss": 0.6865, + "rewards/chosen": 1.5435899496078491, + "rewards/margins": 0.2792242765426636, + "rewards/rejected": 1.2643656730651855, + "step": 8200 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.05, + "learning_rate": 2.2757418505897163e-06, + "logps/chosen": -156.9351043701172, + "logps/margins": -6.692040920257568, + "logps/rejected": -150.24307250976562, + "loss": 0.682, + "rewards/chosen": 1.734041452407837, + "rewards/margins": 0.36988386511802673, + "rewards/rejected": 1.3641574382781982, + "step": 8210 + }, + { + "accuracy": 0.5625, + "epoch": 2.06, + "learning_rate": 2.2647747433432837e-06, + "logps/chosen": -157.39675903320312, + "logps/margins": -18.195247650146484, + "logps/rejected": -139.20147705078125, + "loss": 0.7426, + "rewards/chosen": 1.3666679859161377, + "rewards/margins": 0.20778140425682068, + "rewards/rejected": 1.1588866710662842, + "step": 8220 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.06, + "learning_rate": 2.253826383336952e-06, + "logps/chosen": -199.2397918701172, + "logps/margins": -33.6287956237793, + "logps/rejected": -165.6110076904297, + "loss": 0.7355, + "rewards/chosen": 1.8045364618301392, + "rewards/margins": 0.41030988097190857, + "rewards/rejected": 1.3942264318466187, + "step": 8230 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.06, + "learning_rate": 2.2428968456108035e-06, + "logps/chosen": -135.59588623046875, + "logps/margins": 0.2366950958967209, + "logps/rejected": -135.8325958251953, + "loss": 0.7525, + "rewards/chosen": 1.7988193035125732, + "rewards/margins": 0.21248801052570343, + "rewards/rejected": 1.5863312482833862, + "step": 8240 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.06, + "learning_rate": 2.231986205075916e-06, + "logps/chosen": -165.7809600830078, + "logps/margins": -12.608695030212402, + "logps/rejected": -153.1722412109375, + "loss": 0.7035, + "rewards/chosen": 1.7122455835342407, + "rewards/margins": 0.674146294593811, + "rewards/rejected": 1.0380994081497192, + "step": 8250 + }, + { + "accuracy": 0.5625, + "epoch": 2.06, + "learning_rate": 2.2210945365138444e-06, + "logps/chosen": -146.78480529785156, + "logps/margins": 18.20116424560547, + "logps/rejected": -164.9859619140625, + "loss": 0.7714, + "rewards/chosen": 1.5064902305603027, + "rewards/margins": 0.29590269923210144, + "rewards/rejected": 1.2105872631072998, + "step": 8260 + }, + { + "accuracy": 0.7124999761581421, + "epoch": 2.07, + "learning_rate": 2.21022191457611e-06, + "logps/chosen": -169.4023895263672, + "logps/margins": -1.6607900857925415, + "logps/rejected": -167.74160766601562, + "loss": 0.6706, + "rewards/chosen": 1.7718967199325562, + "rewards/margins": 0.6762997508049011, + "rewards/rejected": 1.0955970287322998, + "step": 8270 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.07, + "learning_rate": 2.1993684137836865e-06, + "logps/chosen": -155.4597625732422, + "logps/margins": -6.345142364501953, + "logps/rejected": -149.11459350585938, + "loss": 0.7244, + "rewards/chosen": 1.6106678247451782, + "rewards/margins": 0.22864112257957458, + "rewards/rejected": 1.3820265531539917, + "step": 8280 + }, + { + "accuracy": 0.5625, + "epoch": 2.07, + "learning_rate": 2.188534108526493e-06, + "logps/chosen": -134.57020568847656, + "logps/margins": 5.193431854248047, + "logps/rejected": -139.7636260986328, + "loss": 0.7861, + "rewards/chosen": 1.499381184577942, + "rewards/margins": 0.30438631772994995, + "rewards/rejected": 1.1949946880340576, + "step": 8290 + }, + { + "accuracy": 0.5625, + "epoch": 2.08, + "learning_rate": 2.1777190730628837e-06, + "logps/chosen": -171.16134643554688, + "logps/margins": -36.803409576416016, + "logps/rejected": -134.35794067382812, + "loss": 0.7446, + "rewards/chosen": 1.9232261180877686, + "rewards/margins": 0.20128774642944336, + "rewards/rejected": 1.7219384908676147, + "step": 8300 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.08, + "learning_rate": 2.166923381519136e-06, + "logps/chosen": -176.7650604248047, + "logps/margins": -37.66004180908203, + "logps/rejected": -139.1050262451172, + "loss": 0.708, + "rewards/chosen": 1.6046323776245117, + "rewards/margins": 0.37916308641433716, + "rewards/rejected": 1.2254693508148193, + "step": 8310 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.08, + "learning_rate": 2.1561471078889453e-06, + "logps/chosen": -166.45034790039062, + "logps/margins": -9.98550033569336, + "logps/rejected": -156.46481323242188, + "loss": 0.7441, + "rewards/chosen": 1.2456642389297485, + "rewards/margins": 0.18639352917671204, + "rewards/rejected": 1.0592706203460693, + "step": 8320 + }, + { + "accuracy": 0.5625, + "epoch": 2.08, + "learning_rate": 2.145390326032911e-06, + "logps/chosen": -186.337890625, + "logps/margins": -22.020666122436523, + "logps/rejected": -164.31723022460938, + "loss": 0.6782, + "rewards/chosen": 1.9358770847320557, + "rewards/margins": 0.42717641592025757, + "rewards/rejected": 1.5087006092071533, + "step": 8330 + }, + { + "accuracy": 0.5625, + "epoch": 2.08, + "learning_rate": 2.1346531096780445e-06, + "logps/chosen": -144.95826721191406, + "logps/margins": 14.184289932250977, + "logps/rejected": -159.14254760742188, + "loss": 0.7661, + "rewards/chosen": 1.6065034866333008, + "rewards/margins": 0.08957413583993912, + "rewards/rejected": 1.5169293880462646, + "step": 8340 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.09, + "learning_rate": 2.1239355324172483e-06, + "logps/chosen": -130.03160095214844, + "logps/margins": 8.258233070373535, + "logps/rejected": -138.28982543945312, + "loss": 0.713, + "rewards/chosen": 1.5247821807861328, + "rewards/margins": 0.3942405581474304, + "rewards/rejected": 1.1305415630340576, + "step": 8350 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.09, + "learning_rate": 2.1143065650240386e-06, + "logps/chosen": -163.11752319335938, + "logps/margins": -27.65108299255371, + "logps/rejected": -135.46644592285156, + "loss": 0.7216, + "rewards/chosen": 1.7467950582504272, + "rewards/margins": 0.4883025288581848, + "rewards/rejected": 1.2584925889968872, + "step": 8360 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.09, + "learning_rate": 2.1036265043079347e-06, + "logps/chosen": -179.0720977783203, + "logps/margins": 0.8080665469169617, + "logps/rejected": -179.88015747070312, + "loss": 0.6717, + "rewards/chosen": 1.9240318536758423, + "rewards/margins": 0.4556439518928528, + "rewards/rejected": 1.4683878421783447, + "step": 8370 + }, + { + "accuracy": 0.5, + "epoch": 2.1, + "learning_rate": 2.0929662953423258e-06, + "logps/chosen": -164.20608520507812, + "logps/margins": 12.96038818359375, + "logps/rejected": -177.16644287109375, + "loss": 0.7469, + "rewards/chosen": 1.5864390134811401, + "rewards/margins": 0.15719899535179138, + "rewards/rejected": 1.429240107536316, + "step": 8380 + }, + { + "accuracy": 0.625, + "epoch": 2.1, + "learning_rate": 2.0823260111923078e-06, + "logps/chosen": -158.79331970214844, + "logps/margins": -13.191431999206543, + "logps/rejected": -145.6018829345703, + "loss": 0.7422, + "rewards/chosen": 1.5085413455963135, + "rewards/margins": 0.27130740880966187, + "rewards/rejected": 1.237234115600586, + "step": 8390 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.1, + "learning_rate": 2.071705724786413e-06, + "logps/chosen": -170.298583984375, + "logps/margins": 5.528465270996094, + "logps/rejected": -175.82704162597656, + "loss": 0.6973, + "rewards/chosen": 1.665657639503479, + "rewards/margins": 0.6047636270523071, + "rewards/rejected": 1.0608941316604614, + "step": 8400 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.1, + "learning_rate": 2.0611055089161086e-06, + "logps/chosen": -157.84571838378906, + "logps/margins": 3.660970687866211, + "logps/rejected": -161.50668334960938, + "loss": 0.7816, + "rewards/chosen": 1.6446950435638428, + "rewards/margins": 0.31027790904045105, + "rewards/rejected": 1.3344172239303589, + "step": 8410 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 2.1, + "learning_rate": 2.050525436235298e-06, + "logps/chosen": -154.82022094726562, + "logps/margins": -41.35010528564453, + "logps/rejected": -113.4700927734375, + "loss": 0.7767, + "rewards/chosen": 1.57981538772583, + "rewards/margins": 0.08861640840768814, + "rewards/rejected": 1.491199016571045, + "step": 8420 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.11, + "learning_rate": 2.0399655792598233e-06, + "logps/chosen": -154.95138549804688, + "logps/margins": -18.962299346923828, + "logps/rejected": -135.9890899658203, + "loss": 0.6808, + "rewards/chosen": 1.521018147468567, + "rewards/margins": 0.42580705881118774, + "rewards/rejected": 1.095211148262024, + "step": 8430 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.11, + "learning_rate": 2.0294260103669695e-06, + "logps/chosen": -169.29391479492188, + "logps/margins": -27.63861656188965, + "logps/rejected": -141.65530395507812, + "loss": 0.7124, + "rewards/chosen": 1.4429848194122314, + "rewards/margins": 0.3141252100467682, + "rewards/rejected": 1.1288598775863647, + "step": 8440 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.11, + "learning_rate": 2.0189068017949627e-06, + "logps/chosen": -147.8411865234375, + "logps/margins": -8.72899341583252, + "logps/rejected": -139.11221313476562, + "loss": 0.6889, + "rewards/chosen": 1.6592670679092407, + "rewards/margins": 0.468961626291275, + "rewards/rejected": 1.190305471420288, + "step": 8450 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.12, + "learning_rate": 2.0084080256424854e-06, + "logps/chosen": -165.54025268554688, + "logps/margins": -0.9891551733016968, + "logps/rejected": -164.5511016845703, + "loss": 0.6497, + "rewards/chosen": 1.5626205205917358, + "rewards/margins": 0.2419670820236206, + "rewards/rejected": 1.3206533193588257, + "step": 8460 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.12, + "learning_rate": 1.9979297538681696e-06, + "logps/chosen": -165.10464477539062, + "logps/margins": -0.2884078919887543, + "logps/rejected": -164.81625366210938, + "loss": 0.7363, + "rewards/chosen": 1.6342134475708008, + "rewards/margins": 0.5481199026107788, + "rewards/rejected": 1.0860934257507324, + "step": 8470 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.12, + "learning_rate": 1.9874720582901157e-06, + "logps/chosen": -163.23245239257812, + "logps/margins": -0.6823524236679077, + "logps/rejected": -162.55010986328125, + "loss": 0.7581, + "rewards/chosen": 1.4807817935943604, + "rewards/margins": 0.21510593593120575, + "rewards/rejected": 1.2656762599945068, + "step": 8480 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.12, + "learning_rate": 1.9770350105853925e-06, + "logps/chosen": -129.67196655273438, + "logps/margins": -6.872628688812256, + "logps/rejected": -122.7993392944336, + "loss": 0.6872, + "rewards/chosen": 1.0559279918670654, + "rewards/margins": 0.2710602879524231, + "rewards/rejected": 0.7848676443099976, + "step": 8490 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.12, + "learning_rate": 1.9666186822895495e-06, + "logps/chosen": -160.75991821289062, + "logps/margins": -13.621179580688477, + "logps/rejected": -147.13873291015625, + "loss": 0.7226, + "rewards/chosen": 1.7736866474151611, + "rewards/margins": 0.2954310476779938, + "rewards/rejected": 1.4782555103302002, + "step": 8500 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.13, + "learning_rate": 1.9562231447961234e-06, + "logps/chosen": -139.3221893310547, + "logps/margins": 11.047687530517578, + "logps/rejected": -150.36988830566406, + "loss": 0.7243, + "rewards/chosen": 1.5304100513458252, + "rewards/margins": 0.23825982213020325, + "rewards/rejected": 1.2921501398086548, + "step": 8510 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.13, + "learning_rate": 1.945848469356152e-06, + "logps/chosen": -167.1498565673828, + "logps/margins": -21.472043991088867, + "logps/rejected": -145.6777801513672, + "loss": 0.715, + "rewards/chosen": 1.7204921245574951, + "rewards/margins": 0.46779781579971313, + "rewards/rejected": 1.2526943683624268, + "step": 8520 + }, + { + "accuracy": 0.625, + "epoch": 2.13, + "learning_rate": 1.9354947270776843e-06, + "logps/chosen": -147.2924041748047, + "logps/margins": 14.488614082336426, + "logps/rejected": -161.78102111816406, + "loss": 0.694, + "rewards/chosen": 1.4855103492736816, + "rewards/margins": 0.5283415913581848, + "rewards/rejected": 0.9571687579154968, + "step": 8530 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.13, + "learning_rate": 1.9251619889252933e-06, + "logps/chosen": -156.94503784179688, + "logps/margins": -14.219863891601562, + "logps/rejected": -142.7251434326172, + "loss": 0.7323, + "rewards/chosen": 1.6963449716567993, + "rewards/margins": 0.27980196475982666, + "rewards/rejected": 1.4165430068969727, + "step": 8540 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.14, + "learning_rate": 1.9148503257195915e-06, + "logps/chosen": -170.87680053710938, + "logps/margins": -28.840808868408203, + "logps/rejected": -142.03599548339844, + "loss": 0.6859, + "rewards/chosen": 1.7452952861785889, + "rewards/margins": 0.328766793012619, + "rewards/rejected": 1.416528582572937, + "step": 8550 + }, + { + "accuracy": 0.6875, + "epoch": 2.14, + "learning_rate": 1.9045598081367383e-06, + "logps/chosen": -169.31411743164062, + "logps/margins": 7.2556610107421875, + "logps/rejected": -176.5697784423828, + "loss": 0.7688, + "rewards/chosen": 1.6895349025726318, + "rewards/margins": 0.33078232407569885, + "rewards/rejected": 1.358752727508545, + "step": 8560 + }, + { + "accuracy": 0.625, + "epoch": 2.14, + "learning_rate": 1.894290506707968e-06, + "logps/chosen": -139.7373046875, + "logps/margins": -4.782748222351074, + "logps/rejected": -134.95455932617188, + "loss": 0.685, + "rewards/chosen": 1.8224594593048096, + "rewards/margins": 0.4113302230834961, + "rewards/rejected": 1.4111289978027344, + "step": 8570 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.15, + "learning_rate": 1.884042491819093e-06, + "logps/chosen": -159.22018432617188, + "logps/margins": -21.419261932373047, + "logps/rejected": -137.80093383789062, + "loss": 0.7388, + "rewards/chosen": 1.719966173171997, + "rewards/margins": 0.3008470833301544, + "rewards/rejected": 1.419119119644165, + "step": 8580 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.15, + "learning_rate": 1.873815833710031e-06, + "logps/chosen": -192.58792114257812, + "logps/margins": -21.861942291259766, + "logps/rejected": -170.72596740722656, + "loss": 0.7143, + "rewards/chosen": 1.7541120052337646, + "rewards/margins": 0.15374931693077087, + "rewards/rejected": 1.600362777709961, + "step": 8590 + }, + { + "accuracy": 0.4124999940395355, + "epoch": 2.15, + "learning_rate": 1.8636106024743206e-06, + "logps/chosen": -155.5521697998047, + "logps/margins": 9.907405853271484, + "logps/rejected": -165.45957946777344, + "loss": 0.7523, + "rewards/chosen": 1.7051265239715576, + "rewards/margins": -0.16903893649578094, + "rewards/rejected": 1.8741657733917236, + "step": 8600 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.15, + "learning_rate": 1.8534268680586398e-06, + "logps/chosen": -173.94158935546875, + "logps/margins": 6.447187900543213, + "logps/rejected": -180.38877868652344, + "loss": 0.7156, + "rewards/chosen": 1.6777536869049072, + "rewards/margins": 0.2259649932384491, + "rewards/rejected": 1.4517886638641357, + "step": 8610 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.15, + "learning_rate": 1.8432647002623272e-06, + "logps/chosen": -168.19534301757812, + "logps/margins": -7.878008842468262, + "logps/rejected": -160.3173370361328, + "loss": 0.6757, + "rewards/chosen": 1.5143414735794067, + "rewards/margins": 0.38952404260635376, + "rewards/rejected": 1.1248172521591187, + "step": 8620 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 2.16, + "learning_rate": 1.833124168736904e-06, + "logps/chosen": -128.70266723632812, + "logps/margins": 10.291298866271973, + "logps/rejected": -138.99395751953125, + "loss": 0.7201, + "rewards/chosen": 1.4326075315475464, + "rewards/margins": 0.03472653031349182, + "rewards/rejected": 1.397881031036377, + "step": 8630 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.16, + "learning_rate": 1.823005342985596e-06, + "logps/chosen": -157.35055541992188, + "logps/margins": 3.7781319618225098, + "logps/rejected": -161.12869262695312, + "loss": 0.7389, + "rewards/chosen": 1.2670929431915283, + "rewards/margins": 0.0621778778731823, + "rewards/rejected": 1.2049150466918945, + "step": 8640 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.16, + "learning_rate": 1.8129082923628577e-06, + "logps/chosen": -149.281982421875, + "logps/margins": -23.904254913330078, + "logps/rejected": -125.37772369384766, + "loss": 0.7203, + "rewards/chosen": 1.3377315998077393, + "rewards/margins": 0.175207257270813, + "rewards/rejected": 1.1625242233276367, + "step": 8650 + }, + { + "accuracy": 0.625, + "epoch": 2.17, + "learning_rate": 1.802833086073899e-06, + "logps/chosen": -137.95144653320312, + "logps/margins": 13.378198623657227, + "logps/rejected": -151.3296356201172, + "loss": 0.697, + "rewards/chosen": 1.4663667678833008, + "rewards/margins": 0.409593403339386, + "rewards/rejected": 1.0567734241485596, + "step": 8660 + }, + { + "accuracy": 0.5625, + "epoch": 2.17, + "learning_rate": 1.7927797931742053e-06, + "logps/chosen": -135.56573486328125, + "logps/margins": 4.933518409729004, + "logps/rejected": -140.49925231933594, + "loss": 0.7272, + "rewards/chosen": 1.4486322402954102, + "rewards/margins": 0.29147547483444214, + "rewards/rejected": 1.1571569442749023, + "step": 8670 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 2.17, + "learning_rate": 1.7827484825690662e-06, + "logps/chosen": -161.00819396972656, + "logps/margins": 17.309127807617188, + "logps/rejected": -178.31732177734375, + "loss": 0.7051, + "rewards/chosen": 1.8373451232910156, + "rewards/margins": 0.28881919384002686, + "rewards/rejected": 1.5485261678695679, + "step": 8680 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.17, + "learning_rate": 1.7727392230131092e-06, + "logps/chosen": -194.5367889404297, + "logps/margins": 9.299463272094727, + "logps/rejected": -203.8362579345703, + "loss": 0.7109, + "rewards/chosen": 2.147111177444458, + "rewards/margins": 0.31106656789779663, + "rewards/rejected": 1.8360443115234375, + "step": 8690 + }, + { + "accuracy": 0.6875, + "epoch": 2.17, + "learning_rate": 1.762752083109822e-06, + "logps/chosen": -133.9709014892578, + "logps/margins": -6.183140277862549, + "logps/rejected": -127.7877426147461, + "loss": 0.6831, + "rewards/chosen": 1.5709456205368042, + "rewards/margins": 0.4310721755027771, + "rewards/rejected": 1.1398733854293823, + "step": 8700 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.18, + "learning_rate": 1.7527871313110851e-06, + "logps/chosen": -164.20236206054688, + "logps/margins": -6.957364559173584, + "logps/rejected": -157.24497985839844, + "loss": 0.7065, + "rewards/chosen": 1.8082212209701538, + "rewards/margins": 0.4541195333003998, + "rewards/rejected": 1.3541016578674316, + "step": 8710 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.18, + "learning_rate": 1.742844435916699e-06, + "logps/chosen": -152.00375366210938, + "logps/margins": 16.378149032592773, + "logps/rejected": -168.38192749023438, + "loss": 0.7177, + "rewards/chosen": 1.6269546747207642, + "rewards/margins": 0.5087138414382935, + "rewards/rejected": 1.1182409524917603, + "step": 8720 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.18, + "learning_rate": 1.7329240650739222e-06, + "logps/chosen": -121.37681579589844, + "logps/margins": -6.030147552490234, + "logps/rejected": -115.3466796875, + "loss": 0.7001, + "rewards/chosen": 1.430532693862915, + "rewards/margins": 0.2904837727546692, + "rewards/rejected": 1.1400489807128906, + "step": 8730 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.19, + "learning_rate": 1.7230260867769971e-06, + "logps/chosen": -190.82266235351562, + "logps/margins": -22.756269454956055, + "logps/rejected": -168.06637573242188, + "loss": 0.6669, + "rewards/chosen": 1.7178049087524414, + "rewards/margins": 0.36230191588401794, + "rewards/rejected": 1.355502724647522, + "step": 8740 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.19, + "learning_rate": 1.7131505688666911e-06, + "logps/chosen": -162.74581909179688, + "logps/margins": -31.385234832763672, + "logps/rejected": -131.36056518554688, + "loss": 0.7014, + "rewards/chosen": 1.4858042001724243, + "rewards/margins": 0.3438750207424164, + "rewards/rejected": 1.1419289112091064, + "step": 8750 + }, + { + "accuracy": 0.6875, + "epoch": 2.19, + "learning_rate": 1.7032975790298266e-06, + "logps/chosen": -145.45460510253906, + "logps/margins": -7.934426307678223, + "logps/rejected": -137.52017211914062, + "loss": 0.675, + "rewards/chosen": 1.4999816417694092, + "rewards/margins": 0.4934515058994293, + "rewards/rejected": 1.0065301656723022, + "step": 8760 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.19, + "learning_rate": 1.6934671847988155e-06, + "logps/chosen": -152.04855346679688, + "logps/margins": -1.0895531177520752, + "logps/rejected": -150.95901489257812, + "loss": 0.7125, + "rewards/chosen": 1.3611030578613281, + "rewards/margins": 0.2801567614078522, + "rewards/rejected": 1.0809463262557983, + "step": 8770 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.19, + "learning_rate": 1.6836594535512057e-06, + "logps/chosen": -199.61875915527344, + "logps/margins": -12.851448059082031, + "logps/rejected": -186.76731872558594, + "loss": 0.774, + "rewards/chosen": 1.6930515766143799, + "rewards/margins": 0.2728603184223175, + "rewards/rejected": 1.4201911687850952, + "step": 8780 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.2, + "learning_rate": 1.6738744525092053e-06, + "logps/chosen": -167.1857147216797, + "logps/margins": -12.358675003051758, + "logps/rejected": -154.8270263671875, + "loss": 0.6924, + "rewards/chosen": 1.9011396169662476, + "rewards/margins": 0.4886767864227295, + "rewards/rejected": 1.4124627113342285, + "step": 8790 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.2, + "learning_rate": 1.6641122487392358e-06, + "logps/chosen": -150.05905151367188, + "logps/margins": 19.3690185546875, + "logps/rejected": -169.42807006835938, + "loss": 0.6659, + "rewards/chosen": 1.6399908065795898, + "rewards/margins": 0.39792391657829285, + "rewards/rejected": 1.2420669794082642, + "step": 8800 + }, + { + "accuracy": 0.4625000059604645, + "epoch": 2.2, + "learning_rate": 1.6543729091514638e-06, + "logps/chosen": -157.3723907470703, + "logps/margins": -22.547088623046875, + "logps/rejected": -134.82528686523438, + "loss": 0.6982, + "rewards/chosen": 1.4471495151519775, + "rewards/margins": 0.17052698135375977, + "rewards/rejected": 1.2766224145889282, + "step": 8810 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.21, + "learning_rate": 1.6446565004993453e-06, + "logps/chosen": -138.96075439453125, + "logps/margins": -5.705632209777832, + "logps/rejected": -133.255126953125, + "loss": 0.7939, + "rewards/chosen": 1.4708983898162842, + "rewards/margins": 0.4054839611053467, + "rewards/rejected": 1.0654144287109375, + "step": 8820 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.21, + "learning_rate": 1.6349630893791663e-06, + "logps/chosen": -152.40383911132812, + "logps/margins": 10.695032119750977, + "logps/rejected": -163.09884643554688, + "loss": 0.7078, + "rewards/chosen": 1.8411426544189453, + "rewards/margins": 0.502041757106781, + "rewards/rejected": 1.3391010761260986, + "step": 8830 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.21, + "learning_rate": 1.6252927422295894e-06, + "logps/chosen": -141.5550537109375, + "logps/margins": -12.74122428894043, + "logps/rejected": -128.81381225585938, + "loss": 0.6853, + "rewards/chosen": 1.539665937423706, + "rewards/margins": 0.33341091871261597, + "rewards/rejected": 1.2062549591064453, + "step": 8840 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.21, + "learning_rate": 1.6156455253311953e-06, + "logps/chosen": -172.87245178222656, + "logps/margins": -11.500372886657715, + "logps/rejected": -161.37208557128906, + "loss": 0.7339, + "rewards/chosen": 1.6482816934585571, + "rewards/margins": 0.28576838970184326, + "rewards/rejected": 1.3625131845474243, + "step": 8850 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.21, + "learning_rate": 1.6060215048060307e-06, + "logps/chosen": -143.3395233154297, + "logps/margins": -14.020299911499023, + "logps/rejected": -129.31922912597656, + "loss": 0.6709, + "rewards/chosen": 1.4853442907333374, + "rewards/margins": 0.35263791680336, + "rewards/rejected": 1.1327062845230103, + "step": 8860 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.22, + "learning_rate": 1.5964207466171556e-06, + "logps/chosen": -162.64041137695312, + "logps/margins": 10.456838607788086, + "logps/rejected": -173.0972442626953, + "loss": 0.6696, + "rewards/chosen": 1.5443044900894165, + "rewards/margins": 0.29026198387145996, + "rewards/rejected": 1.254042387008667, + "step": 8870 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.22, + "learning_rate": 1.586843316568185e-06, + "logps/chosen": -155.17373657226562, + "logps/margins": 2.354557752609253, + "logps/rejected": -157.52828979492188, + "loss": 0.6817, + "rewards/chosen": 1.6223640441894531, + "rewards/margins": 0.5991525053977966, + "rewards/rejected": 1.0232114791870117, + "step": 8880 + }, + { + "accuracy": 0.5625, + "epoch": 2.22, + "learning_rate": 1.57728928030285e-06, + "logps/chosen": -149.88656616210938, + "logps/margins": -14.486322402954102, + "logps/rejected": -135.4002685546875, + "loss": 0.7272, + "rewards/chosen": 1.5538326501846313, + "rewards/margins": 0.2739008069038391, + "rewards/rejected": 1.2799317836761475, + "step": 8890 + }, + { + "accuracy": 0.5625, + "epoch": 2.23, + "learning_rate": 1.5677587033045328e-06, + "logps/chosen": -145.5820770263672, + "logps/margins": 28.83034324645996, + "logps/rejected": -174.41244506835938, + "loss": 0.6808, + "rewards/chosen": 1.705649733543396, + "rewards/margins": 0.2532147765159607, + "rewards/rejected": 1.4524348974227905, + "step": 8900 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.23, + "learning_rate": 1.5582516508958317e-06, + "logps/chosen": -207.56460571289062, + "logps/margins": -35.034297943115234, + "logps/rejected": -172.53030395507812, + "loss": 0.7417, + "rewards/chosen": 1.6090505123138428, + "rewards/margins": 0.4967781901359558, + "rewards/rejected": 1.1122722625732422, + "step": 8910 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.23, + "learning_rate": 1.548768188238105e-06, + "logps/chosen": -141.54115295410156, + "logps/margins": -19.045608520507812, + "logps/rejected": -122.49554443359375, + "loss": 0.68, + "rewards/chosen": 1.4551907777786255, + "rewards/margins": 0.43837660551071167, + "rewards/rejected": 1.0168139934539795, + "step": 8920 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.23, + "learning_rate": 1.5393083803310282e-06, + "logps/chosen": -172.00799560546875, + "logps/margins": 3.3133513927459717, + "logps/rejected": -175.32135009765625, + "loss": 0.8257, + "rewards/chosen": 1.3916940689086914, + "rewards/margins": -0.06063110753893852, + "rewards/rejected": 1.4523251056671143, + "step": 8930 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.23, + "learning_rate": 1.529872292012145e-06, + "logps/chosen": -169.62440490722656, + "logps/margins": 9.763392448425293, + "logps/rejected": -179.38778686523438, + "loss": 0.7165, + "rewards/chosen": 1.450634479522705, + "rewards/margins": 0.3236467242240906, + "rewards/rejected": 1.1269876956939697, + "step": 8940 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.24, + "learning_rate": 1.5204599879564263e-06, + "logps/chosen": -127.61555480957031, + "logps/margins": 35.28068161010742, + "logps/rejected": -162.896240234375, + "loss": 0.6621, + "rewards/chosen": 1.5860315561294556, + "rewards/margins": 0.5005285739898682, + "rewards/rejected": 1.0855028629302979, + "step": 8950 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.24, + "learning_rate": 1.5110715326758263e-06, + "logps/chosen": -172.27919006347656, + "logps/margins": -24.1689510345459, + "logps/rejected": -148.11024475097656, + "loss": 0.7476, + "rewards/chosen": 1.6849327087402344, + "rewards/margins": 0.38788866996765137, + "rewards/rejected": 1.297044038772583, + "step": 8960 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.24, + "learning_rate": 1.501706990518838e-06, + "logps/chosen": -157.38433837890625, + "logps/margins": 19.355331420898438, + "logps/rejected": -176.7396697998047, + "loss": 0.6783, + "rewards/chosen": 1.5520120859146118, + "rewards/margins": 0.4625958800315857, + "rewards/rejected": 1.0894161462783813, + "step": 8970 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.25, + "learning_rate": 1.492366425670056e-06, + "logps/chosen": -153.9088134765625, + "logps/margins": 6.092455863952637, + "logps/rejected": -160.0012664794922, + "loss": 0.6616, + "rewards/chosen": 1.6637213230133057, + "rewards/margins": 0.5724526643753052, + "rewards/rejected": 1.09126877784729, + "step": 8980 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.25, + "learning_rate": 1.4830499021497296e-06, + "logps/chosen": -144.7803955078125, + "logps/margins": 6.8450927734375, + "logps/rejected": -151.62550354003906, + "loss": 0.7145, + "rewards/chosen": 1.5153062343597412, + "rewards/margins": 0.3127608895301819, + "rewards/rejected": 1.202545404434204, + "step": 8990 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.25, + "learning_rate": 1.473757483813336e-06, + "logps/chosen": -167.8705291748047, + "logps/margins": -21.814117431640625, + "logps/rejected": -146.05642700195312, + "loss": 0.7089, + "rewards/chosen": 1.7602484226226807, + "rewards/margins": 0.44942083954811096, + "rewards/rejected": 1.310827612876892, + "step": 9000 + }, + { + "epoch": 2.25, + "eval_accuracy": 0.595137420718816, + "eval_logps/chosen": -158.15721130371094, + "eval_logps/margins": -5.295763969421387, + "eval_logps/rejected": -152.86143493652344, + "eval_loss": 0.733016848564148, + "eval_rewards/chosen": 1.514376163482666, + "eval_rewards/margins": 0.3051438629627228, + "eval_rewards/rejected": 1.2092324495315552, + "eval_runtime": 1292.0255, + "eval_samples_per_second": 10.983, + "eval_steps_per_second": 1.373, + "step": 9000 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.25, + "learning_rate": 1.4644892343511286e-06, + "logps/chosen": -140.15621948242188, + "logps/margins": 3.3032939434051514, + "logps/rejected": -143.4595184326172, + "loss": 0.7482, + "rewards/chosen": 1.6140540838241577, + "rewards/margins": 0.217368483543396, + "rewards/rejected": 1.3966857194900513, + "step": 9010 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.25, + "learning_rate": 1.4552452172877103e-06, + "logps/chosen": -151.78140258789062, + "logps/margins": -17.376636505126953, + "logps/rejected": -134.40475463867188, + "loss": 0.7341, + "rewards/chosen": 1.3568575382232666, + "rewards/margins": 0.1822388768196106, + "rewards/rejected": 1.1746186017990112, + "step": 9020 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.26, + "learning_rate": 1.446025495981596e-06, + "logps/chosen": -150.89797973632812, + "logps/margins": -2.0211005210876465, + "logps/rejected": -148.8768768310547, + "loss": 0.7515, + "rewards/chosen": 1.6613954305648804, + "rewards/margins": 0.13923636078834534, + "rewards/rejected": 1.522159218788147, + "step": 9030 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.26, + "learning_rate": 1.4368301336247765e-06, + "logps/chosen": -144.09152221679688, + "logps/margins": -3.2720589637756348, + "logps/rejected": -140.8194580078125, + "loss": 0.6602, + "rewards/chosen": 1.4038548469543457, + "rewards/margins": 0.3739367425441742, + "rewards/rejected": 1.0299179553985596, + "step": 9040 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.26, + "learning_rate": 1.4276591932422861e-06, + "logps/chosen": -175.94786071777344, + "logps/margins": -37.32986831665039, + "logps/rejected": -138.6179962158203, + "loss": 0.6815, + "rewards/chosen": 1.6072725057601929, + "rewards/margins": 0.4715591371059418, + "rewards/rejected": 1.1357133388519287, + "step": 9050 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.27, + "learning_rate": 1.41851273769177e-06, + "logps/chosen": -171.75283813476562, + "logps/margins": 9.169479370117188, + "logps/rejected": -180.92230224609375, + "loss": 0.7198, + "rewards/chosen": 1.8163988590240479, + "rewards/margins": 0.5535916090011597, + "rewards/rejected": 1.2628071308135986, + "step": 9060 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.27, + "learning_rate": 1.409390829663057e-06, + "logps/chosen": -161.4254608154297, + "logps/margins": -8.558207511901855, + "logps/rejected": -152.8672332763672, + "loss": 0.7391, + "rewards/chosen": 1.4760303497314453, + "rewards/margins": 0.15246079862117767, + "rewards/rejected": 1.3235695362091064, + "step": 9070 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.27, + "learning_rate": 1.4002935316777239e-06, + "logps/chosen": -163.16477966308594, + "logps/margins": -19.930648803710938, + "logps/rejected": -143.234130859375, + "loss": 0.718, + "rewards/chosen": 1.6364920139312744, + "rewards/margins": 0.4381323754787445, + "rewards/rejected": 1.198359727859497, + "step": 9080 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.27, + "learning_rate": 1.391220906088674e-06, + "logps/chosen": -144.847900390625, + "logps/margins": 5.415781497955322, + "logps/rejected": -150.26370239257812, + "loss": 0.6691, + "rewards/chosen": 1.551347017288208, + "rewards/margins": 0.3882475197315216, + "rewards/rejected": 1.1630994081497192, + "step": 9090 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.27, + "learning_rate": 1.3821730150796998e-06, + "logps/chosen": -175.1791229248047, + "logps/margins": -9.351771354675293, + "logps/rejected": -165.82736206054688, + "loss": 0.6823, + "rewards/chosen": 1.8021516799926758, + "rewards/margins": 0.4165884554386139, + "rewards/rejected": 1.3855631351470947, + "step": 9100 + }, + { + "accuracy": 0.625, + "epoch": 2.28, + "learning_rate": 1.3731499206650706e-06, + "logps/chosen": -150.88084411621094, + "logps/margins": 11.26040267944336, + "logps/rejected": -162.14125061035156, + "loss": 0.7195, + "rewards/chosen": 1.7576625347137451, + "rewards/margins": 0.1033773198723793, + "rewards/rejected": 1.6542854309082031, + "step": 9110 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.28, + "learning_rate": 1.3641516846890923e-06, + "logps/chosen": -170.0763702392578, + "logps/margins": -2.1857364177703857, + "logps/rejected": -167.89064025878906, + "loss": 0.714, + "rewards/chosen": 1.5904673337936401, + "rewards/margins": 0.016722047701478004, + "rewards/rejected": 1.5737452507019043, + "step": 9120 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.28, + "learning_rate": 1.3551783688256965e-06, + "logps/chosen": -169.84005737304688, + "logps/margins": 10.964398384094238, + "logps/rejected": -180.8044891357422, + "loss": 0.7183, + "rewards/chosen": 1.4091973304748535, + "rewards/margins": 0.3086218237876892, + "rewards/rejected": 1.1005754470825195, + "step": 9130 + }, + { + "accuracy": 0.625, + "epoch": 2.29, + "learning_rate": 1.3462300345780104e-06, + "logps/chosen": -147.86582946777344, + "logps/margins": 1.0733330249786377, + "logps/rejected": -148.93917846679688, + "loss": 0.6627, + "rewards/chosen": 1.7560867071151733, + "rewards/margins": 0.6477835178375244, + "rewards/rejected": 1.108303189277649, + "step": 9140 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.29, + "learning_rate": 1.3373067432779374e-06, + "logps/chosen": -169.39144897460938, + "logps/margins": 12.25880241394043, + "logps/rejected": -181.65023803710938, + "loss": 0.7556, + "rewards/chosen": 1.6017158031463623, + "rewards/margins": 0.2665536105632782, + "rewards/rejected": 1.3351620435714722, + "step": 9150 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.29, + "learning_rate": 1.3284085560857358e-06, + "logps/chosen": -131.6964874267578, + "logps/margins": 5.135622978210449, + "logps/rejected": -136.8321075439453, + "loss": 0.7177, + "rewards/chosen": 1.496072769165039, + "rewards/margins": 0.34981197118759155, + "rewards/rejected": 1.1462608575820923, + "step": 9160 + }, + { + "accuracy": 0.5, + "epoch": 2.29, + "learning_rate": 1.319535533989601e-06, + "logps/chosen": -140.1403045654297, + "logps/margins": -6.5386643409729, + "logps/rejected": -133.60162353515625, + "loss": 0.7139, + "rewards/chosen": 1.298241376876831, + "rewards/margins": 0.008867263793945312, + "rewards/rejected": 1.2893741130828857, + "step": 9170 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.29, + "learning_rate": 1.3106877378052468e-06, + "logps/chosen": -170.4357452392578, + "logps/margins": -13.488035202026367, + "logps/rejected": -156.94772338867188, + "loss": 0.7131, + "rewards/chosen": 1.643431305885315, + "rewards/margins": 0.22355131804943085, + "rewards/rejected": 1.4198799133300781, + "step": 9180 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.3, + "learning_rate": 1.301865228175488e-06, + "logps/chosen": -179.9849395751953, + "logps/margins": -34.471435546875, + "logps/rejected": -145.51351928710938, + "loss": 0.6515, + "rewards/chosen": 2.053356170654297, + "rewards/margins": 0.5836462378501892, + "rewards/rejected": 1.4697099924087524, + "step": 9190 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.3, + "learning_rate": 1.2930680655698264e-06, + "logps/chosen": -181.6209259033203, + "logps/margins": -29.35764503479004, + "logps/rejected": -152.26327514648438, + "loss": 0.7301, + "rewards/chosen": 1.7384264469146729, + "rewards/margins": 0.3347424268722534, + "rewards/rejected": 1.4036840200424194, + "step": 9200 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.3, + "learning_rate": 1.2842963102840327e-06, + "logps/chosen": -156.84042358398438, + "logps/margins": 14.667080879211426, + "logps/rejected": -171.50750732421875, + "loss": 0.6687, + "rewards/chosen": 1.5941455364227295, + "rewards/margins": 0.623616099357605, + "rewards/rejected": 0.9705293774604797, + "step": 9210 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.31, + "learning_rate": 1.27555002243974e-06, + "logps/chosen": -141.3217315673828, + "logps/margins": 15.408876419067383, + "logps/rejected": -156.73060607910156, + "loss": 0.7158, + "rewards/chosen": 1.4217623472213745, + "rewards/margins": 0.18102550506591797, + "rewards/rejected": 1.240736961364746, + "step": 9220 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.31, + "learning_rate": 1.266829261984021e-06, + "logps/chosen": -179.71163940429688, + "logps/margins": -9.987619400024414, + "logps/rejected": -169.72402954101562, + "loss": 0.7328, + "rewards/chosen": 1.552562952041626, + "rewards/margins": 0.11078063398599625, + "rewards/rejected": 1.4417824745178223, + "step": 9230 + }, + { + "accuracy": 0.5625, + "epoch": 2.31, + "learning_rate": 1.2581340886889908e-06, + "logps/chosen": -163.5059814453125, + "logps/margins": 22.396549224853516, + "logps/rejected": -185.90252685546875, + "loss": 0.7301, + "rewards/chosen": 1.6513032913208008, + "rewards/margins": 0.3200327754020691, + "rewards/rejected": 1.3312708139419556, + "step": 9240 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.31, + "learning_rate": 1.2494645621513856e-06, + "logps/chosen": -166.04409790039062, + "logps/margins": -8.708564758300781, + "logps/rejected": -157.33554077148438, + "loss": 0.7002, + "rewards/chosen": 1.5607943534851074, + "rewards/margins": 0.5162396430969238, + "rewards/rejected": 1.044554591178894, + "step": 9250 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.31, + "learning_rate": 1.2408207417921603e-06, + "logps/chosen": -139.47116088867188, + "logps/margins": 19.832660675048828, + "logps/rejected": -159.30380249023438, + "loss": 0.7811, + "rewards/chosen": 1.3712660074234009, + "rewards/margins": 0.01880212500691414, + "rewards/rejected": 1.352463960647583, + "step": 9260 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.32, + "learning_rate": 1.2322026868560793e-06, + "logps/chosen": -152.81991577148438, + "logps/margins": -4.470402240753174, + "logps/rejected": -148.3495330810547, + "loss": 0.6658, + "rewards/chosen": 1.261376142501831, + "rewards/margins": 0.15011903643608093, + "rewards/rejected": 1.1112570762634277, + "step": 9270 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.32, + "learning_rate": 1.2236104564113106e-06, + "logps/chosen": -157.5109405517578, + "logps/margins": -14.323707580566406, + "logps/rejected": -143.18722534179688, + "loss": 0.7353, + "rewards/chosen": 1.2595093250274658, + "rewards/margins": 0.35361140966415405, + "rewards/rejected": 0.9058979153633118, + "step": 9280 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.32, + "learning_rate": 1.2150441093490212e-06, + "logps/chosen": -164.19973754882812, + "logps/margins": -10.216934204101562, + "logps/rejected": -153.98280334472656, + "loss": 0.7226, + "rewards/chosen": 1.6225990056991577, + "rewards/margins": 0.35506927967071533, + "rewards/rejected": 1.267529845237732, + "step": 9290 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.33, + "learning_rate": 1.2065037043829725e-06, + "logps/chosen": -112.38753509521484, + "logps/margins": 19.92919921875, + "logps/rejected": -132.3167266845703, + "loss": 0.6934, + "rewards/chosen": 1.8175365924835205, + "rewards/margins": 0.4672180712223053, + "rewards/rejected": 1.3503185510635376, + "step": 9300 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.33, + "learning_rate": 1.1979893000491221e-06, + "logps/chosen": -155.2445068359375, + "logps/margins": 16.538589477539062, + "logps/rejected": -171.78309631347656, + "loss": 0.7196, + "rewards/chosen": 1.4976298809051514, + "rewards/margins": 0.43379268050193787, + "rewards/rejected": 1.0638372898101807, + "step": 9310 + }, + { + "accuracy": 0.5625, + "epoch": 2.33, + "learning_rate": 1.1895009547052128e-06, + "logps/chosen": -162.82957458496094, + "logps/margins": -34.76596450805664, + "logps/rejected": -128.06362915039062, + "loss": 0.7376, + "rewards/chosen": 1.6303272247314453, + "rewards/margins": 0.269422709941864, + "rewards/rejected": 1.3609044551849365, + "step": 9320 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.33, + "learning_rate": 1.1810387265303863e-06, + "logps/chosen": -139.53135681152344, + "logps/margins": 28.58318519592285, + "logps/rejected": -168.1145477294922, + "loss": 0.7065, + "rewards/chosen": 1.4115617275238037, + "rewards/margins": 0.21647624671459198, + "rewards/rejected": 1.1950855255126953, + "step": 9330 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.33, + "learning_rate": 1.1726026735247702e-06, + "logps/chosen": -143.6156005859375, + "logps/margins": 23.734203338623047, + "logps/rejected": -167.3497772216797, + "loss": 0.7004, + "rewards/chosen": 1.5682671070098877, + "rewards/margins": 0.32487040758132935, + "rewards/rejected": 1.2433966398239136, + "step": 9340 + }, + { + "accuracy": 0.6875, + "epoch": 2.34, + "learning_rate": 1.1641928535090924e-06, + "logps/chosen": -156.2618408203125, + "logps/margins": 6.886648654937744, + "logps/rejected": -163.14849853515625, + "loss": 0.6733, + "rewards/chosen": 1.5554397106170654, + "rewards/margins": 0.4611653685569763, + "rewards/rejected": 1.0942742824554443, + "step": 9350 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.34, + "learning_rate": 1.155809324124279e-06, + "logps/chosen": -132.93206787109375, + "logps/margins": -27.214092254638672, + "logps/rejected": -105.71797180175781, + "loss": 0.7606, + "rewards/chosen": 1.7774693965911865, + "rewards/margins": 0.47879329323768616, + "rewards/rejected": 1.2986762523651123, + "step": 9360 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.34, + "learning_rate": 1.1474521428310586e-06, + "logps/chosen": -142.68211364746094, + "logps/margins": 12.338725090026855, + "logps/rejected": -155.02084350585938, + "loss": 0.7262, + "rewards/chosen": 1.8100168704986572, + "rewards/margins": 0.37769442796707153, + "rewards/rejected": 1.4323222637176514, + "step": 9370 + }, + { + "accuracy": 0.7124999761581421, + "epoch": 2.34, + "learning_rate": 1.1391213669095713e-06, + "logps/chosen": -173.10409545898438, + "logps/margins": -10.700777053833008, + "logps/rejected": -162.4033203125, + "loss": 0.6704, + "rewards/chosen": 1.881722092628479, + "rewards/margins": 0.5909376740455627, + "rewards/rejected": 1.2907843589782715, + "step": 9380 + }, + { + "accuracy": 0.625, + "epoch": 2.35, + "learning_rate": 1.1308170534589734e-06, + "logps/chosen": -145.08135986328125, + "logps/margins": 8.311272621154785, + "logps/rejected": -153.3926239013672, + "loss": 0.6735, + "rewards/chosen": 1.7140896320343018, + "rewards/margins": 0.33044159412384033, + "rewards/rejected": 1.383648157119751, + "step": 9390 + }, + { + "accuracy": 0.625, + "epoch": 2.35, + "learning_rate": 1.122539259397049e-06, + "logps/chosen": -179.33187866210938, + "logps/margins": -42.481727600097656, + "logps/rejected": -136.85015869140625, + "loss": 0.6573, + "rewards/chosen": 1.7291555404663086, + "rewards/margins": 0.43145304918289185, + "rewards/rejected": 1.2977025508880615, + "step": 9400 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.35, + "learning_rate": 1.1142880414598184e-06, + "logps/chosen": -157.69996643066406, + "logps/margins": -0.6335235834121704, + "logps/rejected": -157.06643676757812, + "loss": 0.7265, + "rewards/chosen": 1.5308783054351807, + "rewards/margins": 0.2028176486492157, + "rewards/rejected": 1.3280606269836426, + "step": 9410 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.35, + "learning_rate": 1.1060634562011446e-06, + "logps/chosen": -167.25372314453125, + "logps/margins": 20.420352935791016, + "logps/rejected": -187.67410278320312, + "loss": 0.7252, + "rewards/chosen": 1.4953434467315674, + "rewards/margins": 0.7023226618766785, + "rewards/rejected": 0.7930207848548889, + "step": 9420 + }, + { + "accuracy": 0.625, + "epoch": 2.36, + "learning_rate": 1.0978655599923554e-06, + "logps/chosen": -157.29611206054688, + "logps/margins": 2.317876100540161, + "logps/rejected": -159.61398315429688, + "loss": 0.7115, + "rewards/chosen": 1.7558200359344482, + "rewards/margins": 0.4976974427700043, + "rewards/rejected": 1.2581225633621216, + "step": 9430 + }, + { + "accuracy": 0.7250000238418579, + "epoch": 2.36, + "learning_rate": 1.089694409021852e-06, + "logps/chosen": -163.4586639404297, + "logps/margins": 2.0434722900390625, + "logps/rejected": -165.50213623046875, + "loss": 0.6354, + "rewards/chosen": 1.7309486865997314, + "rewards/margins": 0.6121265292167664, + "rewards/rejected": 1.1188218593597412, + "step": 9440 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.36, + "learning_rate": 1.0815500592947176e-06, + "logps/chosen": -159.05514526367188, + "logps/margins": 15.057942390441895, + "logps/rejected": -174.11309814453125, + "loss": 0.6888, + "rewards/chosen": 1.9620682001113892, + "rewards/margins": 0.5734186768531799, + "rewards/rejected": 1.3886497020721436, + "step": 9450 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.37, + "learning_rate": 1.0734325666323458e-06, + "logps/chosen": -150.67575073242188, + "logps/margins": -10.013188362121582, + "logps/rejected": -140.66256713867188, + "loss": 0.7439, + "rewards/chosen": 1.4265446662902832, + "rewards/margins": 0.25912731885910034, + "rewards/rejected": 1.167417287826538, + "step": 9460 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.37, + "learning_rate": 1.06534198667205e-06, + "logps/chosen": -172.04296875, + "logps/margins": -31.445383071899414, + "logps/rejected": -140.5975799560547, + "loss": 0.6317, + "rewards/chosen": 1.7565895318984985, + "rewards/margins": 0.4719395637512207, + "rewards/rejected": 1.2846500873565674, + "step": 9470 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.37, + "learning_rate": 1.0572783748666832e-06, + "logps/chosen": -159.42288208007812, + "logps/margins": -20.05147361755371, + "logps/rejected": -139.37142944335938, + "loss": 0.6643, + "rewards/chosen": 1.7694286108016968, + "rewards/margins": 0.4723431468009949, + "rewards/rejected": 1.2970855236053467, + "step": 9480 + }, + { + "accuracy": 0.625, + "epoch": 2.37, + "learning_rate": 1.0492417864842585e-06, + "logps/chosen": -131.25352478027344, + "logps/margins": 14.675390243530273, + "logps/rejected": -145.92892456054688, + "loss": 0.7295, + "rewards/chosen": 1.5723516941070557, + "rewards/margins": 0.1843392699956894, + "rewards/rejected": 1.388012409210205, + "step": 9490 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.38, + "learning_rate": 1.0412322766075717e-06, + "logps/chosen": -146.24911499023438, + "logps/margins": 0.4702262878417969, + "logps/rejected": -146.7193603515625, + "loss": 0.6965, + "rewards/chosen": 1.6009800434112549, + "rewards/margins": 0.42105555534362793, + "rewards/rejected": 1.1799246072769165, + "step": 9500 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.38, + "learning_rate": 1.033249900133821e-06, + "logps/chosen": -143.3681640625, + "logps/margins": 5.319980144500732, + "logps/rejected": -148.68814086914062, + "loss": 0.6874, + "rewards/chosen": 1.8423643112182617, + "rewards/margins": 0.512923002243042, + "rewards/rejected": 1.3294413089752197, + "step": 9510 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.38, + "learning_rate": 1.0252947117742346e-06, + "logps/chosen": -159.81214904785156, + "logps/margins": 18.622472763061523, + "logps/rejected": -178.43463134765625, + "loss": 0.6539, + "rewards/chosen": 1.464560866355896, + "rewards/margins": 0.3686937987804413, + "rewards/rejected": 1.0958671569824219, + "step": 9520 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.38, + "learning_rate": 1.0173667660536885e-06, + "logps/chosen": -145.4687042236328, + "logps/margins": -0.3341163694858551, + "logps/rejected": -145.1345672607422, + "loss": 0.7107, + "rewards/chosen": 1.4643455743789673, + "rewards/margins": 0.4681021273136139, + "rewards/rejected": 0.9962433576583862, + "step": 9530 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.38, + "learning_rate": 1.0094661173103421e-06, + "logps/chosen": -161.6818084716797, + "logps/margins": -32.527442932128906, + "logps/rejected": -129.1543731689453, + "loss": 0.6565, + "rewards/chosen": 1.717831015586853, + "rewards/margins": 0.5505114197731018, + "rewards/rejected": 1.1673195362091064, + "step": 9540 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.39, + "learning_rate": 1.0015928196952607e-06, + "logps/chosen": -165.14212036132812, + "logps/margins": -7.842411041259766, + "logps/rejected": -157.29969787597656, + "loss": 0.8084, + "rewards/chosen": 1.6911548376083374, + "rewards/margins": 0.1980780065059662, + "rewards/rejected": 1.4930768013000488, + "step": 9550 + }, + { + "accuracy": 0.5, + "epoch": 2.39, + "learning_rate": 9.937469271720417e-07, + "logps/chosen": -155.35658264160156, + "logps/margins": -6.441777229309082, + "logps/rejected": -148.91482543945312, + "loss": 0.7587, + "rewards/chosen": 1.6535637378692627, + "rewards/margins": 0.29293909668922424, + "rewards/rejected": 1.3606245517730713, + "step": 9560 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.39, + "learning_rate": 9.85928493516452e-07, + "logps/chosen": -167.6337890625, + "logps/margins": 10.566003799438477, + "logps/rejected": -178.19979858398438, + "loss": 0.795, + "rewards/chosen": 1.434712290763855, + "rewards/margins": 0.18009762465953827, + "rewards/rejected": 1.2546145915985107, + "step": 9570 + }, + { + "accuracy": 0.625, + "epoch": 2.4, + "learning_rate": 9.78137572316053e-07, + "logps/chosen": -148.00750732421875, + "logps/margins": 5.41839599609375, + "logps/rejected": -153.4259033203125, + "loss": 0.6852, + "rewards/chosen": 1.5602020025253296, + "rewards/margins": 0.3490469455718994, + "rewards/rejected": 1.2111550569534302, + "step": 9580 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.4, + "learning_rate": 9.703742169698365e-07, + "logps/chosen": -172.73135375976562, + "logps/margins": -25.68975830078125, + "logps/rejected": -147.04159545898438, + "loss": 0.7069, + "rewards/chosen": 1.8070247173309326, + "rewards/margins": 0.42844122648239136, + "rewards/rejected": 1.378583312034607, + "step": 9590 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.4, + "learning_rate": 9.62638480687857e-07, + "logps/chosen": -185.79116821289062, + "logps/margins": -21.861568450927734, + "logps/rejected": -163.92959594726562, + "loss": 0.7477, + "rewards/chosen": 2.0217483043670654, + "rewards/margins": 0.7596826553344727, + "rewards/rejected": 1.2620656490325928, + "step": 9600 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.4, + "learning_rate": 9.549304164908691e-07, + "logps/chosen": -168.33267211914062, + "logps/margins": 3.181908369064331, + "logps/rejected": -171.5145721435547, + "loss": 0.7338, + "rewards/chosen": 1.5502103567123413, + "rewards/margins": 0.1492123007774353, + "rewards/rejected": 1.4009983539581299, + "step": 9610 + }, + { + "accuracy": 0.4000000059604645, + "epoch": 2.41, + "learning_rate": 9.472500772099624e-07, + "logps/chosen": -149.6251983642578, + "logps/margins": 4.278947353363037, + "logps/rejected": -153.90414428710938, + "loss": 0.7244, + "rewards/chosen": 1.2571920156478882, + "rewards/margins": 0.15822848677635193, + "rewards/rejected": 1.0989634990692139, + "step": 9620 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.41, + "learning_rate": 9.395975154862003e-07, + "logps/chosen": -158.68826293945312, + "logps/margins": 3.610377550125122, + "logps/rejected": -162.2986602783203, + "loss": 0.7222, + "rewards/chosen": 2.1626174449920654, + "rewards/margins": 0.3893752694129944, + "rewards/rejected": 1.7732422351837158, + "step": 9630 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.41, + "learning_rate": 9.319727837702564e-07, + "logps/chosen": -194.62660217285156, + "logps/margins": -27.15042495727539, + "logps/rejected": -167.47616577148438, + "loss": 0.7534, + "rewards/chosen": 1.588010311126709, + "rewards/margins": 0.0478978268802166, + "rewards/rejected": 1.5401123762130737, + "step": 9640 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.41, + "learning_rate": 9.243759343220599e-07, + "logps/chosen": -160.7239990234375, + "logps/margins": -17.862651824951172, + "logps/rejected": -142.86135864257812, + "loss": 0.6674, + "rewards/chosen": 1.5959705114364624, + "rewards/margins": 0.32109734416007996, + "rewards/rejected": 1.2748732566833496, + "step": 9650 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.42, + "learning_rate": 9.168070192104351e-07, + "logps/chosen": -159.25540161132812, + "logps/margins": 19.994998931884766, + "logps/rejected": -179.2504119873047, + "loss": 0.672, + "rewards/chosen": 1.6559057235717773, + "rewards/margins": 0.3565587103366852, + "rewards/rejected": 1.299346923828125, + "step": 9660 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.42, + "learning_rate": 9.092660903127414e-07, + "logps/chosen": -145.45541381835938, + "logps/margins": -8.544962882995605, + "logps/rejected": -136.9104461669922, + "loss": 0.7431, + "rewards/chosen": 1.4745855331420898, + "rewards/margins": 0.3204536437988281, + "rewards/rejected": 1.1541321277618408, + "step": 9670 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.42, + "learning_rate": 9.01753199314524e-07, + "logps/chosen": -178.23411560058594, + "logps/margins": -13.244361877441406, + "logps/rejected": -164.98974609375, + "loss": 0.8173, + "rewards/chosen": 1.886765718460083, + "rewards/margins": 0.2266453206539154, + "rewards/rejected": 1.6601203680038452, + "step": 9680 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.42, + "learning_rate": 8.942683977091538e-07, + "logps/chosen": -174.52378845214844, + "logps/margins": -17.57522201538086, + "logps/rejected": -156.94857788085938, + "loss": 0.7094, + "rewards/chosen": 1.793280005455017, + "rewards/margins": 0.32336562871932983, + "rewards/rejected": 1.4699143171310425, + "step": 9690 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.42, + "learning_rate": 8.868117367974788e-07, + "logps/chosen": -175.92703247070312, + "logps/margins": -24.92273712158203, + "logps/rejected": -151.00430297851562, + "loss": 0.7716, + "rewards/chosen": 1.7987258434295654, + "rewards/margins": 0.13221819698810577, + "rewards/rejected": 1.6665074825286865, + "step": 9700 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.43, + "learning_rate": 8.793832676874697e-07, + "logps/chosen": -147.49586486816406, + "logps/margins": 0.363046258687973, + "logps/rejected": -147.85890197753906, + "loss": 0.7269, + "rewards/chosen": 1.891561508178711, + "rewards/margins": 0.29041898250579834, + "rewards/rejected": 1.6011425256729126, + "step": 9710 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.43, + "learning_rate": 8.719830412938701e-07, + "logps/chosen": -176.7687225341797, + "logps/margins": -18.616926193237305, + "logps/rejected": -158.15179443359375, + "loss": 0.6594, + "rewards/chosen": 1.8652225732803345, + "rewards/margins": 0.5006753206253052, + "rewards/rejected": 1.3645473718643188, + "step": 9720 + }, + { + "accuracy": 0.699999988079071, + "epoch": 2.43, + "learning_rate": 8.6461110833785e-07, + "logps/chosen": -158.601806640625, + "logps/margins": 14.120638847351074, + "logps/rejected": -172.72244262695312, + "loss": 0.6966, + "rewards/chosen": 1.6582660675048828, + "rewards/margins": 0.45551618933677673, + "rewards/rejected": 1.2027499675750732, + "step": 9730 + }, + { + "accuracy": 0.5625, + "epoch": 2.44, + "learning_rate": 8.572675193466523e-07, + "logps/chosen": -163.9615020751953, + "logps/margins": -17.350330352783203, + "logps/rejected": -146.6112060546875, + "loss": 0.7646, + "rewards/chosen": 1.9250911474227905, + "rewards/margins": 0.11251489073038101, + "rewards/rejected": 1.8125760555267334, + "step": 9740 + }, + { + "accuracy": 0.5625, + "epoch": 2.44, + "learning_rate": 8.499523246532532e-07, + "logps/chosen": -146.95419311523438, + "logps/margins": 13.272099494934082, + "logps/rejected": -160.22630310058594, + "loss": 0.7148, + "rewards/chosen": 1.816480278968811, + "rewards/margins": 0.2832276225090027, + "rewards/rejected": 1.5332527160644531, + "step": 9750 + }, + { + "accuracy": 0.5625, + "epoch": 2.44, + "learning_rate": 8.426655743960144e-07, + "logps/chosen": -207.29861450195312, + "logps/margins": -51.79948043823242, + "logps/rejected": -155.49913024902344, + "loss": 0.7514, + "rewards/chosen": 1.8375985622406006, + "rewards/margins": 0.24382928013801575, + "rewards/rejected": 1.5937691926956177, + "step": 9760 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.44, + "learning_rate": 8.354073185183392e-07, + "logps/chosen": -176.23899841308594, + "logps/margins": -27.378036499023438, + "logps/rejected": -148.86097717285156, + "loss": 0.7317, + "rewards/chosen": 1.7392657995224, + "rewards/margins": 0.2857814431190491, + "rewards/rejected": 1.4534841775894165, + "step": 9770 + }, + { + "accuracy": 0.737500011920929, + "epoch": 2.44, + "learning_rate": 8.281776067683279e-07, + "logps/chosen": -165.9964141845703, + "logps/margins": -26.62957763671875, + "logps/rejected": -139.3668212890625, + "loss": 0.6582, + "rewards/chosen": 1.8514553308486938, + "rewards/margins": 0.5675183534622192, + "rewards/rejected": 1.2839370965957642, + "step": 9780 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.45, + "learning_rate": 8.209764886984423e-07, + "logps/chosen": -150.6623077392578, + "logps/margins": -9.905928611755371, + "logps/rejected": -140.75637817382812, + "loss": 0.7069, + "rewards/chosen": 1.4279701709747314, + "rewards/margins": 0.37460923194885254, + "rewards/rejected": 1.0533610582351685, + "step": 9790 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.45, + "learning_rate": 8.138040136651615e-07, + "logps/chosen": -157.9936981201172, + "logps/margins": 29.88228416442871, + "logps/rejected": -187.87599182128906, + "loss": 0.7297, + "rewards/chosen": 1.7519623041152954, + "rewards/margins": 0.22326412796974182, + "rewards/rejected": 1.5286980867385864, + "step": 9800 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.45, + "learning_rate": 8.066602308286464e-07, + "logps/chosen": -158.0318145751953, + "logps/margins": 11.26066780090332, + "logps/rejected": -169.29249572753906, + "loss": 0.7239, + "rewards/chosen": 1.934862494468689, + "rewards/margins": 0.2915184795856476, + "rewards/rejected": 1.6433439254760742, + "step": 9810 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.46, + "learning_rate": 7.995451891524003e-07, + "logps/chosen": -159.17398071289062, + "logps/margins": 7.945642948150635, + "logps/rejected": -167.11962890625, + "loss": 0.6955, + "rewards/chosen": 1.5379745960235596, + "rewards/margins": 0.15703454613685608, + "rewards/rejected": 1.3809400796890259, + "step": 9820 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.46, + "learning_rate": 7.924589374029346e-07, + "logps/chosen": -145.30796813964844, + "logps/margins": 33.14885711669922, + "logps/rejected": -178.4568328857422, + "loss": 0.7242, + "rewards/chosen": 1.6670467853546143, + "rewards/margins": 0.3576171398162842, + "rewards/rejected": 1.3094297647476196, + "step": 9830 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.46, + "learning_rate": 7.854015241494367e-07, + "logps/chosen": -154.5379638671875, + "logps/margins": -9.783159255981445, + "logps/rejected": -144.75479125976562, + "loss": 0.6677, + "rewards/chosen": 1.5088295936584473, + "rewards/margins": 0.14709821343421936, + "rewards/rejected": 1.3617314100265503, + "step": 9840 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.46, + "learning_rate": 7.783729977634302e-07, + "logps/chosen": -141.91236877441406, + "logps/margins": -21.697874069213867, + "logps/rejected": -120.2144775390625, + "loss": 0.6897, + "rewards/chosen": 1.6470162868499756, + "rewards/margins": 0.40481749176979065, + "rewards/rejected": 1.2421987056732178, + "step": 9850 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.46, + "learning_rate": 7.713734064184525e-07, + "logps/chosen": -175.11485290527344, + "logps/margins": -7.495996952056885, + "logps/rejected": -167.6188507080078, + "loss": 0.7152, + "rewards/chosen": 1.8027808666229248, + "rewards/margins": 0.2860889434814453, + "rewards/rejected": 1.5166919231414795, + "step": 9860 + }, + { + "accuracy": 0.625, + "epoch": 2.47, + "learning_rate": 7.644027980897179e-07, + "logps/chosen": -156.45816040039062, + "logps/margins": 29.7894229888916, + "logps/rejected": -186.24758911132812, + "loss": 0.7423, + "rewards/chosen": 1.6115562915802002, + "rewards/margins": 0.4135339856147766, + "rewards/rejected": 1.1980221271514893, + "step": 9870 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.47, + "learning_rate": 7.574612205537929e-07, + "logps/chosen": -140.2349395751953, + "logps/margins": -14.217233657836914, + "logps/rejected": -126.0177001953125, + "loss": 0.7364, + "rewards/chosen": 1.5086239576339722, + "rewards/margins": 0.19660314917564392, + "rewards/rejected": 1.3120208978652954, + "step": 9880 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.47, + "learning_rate": 7.50548721388264e-07, + "logps/chosen": -164.3436737060547, + "logps/margins": -20.509065628051758, + "logps/rejected": -143.8345947265625, + "loss": 0.7749, + "rewards/chosen": 1.6397240161895752, + "rewards/margins": 0.3839845061302185, + "rewards/rejected": 1.255739450454712, + "step": 9890 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.48, + "learning_rate": 7.436653479714167e-07, + "logps/chosen": -173.26365661621094, + "logps/margins": -6.868396759033203, + "logps/rejected": -166.395263671875, + "loss": 0.7388, + "rewards/chosen": 1.9731528759002686, + "rewards/margins": 0.2896750569343567, + "rewards/rejected": 1.6834779977798462, + "step": 9900 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.48, + "learning_rate": 7.368111474819078e-07, + "logps/chosen": -164.185302734375, + "logps/margins": -22.300792694091797, + "logps/rejected": -141.884521484375, + "loss": 0.6432, + "rewards/chosen": 1.5310838222503662, + "rewards/margins": 0.0994194820523262, + "rewards/rejected": 1.4316645860671997, + "step": 9910 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.48, + "learning_rate": 7.299861668984437e-07, + "logps/chosen": -166.52099609375, + "logps/margins": -10.982331275939941, + "logps/rejected": -155.53866577148438, + "loss": 0.6601, + "rewards/chosen": 1.8998931646347046, + "rewards/margins": 0.5171636939048767, + "rewards/rejected": 1.3827292919158936, + "step": 9920 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.48, + "learning_rate": 7.23190452999456e-07, + "logps/chosen": -190.1132354736328, + "logps/margins": -39.64553451538086, + "logps/rejected": -150.46768188476562, + "loss": 0.6794, + "rewards/chosen": 1.7581523656845093, + "rewards/margins": 0.3194006085395813, + "rewards/rejected": 1.4387519359588623, + "step": 9930 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.48, + "learning_rate": 7.164240523627835e-07, + "logps/chosen": -163.2890167236328, + "logps/margins": -11.928853988647461, + "logps/rejected": -151.3601531982422, + "loss": 0.7114, + "rewards/chosen": 1.723435640335083, + "rewards/margins": 0.5160144567489624, + "rewards/rejected": 1.2074211835861206, + "step": 9940 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 2.49, + "learning_rate": 7.096870113653526e-07, + "logps/chosen": -174.7071533203125, + "logps/margins": -6.532422065734863, + "logps/rejected": -168.17474365234375, + "loss": 0.7308, + "rewards/chosen": 1.5038098096847534, + "rewards/margins": 0.25628599524497986, + "rewards/rejected": 1.2475237846374512, + "step": 9950 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.49, + "learning_rate": 7.029793761828557e-07, + "logps/chosen": -159.38587951660156, + "logps/margins": -10.556696891784668, + "logps/rejected": -148.82919311523438, + "loss": 0.7467, + "rewards/chosen": 1.6162688732147217, + "rewards/margins": 0.2932799458503723, + "rewards/rejected": 1.3229888677597046, + "step": 9960 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.49, + "learning_rate": 6.963011927894398e-07, + "logps/chosen": -120.05732727050781, + "logps/margins": 14.96923828125, + "logps/rejected": -135.02655029296875, + "loss": 0.6792, + "rewards/chosen": 1.5176719427108765, + "rewards/margins": 0.36645784974098206, + "rewards/rejected": 1.1512140035629272, + "step": 9970 + }, + { + "accuracy": 0.5625, + "epoch": 2.5, + "learning_rate": 6.896525069573895e-07, + "logps/chosen": -142.44766235351562, + "logps/margins": 43.97100830078125, + "logps/rejected": -186.41867065429688, + "loss": 0.7334, + "rewards/chosen": 1.8960453271865845, + "rewards/margins": 0.14816515147686005, + "rewards/rejected": 1.747879981994629, + "step": 9980 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.5, + "learning_rate": 6.830333642568138e-07, + "logps/chosen": -162.27317810058594, + "logps/margins": 13.349047660827637, + "logps/rejected": -175.6222381591797, + "loss": 0.6908, + "rewards/chosen": 1.8994255065917969, + "rewards/margins": 0.5740529894828796, + "rewards/rejected": 1.325372576713562, + "step": 9990 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.5, + "learning_rate": 6.764438100553289e-07, + "logps/chosen": -159.21548461914062, + "logps/margins": 7.8038153648376465, + "logps/rejected": -167.01930236816406, + "loss": 0.7214, + "rewards/chosen": 1.5763514041900635, + "rewards/margins": 0.010388913564383984, + "rewards/rejected": 1.565962553024292, + "step": 10000 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.5, + "learning_rate": 6.698838895177556e-07, + "logps/chosen": -172.28408813476562, + "logps/margins": -16.814733505249023, + "logps/rejected": -155.4693603515625, + "loss": 0.6996, + "rewards/chosen": 1.7195981740951538, + "rewards/margins": 0.41504478454589844, + "rewards/rejected": 1.3045533895492554, + "step": 10010 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.5, + "learning_rate": 6.63353647605805e-07, + "logps/chosen": -205.3917694091797, + "logps/margins": -42.575809478759766, + "logps/rejected": -162.81594848632812, + "loss": 0.7448, + "rewards/chosen": 1.6821248531341553, + "rewards/margins": 0.31331807374954224, + "rewards/rejected": 1.3688069581985474, + "step": 10020 + }, + { + "accuracy": 0.5625, + "epoch": 2.51, + "learning_rate": 6.568531290777686e-07, + "logps/chosen": -136.85354614257812, + "logps/margins": -3.8076682090759277, + "logps/rejected": -133.04588317871094, + "loss": 0.6853, + "rewards/chosen": 1.5016696453094482, + "rewards/margins": 0.28090569376945496, + "rewards/rejected": 1.2207638025283813, + "step": 10030 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.51, + "learning_rate": 6.503823784882157e-07, + "logps/chosen": -158.67800903320312, + "logps/margins": -3.447350263595581, + "logps/rejected": -155.23062133789062, + "loss": 0.7045, + "rewards/chosen": 1.5721737146377563, + "rewards/margins": 0.24155478179454803, + "rewards/rejected": 1.3306188583374023, + "step": 10040 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.51, + "learning_rate": 6.43941440187687e-07, + "logps/chosen": -148.8336944580078, + "logps/margins": -6.549199104309082, + "logps/rejected": -142.28445434570312, + "loss": 0.6918, + "rewards/chosen": 1.765259027481079, + "rewards/margins": 0.4852274954319, + "rewards/rejected": 1.280031442642212, + "step": 10050 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.52, + "learning_rate": 6.375303583223852e-07, + "logps/chosen": -139.75965881347656, + "logps/margins": 42.60646057128906, + "logps/rejected": -182.36611938476562, + "loss": 0.7973, + "rewards/chosen": 1.8161888122558594, + "rewards/margins": 0.49565666913986206, + "rewards/rejected": 1.3205320835113525, + "step": 10060 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.52, + "learning_rate": 6.311491768338812e-07, + "logps/chosen": -166.62728881835938, + "logps/margins": -18.201078414916992, + "logps/rejected": -148.42620849609375, + "loss": 0.7254, + "rewards/chosen": 1.515053629875183, + "rewards/margins": 0.22957149147987366, + "rewards/rejected": 1.2854821681976318, + "step": 10070 + }, + { + "accuracy": 0.5625, + "epoch": 2.52, + "learning_rate": 6.247979394588078e-07, + "logps/chosen": -154.6636199951172, + "logps/margins": 21.685977935791016, + "logps/rejected": -176.349609375, + "loss": 0.7358, + "rewards/chosen": 1.659314513206482, + "rewards/margins": 0.2850584089756012, + "rewards/rejected": 1.3742562532424927, + "step": 10080 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.52, + "learning_rate": 6.184766897285615e-07, + "logps/chosen": -158.09425354003906, + "logps/margins": -16.5406551361084, + "logps/rejected": -141.5535888671875, + "loss": 0.7264, + "rewards/chosen": 1.4330400228500366, + "rewards/margins": 0.2220078408718109, + "rewards/rejected": 1.2110321521759033, + "step": 10090 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.52, + "learning_rate": 6.121854709689995e-07, + "logps/chosen": -158.7074737548828, + "logps/margins": 2.478447675704956, + "logps/rejected": -161.18592834472656, + "loss": 0.682, + "rewards/chosen": 1.5075863599777222, + "rewards/margins": 0.4088488519191742, + "rewards/rejected": 1.0987374782562256, + "step": 10100 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.53, + "learning_rate": 6.059243263001502e-07, + "logps/chosen": -160.43089294433594, + "logps/margins": -11.308477401733398, + "logps/rejected": -149.12242126464844, + "loss": 0.7339, + "rewards/chosen": 1.4142742156982422, + "rewards/margins": 0.3587925136089325, + "rewards/rejected": 1.0554816722869873, + "step": 10110 + }, + { + "accuracy": 0.5, + "epoch": 2.53, + "learning_rate": 5.996932986359144e-07, + "logps/chosen": -163.14320373535156, + "logps/margins": -28.927875518798828, + "logps/rejected": -134.21531677246094, + "loss": 0.7272, + "rewards/chosen": 1.5396592617034912, + "rewards/margins": 0.16019289195537567, + "rewards/rejected": 1.3794664144515991, + "step": 10120 + }, + { + "accuracy": 0.75, + "epoch": 2.53, + "learning_rate": 5.934924306837698e-07, + "logps/chosen": -160.2681884765625, + "logps/margins": -5.355586528778076, + "logps/rejected": -154.91259765625, + "loss": 0.639, + "rewards/chosen": 1.8887990713119507, + "rewards/margins": 0.6285035014152527, + "rewards/rejected": 1.2602955102920532, + "step": 10130 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.54, + "learning_rate": 5.873217649444779e-07, + "logps/chosen": -169.8264923095703, + "logps/margins": 20.929908752441406, + "logps/rejected": -190.7563934326172, + "loss": 0.7345, + "rewards/chosen": 1.5167617797851562, + "rewards/margins": 0.09298063814640045, + "rewards/rejected": 1.423780918121338, + "step": 10140 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.54, + "learning_rate": 5.811813437117975e-07, + "logps/chosen": -162.54232788085938, + "logps/margins": 5.259152412414551, + "logps/rejected": -167.801513671875, + "loss": 0.7362, + "rewards/chosen": 1.428236722946167, + "rewards/margins": 0.18189296126365662, + "rewards/rejected": 1.2463438510894775, + "step": 10150 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.54, + "learning_rate": 5.750712090721894e-07, + "logps/chosen": -154.10671997070312, + "logps/margins": -20.24647331237793, + "logps/rejected": -133.86026000976562, + "loss": 0.6717, + "rewards/chosen": 1.5687921047210693, + "rewards/margins": 0.18637429177761078, + "rewards/rejected": 1.3824176788330078, + "step": 10160 + }, + { + "accuracy": 0.625, + "epoch": 2.54, + "learning_rate": 5.689914029045285e-07, + "logps/chosen": -165.53726196289062, + "logps/margins": -11.109697341918945, + "logps/rejected": -154.4275360107422, + "loss": 0.6791, + "rewards/chosen": 1.5965814590454102, + "rewards/margins": 0.3485226631164551, + "rewards/rejected": 1.2480590343475342, + "step": 10170 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.54, + "learning_rate": 5.629419668798214e-07, + "logps/chosen": -153.14306640625, + "logps/margins": -7.417462348937988, + "logps/rejected": -145.7255859375, + "loss": 0.6844, + "rewards/chosen": 1.5229694843292236, + "rewards/margins": 0.41280597448349, + "rewards/rejected": 1.1101634502410889, + "step": 10180 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.55, + "learning_rate": 5.569229424609157e-07, + "logps/chosen": -134.8220672607422, + "logps/margins": 16.329435348510742, + "logps/rejected": -151.15151977539062, + "loss": 0.6957, + "rewards/chosen": 1.4444681406021118, + "rewards/margins": 0.28991633653640747, + "rewards/rejected": 1.1545517444610596, + "step": 10190 + }, + { + "accuracy": 0.699999988079071, + "epoch": 2.55, + "learning_rate": 5.509343709022203e-07, + "logps/chosen": -129.17367553710938, + "logps/margins": 6.479616641998291, + "logps/rejected": -135.65328979492188, + "loss": 0.6648, + "rewards/chosen": 1.8376258611679077, + "rewards/margins": 0.4804604649543762, + "rewards/rejected": 1.3571654558181763, + "step": 10200 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.55, + "learning_rate": 5.449762932494152e-07, + "logps/chosen": -152.7789764404297, + "logps/margins": -0.8572551608085632, + "logps/rejected": -151.92172241210938, + "loss": 0.7191, + "rewards/chosen": 1.7641277313232422, + "rewards/margins": 0.3272823095321655, + "rewards/rejected": 1.4368455410003662, + "step": 10210 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.56, + "learning_rate": 5.390487503391795e-07, + "logps/chosen": -136.16244506835938, + "logps/margins": 3.2090206146240234, + "logps/rejected": -139.3714599609375, + "loss": 0.7508, + "rewards/chosen": 1.7097707986831665, + "rewards/margins": 0.39835232496261597, + "rewards/rejected": 1.3114182949066162, + "step": 10220 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.56, + "learning_rate": 5.331517827989057e-07, + "logps/chosen": -148.9392547607422, + "logps/margins": 4.415112495422363, + "logps/rejected": -153.35438537597656, + "loss": 0.669, + "rewards/chosen": 1.4533170461654663, + "rewards/margins": 0.3614344596862793, + "rewards/rejected": 1.091882586479187, + "step": 10230 + }, + { + "accuracy": 0.6875, + "epoch": 2.56, + "learning_rate": 5.272854310464231e-07, + "logps/chosen": -144.89292907714844, + "logps/margins": -17.377492904663086, + "logps/rejected": -127.51542663574219, + "loss": 0.6865, + "rewards/chosen": 1.6477216482162476, + "rewards/margins": 0.4931011199951172, + "rewards/rejected": 1.1546205282211304, + "step": 10240 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.56, + "learning_rate": 5.214497352897197e-07, + "logps/chosen": -179.67184448242188, + "logps/margins": -3.607759952545166, + "logps/rejected": -176.0640869140625, + "loss": 0.7243, + "rewards/chosen": 1.8750633001327515, + "rewards/margins": 0.384525865316391, + "rewards/rejected": 1.4905375242233276, + "step": 10250 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.56, + "learning_rate": 5.156447355266681e-07, + "logps/chosen": -146.90066528320312, + "logps/margins": -6.4042558670043945, + "logps/rejected": -140.49639892578125, + "loss": 0.6776, + "rewards/chosen": 1.4684303998947144, + "rewards/margins": 0.2899070084095001, + "rewards/rejected": 1.1785234212875366, + "step": 10260 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.57, + "learning_rate": 5.098704715447478e-07, + "logps/chosen": -136.5681915283203, + "logps/margins": 4.121154308319092, + "logps/rejected": -140.68934631347656, + "loss": 0.7277, + "rewards/chosen": 1.8801416158676147, + "rewards/margins": 0.2951890528202057, + "rewards/rejected": 1.584952473640442, + "step": 10270 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.57, + "learning_rate": 5.041269829207784e-07, + "logps/chosen": -164.70407104492188, + "logps/margins": -7.782859802246094, + "logps/rejected": -156.92120361328125, + "loss": 0.6548, + "rewards/chosen": 1.945514440536499, + "rewards/margins": 0.5381917953491211, + "rewards/rejected": 1.407322645187378, + "step": 10280 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.57, + "learning_rate": 4.984143090206445e-07, + "logps/chosen": -146.0883026123047, + "logps/margins": 18.044635772705078, + "logps/rejected": -164.13294982910156, + "loss": 0.7707, + "rewards/chosen": 1.5849359035491943, + "rewards/margins": 0.33173900842666626, + "rewards/rejected": 1.2531967163085938, + "step": 10290 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.58, + "learning_rate": 4.927324889990248e-07, + "logps/chosen": -140.98744201660156, + "logps/margins": 3.6417973041534424, + "logps/rejected": -144.62925720214844, + "loss": 0.6644, + "rewards/chosen": 1.6773532629013062, + "rewards/margins": 0.4975704252719879, + "rewards/rejected": 1.1797829866409302, + "step": 10300 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.58, + "learning_rate": 4.870815617991281e-07, + "logps/chosen": -147.18875122070312, + "logps/margins": -21.48464584350586, + "logps/rejected": -125.70408630371094, + "loss": 0.6925, + "rewards/chosen": 1.5730440616607666, + "rewards/margins": 0.3138018846511841, + "rewards/rejected": 1.259242296218872, + "step": 10310 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.58, + "learning_rate": 4.814615661524208e-07, + "logps/chosen": -154.61264038085938, + "logps/margins": 13.212823867797852, + "logps/rejected": -167.8254852294922, + "loss": 0.689, + "rewards/chosen": 1.380718469619751, + "rewards/margins": 0.5258862376213074, + "rewards/rejected": 0.8548324704170227, + "step": 10320 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.58, + "learning_rate": 4.7587254057836594e-07, + "logps/chosen": -175.52635192871094, + "logps/margins": -26.888683319091797, + "logps/rejected": -148.63766479492188, + "loss": 0.6863, + "rewards/chosen": 1.7164392471313477, + "rewards/margins": 0.1589020937681198, + "rewards/rejected": 1.5575369596481323, + "step": 10330 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.58, + "learning_rate": 4.7031452338415774e-07, + "logps/chosen": -136.0001678466797, + "logps/margins": 16.956220626831055, + "logps/rejected": -152.95639038085938, + "loss": 0.7447, + "rewards/chosen": 1.3328689336776733, + "rewards/margins": 0.27154800295829773, + "rewards/rejected": 1.0613210201263428, + "step": 10340 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.59, + "learning_rate": 4.647875526644585e-07, + "logps/chosen": -165.89590454101562, + "logps/margins": -16.506202697753906, + "logps/rejected": -149.38967895507812, + "loss": 0.7385, + "rewards/chosen": 1.541682243347168, + "rewards/margins": 0.4795795977115631, + "rewards/rejected": 1.0621026754379272, + "step": 10350 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.59, + "learning_rate": 4.5929166630113873e-07, + "logps/chosen": -163.45611572265625, + "logps/margins": -20.461566925048828, + "logps/rejected": -142.9945526123047, + "loss": 0.7294, + "rewards/chosen": 1.4781315326690674, + "rewards/margins": 0.08739657700061798, + "rewards/rejected": 1.390735149383545, + "step": 10360 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.59, + "learning_rate": 4.538269019630159e-07, + "logps/chosen": -154.6902618408203, + "logps/margins": -8.726030349731445, + "logps/rejected": -145.9642333984375, + "loss": 0.6716, + "rewards/chosen": 1.7533833980560303, + "rewards/margins": 0.4463822841644287, + "rewards/rejected": 1.3070011138916016, + "step": 10370 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.59, + "learning_rate": 4.48393297105596e-07, + "logps/chosen": -142.0727081298828, + "logps/margins": 1.5529677867889404, + "logps/rejected": -143.62567138671875, + "loss": 0.6951, + "rewards/chosen": 1.6950922012329102, + "rewards/margins": 0.1875830888748169, + "rewards/rejected": 1.5075089931488037, + "step": 10380 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 2.6, + "learning_rate": 4.429908889708195e-07, + "logps/chosen": -144.35433959960938, + "logps/margins": -0.6098182797431946, + "logps/rejected": -143.74453735351562, + "loss": 0.7541, + "rewards/chosen": 1.6737911701202393, + "rewards/margins": 0.13235017657279968, + "rewards/rejected": 1.5414409637451172, + "step": 10390 + }, + { + "accuracy": 0.6875, + "epoch": 2.6, + "learning_rate": 4.376197145868044e-07, + "logps/chosen": -143.8758544921875, + "logps/margins": 3.195131778717041, + "logps/rejected": -147.07098388671875, + "loss": 0.7209, + "rewards/chosen": 1.6185165643692017, + "rewards/margins": 0.7061958312988281, + "rewards/rejected": 0.9123207926750183, + "step": 10400 + }, + { + "accuracy": 0.625, + "epoch": 2.6, + "learning_rate": 4.322798107675924e-07, + "logps/chosen": -127.64068603515625, + "logps/margins": 39.77457809448242, + "logps/rejected": -167.41526794433594, + "loss": 0.6732, + "rewards/chosen": 1.3446811437606812, + "rewards/margins": 0.20344407856464386, + "rewards/rejected": 1.1412371397018433, + "step": 10410 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.6, + "learning_rate": 4.2697121411289623e-07, + "logps/chosen": -140.3050537109375, + "logps/margins": 10.931300163269043, + "logps/rejected": -151.23635864257812, + "loss": 0.6904, + "rewards/chosen": 1.5837719440460205, + "rewards/margins": 0.41889768838882446, + "rewards/rejected": 1.1648743152618408, + "step": 10420 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.61, + "learning_rate": 4.21693961007848e-07, + "logps/chosen": -144.75013732910156, + "logps/margins": -17.15608787536621, + "logps/rejected": -127.59403228759766, + "loss": 0.6794, + "rewards/chosen": 1.6476796865463257, + "rewards/margins": 0.49424368143081665, + "rewards/rejected": 1.1534361839294434, + "step": 10430 + }, + { + "accuracy": 0.6875, + "epoch": 2.61, + "learning_rate": 4.164480876227539e-07, + "logps/chosen": -153.25234985351562, + "logps/margins": 18.287174224853516, + "logps/rejected": -171.53952026367188, + "loss": 0.6938, + "rewards/chosen": 1.525827169418335, + "rewards/margins": 0.23063834011554718, + "rewards/rejected": 1.2951889038085938, + "step": 10440 + }, + { + "accuracy": 0.625, + "epoch": 2.61, + "learning_rate": 4.112336299128417e-07, + "logps/chosen": -148.50718688964844, + "logps/margins": 16.718585968017578, + "logps/rejected": -165.2257843017578, + "loss": 0.695, + "rewards/chosen": 1.6053005456924438, + "rewards/margins": 0.5407900810241699, + "rewards/rejected": 1.0645105838775635, + "step": 10450 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.62, + "learning_rate": 4.060506236180156e-07, + "logps/chosen": -145.3214569091797, + "logps/margins": 8.820276260375977, + "logps/rejected": -154.14173889160156, + "loss": 0.6887, + "rewards/chosen": 1.6494741439819336, + "rewards/margins": 0.2769840955734253, + "rewards/rejected": 1.3724901676177979, + "step": 10460 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.62, + "learning_rate": 4.008991042626131e-07, + "logps/chosen": -141.070556640625, + "logps/margins": 7.879616737365723, + "logps/rejected": -148.95016479492188, + "loss": 0.792, + "rewards/chosen": 1.5413919687271118, + "rewards/margins": 0.33226412534713745, + "rewards/rejected": 1.2091281414031982, + "step": 10470 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.62, + "learning_rate": 3.957791071551609e-07, + "logps/chosen": -171.77371215820312, + "logps/margins": -24.239776611328125, + "logps/rejected": -147.533935546875, + "loss": 0.6591, + "rewards/chosen": 1.7820875644683838, + "rewards/margins": 0.3989606201648712, + "rewards/rejected": 1.3831268548965454, + "step": 10480 + }, + { + "accuracy": 0.5625, + "epoch": 2.62, + "learning_rate": 3.9069066738812775e-07, + "logps/chosen": -146.9730224609375, + "logps/margins": -7.8162126541137695, + "logps/rejected": -139.1568145751953, + "loss": 0.7259, + "rewards/chosen": 1.5560572147369385, + "rewards/margins": 0.29367756843566895, + "rewards/rejected": 1.26237952709198, + "step": 10490 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.62, + "learning_rate": 3.8563381983769354e-07, + "logps/chosen": -145.4248046875, + "logps/margins": -8.54357624053955, + "logps/rejected": -136.88124084472656, + "loss": 0.7085, + "rewards/chosen": 1.7293905019760132, + "rewards/margins": 0.2882283627986908, + "rewards/rejected": 1.4411622285842896, + "step": 10500 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.63, + "learning_rate": 3.8060859916350226e-07, + "logps/chosen": -201.6217498779297, + "logps/margins": -28.402873992919922, + "logps/rejected": -173.21888732910156, + "loss": 0.7056, + "rewards/chosen": 2.021745443344116, + "rewards/margins": 0.443168580532074, + "rewards/rejected": 1.5785770416259766, + "step": 10510 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.63, + "learning_rate": 3.756150398084274e-07, + "logps/chosen": -143.6564178466797, + "logps/margins": -12.014060974121094, + "logps/rejected": -131.64236450195312, + "loss": 0.6434, + "rewards/chosen": 1.578778624534607, + "rewards/margins": 0.2785910367965698, + "rewards/rejected": 1.300187587738037, + "step": 10520 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.63, + "learning_rate": 3.7065317599833725e-07, + "logps/chosen": -130.0078582763672, + "logps/margins": 22.231075286865234, + "logps/rejected": -152.23895263671875, + "loss": 0.7608, + "rewards/chosen": 1.4009171724319458, + "rewards/margins": 0.34369316697120667, + "rewards/rejected": 1.0572240352630615, + "step": 10530 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.63, + "learning_rate": 3.657230417418561e-07, + "logps/chosen": -182.8376007080078, + "logps/margins": -18.702651977539062, + "logps/rejected": -164.1349334716797, + "loss": 0.7568, + "rewards/chosen": 1.6314647197723389, + "rewards/margins": 0.379741370677948, + "rewards/rejected": 1.2517234086990356, + "step": 10540 + }, + { + "accuracy": 0.625, + "epoch": 2.64, + "learning_rate": 3.6082467083013604e-07, + "logps/chosen": -166.7527618408203, + "logps/margins": 8.057609558105469, + "logps/rejected": -174.81036376953125, + "loss": 0.6697, + "rewards/chosen": 1.456066370010376, + "rewards/margins": 0.3404538333415985, + "rewards/rejected": 1.115612506866455, + "step": 10550 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.64, + "learning_rate": 3.5595809683662217e-07, + "logps/chosen": -148.78012084960938, + "logps/margins": 4.150847911834717, + "logps/rejected": -152.9309844970703, + "loss": 0.6985, + "rewards/chosen": 1.7091699838638306, + "rewards/margins": 0.5028413534164429, + "rewards/rejected": 1.2063285112380981, + "step": 10560 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.64, + "learning_rate": 3.5112335311682397e-07, + "logps/chosen": -170.67459106445312, + "logps/margins": -11.61083984375, + "logps/rejected": -159.06375122070312, + "loss": 0.6588, + "rewards/chosen": 1.5618044137954712, + "rewards/margins": 0.344957172870636, + "rewards/rejected": 1.2168471813201904, + "step": 10570 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.65, + "learning_rate": 3.4632047280808654e-07, + "logps/chosen": -157.56039428710938, + "logps/margins": -9.039320945739746, + "logps/rejected": -148.52105712890625, + "loss": 0.6863, + "rewards/chosen": 1.581024408340454, + "rewards/margins": 0.41993817687034607, + "rewards/rejected": 1.1610862016677856, + "step": 10580 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.65, + "learning_rate": 3.415494888293602e-07, + "logps/chosen": -161.7837371826172, + "logps/margins": 4.870204448699951, + "logps/rejected": -166.65394592285156, + "loss": 0.7105, + "rewards/chosen": 1.4997773170471191, + "rewards/margins": 0.5541407465934753, + "rewards/rejected": 0.9456365704536438, + "step": 10590 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.65, + "learning_rate": 3.368104338809819e-07, + "logps/chosen": -157.84609985351562, + "logps/margins": 13.422027587890625, + "logps/rejected": -171.26812744140625, + "loss": 0.6592, + "rewards/chosen": 1.7351264953613281, + "rewards/margins": 0.36960747838020325, + "rewards/rejected": 1.3655191659927368, + "step": 10600 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.65, + "learning_rate": 3.32103340444444e-07, + "logps/chosen": -134.43722534179688, + "logps/margins": 10.036497116088867, + "logps/rejected": -144.47372436523438, + "loss": 0.6917, + "rewards/chosen": 1.4535917043685913, + "rewards/margins": 0.16945095360279083, + "rewards/rejected": 1.2841408252716064, + "step": 10610 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.66, + "learning_rate": 3.2742824078217604e-07, + "logps/chosen": -165.32785034179688, + "logps/margins": 18.22348403930664, + "logps/rejected": -183.55133056640625, + "loss": 0.7234, + "rewards/chosen": 1.4988008737564087, + "rewards/margins": 0.4024287164211273, + "rewards/rejected": 1.096372127532959, + "step": 10620 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.66, + "learning_rate": 3.2278516693732166e-07, + "logps/chosen": -173.69908142089844, + "logps/margins": 12.5607271194458, + "logps/rejected": -186.2598114013672, + "loss": 0.7249, + "rewards/chosen": 1.9409434795379639, + "rewards/margins": 0.47186437249183655, + "rewards/rejected": 1.4690793752670288, + "step": 10630 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.66, + "learning_rate": 3.181741507335201e-07, + "logps/chosen": -132.84254455566406, + "logps/margins": 18.383285522460938, + "logps/rejected": -151.225830078125, + "loss": 0.6932, + "rewards/chosen": 1.5318937301635742, + "rewards/margins": 0.23338551819324493, + "rewards/rejected": 1.2985084056854248, + "step": 10640 + }, + { + "accuracy": 0.625, + "epoch": 2.66, + "learning_rate": 3.135952237746853e-07, + "logps/chosen": -165.6799774169922, + "logps/margins": -4.473043441772461, + "logps/rejected": -161.2069549560547, + "loss": 0.7395, + "rewards/chosen": 1.7145675420761108, + "rewards/margins": 0.4156590402126312, + "rewards/rejected": 1.2989085912704468, + "step": 10650 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.67, + "learning_rate": 3.0904841744479384e-07, + "logps/chosen": -141.64495849609375, + "logps/margins": -0.8441829681396484, + "logps/rejected": -140.80076599121094, + "loss": 0.782, + "rewards/chosen": 1.492417335510254, + "rewards/margins": 0.019361266866326332, + "rewards/rejected": 1.4730560779571533, + "step": 10660 + }, + { + "accuracy": 0.5625, + "epoch": 2.67, + "learning_rate": 3.0453376290766667e-07, + "logps/chosen": -168.78512573242188, + "logps/margins": -0.7318155169487, + "logps/rejected": -168.0532989501953, + "loss": 0.7216, + "rewards/chosen": 1.6531559228897095, + "rewards/margins": 0.21298010647296906, + "rewards/rejected": 1.4401757717132568, + "step": 10670 + }, + { + "accuracy": 0.6875, + "epoch": 2.67, + "learning_rate": 3.00051291106756e-07, + "logps/chosen": -144.38949584960938, + "logps/margins": 27.488525390625, + "logps/rejected": -171.87803649902344, + "loss": 0.6269, + "rewards/chosen": 1.6364219188690186, + "rewards/margins": 0.4856742024421692, + "rewards/rejected": 1.1507476568222046, + "step": 10680 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.67, + "learning_rate": 2.9560103276493503e-07, + "logps/chosen": -143.61288452148438, + "logps/margins": -10.060467720031738, + "logps/rejected": -133.55239868164062, + "loss": 0.7448, + "rewards/chosen": 1.3371295928955078, + "rewards/margins": 0.14515912532806396, + "rewards/rejected": 1.1919705867767334, + "step": 10690 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.67, + "learning_rate": 2.911830183842818e-07, + "logps/chosen": -166.45095825195312, + "logps/margins": -6.1256513595581055, + "logps/rejected": -160.32528686523438, + "loss": 0.7858, + "rewards/chosen": 1.6316633224487305, + "rewards/margins": 0.43664416670799255, + "rewards/rejected": 1.1950193643569946, + "step": 10700 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.68, + "learning_rate": 2.8679727824587887e-07, + "logps/chosen": -140.07427978515625, + "logps/margins": 1.3530391454696655, + "logps/rejected": -141.4273223876953, + "loss": 0.6689, + "rewards/chosen": 1.6437946557998657, + "rewards/margins": 0.503858745098114, + "rewards/rejected": 1.139935851097107, + "step": 10710 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.68, + "learning_rate": 2.8244384240959876e-07, + "logps/chosen": -156.7366180419922, + "logps/margins": 14.175226211547852, + "logps/rejected": -170.91183471679688, + "loss": 0.7254, + "rewards/chosen": 1.4197204113006592, + "rewards/margins": 0.299009770154953, + "rewards/rejected": 1.1207106113433838, + "step": 10720 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.68, + "learning_rate": 2.781227407139003e-07, + "logps/chosen": -162.68284606933594, + "logps/margins": -7.633828639984131, + "logps/rejected": -155.04901123046875, + "loss": 0.7308, + "rewards/chosen": 1.7630033493041992, + "rewards/margins": 0.19181713461875916, + "rewards/rejected": 1.5711861848831177, + "step": 10730 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.69, + "learning_rate": 2.7383400277562423e-07, + "logps/chosen": -197.96755981445312, + "logps/margins": -37.84418487548828, + "logps/rejected": -160.1233673095703, + "loss": 0.719, + "rewards/chosen": 1.8301318883895874, + "rewards/margins": 0.515482485294342, + "rewards/rejected": 1.3146494626998901, + "step": 10740 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.69, + "learning_rate": 2.695776579897913e-07, + "logps/chosen": -154.4912109375, + "logps/margins": -10.452499389648438, + "logps/rejected": -144.03872680664062, + "loss": 0.7121, + "rewards/chosen": 1.4876806735992432, + "rewards/margins": 0.27876460552215576, + "rewards/rejected": 1.2089159488677979, + "step": 10750 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.69, + "learning_rate": 2.6535373552939615e-07, + "logps/chosen": -164.10330200195312, + "logps/margins": 7.039018154144287, + "logps/rejected": -171.14230346679688, + "loss": 0.7179, + "rewards/chosen": 2.165771722793579, + "rewards/margins": 0.6352127194404602, + "rewards/rejected": 1.5305588245391846, + "step": 10760 + }, + { + "accuracy": 0.625, + "epoch": 2.69, + "learning_rate": 2.6116226434521383e-07, + "logps/chosen": -177.56527709960938, + "logps/margins": -5.483725070953369, + "logps/rejected": -172.08154296875, + "loss": 0.6706, + "rewards/chosen": 1.937949538230896, + "rewards/margins": 0.5156494975090027, + "rewards/rejected": 1.4222999811172485, + "step": 10770 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 2.69, + "learning_rate": 2.57003273165598e-07, + "logps/chosen": -176.0704803466797, + "logps/margins": -7.556451320648193, + "logps/rejected": -168.5140380859375, + "loss": 0.7425, + "rewards/chosen": 1.900979995727539, + "rewards/margins": 0.21253938972949982, + "rewards/rejected": 1.6884405612945557, + "step": 10780 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 2.7, + "learning_rate": 2.5287679049628367e-07, + "logps/chosen": -146.0129852294922, + "logps/margins": 7.128104209899902, + "logps/rejected": -153.14109802246094, + "loss": 0.7379, + "rewards/chosen": 1.5207948684692383, + "rewards/margins": 0.18472692370414734, + "rewards/rejected": 1.3360679149627686, + "step": 10790 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.7, + "learning_rate": 2.4878284462019067e-07, + "logps/chosen": -152.4406280517578, + "logps/margins": -10.900254249572754, + "logps/rejected": -141.5403594970703, + "loss": 0.6926, + "rewards/chosen": 1.7750060558319092, + "rewards/margins": 0.32715070247650146, + "rewards/rejected": 1.4478554725646973, + "step": 10800 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.7, + "learning_rate": 2.4472146359723504e-07, + "logps/chosen": -168.95565795898438, + "logps/margins": 8.44402027130127, + "logps/rejected": -177.3997039794922, + "loss": 0.7073, + "rewards/chosen": 1.714660406112671, + "rewards/margins": 0.40569210052490234, + "rewards/rejected": 1.3089683055877686, + "step": 10810 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.71, + "learning_rate": 2.4069267526413085e-07, + "logps/chosen": -164.18008422851562, + "logps/margins": 5.244939804077148, + "logps/rejected": -169.42501831054688, + "loss": 0.7182, + "rewards/chosen": 1.536342740058899, + "rewards/margins": 0.3071072995662689, + "rewards/rejected": 1.2292354106903076, + "step": 10820 + }, + { + "accuracy": 0.6875, + "epoch": 2.71, + "learning_rate": 2.3669650723420202e-07, + "logps/chosen": -167.36923217773438, + "logps/margins": -5.502712726593018, + "logps/rejected": -161.8665313720703, + "loss": 0.6907, + "rewards/chosen": 1.5276893377304077, + "rewards/margins": 0.3861038088798523, + "rewards/rejected": 1.1415855884552002, + "step": 10830 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.71, + "learning_rate": 2.3273298689719392e-07, + "logps/chosen": -157.96096801757812, + "logps/margins": -16.843746185302734, + "logps/rejected": -141.11721801757812, + "loss": 0.6477, + "rewards/chosen": 1.6701799631118774, + "rewards/margins": 0.4108037054538727, + "rewards/rejected": 1.2593762874603271, + "step": 10840 + }, + { + "accuracy": 0.625, + "epoch": 2.71, + "learning_rate": 2.2880214141908387e-07, + "logps/chosen": -138.9268798828125, + "logps/margins": -11.28620433807373, + "logps/rejected": -127.64070129394531, + "loss": 0.7146, + "rewards/chosen": 1.7406654357910156, + "rewards/margins": 0.5745649337768555, + "rewards/rejected": 1.1661005020141602, + "step": 10850 + }, + { + "accuracy": 0.625, + "epoch": 2.71, + "learning_rate": 2.2490399774189652e-07, + "logps/chosen": -158.0842742919922, + "logps/margins": 13.697056770324707, + "logps/rejected": -171.78134155273438, + "loss": 0.732, + "rewards/chosen": 1.8421891927719116, + "rewards/margins": 0.42154446244239807, + "rewards/rejected": 1.420644760131836, + "step": 10860 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.72, + "learning_rate": 2.210385825835154e-07, + "logps/chosen": -165.48143005371094, + "logps/margins": 12.895746231079102, + "logps/rejected": -178.37716674804688, + "loss": 0.6685, + "rewards/chosen": 1.8146826028823853, + "rewards/margins": 0.3660617768764496, + "rewards/rejected": 1.4486209154129028, + "step": 10870 + }, + { + "accuracy": 0.7124999761581421, + "epoch": 2.72, + "learning_rate": 2.1720592243750615e-07, + "logps/chosen": -164.51657104492188, + "logps/margins": -7.758332252502441, + "logps/rejected": -156.7582550048828, + "loss": 0.7068, + "rewards/chosen": 2.071967124938965, + "rewards/margins": 0.8661059141159058, + "rewards/rejected": 1.2058608531951904, + "step": 10880 + }, + { + "accuracy": 0.7124999761581421, + "epoch": 2.72, + "learning_rate": 2.134060435729296e-07, + "logps/chosen": -173.61770629882812, + "logps/margins": -22.356082916259766, + "logps/rejected": -151.2616424560547, + "loss": 0.6662, + "rewards/chosen": 1.9124956130981445, + "rewards/margins": 0.5881538987159729, + "rewards/rejected": 1.3243415355682373, + "step": 10890 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.73, + "learning_rate": 2.0963897203416517e-07, + "logps/chosen": -182.8125762939453, + "logps/margins": -26.7191162109375, + "logps/rejected": -156.0934600830078, + "loss": 0.6464, + "rewards/chosen": 1.8550952672958374, + "rewards/margins": 0.5680166482925415, + "rewards/rejected": 1.287078619003296, + "step": 10900 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.73, + "learning_rate": 2.0590473364072772e-07, + "logps/chosen": -138.01852416992188, + "logps/margins": 24.44509506225586, + "logps/rejected": -162.46360778808594, + "loss": 0.6972, + "rewards/chosen": 1.578711748123169, + "rewards/margins": 0.45565444231033325, + "rewards/rejected": 1.1230573654174805, + "step": 10910 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.73, + "learning_rate": 2.022033539870971e-07, + "logps/chosen": -135.84890747070312, + "logps/margins": 38.05684280395508, + "logps/rejected": -173.90576171875, + "loss": 0.7295, + "rewards/chosen": 1.5879027843475342, + "rewards/margins": 0.3355152904987335, + "rewards/rejected": 1.252387523651123, + "step": 10920 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.73, + "learning_rate": 1.9853485844253727e-07, + "logps/chosen": -156.9248504638672, + "logps/margins": -0.1229104995727539, + "logps/rejected": -156.8019256591797, + "loss": 0.712, + "rewards/chosen": 1.825823187828064, + "rewards/margins": 0.16458019614219666, + "rewards/rejected": 1.6612430810928345, + "step": 10930 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.73, + "learning_rate": 1.9489927215092574e-07, + "logps/chosen": -155.16860961914062, + "logps/margins": -9.452402114868164, + "logps/rejected": -145.71620178222656, + "loss": 0.7125, + "rewards/chosen": 1.4233394861221313, + "rewards/margins": 0.3584577143192291, + "rewards/rejected": 1.064881682395935, + "step": 10940 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.74, + "learning_rate": 1.9129662003057825e-07, + "logps/chosen": -185.6385955810547, + "logps/margins": -45.60307693481445, + "logps/rejected": -140.03553771972656, + "loss": 0.7235, + "rewards/chosen": 1.8071057796478271, + "rewards/margins": 0.3690805435180664, + "rewards/rejected": 1.4380252361297607, + "step": 10950 + }, + { + "accuracy": 0.6875, + "epoch": 2.74, + "learning_rate": 1.8772692677408112e-07, + "logps/chosen": -149.22068786621094, + "logps/margins": -7.471609592437744, + "logps/rejected": -141.74908447265625, + "loss": 0.6516, + "rewards/chosen": 1.6673290729522705, + "rewards/margins": 0.42395859956741333, + "rewards/rejected": 1.243370771408081, + "step": 10960 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.74, + "learning_rate": 1.8419021684812022e-07, + "logps/chosen": -158.24844360351562, + "logps/margins": 11.330198287963867, + "logps/rejected": -169.57861328125, + "loss": 0.747, + "rewards/chosen": 1.7867847681045532, + "rewards/margins": 0.16895949840545654, + "rewards/rejected": 1.6178252696990967, + "step": 10970 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.75, + "learning_rate": 1.8068651449331233e-07, + "logps/chosen": -142.6320037841797, + "logps/margins": 18.9156494140625, + "logps/rejected": -161.54766845703125, + "loss": 0.7139, + "rewards/chosen": 1.4414952993392944, + "rewards/margins": 0.16476209461688995, + "rewards/rejected": 1.276733160018921, + "step": 10980 + }, + { + "accuracy": 0.5625, + "epoch": 2.75, + "learning_rate": 1.7721584372404234e-07, + "logps/chosen": -143.33416748046875, + "logps/margins": 2.0921378135681152, + "logps/rejected": -145.4263153076172, + "loss": 0.7248, + "rewards/chosen": 1.5147507190704346, + "rewards/margins": 0.3626391887664795, + "rewards/rejected": 1.1521114110946655, + "step": 10990 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.75, + "learning_rate": 1.7377822832829516e-07, + "logps/chosen": -139.17840576171875, + "logps/margins": 6.1546101570129395, + "logps/rejected": -145.33302307128906, + "loss": 0.729, + "rewards/chosen": 1.3594213724136353, + "rewards/margins": 0.37812286615371704, + "rewards/rejected": 0.9812984466552734, + "step": 11000 + }, + { + "accuracy": 0.625, + "epoch": 2.75, + "learning_rate": 1.703736918674953e-07, + "logps/chosen": -152.59622192382812, + "logps/margins": -3.816408634185791, + "logps/rejected": -148.77981567382812, + "loss": 0.7427, + "rewards/chosen": 1.524777889251709, + "rewards/margins": 0.2605270743370056, + "rewards/rejected": 1.2642507553100586, + "step": 11010 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.75, + "learning_rate": 1.6700225767634192e-07, + "logps/chosen": -128.08567810058594, + "logps/margins": 3.3214144706726074, + "logps/rejected": -131.4071044921875, + "loss": 0.7608, + "rewards/chosen": 1.5971324443817139, + "rewards/margins": 0.35593709349632263, + "rewards/rejected": 1.2411954402923584, + "step": 11020 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.76, + "learning_rate": 1.6366394886265348e-07, + "logps/chosen": -139.92404174804688, + "logps/margins": -4.101496696472168, + "logps/rejected": -135.82254028320312, + "loss": 0.7421, + "rewards/chosen": 1.2594581842422485, + "rewards/margins": 0.17724508047103882, + "rewards/rejected": 1.082213044166565, + "step": 11030 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.76, + "learning_rate": 1.6035878830720608e-07, + "logps/chosen": -153.57931518554688, + "logps/margins": 0.20193548500537872, + "logps/rejected": -153.78128051757812, + "loss": 0.7349, + "rewards/chosen": 1.7891979217529297, + "rewards/margins": 0.5086416006088257, + "rewards/rejected": 1.280556321144104, + "step": 11040 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.76, + "learning_rate": 1.5708679866357712e-07, + "logps/chosen": -184.7306365966797, + "logps/margins": -36.678001403808594, + "logps/rejected": -148.05264282226562, + "loss": 0.6865, + "rewards/chosen": 1.7966684103012085, + "rewards/margins": 0.5260520577430725, + "rewards/rejected": 1.2706161737442017, + "step": 11050 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 2.77, + "learning_rate": 1.538480023579919e-07, + "logps/chosen": -175.147216796875, + "logps/margins": -30.69257164001465, + "logps/rejected": -144.4546356201172, + "loss": 0.7779, + "rewards/chosen": 1.5998752117156982, + "rewards/margins": 0.26564091444015503, + "rewards/rejected": 1.3342342376708984, + "step": 11060 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.77, + "learning_rate": 1.5064242158916719e-07, + "logps/chosen": -167.35305786132812, + "logps/margins": -20.84743881225586, + "logps/rejected": -146.50559997558594, + "loss": 0.7425, + "rewards/chosen": 1.4591774940490723, + "rewards/margins": 0.46168002486228943, + "rewards/rejected": 0.9974973797798157, + "step": 11070 + }, + { + "accuracy": 0.625, + "epoch": 2.77, + "learning_rate": 1.474700783281613e-07, + "logps/chosen": -151.85214233398438, + "logps/margins": 12.843159675598145, + "logps/rejected": -164.69532775878906, + "loss": 0.5985, + "rewards/chosen": 1.470223307609558, + "rewards/margins": 0.5629884600639343, + "rewards/rejected": 0.907234787940979, + "step": 11080 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.77, + "learning_rate": 1.4433099431822083e-07, + "logps/chosen": -137.92367553710938, + "logps/margins": -1.7516624927520752, + "logps/rejected": -136.1719970703125, + "loss": 0.664, + "rewards/chosen": 1.48823082447052, + "rewards/margins": 0.43382516503334045, + "rewards/rejected": 1.0544055700302124, + "step": 11090 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.77, + "learning_rate": 1.4122519107463594e-07, + "logps/chosen": -154.58505249023438, + "logps/margins": -18.09127426147461, + "logps/rejected": -136.4937744140625, + "loss": 0.7393, + "rewards/chosen": 1.5577672719955444, + "rewards/margins": 0.3653559684753418, + "rewards/rejected": 1.192411184310913, + "step": 11100 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.78, + "learning_rate": 1.3815268988458863e-07, + "logps/chosen": -155.58987426757812, + "logps/margins": -6.7224225997924805, + "logps/rejected": -148.86746215820312, + "loss": 0.71, + "rewards/chosen": 1.6583473682403564, + "rewards/margins": 0.523197591304779, + "rewards/rejected": 1.1351497173309326, + "step": 11110 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.78, + "learning_rate": 1.3511351180700905e-07, + "logps/chosen": -142.68112182617188, + "logps/margins": 4.823821067810059, + "logps/rejected": -147.5049285888672, + "loss": 0.6787, + "rewards/chosen": 1.5983814001083374, + "rewards/margins": 0.2833515703678131, + "rewards/rejected": 1.3150298595428467, + "step": 11120 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.78, + "learning_rate": 1.321076776724306e-07, + "logps/chosen": -147.7661895751953, + "logps/margins": 2.999340057373047, + "logps/rejected": -150.76551818847656, + "loss": 0.6653, + "rewards/chosen": 1.389024019241333, + "rewards/margins": 0.29787057638168335, + "rewards/rejected": 1.0911533832550049, + "step": 11130 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.79, + "learning_rate": 1.291352080828473e-07, + "logps/chosen": -173.09169006347656, + "logps/margins": -17.616104125976562, + "logps/rejected": -155.47557067871094, + "loss": 0.735, + "rewards/chosen": 1.629541039466858, + "rewards/margins": 0.2287517786026001, + "rewards/rejected": 1.400789499282837, + "step": 11140 + }, + { + "accuracy": 0.5625, + "epoch": 2.79, + "learning_rate": 1.2619612341157217e-07, + "logps/chosen": -187.9266357421875, + "logps/margins": -58.33623504638672, + "logps/rejected": -129.59039306640625, + "loss": 0.7554, + "rewards/chosen": 1.7209575176239014, + "rewards/margins": 0.3441689610481262, + "rewards/rejected": 1.3767887353897095, + "step": 11150 + }, + { + "accuracy": 0.5, + "epoch": 2.79, + "learning_rate": 1.2329044380309852e-07, + "logps/chosen": -142.24566650390625, + "logps/margins": 10.193199157714844, + "logps/rejected": -152.43887329101562, + "loss": 0.674, + "rewards/chosen": 1.5469690561294556, + "rewards/margins": 0.23745055496692657, + "rewards/rejected": 1.309518575668335, + "step": 11160 + }, + { + "accuracy": 0.75, + "epoch": 2.79, + "learning_rate": 1.2041818917296057e-07, + "logps/chosen": -182.3783416748047, + "logps/margins": -11.2783203125, + "logps/rejected": -171.1000518798828, + "loss": 0.6506, + "rewards/chosen": 1.8015865087509155, + "rewards/margins": 0.7242036461830139, + "rewards/rejected": 1.077383041381836, + "step": 11170 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.79, + "learning_rate": 1.175793792075991e-07, + "logps/chosen": -144.62515258789062, + "logps/margins": -9.925000190734863, + "logps/rejected": -134.7001190185547, + "loss": 0.7338, + "rewards/chosen": 1.4733350276947021, + "rewards/margins": 0.11428195238113403, + "rewards/rejected": 1.3590528964996338, + "step": 11180 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.8, + "learning_rate": 1.1477403336422333e-07, + "logps/chosen": -161.16506958007812, + "logps/margins": 14.0189208984375, + "logps/rejected": -175.18397521972656, + "loss": 0.8173, + "rewards/chosen": 1.5556910037994385, + "rewards/margins": 0.2830396592617035, + "rewards/rejected": 1.2726513147354126, + "step": 11190 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.8, + "learning_rate": 1.1200217087067922e-07, + "logps/chosen": -164.09320068359375, + "logps/margins": -18.81949806213379, + "logps/rejected": -145.27369689941406, + "loss": 0.7179, + "rewards/chosen": 1.3783905506134033, + "rewards/margins": 0.14728017151355743, + "rewards/rejected": 1.2311104536056519, + "step": 11200 + }, + { + "accuracy": 0.5625, + "epoch": 2.8, + "learning_rate": 1.0926381072532022e-07, + "logps/chosen": -180.49819946289062, + "logps/margins": -36.66614532470703, + "logps/rejected": -143.83206176757812, + "loss": 0.7633, + "rewards/chosen": 1.599501371383667, + "rewards/margins": 0.386269748210907, + "rewards/rejected": 1.2132318019866943, + "step": 11210 + }, + { + "accuracy": 0.5, + "epoch": 2.81, + "learning_rate": 1.0655897169687179e-07, + "logps/chosen": -170.09561157226562, + "logps/margins": -35.01601028442383, + "logps/rejected": -135.07962036132812, + "loss": 0.7063, + "rewards/chosen": 1.3766586780548096, + "rewards/margins": 0.13393130898475647, + "rewards/rejected": 1.242727518081665, + "step": 11220 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.81, + "learning_rate": 1.0388767232430708e-07, + "logps/chosen": -150.19448852539062, + "logps/margins": 16.94122314453125, + "logps/rejected": -167.1356964111328, + "loss": 0.745, + "rewards/chosen": 1.4577341079711914, + "rewards/margins": 0.14234304428100586, + "rewards/rejected": 1.3153913021087646, + "step": 11230 + }, + { + "accuracy": 0.625, + "epoch": 2.81, + "learning_rate": 1.0151219443199523e-07, + "logps/chosen": -166.55221557617188, + "logps/margins": -16.685935974121094, + "logps/rejected": -149.86627197265625, + "loss": 0.6837, + "rewards/chosen": 1.5495282411575317, + "rewards/margins": 0.36587703227996826, + "rewards/rejected": 1.1836512088775635, + "step": 11240 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.81, + "learning_rate": 9.890467065706011e-08, + "logps/chosen": -154.1913604736328, + "logps/margins": -7.0385918617248535, + "logps/rejected": -147.15274047851562, + "loss": 0.6777, + "rewards/chosen": 1.542001724243164, + "rewards/margins": 0.44431352615356445, + "rewards/rejected": 1.0976883172988892, + "step": 11250 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.81, + "learning_rate": 9.633073900060519e-08, + "logps/chosen": -154.37692260742188, + "logps/margins": -11.257157325744629, + "logps/rejected": -143.11978149414062, + "loss": 0.6613, + "rewards/chosen": 1.5631589889526367, + "rewards/margins": 0.19104748964309692, + "rewards/rejected": 1.372111439704895, + "step": 11260 + }, + { + "accuracy": 0.48750001192092896, + "epoch": 2.82, + "learning_rate": 9.379041710436465e-08, + "logps/chosen": -157.76321411132812, + "logps/margins": -26.868179321289062, + "logps/rejected": -130.89503479003906, + "loss": 0.7502, + "rewards/chosen": 1.6427370309829712, + "rewards/margins": 0.31062236428260803, + "rewards/rejected": 1.332114815711975, + "step": 11270 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.82, + "learning_rate": 9.128372237970917e-08, + "logps/chosen": -168.6049346923828, + "logps/margins": -20.591876983642578, + "logps/rejected": -148.01304626464844, + "loss": 0.6873, + "rewards/chosen": 1.6151784658432007, + "rewards/margins": 0.2944037616252899, + "rewards/rejected": 1.320774793624878, + "step": 11280 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.82, + "learning_rate": 8.881067200753102e-08, + "logps/chosen": -155.73092651367188, + "logps/margins": 16.567554473876953, + "logps/rejected": -172.29849243164062, + "loss": 0.7136, + "rewards/chosen": 1.7404823303222656, + "rewards/margins": 0.3331090807914734, + "rewards/rejected": 1.4073731899261475, + "step": 11290 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.83, + "learning_rate": 8.637128293812246e-08, + "logps/chosen": -138.0655975341797, + "logps/margins": 16.163190841674805, + "logps/rejected": -154.22877502441406, + "loss": 0.7187, + "rewards/chosen": 1.786555528640747, + "rewards/margins": 0.40820708870887756, + "rewards/rejected": 1.378348469734192, + "step": 11300 + }, + { + "accuracy": 0.6875, + "epoch": 2.83, + "learning_rate": 8.396557189106203e-08, + "logps/chosen": -143.49044799804688, + "logps/margins": -19.632183074951172, + "logps/rejected": -123.85826110839844, + "loss": 0.6902, + "rewards/chosen": 1.7040958404541016, + "rewards/margins": 0.6383683085441589, + "rewards/rejected": 1.0657275915145874, + "step": 11310 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.83, + "learning_rate": 8.159355535509895e-08, + "logps/chosen": -153.81671142578125, + "logps/margins": 2.67878794670105, + "logps/rejected": -156.49549865722656, + "loss": 0.694, + "rewards/chosen": 1.8822914361953735, + "rewards/margins": 0.30913814902305603, + "rewards/rejected": 1.5731532573699951, + "step": 11320 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.83, + "learning_rate": 7.925524958803998e-08, + "logps/chosen": -138.56051635742188, + "logps/margins": 15.390783309936523, + "logps/rejected": -153.95132446289062, + "loss": 0.6687, + "rewards/chosen": 1.5033657550811768, + "rewards/margins": 0.15062126517295837, + "rewards/rejected": 1.3527443408966064, + "step": 11330 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.83, + "learning_rate": 7.69506706166373e-08, + "logps/chosen": -149.4626007080078, + "logps/margins": 3.585559129714966, + "logps/rejected": -153.04815673828125, + "loss": 0.6789, + "rewards/chosen": 1.7568851709365845, + "rewards/margins": 0.4971792697906494, + "rewards/rejected": 1.2597057819366455, + "step": 11340 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.84, + "learning_rate": 7.467983423648129e-08, + "logps/chosen": -166.32223510742188, + "logps/margins": -0.3509170413017273, + "logps/rejected": -165.9713134765625, + "loss": 0.7815, + "rewards/chosen": 1.7449209690093994, + "rewards/margins": 0.3645118474960327, + "rewards/rejected": 1.3804090023040771, + "step": 11350 + }, + { + "accuracy": 0.44999998807907104, + "epoch": 2.84, + "learning_rate": 7.244275601188955e-08, + "logps/chosen": -151.33596801757812, + "logps/margins": 6.247866630554199, + "logps/rejected": -157.58383178710938, + "loss": 0.7574, + "rewards/chosen": 1.3681541681289673, + "rewards/margins": -0.04510964825749397, + "rewards/rejected": 1.4132637977600098, + "step": 11360 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.84, + "learning_rate": 7.023945127580034e-08, + "logps/chosen": -164.22190856933594, + "logps/margins": 21.83420181274414, + "logps/rejected": -186.05612182617188, + "loss": 0.7739, + "rewards/chosen": 1.5980379581451416, + "rewards/margins": 0.25258588790893555, + "rewards/rejected": 1.345452070236206, + "step": 11370 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.84, + "learning_rate": 6.806993512967097e-08, + "logps/chosen": -161.92803955078125, + "logps/margins": -27.416162490844727, + "logps/rejected": -134.51187133789062, + "loss": 0.6719, + "rewards/chosen": 1.6502301692962646, + "rewards/margins": 0.4730203151702881, + "rewards/rejected": 1.1772098541259766, + "step": 11380 + }, + { + "accuracy": 0.5625, + "epoch": 2.85, + "learning_rate": 6.593422244336845e-08, + "logps/chosen": -178.5666961669922, + "logps/margins": -14.481927871704102, + "logps/rejected": -164.0847930908203, + "loss": 0.7558, + "rewards/chosen": 1.6670430898666382, + "rewards/margins": 0.10642099380493164, + "rewards/rejected": 1.5606218576431274, + "step": 11390 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 2.85, + "learning_rate": 6.383232785507287e-08, + "logps/chosen": -137.44729614257812, + "logps/margins": 0.025350570678710938, + "logps/rejected": -137.47265625, + "loss": 0.7185, + "rewards/chosen": 1.37907874584198, + "rewards/margins": 0.00710330018773675, + "rewards/rejected": 1.3719755411148071, + "step": 11400 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.85, + "learning_rate": 6.176426577117479e-08, + "logps/chosen": -142.7912139892578, + "logps/margins": -0.2945447862148285, + "logps/rejected": -142.49667358398438, + "loss": 0.7395, + "rewards/chosen": 1.586849331855774, + "rewards/margins": 0.10084810107946396, + "rewards/rejected": 1.4860012531280518, + "step": 11410 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.85, + "learning_rate": 5.973005036617575e-08, + "logps/chosen": -136.79830932617188, + "logps/margins": 19.08477783203125, + "logps/rejected": -155.8831024169922, + "loss": 0.7541, + "rewards/chosen": 1.58281672000885, + "rewards/margins": 0.25617140531539917, + "rewards/rejected": 1.3266451358795166, + "step": 11420 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.86, + "learning_rate": 5.772969558259345e-08, + "logps/chosen": -169.52952575683594, + "logps/margins": -34.450870513916016, + "logps/rejected": -135.07864379882812, + "loss": 0.6997, + "rewards/chosen": 1.8916950225830078, + "rewards/margins": 0.4664227068424225, + "rewards/rejected": 1.4252722263336182, + "step": 11430 + }, + { + "accuracy": 0.5625, + "epoch": 2.86, + "learning_rate": 5.576321513086402e-08, + "logps/chosen": -149.6195526123047, + "logps/margins": -26.042837142944336, + "logps/rejected": -123.57672119140625, + "loss": 0.7043, + "rewards/chosen": 1.3855243921279907, + "rewards/margins": 0.07952861487865448, + "rewards/rejected": 1.3059957027435303, + "step": 11440 + }, + { + "accuracy": 0.625, + "epoch": 2.86, + "learning_rate": 5.383062248924875e-08, + "logps/chosen": -158.70742797851562, + "logps/margins": 6.9450483322143555, + "logps/rejected": -165.65248107910156, + "loss": 0.7669, + "rewards/chosen": 1.796149492263794, + "rewards/margins": 0.22773614525794983, + "rewards/rejected": 1.5684131383895874, + "step": 11450 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.87, + "learning_rate": 5.193193090374193e-08, + "logps/chosen": -149.7436065673828, + "logps/margins": -6.487473487854004, + "logps/rejected": -143.25613403320312, + "loss": 0.6843, + "rewards/chosen": 1.5230000019073486, + "rewards/margins": 0.2523488402366638, + "rewards/rejected": 1.27065110206604, + "step": 11460 + }, + { + "accuracy": 0.625, + "epoch": 2.87, + "learning_rate": 5.0067153387980404e-08, + "logps/chosen": -152.24130249023438, + "logps/margins": 8.717564582824707, + "logps/rejected": -160.9588623046875, + "loss": 0.6831, + "rewards/chosen": 1.5380882024765015, + "rewards/margins": 0.3861217200756073, + "rewards/rejected": 1.1519664525985718, + "step": 11470 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.87, + "learning_rate": 4.823630272315305e-08, + "logps/chosen": -155.7375946044922, + "logps/margins": -0.012667846865952015, + "logps/rejected": -155.7249298095703, + "loss": 0.6334, + "rewards/chosen": 1.554757833480835, + "rewards/margins": 0.5306918025016785, + "rewards/rejected": 1.0240660905838013, + "step": 11480 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.87, + "learning_rate": 4.643939145791421e-08, + "logps/chosen": -166.73074340820312, + "logps/margins": -10.60809326171875, + "logps/rejected": -156.12265014648438, + "loss": 0.7377, + "rewards/chosen": 1.7289854288101196, + "rewards/margins": 0.29952967166900635, + "rewards/rejected": 1.4294557571411133, + "step": 11490 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.88, + "learning_rate": 4.4676431908298176e-08, + "logps/chosen": -179.9239959716797, + "logps/margins": -29.510412216186523, + "logps/rejected": -150.41355895996094, + "loss": 0.7529, + "rewards/chosen": 1.361128807067871, + "rewards/margins": 0.3105676770210266, + "rewards/rejected": 1.0505611896514893, + "step": 11500 + }, + { + "accuracy": 0.5625, + "epoch": 2.88, + "learning_rate": 4.2947436157633196e-08, + "logps/chosen": -139.3575897216797, + "logps/margins": -0.09679107367992401, + "logps/rejected": -139.26080322265625, + "loss": 0.6831, + "rewards/chosen": 1.406343698501587, + "rewards/margins": 0.2929428219795227, + "rewards/rejected": 1.1134008169174194, + "step": 11510 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.88, + "learning_rate": 4.125241605646091e-08, + "logps/chosen": -162.6851348876953, + "logps/margins": -10.55716323852539, + "logps/rejected": -152.12794494628906, + "loss": 0.7146, + "rewards/chosen": 1.6262986660003662, + "rewards/margins": 0.38315635919570923, + "rewards/rejected": 1.2431422472000122, + "step": 11520 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.88, + "learning_rate": 3.95913832224526e-08, + "logps/chosen": -161.24261474609375, + "logps/margins": -3.4222519397735596, + "logps/rejected": -157.8203582763672, + "loss": 0.7361, + "rewards/chosen": 1.5215985774993896, + "rewards/margins": 0.24218225479125977, + "rewards/rejected": 1.2794163227081299, + "step": 11530 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.88, + "learning_rate": 3.796434904033086e-08, + "logps/chosen": -147.24012756347656, + "logps/margins": 4.421202182769775, + "logps/rejected": -151.6613311767578, + "loss": 0.6659, + "rewards/chosen": 1.5415115356445312, + "rewards/margins": 0.42680519819259644, + "rewards/rejected": 1.11470627784729, + "step": 11540 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.89, + "learning_rate": 3.637132466179194e-08, + "logps/chosen": -158.4945526123047, + "logps/margins": -13.046290397644043, + "logps/rejected": -145.44825744628906, + "loss": 0.6442, + "rewards/chosen": 1.4034132957458496, + "rewards/margins": 0.34212860465049744, + "rewards/rejected": 1.0612847805023193, + "step": 11550 + }, + { + "accuracy": 0.5375000238418579, + "epoch": 2.89, + "learning_rate": 3.481232100542797e-08, + "logps/chosen": -162.7205047607422, + "logps/margins": 5.538206577301025, + "logps/rejected": -168.25872802734375, + "loss": 0.7483, + "rewards/chosen": 1.5656076669692993, + "rewards/margins": 0.29567545652389526, + "rewards/rejected": 1.2699320316314697, + "step": 11560 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.89, + "learning_rate": 3.328734875665429e-08, + "logps/chosen": -170.35565185546875, + "logps/margins": 3.424835205078125, + "logps/rejected": -173.78048706054688, + "loss": 0.6905, + "rewards/chosen": 1.5593115091323853, + "rewards/margins": 0.34766140580177307, + "rewards/rejected": 1.2116501331329346, + "step": 11570 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.9, + "learning_rate": 3.1796418367633366e-08, + "logps/chosen": -162.00790405273438, + "logps/margins": -0.07475139945745468, + "logps/rejected": -161.9331512451172, + "loss": 0.7382, + "rewards/chosen": 1.778586983680725, + "rewards/margins": 0.27168551087379456, + "rewards/rejected": 1.506901502609253, + "step": 11580 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.9, + "learning_rate": 3.033954005720652e-08, + "logps/chosen": -138.6369171142578, + "logps/margins": 2.0734264850616455, + "logps/rejected": -140.71035766601562, + "loss": 0.7376, + "rewards/chosen": 1.4892466068267822, + "rewards/margins": 0.15492983162403107, + "rewards/rejected": 1.334316611289978, + "step": 11590 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.9, + "learning_rate": 2.8916723810820113e-08, + "logps/chosen": -163.9577178955078, + "logps/margins": 3.078484296798706, + "logps/rejected": -167.0362091064453, + "loss": 0.6911, + "rewards/chosen": 1.7150964736938477, + "rewards/margins": 0.46825605630874634, + "rewards/rejected": 1.246840476989746, + "step": 11600 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.9, + "learning_rate": 2.7527979380460035e-08, + "logps/chosen": -196.16207885742188, + "logps/margins": -24.925968170166016, + "logps/rejected": -171.23611450195312, + "loss": 0.6953, + "rewards/chosen": 1.6171023845672607, + "rewards/margins": 0.24374112486839294, + "rewards/rejected": 1.3733612298965454, + "step": 11610 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.91, + "learning_rate": 2.6173316284583417e-08, + "logps/chosen": -174.43447875976562, + "logps/margins": 10.989045143127441, + "logps/rejected": -185.42352294921875, + "loss": 0.6619, + "rewards/chosen": 1.7970306873321533, + "rewards/margins": 0.4700183868408203, + "rewards/rejected": 1.3270121812820435, + "step": 11620 + }, + { + "accuracy": 0.5, + "epoch": 2.91, + "learning_rate": 2.4852743808055358e-08, + "logps/chosen": -141.4775390625, + "logps/margins": 31.09872817993164, + "logps/rejected": -172.57626342773438, + "loss": 0.7814, + "rewards/chosen": 1.389943242073059, + "rewards/margins": 0.10575101524591446, + "rewards/rejected": 1.2841922044754028, + "step": 11630 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.91, + "learning_rate": 2.3566271002081753e-08, + "logps/chosen": -174.27285766601562, + "logps/margins": -17.093669891357422, + "logps/rejected": -157.17916870117188, + "loss": 0.6835, + "rewards/chosen": 1.6831748485565186, + "rewards/margins": 0.2387402057647705, + "rewards/rejected": 1.4444347620010376, + "step": 11640 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.91, + "learning_rate": 2.2313906684149898e-08, + "logps/chosen": -137.17764282226562, + "logps/margins": 2.9303112030029297, + "logps/rejected": -140.10794067382812, + "loss": 0.6391, + "rewards/chosen": 1.595644235610962, + "rewards/margins": 0.42631950974464417, + "rewards/rejected": 1.1693246364593506, + "step": 11650 + }, + { + "accuracy": 0.699999988079071, + "epoch": 2.92, + "learning_rate": 2.1095659437967987e-08, + "logps/chosen": -179.24227905273438, + "logps/margins": -9.806901931762695, + "logps/rejected": -169.43536376953125, + "loss": 0.6751, + "rewards/chosen": 1.7254348993301392, + "rewards/margins": 0.440643846988678, + "rewards/rejected": 1.2847912311553955, + "step": 11660 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.92, + "learning_rate": 1.9911537613405697e-08, + "logps/chosen": -164.1608428955078, + "logps/margins": -9.968618392944336, + "logps/rejected": -154.19223022460938, + "loss": 0.6518, + "rewards/chosen": 1.8670356273651123, + "rewards/margins": 0.5709142684936523, + "rewards/rejected": 1.29612135887146, + "step": 11670 + }, + { + "accuracy": 0.6875, + "epoch": 2.92, + "learning_rate": 1.876154932643648e-08, + "logps/chosen": -136.9475555419922, + "logps/margins": -0.3851589262485504, + "logps/rejected": -136.56239318847656, + "loss": 0.7066, + "rewards/chosen": 1.8026936054229736, + "rewards/margins": 0.38109090924263, + "rewards/rejected": 1.4216026067733765, + "step": 11680 + }, + { + "accuracy": 0.5625, + "epoch": 2.92, + "learning_rate": 1.7645702459082592e-08, + "logps/chosen": -170.3186798095703, + "logps/margins": -16.116680145263672, + "logps/rejected": -154.20199584960938, + "loss": 0.6607, + "rewards/chosen": 1.8988637924194336, + "rewards/margins": 0.32062238454818726, + "rewards/rejected": 1.5782413482666016, + "step": 11690 + }, + { + "accuracy": 0.5, + "epoch": 2.92, + "learning_rate": 1.6564004659361254e-08, + "logps/chosen": -129.0775909423828, + "logps/margins": -13.316055297851562, + "logps/rejected": -115.76153564453125, + "loss": 0.7782, + "rewards/chosen": 1.3931394815444946, + "rewards/margins": 0.16251839697360992, + "rewards/rejected": 1.2306208610534668, + "step": 11700 + }, + { + "accuracy": 0.625, + "epoch": 2.93, + "learning_rate": 1.5516463341230803e-08, + "logps/chosen": -154.0155029296875, + "logps/margins": 0.011992263607680798, + "logps/rejected": -154.0275115966797, + "loss": 0.74, + "rewards/chosen": 1.7636473178863525, + "rewards/margins": 0.31502288579940796, + "rewards/rejected": 1.4486243724822998, + "step": 11710 + }, + { + "accuracy": 0.5625, + "epoch": 2.93, + "learning_rate": 1.450308568454295e-08, + "logps/chosen": -170.80027770996094, + "logps/margins": 20.752635955810547, + "logps/rejected": -191.5529022216797, + "loss": 0.7466, + "rewards/chosen": 1.7657569646835327, + "rewards/margins": 0.0858931690454483, + "rewards/rejected": 1.679863691329956, + "step": 11720 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.93, + "learning_rate": 1.3523878634989495e-08, + "logps/chosen": -179.47283935546875, + "logps/margins": -5.985913276672363, + "logps/rejected": -173.4869384765625, + "loss": 0.7066, + "rewards/chosen": 1.4171624183654785, + "rewards/margins": 0.08701962232589722, + "rewards/rejected": 1.3301429748535156, + "step": 11730 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.94, + "learning_rate": 1.2578848904057916e-08, + "logps/chosen": -154.65774536132812, + "logps/margins": 18.893810272216797, + "logps/rejected": -173.55154418945312, + "loss": 0.6439, + "rewards/chosen": 1.8629754781723022, + "rewards/margins": 0.47096118330955505, + "rewards/rejected": 1.3920143842697144, + "step": 11740 + }, + { + "accuracy": 0.625, + "epoch": 2.94, + "learning_rate": 1.1668002968984737e-08, + "logps/chosen": -170.75564575195312, + "logps/margins": -26.661701202392578, + "logps/rejected": -144.0939178466797, + "loss": 0.6998, + "rewards/chosen": 1.5653568506240845, + "rewards/margins": 0.5123574733734131, + "rewards/rejected": 1.0529993772506714, + "step": 11750 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.94, + "learning_rate": 1.0791347072710012e-08, + "logps/chosen": -134.86563110351562, + "logps/margins": -0.4014686644077301, + "logps/rejected": -134.4641571044922, + "loss": 0.7577, + "rewards/chosen": 1.4747288227081299, + "rewards/margins": 0.3959568440914154, + "rewards/rejected": 1.0787720680236816, + "step": 11760 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.94, + "learning_rate": 9.948887223834026e-09, + "logps/chosen": -140.5831298828125, + "logps/margins": 25.226476669311523, + "logps/rejected": -165.80960083007812, + "loss": 0.6536, + "rewards/chosen": 1.7601194381713867, + "rewards/margins": 0.32764285802841187, + "rewards/rejected": 1.4324766397476196, + "step": 11770 + }, + { + "accuracy": 0.5249999761581421, + "epoch": 2.94, + "learning_rate": 9.140629196580098e-09, + "logps/chosen": -172.01473999023438, + "logps/margins": 7.114737510681152, + "logps/rejected": -179.1294708251953, + "loss": 0.7332, + "rewards/chosen": 1.765822410583496, + "rewards/margins": 0.2777295708656311, + "rewards/rejected": 1.4880928993225098, + "step": 11780 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.95, + "learning_rate": 8.366578530749625e-09, + "logps/chosen": -152.0011749267578, + "logps/margins": -16.068004608154297, + "logps/rejected": -135.9331817626953, + "loss": 0.7256, + "rewards/chosen": 1.3085957765579224, + "rewards/margins": 0.2416520118713379, + "rewards/rejected": 1.066943645477295, + "step": 11790 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.95, + "learning_rate": 7.626740531688215e-09, + "logps/chosen": -160.2397003173828, + "logps/margins": -15.914340019226074, + "logps/rejected": -144.32534790039062, + "loss": 0.6971, + "rewards/chosen": 1.752166509628296, + "rewards/margins": 0.33867818117141724, + "rewards/rejected": 1.4134883880615234, + "step": 11800 + }, + { + "accuracy": 0.574999988079071, + "epoch": 2.95, + "learning_rate": 6.9211202702468286e-09, + "logps/chosen": -158.93490600585938, + "logps/margins": -4.569110870361328, + "logps/rejected": -154.36578369140625, + "loss": 0.7538, + "rewards/chosen": 1.785861611366272, + "rewards/margins": 0.5427820682525635, + "rewards/rejected": 1.243079423904419, + "step": 11810 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.96, + "learning_rate": 6.249722582747919e-09, + "logps/chosen": -122.33293151855469, + "logps/margins": 9.133320808410645, + "logps/rejected": -131.4662628173828, + "loss": 0.7187, + "rewards/chosen": 1.1818997859954834, + "rewards/margins": 0.17978112399578094, + "rewards/rejected": 1.0021185874938965, + "step": 11820 + }, + { + "accuracy": 0.612500011920929, + "epoch": 2.96, + "learning_rate": 5.612552070953237e-09, + "logps/chosen": -165.9116668701172, + "logps/margins": -23.724407196044922, + "logps/rejected": -142.187255859375, + "loss": 0.8016, + "rewards/chosen": 1.6438732147216797, + "rewards/margins": 0.14016087353229523, + "rewards/rejected": 1.5037124156951904, + "step": 11830 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.96, + "learning_rate": 5.009613102031075e-09, + "logps/chosen": -163.58680725097656, + "logps/margins": -19.323673248291016, + "logps/rejected": -144.2631378173828, + "loss": 0.637, + "rewards/chosen": 1.5684988498687744, + "rewards/margins": 0.37676405906677246, + "rewards/rejected": 1.1917349100112915, + "step": 11840 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.96, + "learning_rate": 4.44090980852574e-09, + "logps/chosen": -160.85830688476562, + "logps/margins": -16.70841407775879, + "logps/rejected": -144.14987182617188, + "loss": 0.705, + "rewards/chosen": 1.5704591274261475, + "rewards/margins": 0.3505284786224365, + "rewards/rejected": 1.2199307680130005, + "step": 11850 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 2.96, + "learning_rate": 3.906446088332016e-09, + "logps/chosen": -162.44886779785156, + "logps/margins": 15.261457443237305, + "logps/rejected": -177.71031188964844, + "loss": 0.67, + "rewards/chosen": 1.5779454708099365, + "rewards/margins": 0.19921907782554626, + "rewards/rejected": 1.3787264823913574, + "step": 11860 + }, + { + "accuracy": 0.512499988079071, + "epoch": 2.97, + "learning_rate": 3.406225604664637e-09, + "logps/chosen": -137.77020263671875, + "logps/margins": 25.638286590576172, + "logps/rejected": -163.40847778320312, + "loss": 0.7526, + "rewards/chosen": 1.5540413856506348, + "rewards/margins": 0.15402910113334656, + "rewards/rejected": 1.400012493133545, + "step": 11870 + }, + { + "accuracy": 0.675000011920929, + "epoch": 2.97, + "learning_rate": 2.9402517860360748e-09, + "logps/chosen": -158.18560791015625, + "logps/margins": -10.477030754089355, + "logps/rejected": -147.70858764648438, + "loss": 0.7007, + "rewards/chosen": 1.6465203762054443, + "rewards/margins": 0.4676668047904968, + "rewards/rejected": 1.1788537502288818, + "step": 11880 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.97, + "learning_rate": 2.508527826232121e-09, + "logps/chosen": -163.64138793945312, + "logps/margins": 21.039915084838867, + "logps/rejected": -184.68130493164062, + "loss": 0.716, + "rewards/chosen": 1.7880483865737915, + "rewards/margins": 0.4099927544593811, + "rewards/rejected": 1.3780555725097656, + "step": 11890 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.98, + "learning_rate": 2.1110566842896805e-09, + "logps/chosen": -160.7971649169922, + "logps/margins": -8.187764167785645, + "logps/rejected": -152.60940551757812, + "loss": 0.7304, + "rewards/chosen": 1.7195698022842407, + "rewards/margins": 0.17522543668746948, + "rewards/rejected": 1.544344186782837, + "step": 11900 + }, + { + "accuracy": 0.6625000238418579, + "epoch": 2.98, + "learning_rate": 1.7478410844767868e-09, + "logps/chosen": -164.2900848388672, + "logps/margins": -4.483556270599365, + "logps/rejected": -159.8065185546875, + "loss": 0.6928, + "rewards/chosen": 1.620644211769104, + "rewards/margins": 0.3815360367298126, + "rewards/rejected": 1.2391082048416138, + "step": 11910 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.98, + "learning_rate": 1.4188835162742831e-09, + "logps/chosen": -162.53131103515625, + "logps/margins": -12.429844856262207, + "logps/rejected": -150.10147094726562, + "loss": 0.7196, + "rewards/chosen": 1.66029953956604, + "rewards/margins": 0.4895680844783783, + "rewards/rejected": 1.1707313060760498, + "step": 11920 + }, + { + "accuracy": 0.637499988079071, + "epoch": 2.98, + "learning_rate": 1.124186234357505e-09, + "logps/chosen": -153.46444702148438, + "logps/margins": -3.6480422019958496, + "logps/rejected": -149.81640625, + "loss": 0.7511, + "rewards/chosen": 1.4499682188034058, + "rewards/margins": 0.3460947275161743, + "rewards/rejected": 1.1038734912872314, + "step": 11930 + }, + { + "accuracy": 0.6499999761581421, + "epoch": 2.98, + "learning_rate": 8.637512585824015e-10, + "logps/chosen": -146.49917602539062, + "logps/margins": 12.519338607788086, + "logps/rejected": -159.0185089111328, + "loss": 0.6839, + "rewards/chosen": 1.8145946264266968, + "rewards/margins": 0.6911009550094604, + "rewards/rejected": 1.1234937906265259, + "step": 11940 + }, + { + "accuracy": 0.550000011920929, + "epoch": 2.99, + "learning_rate": 6.37580373971658e-10, + "logps/chosen": -150.8441162109375, + "logps/margins": -9.69525146484375, + "logps/rejected": -141.1488494873047, + "loss": 0.6885, + "rewards/chosen": 1.8498260974884033, + "rewards/margins": 0.3790434002876282, + "rewards/rejected": 1.4707826375961304, + "step": 11950 + }, + { + "accuracy": 0.625, + "epoch": 2.99, + "learning_rate": 4.456751307002627e-10, + "logps/chosen": -140.88059997558594, + "logps/margins": 24.35348129272461, + "logps/rejected": -165.23410034179688, + "loss": 0.7191, + "rewards/chosen": 1.5284738540649414, + "rewards/margins": 0.32110127806663513, + "rewards/rejected": 1.2073724269866943, + "step": 11960 + }, + { + "accuracy": 0.5874999761581421, + "epoch": 2.99, + "learning_rate": 2.880368440871806e-10, + "logps/chosen": -127.67085266113281, + "logps/margins": 21.16813850402832, + "logps/rejected": -148.8389892578125, + "loss": 0.6953, + "rewards/chosen": 1.3602811098098755, + "rewards/margins": 0.3886519968509674, + "rewards/rejected": 0.9716290235519409, + "step": 11970 + }, + { + "accuracy": 0.699999988079071, + "epoch": 3.0, + "learning_rate": 1.6466659458591604e-10, + "logps/chosen": -166.7295684814453, + "logps/margins": -5.359309196472168, + "logps/rejected": -161.37026977539062, + "loss": 0.73, + "rewards/chosen": 1.6260411739349365, + "rewards/margins": 0.4541957378387451, + "rewards/rejected": 1.1718454360961914, + "step": 11980 + }, + { + "accuracy": 0.699999988079071, + "epoch": 3.0, + "learning_rate": 7.556522777674158e-11, + "logps/chosen": -147.84793090820312, + "logps/margins": -18.4981632232666, + "logps/rejected": -129.3497772216797, + "loss": 0.7081, + "rewards/chosen": 1.7136790752410889, + "rewards/margins": 0.5192862153053284, + "rewards/rejected": 1.1943929195404053, + "step": 11990 + }, + { + "accuracy": 0.6000000238418579, + "epoch": 3.0, + "learning_rate": 2.0733354360036316e-11, + "logps/chosen": -155.54161071777344, + "logps/margins": -17.127153396606445, + "logps/rejected": -138.4144744873047, + "loss": 0.7041, + "rewards/chosen": 1.8409420251846313, + "rewards/margins": 0.18160924315452576, + "rewards/rejected": 1.6593328714370728, + "step": 12000 + }, + { + "epoch": 3.0, + "eval_accuracy": 0.5954193093727977, + "eval_logps/chosen": -157.67654418945312, + "eval_logps/margins": -5.2187299728393555, + "eval_logps/rejected": -152.4578094482422, + "eval_loss": 0.7306671142578125, + "eval_rewards/chosen": 1.567785620689392, + "eval_rewards/margins": 0.31370338797569275, + "eval_rewards/rejected": 1.2540823221206665, + "eval_runtime": 1292.0492, + "eval_samples_per_second": 10.983, + "eval_steps_per_second": 1.373, + "step": 12000 + } + ], + "logging_steps": 10, + "max_steps": 12000, + "num_train_epochs": 3, + "save_steps": 3000, + "total_flos": 8.977833049899139e+18, + "trial_name": null, + "trial_params": null +}