| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.7979688066739209, |
| "eval_steps": 500, |
| "global_step": 2200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0003627130939426913, |
| "grad_norm": 0.64821457862854, |
| "learning_rate": 0.0, |
| "loss": 0.4482, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.0007254261878853826, |
| "grad_norm": 0.6469861268997192, |
| "learning_rate": 2e-05, |
| "loss": 0.4874, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.001088139281828074, |
| "grad_norm": 0.45289790630340576, |
| "learning_rate": 4e-05, |
| "loss": 0.4732, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.0014508523757707653, |
| "grad_norm": 0.38072678446769714, |
| "learning_rate": 6e-05, |
| "loss": 0.4503, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.0018135654697134566, |
| "grad_norm": 0.4032226800918579, |
| "learning_rate": 8e-05, |
| "loss": 0.4312, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.002176278563656148, |
| "grad_norm": 0.3684772551059723, |
| "learning_rate": 0.0001, |
| "loss": 0.4055, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.0025389916575988395, |
| "grad_norm": 0.3409311771392822, |
| "learning_rate": 0.00012, |
| "loss": 0.4201, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.0029017047515415306, |
| "grad_norm": 0.3772580921649933, |
| "learning_rate": 0.00014, |
| "loss": 0.4086, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.003264417845484222, |
| "grad_norm": 0.30869755148887634, |
| "learning_rate": 0.00016, |
| "loss": 0.3954, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.003627130939426913, |
| "grad_norm": 0.23723824322223663, |
| "learning_rate": 0.00018, |
| "loss": 0.3992, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.003989844033369605, |
| "grad_norm": 0.18091322481632233, |
| "learning_rate": 0.0002, |
| "loss": 0.368, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.004352557127312296, |
| "grad_norm": 0.20436523854732513, |
| "learning_rate": 0.00019999993460381957, |
| "loss": 0.3711, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.004715270221254987, |
| "grad_norm": 0.19207683205604553, |
| "learning_rate": 0.00019999973841536373, |
| "loss": 0.3788, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.005077983315197679, |
| "grad_norm": 0.1436341255903244, |
| "learning_rate": 0.00019999941143488914, |
| "loss": 0.3936, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.00544069640914037, |
| "grad_norm": 0.13892005383968353, |
| "learning_rate": 0.0001999989536628234, |
| "loss": 0.4062, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.005803409503083061, |
| "grad_norm": 0.12910069525241852, |
| "learning_rate": 0.00019999836509976534, |
| "loss": 0.3863, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.006166122597025753, |
| "grad_norm": 0.10763731598854065, |
| "learning_rate": 0.00019999764574648465, |
| "loss": 0.3757, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.006528835690968444, |
| "grad_norm": 0.1078948974609375, |
| "learning_rate": 0.00019999679560392226, |
| "loss": 0.3342, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.006891548784911135, |
| "grad_norm": 0.10403122007846832, |
| "learning_rate": 0.00019999581467319006, |
| "loss": 0.3371, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.007254261878853826, |
| "grad_norm": 0.09776491671800613, |
| "learning_rate": 0.00019999470295557105, |
| "loss": 0.3263, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.007616974972796518, |
| "grad_norm": 0.10792049765586853, |
| "learning_rate": 0.00019999346045251925, |
| "loss": 0.3447, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.00797968806673921, |
| "grad_norm": 0.1174544170498848, |
| "learning_rate": 0.00019999208716565977, |
| "loss": 0.336, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.008342401160681901, |
| "grad_norm": 0.09458769857883453, |
| "learning_rate": 0.0001999905830967888, |
| "loss": 0.3262, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.008705114254624592, |
| "grad_norm": 0.09644383937120438, |
| "learning_rate": 0.0001999889482478735, |
| "loss": 0.3361, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.009067827348567283, |
| "grad_norm": 0.09843447804450989, |
| "learning_rate": 0.0001999871826210521, |
| "loss": 0.3485, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.009430540442509974, |
| "grad_norm": 0.10075519979000092, |
| "learning_rate": 0.00019998528621863396, |
| "loss": 0.3461, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.009793253536452665, |
| "grad_norm": 0.09084542095661163, |
| "learning_rate": 0.00019998325904309946, |
| "loss": 0.3267, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.010155966630395358, |
| "grad_norm": 0.10560671985149384, |
| "learning_rate": 0.00019998110109709988, |
| "loss": 0.3532, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.01051867972433805, |
| "grad_norm": 0.08736245334148407, |
| "learning_rate": 0.00019997881238345775, |
| "loss": 0.37, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.01088139281828074, |
| "grad_norm": 0.103543221950531, |
| "learning_rate": 0.0001999763929051665, |
| "loss": 0.3093, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.011244105912223431, |
| "grad_norm": 0.09106361120939255, |
| "learning_rate": 0.0001999738426653906, |
| "loss": 0.3231, |
| "step": 31 |
| }, |
| { |
| "epoch": 0.011606819006166122, |
| "grad_norm": 0.09385113418102264, |
| "learning_rate": 0.00019997116166746562, |
| "loss": 0.3162, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.011969532100108813, |
| "grad_norm": 0.10086266696453094, |
| "learning_rate": 0.00019996834991489805, |
| "loss": 0.3105, |
| "step": 33 |
| }, |
| { |
| "epoch": 0.012332245194051506, |
| "grad_norm": 0.08959592878818512, |
| "learning_rate": 0.00019996540741136544, |
| "loss": 0.3241, |
| "step": 34 |
| }, |
| { |
| "epoch": 0.012694958287994197, |
| "grad_norm": 0.10446605086326599, |
| "learning_rate": 0.00019996233416071644, |
| "loss": 0.338, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.013057671381936888, |
| "grad_norm": 0.08997010439634323, |
| "learning_rate": 0.00019995913016697053, |
| "loss": 0.3089, |
| "step": 36 |
| }, |
| { |
| "epoch": 0.01342038447587958, |
| "grad_norm": 0.09340513497591019, |
| "learning_rate": 0.00019995579543431835, |
| "loss": 0.3167, |
| "step": 37 |
| }, |
| { |
| "epoch": 0.01378309756982227, |
| "grad_norm": 0.0928172841668129, |
| "learning_rate": 0.00019995232996712146, |
| "loss": 0.3236, |
| "step": 38 |
| }, |
| { |
| "epoch": 0.014145810663764961, |
| "grad_norm": 0.10568640381097794, |
| "learning_rate": 0.0001999487337699124, |
| "loss": 0.3213, |
| "step": 39 |
| }, |
| { |
| "epoch": 0.014508523757707652, |
| "grad_norm": 0.09213658422231674, |
| "learning_rate": 0.0001999450068473948, |
| "loss": 0.3308, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.014871236851650345, |
| "grad_norm": 0.09331916272640228, |
| "learning_rate": 0.0001999411492044431, |
| "loss": 0.3265, |
| "step": 41 |
| }, |
| { |
| "epoch": 0.015233949945593036, |
| "grad_norm": 0.0938337966799736, |
| "learning_rate": 0.00019993716084610284, |
| "loss": 0.3084, |
| "step": 42 |
| }, |
| { |
| "epoch": 0.015596663039535727, |
| "grad_norm": 0.1006985679268837, |
| "learning_rate": 0.00019993304177759046, |
| "loss": 0.3932, |
| "step": 43 |
| }, |
| { |
| "epoch": 0.01595937613347842, |
| "grad_norm": 0.09978915005922318, |
| "learning_rate": 0.00019992879200429346, |
| "loss": 0.3147, |
| "step": 44 |
| }, |
| { |
| "epoch": 0.01632208922742111, |
| "grad_norm": 0.095309779047966, |
| "learning_rate": 0.00019992441153177015, |
| "loss": 0.3271, |
| "step": 45 |
| }, |
| { |
| "epoch": 0.016684802321363802, |
| "grad_norm": 0.09498284012079239, |
| "learning_rate": 0.00019991990036574987, |
| "loss": 0.3138, |
| "step": 46 |
| }, |
| { |
| "epoch": 0.017047515415306493, |
| "grad_norm": 0.09961807727813721, |
| "learning_rate": 0.0001999152585121329, |
| "loss": 0.3447, |
| "step": 47 |
| }, |
| { |
| "epoch": 0.017410228509249184, |
| "grad_norm": 0.11686038225889206, |
| "learning_rate": 0.0001999104859769904, |
| "loss": 0.3059, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.017772941603191875, |
| "grad_norm": 0.09790387004613876, |
| "learning_rate": 0.0001999055827665645, |
| "loss": 0.3241, |
| "step": 49 |
| }, |
| { |
| "epoch": 0.018135654697134566, |
| "grad_norm": 0.0987682044506073, |
| "learning_rate": 0.00019990054888726824, |
| "loss": 0.3159, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.018498367791077257, |
| "grad_norm": 0.09558644145727158, |
| "learning_rate": 0.0001998953843456855, |
| "loss": 0.3528, |
| "step": 51 |
| }, |
| { |
| "epoch": 0.01886108088501995, |
| "grad_norm": 0.1173083484172821, |
| "learning_rate": 0.00019989008914857116, |
| "loss": 0.3138, |
| "step": 52 |
| }, |
| { |
| "epoch": 0.01922379397896264, |
| "grad_norm": 0.09404181689023972, |
| "learning_rate": 0.0001998846633028509, |
| "loss": 0.3262, |
| "step": 53 |
| }, |
| { |
| "epoch": 0.01958650707290533, |
| "grad_norm": 0.09235358238220215, |
| "learning_rate": 0.00019987910681562132, |
| "loss": 0.3271, |
| "step": 54 |
| }, |
| { |
| "epoch": 0.01994922016684802, |
| "grad_norm": 0.10229232162237167, |
| "learning_rate": 0.0001998734196941499, |
| "loss": 0.3098, |
| "step": 55 |
| }, |
| { |
| "epoch": 0.020311933260790716, |
| "grad_norm": 0.08622050285339355, |
| "learning_rate": 0.0001998676019458749, |
| "loss": 0.2878, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.020674646354733407, |
| "grad_norm": 0.10718828439712524, |
| "learning_rate": 0.00019986165357840558, |
| "loss": 0.319, |
| "step": 57 |
| }, |
| { |
| "epoch": 0.0210373594486761, |
| "grad_norm": 0.09529942274093628, |
| "learning_rate": 0.00019985557459952188, |
| "loss": 0.2974, |
| "step": 58 |
| }, |
| { |
| "epoch": 0.02140007254261879, |
| "grad_norm": 0.09532184153795242, |
| "learning_rate": 0.00019984936501717468, |
| "loss": 0.3016, |
| "step": 59 |
| }, |
| { |
| "epoch": 0.02176278563656148, |
| "grad_norm": 0.098875492811203, |
| "learning_rate": 0.00019984302483948562, |
| "loss": 0.3006, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.02212549873050417, |
| "grad_norm": 0.1071372851729393, |
| "learning_rate": 0.00019983655407474719, |
| "loss": 0.2796, |
| "step": 61 |
| }, |
| { |
| "epoch": 0.022488211824446862, |
| "grad_norm": 0.11236250400543213, |
| "learning_rate": 0.0001998299527314226, |
| "loss": 0.3067, |
| "step": 62 |
| }, |
| { |
| "epoch": 0.022850924918389554, |
| "grad_norm": 0.10537782311439514, |
| "learning_rate": 0.00019982322081814596, |
| "loss": 0.3415, |
| "step": 63 |
| }, |
| { |
| "epoch": 0.023213638012332245, |
| "grad_norm": 0.09595459699630737, |
| "learning_rate": 0.00019981635834372209, |
| "loss": 0.3076, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.023576351106274936, |
| "grad_norm": 0.09259078651666641, |
| "learning_rate": 0.00019980936531712652, |
| "loss": 0.2913, |
| "step": 65 |
| }, |
| { |
| "epoch": 0.023939064200217627, |
| "grad_norm": 0.20734301209449768, |
| "learning_rate": 0.00019980224174750568, |
| "loss": 0.3102, |
| "step": 66 |
| }, |
| { |
| "epoch": 0.024301777294160318, |
| "grad_norm": 0.10769975185394287, |
| "learning_rate": 0.0001997949876441766, |
| "loss": 0.3336, |
| "step": 67 |
| }, |
| { |
| "epoch": 0.024664490388103012, |
| "grad_norm": 0.1010124459862709, |
| "learning_rate": 0.00019978760301662715, |
| "loss": 0.3305, |
| "step": 68 |
| }, |
| { |
| "epoch": 0.025027203482045703, |
| "grad_norm": 0.09571480005979538, |
| "learning_rate": 0.0001997800878745158, |
| "loss": 0.3181, |
| "step": 69 |
| }, |
| { |
| "epoch": 0.025389916575988394, |
| "grad_norm": 0.10125493258237839, |
| "learning_rate": 0.00019977244222767182, |
| "loss": 0.2873, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.025752629669931085, |
| "grad_norm": 0.11057893931865692, |
| "learning_rate": 0.0001997646660860951, |
| "loss": 0.3125, |
| "step": 71 |
| }, |
| { |
| "epoch": 0.026115342763873776, |
| "grad_norm": 0.1009269654750824, |
| "learning_rate": 0.00019975675945995626, |
| "loss": 0.3038, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.026478055857816468, |
| "grad_norm": 0.09274876117706299, |
| "learning_rate": 0.00019974872235959654, |
| "loss": 0.292, |
| "step": 73 |
| }, |
| { |
| "epoch": 0.02684076895175916, |
| "grad_norm": 0.09206151217222214, |
| "learning_rate": 0.00019974055479552791, |
| "loss": 0.3064, |
| "step": 74 |
| }, |
| { |
| "epoch": 0.02720348204570185, |
| "grad_norm": 0.09455125778913498, |
| "learning_rate": 0.00019973225677843284, |
| "loss": 0.3031, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.02756619513964454, |
| "grad_norm": 0.10313421487808228, |
| "learning_rate": 0.00019972382831916457, |
| "loss": 0.2975, |
| "step": 76 |
| }, |
| { |
| "epoch": 0.02792890823358723, |
| "grad_norm": 0.08839363604784012, |
| "learning_rate": 0.00019971526942874686, |
| "loss": 0.2926, |
| "step": 77 |
| }, |
| { |
| "epoch": 0.028291621327529923, |
| "grad_norm": 0.0924365371465683, |
| "learning_rate": 0.00019970658011837404, |
| "loss": 0.3071, |
| "step": 78 |
| }, |
| { |
| "epoch": 0.028654334421472614, |
| "grad_norm": 0.09888923168182373, |
| "learning_rate": 0.00019969776039941114, |
| "loss": 0.3004, |
| "step": 79 |
| }, |
| { |
| "epoch": 0.029017047515415305, |
| "grad_norm": 0.09569084644317627, |
| "learning_rate": 0.00019968881028339363, |
| "loss": 0.2923, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.029379760609358, |
| "grad_norm": 0.11503931879997253, |
| "learning_rate": 0.0001996797297820276, |
| "loss": 0.3117, |
| "step": 81 |
| }, |
| { |
| "epoch": 0.02974247370330069, |
| "grad_norm": 0.09839354455471039, |
| "learning_rate": 0.00019967051890718964, |
| "loss": 0.2802, |
| "step": 82 |
| }, |
| { |
| "epoch": 0.03010518679724338, |
| "grad_norm": 0.09043775498867035, |
| "learning_rate": 0.00019966117767092686, |
| "loss": 0.2877, |
| "step": 83 |
| }, |
| { |
| "epoch": 0.030467899891186073, |
| "grad_norm": 0.09896934777498245, |
| "learning_rate": 0.00019965170608545688, |
| "loss": 0.3094, |
| "step": 84 |
| }, |
| { |
| "epoch": 0.030830612985128764, |
| "grad_norm": 0.09892911463975906, |
| "learning_rate": 0.00019964210416316787, |
| "loss": 0.302, |
| "step": 85 |
| }, |
| { |
| "epoch": 0.031193326079071455, |
| "grad_norm": 0.0898653194308281, |
| "learning_rate": 0.00019963237191661834, |
| "loss": 0.2982, |
| "step": 86 |
| }, |
| { |
| "epoch": 0.031556039173014146, |
| "grad_norm": 0.10663247853517532, |
| "learning_rate": 0.00019962250935853736, |
| "loss": 0.2943, |
| "step": 87 |
| }, |
| { |
| "epoch": 0.03191875226695684, |
| "grad_norm": 0.09792915731668472, |
| "learning_rate": 0.0001996125165018244, |
| "loss": 0.2826, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.03228146536089953, |
| "grad_norm": 0.09535045176744461, |
| "learning_rate": 0.00019960239335954936, |
| "loss": 0.3026, |
| "step": 89 |
| }, |
| { |
| "epoch": 0.03264417845484222, |
| "grad_norm": 0.08838774263858795, |
| "learning_rate": 0.0001995921399449525, |
| "loss": 0.277, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.03300689154878491, |
| "grad_norm": 0.09616609662771225, |
| "learning_rate": 0.00019958175627144453, |
| "loss": 0.3015, |
| "step": 91 |
| }, |
| { |
| "epoch": 0.033369604642727604, |
| "grad_norm": 0.0945005938410759, |
| "learning_rate": 0.00019957124235260652, |
| "loss": 0.288, |
| "step": 92 |
| }, |
| { |
| "epoch": 0.03373231773667029, |
| "grad_norm": 0.10378480702638626, |
| "learning_rate": 0.00019956059820218982, |
| "loss": 0.3361, |
| "step": 93 |
| }, |
| { |
| "epoch": 0.034095030830612987, |
| "grad_norm": 0.09242385625839233, |
| "learning_rate": 0.0001995498238341162, |
| "loss": 0.2903, |
| "step": 94 |
| }, |
| { |
| "epoch": 0.034457743924555674, |
| "grad_norm": 0.0919501855969429, |
| "learning_rate": 0.00019953891926247774, |
| "loss": 0.3025, |
| "step": 95 |
| }, |
| { |
| "epoch": 0.03482045701849837, |
| "grad_norm": 0.09978862851858139, |
| "learning_rate": 0.00019952788450153675, |
| "loss": 0.3335, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.035183170112441056, |
| "grad_norm": 0.10097439587116241, |
| "learning_rate": 0.00019951671956572583, |
| "loss": 0.3137, |
| "step": 97 |
| }, |
| { |
| "epoch": 0.03554588320638375, |
| "grad_norm": 0.1043080985546112, |
| "learning_rate": 0.00019950542446964793, |
| "loss": 0.2896, |
| "step": 98 |
| }, |
| { |
| "epoch": 0.035908596300326445, |
| "grad_norm": 0.09220679104328156, |
| "learning_rate": 0.00019949399922807612, |
| "loss": 0.3031, |
| "step": 99 |
| }, |
| { |
| "epoch": 0.03627130939426913, |
| "grad_norm": 0.10692602396011353, |
| "learning_rate": 0.00019948244385595374, |
| "loss": 0.3057, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.03663402248821183, |
| "grad_norm": 0.10609027743339539, |
| "learning_rate": 0.00019947075836839438, |
| "loss": 0.3082, |
| "step": 101 |
| }, |
| { |
| "epoch": 0.036996735582154515, |
| "grad_norm": 0.16867391765117645, |
| "learning_rate": 0.00019945894278068172, |
| "loss": 0.302, |
| "step": 102 |
| }, |
| { |
| "epoch": 0.03735944867609721, |
| "grad_norm": 0.09805990755558014, |
| "learning_rate": 0.00019944699710826966, |
| "loss": 0.3218, |
| "step": 103 |
| }, |
| { |
| "epoch": 0.0377221617700399, |
| "grad_norm": 0.09552697837352753, |
| "learning_rate": 0.00019943492136678223, |
| "loss": 0.2576, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.03808487486398259, |
| "grad_norm": 0.09718494862318039, |
| "learning_rate": 0.0001994227155720136, |
| "loss": 0.2882, |
| "step": 105 |
| }, |
| { |
| "epoch": 0.03844758795792528, |
| "grad_norm": 0.0933772400021553, |
| "learning_rate": 0.000199410379739928, |
| "loss": 0.3069, |
| "step": 106 |
| }, |
| { |
| "epoch": 0.038810301051867974, |
| "grad_norm": 0.09682098776102066, |
| "learning_rate": 0.00019939791388665974, |
| "loss": 0.3013, |
| "step": 107 |
| }, |
| { |
| "epoch": 0.03917301414581066, |
| "grad_norm": 0.1064608246088028, |
| "learning_rate": 0.0001993853180285132, |
| "loss": 0.3307, |
| "step": 108 |
| }, |
| { |
| "epoch": 0.039535727239753356, |
| "grad_norm": 0.09508496522903442, |
| "learning_rate": 0.00019937259218196282, |
| "loss": 0.2968, |
| "step": 109 |
| }, |
| { |
| "epoch": 0.03989844033369604, |
| "grad_norm": 0.10839469730854034, |
| "learning_rate": 0.00019935973636365305, |
| "loss": 0.3017, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.04026115342763874, |
| "grad_norm": 0.10720638930797577, |
| "learning_rate": 0.00019934675059039828, |
| "loss": 0.2817, |
| "step": 111 |
| }, |
| { |
| "epoch": 0.04062386652158143, |
| "grad_norm": 0.10672647505998611, |
| "learning_rate": 0.00019933363487918294, |
| "loss": 0.2876, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.04098657961552412, |
| "grad_norm": 0.10290908813476562, |
| "learning_rate": 0.00019932038924716134, |
| "loss": 0.2906, |
| "step": 113 |
| }, |
| { |
| "epoch": 0.041349292709466814, |
| "grad_norm": 0.11226241290569305, |
| "learning_rate": 0.0001993070137116578, |
| "loss": 0.2816, |
| "step": 114 |
| }, |
| { |
| "epoch": 0.0417120058034095, |
| "grad_norm": 0.09558378159999847, |
| "learning_rate": 0.00019929350829016648, |
| "loss": 0.3115, |
| "step": 115 |
| }, |
| { |
| "epoch": 0.0420747188973522, |
| "grad_norm": 0.10267224162817001, |
| "learning_rate": 0.00019927987300035147, |
| "loss": 0.3035, |
| "step": 116 |
| }, |
| { |
| "epoch": 0.042437431991294884, |
| "grad_norm": 0.09401127696037292, |
| "learning_rate": 0.00019926610786004663, |
| "loss": 0.2995, |
| "step": 117 |
| }, |
| { |
| "epoch": 0.04280014508523758, |
| "grad_norm": 0.10615453869104385, |
| "learning_rate": 0.00019925221288725573, |
| "loss": 0.3062, |
| "step": 118 |
| }, |
| { |
| "epoch": 0.043162858179180266, |
| "grad_norm": 0.11928743124008179, |
| "learning_rate": 0.00019923818810015236, |
| "loss": 0.317, |
| "step": 119 |
| }, |
| { |
| "epoch": 0.04352557127312296, |
| "grad_norm": 0.10731657594442368, |
| "learning_rate": 0.00019922403351707983, |
| "loss": 0.3261, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.04388828436706565, |
| "grad_norm": 0.10545065253973007, |
| "learning_rate": 0.0001992097491565513, |
| "loss": 0.3125, |
| "step": 121 |
| }, |
| { |
| "epoch": 0.04425099746100834, |
| "grad_norm": 0.1098426803946495, |
| "learning_rate": 0.0001991953350372496, |
| "loss": 0.2928, |
| "step": 122 |
| }, |
| { |
| "epoch": 0.04461371055495103, |
| "grad_norm": 0.09736689925193787, |
| "learning_rate": 0.00019918079117802725, |
| "loss": 0.2736, |
| "step": 123 |
| }, |
| { |
| "epoch": 0.044976423648893725, |
| "grad_norm": 0.11810169368982315, |
| "learning_rate": 0.0001991661175979066, |
| "loss": 0.2806, |
| "step": 124 |
| }, |
| { |
| "epoch": 0.04533913674283642, |
| "grad_norm": 0.11560354381799698, |
| "learning_rate": 0.00019915131431607952, |
| "loss": 0.317, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.04570184983677911, |
| "grad_norm": 0.11197232455015182, |
| "learning_rate": 0.00019913638135190756, |
| "loss": 0.3382, |
| "step": 126 |
| }, |
| { |
| "epoch": 0.0460645629307218, |
| "grad_norm": 0.1027117446064949, |
| "learning_rate": 0.0001991213187249219, |
| "loss": 0.2684, |
| "step": 127 |
| }, |
| { |
| "epoch": 0.04642727602466449, |
| "grad_norm": 0.10549558699131012, |
| "learning_rate": 0.00019910612645482334, |
| "loss": 0.2939, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.046789989118607184, |
| "grad_norm": 0.09976191818714142, |
| "learning_rate": 0.00019909080456148218, |
| "loss": 0.2878, |
| "step": 129 |
| }, |
| { |
| "epoch": 0.04715270221254987, |
| "grad_norm": 0.10141481459140778, |
| "learning_rate": 0.0001990753530649383, |
| "loss": 0.2959, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.047515415306492566, |
| "grad_norm": 0.10536810010671616, |
| "learning_rate": 0.00019905977198540105, |
| "loss": 0.283, |
| "step": 131 |
| }, |
| { |
| "epoch": 0.04787812840043525, |
| "grad_norm": 0.1081426814198494, |
| "learning_rate": 0.00019904406134324933, |
| "loss": 0.2982, |
| "step": 132 |
| }, |
| { |
| "epoch": 0.04824084149437795, |
| "grad_norm": 0.10106177628040314, |
| "learning_rate": 0.00019902822115903143, |
| "loss": 0.3301, |
| "step": 133 |
| }, |
| { |
| "epoch": 0.048603554588320635, |
| "grad_norm": 0.09809243679046631, |
| "learning_rate": 0.0001990122514534651, |
| "loss": 0.2868, |
| "step": 134 |
| }, |
| { |
| "epoch": 0.04896626768226333, |
| "grad_norm": 0.10104624181985855, |
| "learning_rate": 0.00019899615224743753, |
| "loss": 0.3035, |
| "step": 135 |
| }, |
| { |
| "epoch": 0.049328980776206025, |
| "grad_norm": 0.09421058744192123, |
| "learning_rate": 0.0001989799235620052, |
| "loss": 0.2982, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.04969169387014871, |
| "grad_norm": 0.09937946498394012, |
| "learning_rate": 0.00019896356541839404, |
| "loss": 0.2988, |
| "step": 137 |
| }, |
| { |
| "epoch": 0.05005440696409141, |
| "grad_norm": 0.10086655616760254, |
| "learning_rate": 0.00019894707783799925, |
| "loss": 0.2849, |
| "step": 138 |
| }, |
| { |
| "epoch": 0.050417120058034094, |
| "grad_norm": 0.09309150278568268, |
| "learning_rate": 0.0001989304608423853, |
| "loss": 0.2792, |
| "step": 139 |
| }, |
| { |
| "epoch": 0.05077983315197679, |
| "grad_norm": 0.15080593526363373, |
| "learning_rate": 0.00019891371445328592, |
| "loss": 0.2993, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.051142546245919476, |
| "grad_norm": 0.09852839261293411, |
| "learning_rate": 0.0001988968386926042, |
| "loss": 0.2887, |
| "step": 141 |
| }, |
| { |
| "epoch": 0.05150525933986217, |
| "grad_norm": 0.13169077038764954, |
| "learning_rate": 0.00019887983358241225, |
| "loss": 0.2889, |
| "step": 142 |
| }, |
| { |
| "epoch": 0.05186797243380486, |
| "grad_norm": 0.203284353017807, |
| "learning_rate": 0.0001988626991449515, |
| "loss": 0.2762, |
| "step": 143 |
| }, |
| { |
| "epoch": 0.05223068552774755, |
| "grad_norm": 0.09370779246091843, |
| "learning_rate": 0.00019884543540263247, |
| "loss": 0.2717, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.05259339862169024, |
| "grad_norm": 0.10462846606969833, |
| "learning_rate": 0.00019882804237803488, |
| "loss": 0.2923, |
| "step": 145 |
| }, |
| { |
| "epoch": 0.052956111715632935, |
| "grad_norm": 0.11297117918729782, |
| "learning_rate": 0.00019881052009390737, |
| "loss": 0.3037, |
| "step": 146 |
| }, |
| { |
| "epoch": 0.05331882480957562, |
| "grad_norm": 0.11037133634090424, |
| "learning_rate": 0.00019879286857316783, |
| "loss": 0.2883, |
| "step": 147 |
| }, |
| { |
| "epoch": 0.05368153790351832, |
| "grad_norm": 0.10279864072799683, |
| "learning_rate": 0.00019877508783890306, |
| "loss": 0.2847, |
| "step": 148 |
| }, |
| { |
| "epoch": 0.05404425099746101, |
| "grad_norm": 0.09439583867788315, |
| "learning_rate": 0.00019875717791436896, |
| "loss": 0.2779, |
| "step": 149 |
| }, |
| { |
| "epoch": 0.0544069640914037, |
| "grad_norm": 0.10622645914554596, |
| "learning_rate": 0.00019873913882299026, |
| "loss": 0.3099, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.054769677185346394, |
| "grad_norm": 0.10882750153541565, |
| "learning_rate": 0.00019872097058836076, |
| "loss": 0.2659, |
| "step": 151 |
| }, |
| { |
| "epoch": 0.05513239027928908, |
| "grad_norm": 0.09320899844169617, |
| "learning_rate": 0.00019870267323424313, |
| "loss": 0.268, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.055495103373231776, |
| "grad_norm": 0.09685231000185013, |
| "learning_rate": 0.00019868424678456888, |
| "loss": 0.2745, |
| "step": 153 |
| }, |
| { |
| "epoch": 0.05585781646717446, |
| "grad_norm": 0.10234569013118744, |
| "learning_rate": 0.00019866569126343844, |
| "loss": 0.2948, |
| "step": 154 |
| }, |
| { |
| "epoch": 0.05622052956111716, |
| "grad_norm": 0.09876774251461029, |
| "learning_rate": 0.00019864700669512098, |
| "loss": 0.2808, |
| "step": 155 |
| }, |
| { |
| "epoch": 0.056583242655059846, |
| "grad_norm": 0.10879123955965042, |
| "learning_rate": 0.00019862819310405449, |
| "loss": 0.2745, |
| "step": 156 |
| }, |
| { |
| "epoch": 0.05694595574900254, |
| "grad_norm": 0.10035258531570435, |
| "learning_rate": 0.00019860925051484572, |
| "loss": 0.3027, |
| "step": 157 |
| }, |
| { |
| "epoch": 0.05730866884294523, |
| "grad_norm": 0.098017618060112, |
| "learning_rate": 0.00019859017895227014, |
| "loss": 0.2844, |
| "step": 158 |
| }, |
| { |
| "epoch": 0.05767138193688792, |
| "grad_norm": 0.09496638178825378, |
| "learning_rate": 0.00019857097844127187, |
| "loss": 0.2852, |
| "step": 159 |
| }, |
| { |
| "epoch": 0.05803409503083061, |
| "grad_norm": 0.10773288458585739, |
| "learning_rate": 0.00019855164900696375, |
| "loss": 0.3112, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.058396808124773304, |
| "grad_norm": 0.09997101873159409, |
| "learning_rate": 0.00019853219067462717, |
| "loss": 0.2913, |
| "step": 161 |
| }, |
| { |
| "epoch": 0.058759521218716, |
| "grad_norm": 0.09856441617012024, |
| "learning_rate": 0.00019851260346971214, |
| "loss": 0.2753, |
| "step": 162 |
| }, |
| { |
| "epoch": 0.059122234312658686, |
| "grad_norm": 0.10671742260456085, |
| "learning_rate": 0.00019849288741783728, |
| "loss": 0.2958, |
| "step": 163 |
| }, |
| { |
| "epoch": 0.05948494740660138, |
| "grad_norm": 0.10415424406528473, |
| "learning_rate": 0.0001984730425447896, |
| "loss": 0.284, |
| "step": 164 |
| }, |
| { |
| "epoch": 0.05984766050054407, |
| "grad_norm": 0.10045934468507767, |
| "learning_rate": 0.00019845306887652476, |
| "loss": 0.281, |
| "step": 165 |
| }, |
| { |
| "epoch": 0.06021037359448676, |
| "grad_norm": 0.10365572571754456, |
| "learning_rate": 0.0001984329664391667, |
| "loss": 0.3186, |
| "step": 166 |
| }, |
| { |
| "epoch": 0.06057308668842945, |
| "grad_norm": 0.10675114393234253, |
| "learning_rate": 0.00019841273525900794, |
| "loss": 0.2774, |
| "step": 167 |
| }, |
| { |
| "epoch": 0.060935799782372145, |
| "grad_norm": 0.100840725004673, |
| "learning_rate": 0.0001983923753625093, |
| "loss": 0.2723, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.06129851287631483, |
| "grad_norm": 0.09524688124656677, |
| "learning_rate": 0.0001983718867763, |
| "loss": 0.2679, |
| "step": 169 |
| }, |
| { |
| "epoch": 0.06166122597025753, |
| "grad_norm": 0.10454592853784561, |
| "learning_rate": 0.0001983512695271775, |
| "loss": 0.2779, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.062023939064200215, |
| "grad_norm": 0.11385498940944672, |
| "learning_rate": 0.00019833052364210757, |
| "loss": 0.2892, |
| "step": 171 |
| }, |
| { |
| "epoch": 0.06238665215814291, |
| "grad_norm": 0.10297231376171112, |
| "learning_rate": 0.00019830964914822433, |
| "loss": 0.2885, |
| "step": 172 |
| }, |
| { |
| "epoch": 0.0627493652520856, |
| "grad_norm": 0.10694777965545654, |
| "learning_rate": 0.00019828864607282994, |
| "loss": 0.2951, |
| "step": 173 |
| }, |
| { |
| "epoch": 0.06311207834602829, |
| "grad_norm": 0.10187729448080063, |
| "learning_rate": 0.00019826751444339483, |
| "loss": 0.267, |
| "step": 174 |
| }, |
| { |
| "epoch": 0.06347479143997098, |
| "grad_norm": 0.10256768018007278, |
| "learning_rate": 0.0001982462542875576, |
| "loss": 0.2812, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.06383750453391368, |
| "grad_norm": 0.106157086789608, |
| "learning_rate": 0.0001982248656331249, |
| "loss": 0.2617, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.06420021762785637, |
| "grad_norm": 0.10591990500688553, |
| "learning_rate": 0.00019820334850807143, |
| "loss": 0.2792, |
| "step": 177 |
| }, |
| { |
| "epoch": 0.06456293072179906, |
| "grad_norm": 0.10539959371089935, |
| "learning_rate": 0.00019818170294053994, |
| "loss": 0.2817, |
| "step": 178 |
| }, |
| { |
| "epoch": 0.06492564381574174, |
| "grad_norm": 0.10033068805932999, |
| "learning_rate": 0.00019815992895884122, |
| "loss": 0.2917, |
| "step": 179 |
| }, |
| { |
| "epoch": 0.06528835690968444, |
| "grad_norm": 0.11100872606039047, |
| "learning_rate": 0.00019813802659145394, |
| "loss": 0.276, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.06565107000362713, |
| "grad_norm": 0.10445630550384521, |
| "learning_rate": 0.0001981159958670247, |
| "loss": 0.3308, |
| "step": 181 |
| }, |
| { |
| "epoch": 0.06601378309756982, |
| "grad_norm": 0.09888961911201477, |
| "learning_rate": 0.00019809383681436809, |
| "loss": 0.2651, |
| "step": 182 |
| }, |
| { |
| "epoch": 0.06637649619151251, |
| "grad_norm": 0.10630346089601517, |
| "learning_rate": 0.00019807154946246635, |
| "loss": 0.2674, |
| "step": 183 |
| }, |
| { |
| "epoch": 0.06673920928545521, |
| "grad_norm": 0.09556199610233307, |
| "learning_rate": 0.00019804913384046974, |
| "loss": 0.2988, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.0671019223793979, |
| "grad_norm": 0.10325701534748077, |
| "learning_rate": 0.0001980265899776961, |
| "loss": 0.2821, |
| "step": 185 |
| }, |
| { |
| "epoch": 0.06746463547334058, |
| "grad_norm": 0.09466871619224548, |
| "learning_rate": 0.00019800391790363112, |
| "loss": 0.2632, |
| "step": 186 |
| }, |
| { |
| "epoch": 0.06782734856728329, |
| "grad_norm": 0.09646070003509521, |
| "learning_rate": 0.00019798111764792814, |
| "loss": 0.2888, |
| "step": 187 |
| }, |
| { |
| "epoch": 0.06819006166122597, |
| "grad_norm": 0.09636171907186508, |
| "learning_rate": 0.00019795818924040815, |
| "loss": 0.2766, |
| "step": 188 |
| }, |
| { |
| "epoch": 0.06855277475516866, |
| "grad_norm": 0.10880020260810852, |
| "learning_rate": 0.00019793513271105975, |
| "loss": 0.3053, |
| "step": 189 |
| }, |
| { |
| "epoch": 0.06891548784911135, |
| "grad_norm": 0.11933793127536774, |
| "learning_rate": 0.0001979119480900391, |
| "loss": 0.2903, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.06927820094305405, |
| "grad_norm": 0.1342136114835739, |
| "learning_rate": 0.00019788863540766996, |
| "loss": 0.2912, |
| "step": 191 |
| }, |
| { |
| "epoch": 0.06964091403699674, |
| "grad_norm": 0.1037123054265976, |
| "learning_rate": 0.0001978651946944435, |
| "loss": 0.3044, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.07000362713093942, |
| "grad_norm": 0.11920095235109329, |
| "learning_rate": 0.00019784162598101838, |
| "loss": 0.2859, |
| "step": 193 |
| }, |
| { |
| "epoch": 0.07036634022488211, |
| "grad_norm": 0.11973892152309418, |
| "learning_rate": 0.00019781792929822068, |
| "loss": 0.2959, |
| "step": 194 |
| }, |
| { |
| "epoch": 0.07072905331882481, |
| "grad_norm": 0.11078456044197083, |
| "learning_rate": 0.00019779410467704389, |
| "loss": 0.2769, |
| "step": 195 |
| }, |
| { |
| "epoch": 0.0710917664127675, |
| "grad_norm": 0.11091899126768112, |
| "learning_rate": 0.00019777015214864877, |
| "loss": 0.2832, |
| "step": 196 |
| }, |
| { |
| "epoch": 0.07145447950671019, |
| "grad_norm": 0.09678234905004501, |
| "learning_rate": 0.00019774607174436338, |
| "loss": 0.2455, |
| "step": 197 |
| }, |
| { |
| "epoch": 0.07181719260065289, |
| "grad_norm": 0.11300257593393326, |
| "learning_rate": 0.00019772186349568304, |
| "loss": 0.3242, |
| "step": 198 |
| }, |
| { |
| "epoch": 0.07217990569459558, |
| "grad_norm": 0.1536862999200821, |
| "learning_rate": 0.00019769752743427032, |
| "loss": 0.2901, |
| "step": 199 |
| }, |
| { |
| "epoch": 0.07254261878853827, |
| "grad_norm": 0.10081265866756439, |
| "learning_rate": 0.00019767306359195493, |
| "loss": 0.3059, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07290533188248095, |
| "grad_norm": 0.10079798847436905, |
| "learning_rate": 0.0001976484720007337, |
| "loss": 0.2871, |
| "step": 201 |
| }, |
| { |
| "epoch": 0.07326804497642365, |
| "grad_norm": 0.09981225430965424, |
| "learning_rate": 0.00019762375269277054, |
| "loss": 0.2713, |
| "step": 202 |
| }, |
| { |
| "epoch": 0.07363075807036634, |
| "grad_norm": 0.10104259103536606, |
| "learning_rate": 0.00019759890570039644, |
| "loss": 0.3178, |
| "step": 203 |
| }, |
| { |
| "epoch": 0.07399347116430903, |
| "grad_norm": 0.10694817453622818, |
| "learning_rate": 0.00019757393105610934, |
| "loss": 0.2725, |
| "step": 204 |
| }, |
| { |
| "epoch": 0.07435618425825172, |
| "grad_norm": 0.10432042181491852, |
| "learning_rate": 0.0001975488287925742, |
| "loss": 0.2798, |
| "step": 205 |
| }, |
| { |
| "epoch": 0.07471889735219442, |
| "grad_norm": 0.11903175711631775, |
| "learning_rate": 0.00019752359894262283, |
| "loss": 0.3138, |
| "step": 206 |
| }, |
| { |
| "epoch": 0.0750816104461371, |
| "grad_norm": 0.10495443642139435, |
| "learning_rate": 0.00019749824153925396, |
| "loss": 0.2764, |
| "step": 207 |
| }, |
| { |
| "epoch": 0.0754443235400798, |
| "grad_norm": 0.10551683604717255, |
| "learning_rate": 0.00019747275661563312, |
| "loss": 0.2884, |
| "step": 208 |
| }, |
| { |
| "epoch": 0.07580703663402248, |
| "grad_norm": 0.12931138277053833, |
| "learning_rate": 0.00019744714420509273, |
| "loss": 0.2843, |
| "step": 209 |
| }, |
| { |
| "epoch": 0.07616974972796518, |
| "grad_norm": 0.10500820726156235, |
| "learning_rate": 0.0001974214043411317, |
| "loss": 0.298, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.07653246282190787, |
| "grad_norm": 0.10469575226306915, |
| "learning_rate": 0.000197395537057416, |
| "loss": 0.2775, |
| "step": 211 |
| }, |
| { |
| "epoch": 0.07689517591585056, |
| "grad_norm": 0.11616349220275879, |
| "learning_rate": 0.00019736954238777792, |
| "loss": 0.2868, |
| "step": 212 |
| }, |
| { |
| "epoch": 0.07725788900979326, |
| "grad_norm": 0.10852184146642685, |
| "learning_rate": 0.00019734342036621652, |
| "loss": 0.2634, |
| "step": 213 |
| }, |
| { |
| "epoch": 0.07762060210373595, |
| "grad_norm": 0.11353151500225067, |
| "learning_rate": 0.00019731717102689747, |
| "loss": 0.2988, |
| "step": 214 |
| }, |
| { |
| "epoch": 0.07798331519767863, |
| "grad_norm": 0.10728183388710022, |
| "learning_rate": 0.00019729079440415287, |
| "loss": 0.273, |
| "step": 215 |
| }, |
| { |
| "epoch": 0.07834602829162132, |
| "grad_norm": 0.11151303350925446, |
| "learning_rate": 0.0001972642905324813, |
| "loss": 0.282, |
| "step": 216 |
| }, |
| { |
| "epoch": 0.07870874138556402, |
| "grad_norm": 0.1237482950091362, |
| "learning_rate": 0.00019723765944654783, |
| "loss": 0.2744, |
| "step": 217 |
| }, |
| { |
| "epoch": 0.07907145447950671, |
| "grad_norm": 0.10815929621458054, |
| "learning_rate": 0.0001972109011811839, |
| "loss": 0.2893, |
| "step": 218 |
| }, |
| { |
| "epoch": 0.0794341675734494, |
| "grad_norm": 0.1144891083240509, |
| "learning_rate": 0.00019718401577138725, |
| "loss": 0.3018, |
| "step": 219 |
| }, |
| { |
| "epoch": 0.07979688066739209, |
| "grad_norm": 0.1146797463297844, |
| "learning_rate": 0.00019715700325232194, |
| "loss": 0.2759, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.08015959376133479, |
| "grad_norm": 0.1100744977593422, |
| "learning_rate": 0.00019712986365931826, |
| "loss": 0.2824, |
| "step": 221 |
| }, |
| { |
| "epoch": 0.08052230685527748, |
| "grad_norm": 0.12042435258626938, |
| "learning_rate": 0.0001971025970278728, |
| "loss": 0.2683, |
| "step": 222 |
| }, |
| { |
| "epoch": 0.08088501994922016, |
| "grad_norm": 0.11394108831882477, |
| "learning_rate": 0.00019707520339364818, |
| "loss": 0.312, |
| "step": 223 |
| }, |
| { |
| "epoch": 0.08124773304316286, |
| "grad_norm": 0.10353437066078186, |
| "learning_rate": 0.00019704768279247317, |
| "loss": 0.2673, |
| "step": 224 |
| }, |
| { |
| "epoch": 0.08161044613710555, |
| "grad_norm": 0.0966782197356224, |
| "learning_rate": 0.00019702003526034264, |
| "loss": 0.2995, |
| "step": 225 |
| }, |
| { |
| "epoch": 0.08197315923104824, |
| "grad_norm": 0.11248703300952911, |
| "learning_rate": 0.00019699226083341742, |
| "loss": 0.2588, |
| "step": 226 |
| }, |
| { |
| "epoch": 0.08233587232499093, |
| "grad_norm": 0.10794703662395477, |
| "learning_rate": 0.00019696435954802438, |
| "loss": 0.2594, |
| "step": 227 |
| }, |
| { |
| "epoch": 0.08269858541893363, |
| "grad_norm": 0.1097991019487381, |
| "learning_rate": 0.0001969363314406562, |
| "loss": 0.2691, |
| "step": 228 |
| }, |
| { |
| "epoch": 0.08306129851287632, |
| "grad_norm": 0.10738769918680191, |
| "learning_rate": 0.00019690817654797161, |
| "loss": 0.2811, |
| "step": 229 |
| }, |
| { |
| "epoch": 0.083424011606819, |
| "grad_norm": 0.10677637159824371, |
| "learning_rate": 0.00019687989490679503, |
| "loss": 0.2864, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.08378672470076169, |
| "grad_norm": 0.11440913379192352, |
| "learning_rate": 0.00019685148655411658, |
| "loss": 0.2961, |
| "step": 231 |
| }, |
| { |
| "epoch": 0.0841494377947044, |
| "grad_norm": 0.10899066925048828, |
| "learning_rate": 0.00019682295152709234, |
| "loss": 0.2852, |
| "step": 232 |
| }, |
| { |
| "epoch": 0.08451215088864708, |
| "grad_norm": 0.10460548102855682, |
| "learning_rate": 0.00019679428986304386, |
| "loss": 0.2954, |
| "step": 233 |
| }, |
| { |
| "epoch": 0.08487486398258977, |
| "grad_norm": 0.12301474809646606, |
| "learning_rate": 0.00019676550159945845, |
| "loss": 0.263, |
| "step": 234 |
| }, |
| { |
| "epoch": 0.08523757707653247, |
| "grad_norm": 0.11282453685998917, |
| "learning_rate": 0.000196736586773989, |
| "loss": 0.3135, |
| "step": 235 |
| }, |
| { |
| "epoch": 0.08560029017047516, |
| "grad_norm": 0.11679442226886749, |
| "learning_rate": 0.0001967075454244538, |
| "loss": 0.287, |
| "step": 236 |
| }, |
| { |
| "epoch": 0.08596300326441784, |
| "grad_norm": 0.11096673458814621, |
| "learning_rate": 0.0001966783775888368, |
| "loss": 0.295, |
| "step": 237 |
| }, |
| { |
| "epoch": 0.08632571635836053, |
| "grad_norm": 0.1101219430565834, |
| "learning_rate": 0.00019664908330528725, |
| "loss": 0.2694, |
| "step": 238 |
| }, |
| { |
| "epoch": 0.08668842945230323, |
| "grad_norm": 0.10985169559717178, |
| "learning_rate": 0.00019661966261211983, |
| "loss": 0.2734, |
| "step": 239 |
| }, |
| { |
| "epoch": 0.08705114254624592, |
| "grad_norm": 0.11106691509485245, |
| "learning_rate": 0.0001965901155478146, |
| "loss": 0.2781, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.08741385564018861, |
| "grad_norm": 0.1100887879729271, |
| "learning_rate": 0.00019656044215101684, |
| "loss": 0.3105, |
| "step": 241 |
| }, |
| { |
| "epoch": 0.0877765687341313, |
| "grad_norm": 0.11487387865781784, |
| "learning_rate": 0.00019653064246053707, |
| "loss": 0.2824, |
| "step": 242 |
| }, |
| { |
| "epoch": 0.088139281828074, |
| "grad_norm": 0.10977080464363098, |
| "learning_rate": 0.00019650071651535104, |
| "loss": 0.3309, |
| "step": 243 |
| }, |
| { |
| "epoch": 0.08850199492201669, |
| "grad_norm": 0.11280547827482224, |
| "learning_rate": 0.0001964706643545996, |
| "loss": 0.2698, |
| "step": 244 |
| }, |
| { |
| "epoch": 0.08886470801595937, |
| "grad_norm": 0.10025591403245926, |
| "learning_rate": 0.00019644048601758865, |
| "loss": 0.2623, |
| "step": 245 |
| }, |
| { |
| "epoch": 0.08922742110990206, |
| "grad_norm": 0.10023844242095947, |
| "learning_rate": 0.0001964101815437892, |
| "loss": 0.2711, |
| "step": 246 |
| }, |
| { |
| "epoch": 0.08959013420384476, |
| "grad_norm": 0.1235634833574295, |
| "learning_rate": 0.0001963797509728371, |
| "loss": 0.2884, |
| "step": 247 |
| }, |
| { |
| "epoch": 0.08995284729778745, |
| "grad_norm": 0.10354435443878174, |
| "learning_rate": 0.0001963491943445333, |
| "loss": 0.2601, |
| "step": 248 |
| }, |
| { |
| "epoch": 0.09031556039173014, |
| "grad_norm": 0.10399331152439117, |
| "learning_rate": 0.00019631851169884352, |
| "loss": 0.2817, |
| "step": 249 |
| }, |
| { |
| "epoch": 0.09067827348567284, |
| "grad_norm": 0.11649379879236221, |
| "learning_rate": 0.00019628770307589827, |
| "loss": 0.3344, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.09104098657961553, |
| "grad_norm": 0.1313096284866333, |
| "learning_rate": 0.00019625676851599288, |
| "loss": 0.326, |
| "step": 251 |
| }, |
| { |
| "epoch": 0.09140369967355821, |
| "grad_norm": 0.11555227637290955, |
| "learning_rate": 0.00019622570805958746, |
| "loss": 0.2687, |
| "step": 252 |
| }, |
| { |
| "epoch": 0.0917664127675009, |
| "grad_norm": 0.1436738669872284, |
| "learning_rate": 0.00019619452174730667, |
| "loss": 0.2748, |
| "step": 253 |
| }, |
| { |
| "epoch": 0.0921291258614436, |
| "grad_norm": 0.11013220995664597, |
| "learning_rate": 0.0001961632096199398, |
| "loss": 0.2556, |
| "step": 254 |
| }, |
| { |
| "epoch": 0.09249183895538629, |
| "grad_norm": 0.11054322123527527, |
| "learning_rate": 0.00019613177171844075, |
| "loss": 0.2813, |
| "step": 255 |
| }, |
| { |
| "epoch": 0.09285455204932898, |
| "grad_norm": 0.10872920602560043, |
| "learning_rate": 0.00019610020808392788, |
| "loss": 0.3022, |
| "step": 256 |
| }, |
| { |
| "epoch": 0.09321726514327167, |
| "grad_norm": 0.12032327055931091, |
| "learning_rate": 0.000196068518757684, |
| "loss": 0.2836, |
| "step": 257 |
| }, |
| { |
| "epoch": 0.09357997823721437, |
| "grad_norm": 0.10551446676254272, |
| "learning_rate": 0.0001960367037811564, |
| "loss": 0.281, |
| "step": 258 |
| }, |
| { |
| "epoch": 0.09394269133115705, |
| "grad_norm": 0.11461377888917923, |
| "learning_rate": 0.00019600476319595658, |
| "loss": 0.2841, |
| "step": 259 |
| }, |
| { |
| "epoch": 0.09430540442509974, |
| "grad_norm": 0.11937367916107178, |
| "learning_rate": 0.00019597269704386036, |
| "loss": 0.2695, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.09466811751904244, |
| "grad_norm": 0.109502412378788, |
| "learning_rate": 0.0001959405053668079, |
| "loss": 0.2796, |
| "step": 261 |
| }, |
| { |
| "epoch": 0.09503083061298513, |
| "grad_norm": 0.12356701493263245, |
| "learning_rate": 0.00019590818820690336, |
| "loss": 0.2963, |
| "step": 262 |
| }, |
| { |
| "epoch": 0.09539354370692782, |
| "grad_norm": 0.1127593144774437, |
| "learning_rate": 0.00019587574560641518, |
| "loss": 0.2646, |
| "step": 263 |
| }, |
| { |
| "epoch": 0.0957562568008705, |
| "grad_norm": 0.13234767317771912, |
| "learning_rate": 0.00019584317760777578, |
| "loss": 0.2816, |
| "step": 264 |
| }, |
| { |
| "epoch": 0.09611896989481321, |
| "grad_norm": 0.10984192788600922, |
| "learning_rate": 0.00019581048425358158, |
| "loss": 0.3069, |
| "step": 265 |
| }, |
| { |
| "epoch": 0.0964816829887559, |
| "grad_norm": 0.1149398684501648, |
| "learning_rate": 0.00019577766558659306, |
| "loss": 0.2574, |
| "step": 266 |
| }, |
| { |
| "epoch": 0.09684439608269858, |
| "grad_norm": 0.10994721949100494, |
| "learning_rate": 0.00019574472164973452, |
| "loss": 0.2705, |
| "step": 267 |
| }, |
| { |
| "epoch": 0.09720710917664127, |
| "grad_norm": 0.10396052896976471, |
| "learning_rate": 0.00019571165248609407, |
| "loss": 0.2343, |
| "step": 268 |
| }, |
| { |
| "epoch": 0.09756982227058397, |
| "grad_norm": 0.1382754147052765, |
| "learning_rate": 0.00019567845813892368, |
| "loss": 0.2586, |
| "step": 269 |
| }, |
| { |
| "epoch": 0.09793253536452666, |
| "grad_norm": 0.10811847448348999, |
| "learning_rate": 0.000195645138651639, |
| "loss": 0.2599, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.09829524845846935, |
| "grad_norm": 0.12254346907138824, |
| "learning_rate": 0.00019561169406781938, |
| "loss": 0.2543, |
| "step": 271 |
| }, |
| { |
| "epoch": 0.09865796155241205, |
| "grad_norm": 0.10719288885593414, |
| "learning_rate": 0.00019557812443120779, |
| "loss": 0.2788, |
| "step": 272 |
| }, |
| { |
| "epoch": 0.09902067464635474, |
| "grad_norm": 0.11490897834300995, |
| "learning_rate": 0.00019554442978571076, |
| "loss": 0.3076, |
| "step": 273 |
| }, |
| { |
| "epoch": 0.09938338774029742, |
| "grad_norm": 0.11272160708904266, |
| "learning_rate": 0.00019551061017539828, |
| "loss": 0.2719, |
| "step": 274 |
| }, |
| { |
| "epoch": 0.09974610083424011, |
| "grad_norm": 0.11950589716434479, |
| "learning_rate": 0.00019547666564450383, |
| "loss": 0.2424, |
| "step": 275 |
| }, |
| { |
| "epoch": 0.10010881392818281, |
| "grad_norm": 0.10737808048725128, |
| "learning_rate": 0.00019544259623742428, |
| "loss": 0.2628, |
| "step": 276 |
| }, |
| { |
| "epoch": 0.1004715270221255, |
| "grad_norm": 0.10422177612781525, |
| "learning_rate": 0.00019540840199871982, |
| "loss": 0.2515, |
| "step": 277 |
| }, |
| { |
| "epoch": 0.10083424011606819, |
| "grad_norm": 0.12654827535152435, |
| "learning_rate": 0.00019537408297311384, |
| "loss": 0.3258, |
| "step": 278 |
| }, |
| { |
| "epoch": 0.10119695321001088, |
| "grad_norm": 0.10753121972084045, |
| "learning_rate": 0.00019533963920549306, |
| "loss": 0.2633, |
| "step": 279 |
| }, |
| { |
| "epoch": 0.10155966630395358, |
| "grad_norm": 0.1134246215224266, |
| "learning_rate": 0.0001953050707409073, |
| "loss": 0.2777, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.10192237939789626, |
| "grad_norm": 0.11118260025978088, |
| "learning_rate": 0.00019527037762456944, |
| "loss": 0.2684, |
| "step": 281 |
| }, |
| { |
| "epoch": 0.10228509249183895, |
| "grad_norm": 0.12425535172224045, |
| "learning_rate": 0.0001952355599018554, |
| "loss": 0.28, |
| "step": 282 |
| }, |
| { |
| "epoch": 0.10264780558578164, |
| "grad_norm": 0.12097672373056412, |
| "learning_rate": 0.00019520061761830424, |
| "loss": 0.2589, |
| "step": 283 |
| }, |
| { |
| "epoch": 0.10301051867972434, |
| "grad_norm": 0.11388805508613586, |
| "learning_rate": 0.00019516555081961764, |
| "loss": 0.2864, |
| "step": 284 |
| }, |
| { |
| "epoch": 0.10337323177366703, |
| "grad_norm": 0.10794699192047119, |
| "learning_rate": 0.00019513035955166035, |
| "loss": 0.2754, |
| "step": 285 |
| }, |
| { |
| "epoch": 0.10373594486760972, |
| "grad_norm": 0.10783129185438156, |
| "learning_rate": 0.00019509504386045986, |
| "loss": 0.252, |
| "step": 286 |
| }, |
| { |
| "epoch": 0.10409865796155242, |
| "grad_norm": 0.12570741772651672, |
| "learning_rate": 0.0001950596037922064, |
| "loss": 0.2563, |
| "step": 287 |
| }, |
| { |
| "epoch": 0.1044613710554951, |
| "grad_norm": 0.12100599706172943, |
| "learning_rate": 0.0001950240393932529, |
| "loss": 0.2811, |
| "step": 288 |
| }, |
| { |
| "epoch": 0.1048240841494378, |
| "grad_norm": 0.09901045262813568, |
| "learning_rate": 0.0001949883507101148, |
| "loss": 0.2724, |
| "step": 289 |
| }, |
| { |
| "epoch": 0.10518679724338048, |
| "grad_norm": 0.10405360162258148, |
| "learning_rate": 0.00019495253778947026, |
| "loss": 0.274, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.10554951033732318, |
| "grad_norm": 0.11303572356700897, |
| "learning_rate": 0.0001949166006781598, |
| "loss": 0.2669, |
| "step": 291 |
| }, |
| { |
| "epoch": 0.10591222343126587, |
| "grad_norm": 0.1083337813615799, |
| "learning_rate": 0.0001948805394231864, |
| "loss": 0.2865, |
| "step": 292 |
| }, |
| { |
| "epoch": 0.10627493652520856, |
| "grad_norm": 0.10910173505544662, |
| "learning_rate": 0.00019484435407171545, |
| "loss": 0.2651, |
| "step": 293 |
| }, |
| { |
| "epoch": 0.10663764961915125, |
| "grad_norm": 0.10337372124195099, |
| "learning_rate": 0.00019480804467107463, |
| "loss": 0.2509, |
| "step": 294 |
| }, |
| { |
| "epoch": 0.10700036271309395, |
| "grad_norm": 0.1112721636891365, |
| "learning_rate": 0.00019477161126875387, |
| "loss": 0.2666, |
| "step": 295 |
| }, |
| { |
| "epoch": 0.10736307580703663, |
| "grad_norm": 0.11390243470668793, |
| "learning_rate": 0.00019473505391240522, |
| "loss": 0.278, |
| "step": 296 |
| }, |
| { |
| "epoch": 0.10772578890097932, |
| "grad_norm": 0.11081282794475555, |
| "learning_rate": 0.000194698372649843, |
| "loss": 0.2725, |
| "step": 297 |
| }, |
| { |
| "epoch": 0.10808850199492202, |
| "grad_norm": 0.12400209158658981, |
| "learning_rate": 0.00019466156752904343, |
| "loss": 0.2812, |
| "step": 298 |
| }, |
| { |
| "epoch": 0.10845121508886471, |
| "grad_norm": 0.11567061394453049, |
| "learning_rate": 0.0001946246385981448, |
| "loss": 0.2907, |
| "step": 299 |
| }, |
| { |
| "epoch": 0.1088139281828074, |
| "grad_norm": 0.11256127059459686, |
| "learning_rate": 0.0001945875859054474, |
| "loss": 0.2537, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.10917664127675009, |
| "grad_norm": 0.12261880189180374, |
| "learning_rate": 0.0001945504094994132, |
| "loss": 0.2726, |
| "step": 301 |
| }, |
| { |
| "epoch": 0.10953935437069279, |
| "grad_norm": 0.10978831350803375, |
| "learning_rate": 0.00019451310942866621, |
| "loss": 0.2578, |
| "step": 302 |
| }, |
| { |
| "epoch": 0.10990206746463548, |
| "grad_norm": 0.12203028053045273, |
| "learning_rate": 0.00019447568574199202, |
| "loss": 0.2685, |
| "step": 303 |
| }, |
| { |
| "epoch": 0.11026478055857816, |
| "grad_norm": 0.11995328217744827, |
| "learning_rate": 0.000194438138488338, |
| "loss": 0.2914, |
| "step": 304 |
| }, |
| { |
| "epoch": 0.11062749365252085, |
| "grad_norm": 0.1177087351679802, |
| "learning_rate": 0.000194400467716813, |
| "loss": 0.2576, |
| "step": 305 |
| }, |
| { |
| "epoch": 0.11099020674646355, |
| "grad_norm": 0.11549436300992966, |
| "learning_rate": 0.00019436267347668757, |
| "loss": 0.2789, |
| "step": 306 |
| }, |
| { |
| "epoch": 0.11135291984040624, |
| "grad_norm": 0.12319694459438324, |
| "learning_rate": 0.0001943247558173937, |
| "loss": 0.2676, |
| "step": 307 |
| }, |
| { |
| "epoch": 0.11171563293434893, |
| "grad_norm": 0.13126415014266968, |
| "learning_rate": 0.00019428671478852479, |
| "loss": 0.2612, |
| "step": 308 |
| }, |
| { |
| "epoch": 0.11207834602829161, |
| "grad_norm": 0.11185677349567413, |
| "learning_rate": 0.00019424855043983556, |
| "loss": 0.2607, |
| "step": 309 |
| }, |
| { |
| "epoch": 0.11244105912223432, |
| "grad_norm": 0.1092672273516655, |
| "learning_rate": 0.00019421026282124212, |
| "loss": 0.2521, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.112803772216177, |
| "grad_norm": 0.12753579020500183, |
| "learning_rate": 0.00019417185198282168, |
| "loss": 0.2876, |
| "step": 311 |
| }, |
| { |
| "epoch": 0.11316648531011969, |
| "grad_norm": 0.11622543632984161, |
| "learning_rate": 0.00019413331797481277, |
| "loss": 0.2656, |
| "step": 312 |
| }, |
| { |
| "epoch": 0.11352919840406239, |
| "grad_norm": 0.11567405611276627, |
| "learning_rate": 0.00019409466084761485, |
| "loss": 0.2836, |
| "step": 313 |
| }, |
| { |
| "epoch": 0.11389191149800508, |
| "grad_norm": 0.11441784352064133, |
| "learning_rate": 0.00019405588065178852, |
| "loss": 0.2523, |
| "step": 314 |
| }, |
| { |
| "epoch": 0.11425462459194777, |
| "grad_norm": 0.11300231516361237, |
| "learning_rate": 0.0001940169774380553, |
| "loss": 0.2804, |
| "step": 315 |
| }, |
| { |
| "epoch": 0.11461733768589046, |
| "grad_norm": 0.12194045633077621, |
| "learning_rate": 0.00019397795125729767, |
| "loss": 0.2867, |
| "step": 316 |
| }, |
| { |
| "epoch": 0.11498005077983316, |
| "grad_norm": 0.12124588340520859, |
| "learning_rate": 0.00019393880216055887, |
| "loss": 0.2859, |
| "step": 317 |
| }, |
| { |
| "epoch": 0.11534276387377584, |
| "grad_norm": 0.11623072624206543, |
| "learning_rate": 0.00019389953019904285, |
| "loss": 0.288, |
| "step": 318 |
| }, |
| { |
| "epoch": 0.11570547696771853, |
| "grad_norm": 0.11297620832920074, |
| "learning_rate": 0.00019386013542411449, |
| "loss": 0.2896, |
| "step": 319 |
| }, |
| { |
| "epoch": 0.11606819006166122, |
| "grad_norm": 0.11987963318824768, |
| "learning_rate": 0.00019382061788729898, |
| "loss": 0.3479, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.11643090315560392, |
| "grad_norm": 0.14857983589172363, |
| "learning_rate": 0.00019378097764028235, |
| "loss": 0.2519, |
| "step": 321 |
| }, |
| { |
| "epoch": 0.11679361624954661, |
| "grad_norm": 0.10684715956449509, |
| "learning_rate": 0.00019374121473491096, |
| "loss": 0.3014, |
| "step": 322 |
| }, |
| { |
| "epoch": 0.1171563293434893, |
| "grad_norm": 0.11060940474271774, |
| "learning_rate": 0.0001937013292231917, |
| "loss": 0.2522, |
| "step": 323 |
| }, |
| { |
| "epoch": 0.117519042437432, |
| "grad_norm": 0.10806398838758469, |
| "learning_rate": 0.00019366132115729173, |
| "loss": 0.2695, |
| "step": 324 |
| }, |
| { |
| "epoch": 0.11788175553137469, |
| "grad_norm": 0.11272536218166351, |
| "learning_rate": 0.0001936211905895386, |
| "loss": 0.2666, |
| "step": 325 |
| }, |
| { |
| "epoch": 0.11824446862531737, |
| "grad_norm": 0.11766637116670609, |
| "learning_rate": 0.00019358093757241996, |
| "loss": 0.3007, |
| "step": 326 |
| }, |
| { |
| "epoch": 0.11860718171926006, |
| "grad_norm": 0.1170196607708931, |
| "learning_rate": 0.0001935405621585837, |
| "loss": 0.2678, |
| "step": 327 |
| }, |
| { |
| "epoch": 0.11896989481320276, |
| "grad_norm": 0.12220901250839233, |
| "learning_rate": 0.0001935000644008378, |
| "loss": 0.2519, |
| "step": 328 |
| }, |
| { |
| "epoch": 0.11933260790714545, |
| "grad_norm": 0.1201847493648529, |
| "learning_rate": 0.00019345944435215023, |
| "loss": 0.267, |
| "step": 329 |
| }, |
| { |
| "epoch": 0.11969532100108814, |
| "grad_norm": 0.11570829898118973, |
| "learning_rate": 0.00019341870206564886, |
| "loss": 0.2515, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.12005803409503082, |
| "grad_norm": 0.12002036720514297, |
| "learning_rate": 0.0001933778375946216, |
| "loss": 0.2767, |
| "step": 331 |
| }, |
| { |
| "epoch": 0.12042074718897353, |
| "grad_norm": 0.12402871996164322, |
| "learning_rate": 0.00019333685099251594, |
| "loss": 0.2508, |
| "step": 332 |
| }, |
| { |
| "epoch": 0.12078346028291621, |
| "grad_norm": 0.11982254683971405, |
| "learning_rate": 0.00019329574231293926, |
| "loss": 0.2802, |
| "step": 333 |
| }, |
| { |
| "epoch": 0.1211461733768589, |
| "grad_norm": 0.11482241749763489, |
| "learning_rate": 0.0001932545116096586, |
| "loss": 0.2774, |
| "step": 334 |
| }, |
| { |
| "epoch": 0.1215088864708016, |
| "grad_norm": 0.1279384046792984, |
| "learning_rate": 0.00019321315893660056, |
| "loss": 0.2718, |
| "step": 335 |
| }, |
| { |
| "epoch": 0.12187159956474429, |
| "grad_norm": 0.11594551056623459, |
| "learning_rate": 0.00019317168434785127, |
| "loss": 0.2771, |
| "step": 336 |
| }, |
| { |
| "epoch": 0.12223431265868698, |
| "grad_norm": 0.1129961609840393, |
| "learning_rate": 0.0001931300878976563, |
| "loss": 0.2602, |
| "step": 337 |
| }, |
| { |
| "epoch": 0.12259702575262967, |
| "grad_norm": 0.11392521858215332, |
| "learning_rate": 0.0001930883696404207, |
| "loss": 0.2595, |
| "step": 338 |
| }, |
| { |
| "epoch": 0.12295973884657237, |
| "grad_norm": 0.10742796212434769, |
| "learning_rate": 0.0001930465296307087, |
| "loss": 0.2473, |
| "step": 339 |
| }, |
| { |
| "epoch": 0.12332245194051505, |
| "grad_norm": 0.11807534843683243, |
| "learning_rate": 0.00019300456792324382, |
| "loss": 0.2374, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.12368516503445774, |
| "grad_norm": 0.13207505643367767, |
| "learning_rate": 0.00019296248457290882, |
| "loss": 0.2732, |
| "step": 341 |
| }, |
| { |
| "epoch": 0.12404787812840043, |
| "grad_norm": 0.13366468250751495, |
| "learning_rate": 0.00019292027963474547, |
| "loss": 0.2702, |
| "step": 342 |
| }, |
| { |
| "epoch": 0.12441059122234313, |
| "grad_norm": 0.1288871318101883, |
| "learning_rate": 0.00019287795316395468, |
| "loss": 0.2667, |
| "step": 343 |
| }, |
| { |
| "epoch": 0.12477330431628582, |
| "grad_norm": 0.11883368343114853, |
| "learning_rate": 0.00019283550521589614, |
| "loss": 0.2666, |
| "step": 344 |
| }, |
| { |
| "epoch": 0.1251360174102285, |
| "grad_norm": 0.1264144480228424, |
| "learning_rate": 0.00019279293584608856, |
| "loss": 0.2795, |
| "step": 345 |
| }, |
| { |
| "epoch": 0.1254987305041712, |
| "grad_norm": 0.12721741199493408, |
| "learning_rate": 0.0001927502451102095, |
| "loss": 0.2516, |
| "step": 346 |
| }, |
| { |
| "epoch": 0.12586144359811388, |
| "grad_norm": 0.1189354807138443, |
| "learning_rate": 0.00019270743306409505, |
| "loss": 0.2489, |
| "step": 347 |
| }, |
| { |
| "epoch": 0.12622415669205658, |
| "grad_norm": 0.12466361373662949, |
| "learning_rate": 0.00019266449976374018, |
| "loss": 0.2856, |
| "step": 348 |
| }, |
| { |
| "epoch": 0.12658686978599928, |
| "grad_norm": 0.13144852221012115, |
| "learning_rate": 0.00019262144526529832, |
| "loss": 0.2612, |
| "step": 349 |
| }, |
| { |
| "epoch": 0.12694958287994196, |
| "grad_norm": 0.10754833370447159, |
| "learning_rate": 0.0001925782696250815, |
| "loss": 0.2523, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.12731229597388466, |
| "grad_norm": 0.1237715408205986, |
| "learning_rate": 0.0001925349728995602, |
| "loss": 0.2526, |
| "step": 351 |
| }, |
| { |
| "epoch": 0.12767500906782736, |
| "grad_norm": 0.1193939596414566, |
| "learning_rate": 0.00019249155514536312, |
| "loss": 0.2819, |
| "step": 352 |
| }, |
| { |
| "epoch": 0.12803772216177003, |
| "grad_norm": 0.12648704648017883, |
| "learning_rate": 0.00019244801641927746, |
| "loss": 0.2709, |
| "step": 353 |
| }, |
| { |
| "epoch": 0.12840043525571274, |
| "grad_norm": 0.11707579344511032, |
| "learning_rate": 0.0001924043567782485, |
| "loss": 0.2853, |
| "step": 354 |
| }, |
| { |
| "epoch": 0.1287631483496554, |
| "grad_norm": 0.12175849080085754, |
| "learning_rate": 0.00019236057627937975, |
| "loss": 0.2702, |
| "step": 355 |
| }, |
| { |
| "epoch": 0.1291258614435981, |
| "grad_norm": 0.1120310127735138, |
| "learning_rate": 0.0001923166749799327, |
| "loss": 0.2596, |
| "step": 356 |
| }, |
| { |
| "epoch": 0.1294885745375408, |
| "grad_norm": 0.12282121926546097, |
| "learning_rate": 0.00019227265293732693, |
| "loss": 0.2581, |
| "step": 357 |
| }, |
| { |
| "epoch": 0.1298512876314835, |
| "grad_norm": 0.13752269744873047, |
| "learning_rate": 0.00019222851020913995, |
| "loss": 0.2641, |
| "step": 358 |
| }, |
| { |
| "epoch": 0.1302140007254262, |
| "grad_norm": 0.11744178086519241, |
| "learning_rate": 0.00019218424685310702, |
| "loss": 0.2462, |
| "step": 359 |
| }, |
| { |
| "epoch": 0.1305767138193689, |
| "grad_norm": 0.11440069228410721, |
| "learning_rate": 0.00019213986292712125, |
| "loss": 0.2495, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.13093942691331156, |
| "grad_norm": 0.11646847426891327, |
| "learning_rate": 0.00019209535848923343, |
| "loss": 0.3054, |
| "step": 361 |
| }, |
| { |
| "epoch": 0.13130214000725426, |
| "grad_norm": 0.11386696994304657, |
| "learning_rate": 0.00019205073359765192, |
| "loss": 0.2503, |
| "step": 362 |
| }, |
| { |
| "epoch": 0.13166485310119697, |
| "grad_norm": 0.12510043382644653, |
| "learning_rate": 0.00019200598831074274, |
| "loss": 0.275, |
| "step": 363 |
| }, |
| { |
| "epoch": 0.13202756619513964, |
| "grad_norm": 0.12363200634717941, |
| "learning_rate": 0.00019196112268702925, |
| "loss": 0.2746, |
| "step": 364 |
| }, |
| { |
| "epoch": 0.13239027928908234, |
| "grad_norm": 0.11029732972383499, |
| "learning_rate": 0.0001919161367851923, |
| "loss": 0.3095, |
| "step": 365 |
| }, |
| { |
| "epoch": 0.13275299238302501, |
| "grad_norm": 0.12199590355157852, |
| "learning_rate": 0.00019187103066406998, |
| "loss": 0.2641, |
| "step": 366 |
| }, |
| { |
| "epoch": 0.13311570547696772, |
| "grad_norm": 0.11692757904529572, |
| "learning_rate": 0.00019182580438265764, |
| "loss": 0.2646, |
| "step": 367 |
| }, |
| { |
| "epoch": 0.13347841857091042, |
| "grad_norm": 0.11142277717590332, |
| "learning_rate": 0.00019178045800010787, |
| "loss": 0.2495, |
| "step": 368 |
| }, |
| { |
| "epoch": 0.1338411316648531, |
| "grad_norm": 0.11492447555065155, |
| "learning_rate": 0.00019173499157573023, |
| "loss": 0.2647, |
| "step": 369 |
| }, |
| { |
| "epoch": 0.1342038447587958, |
| "grad_norm": 0.114183709025383, |
| "learning_rate": 0.0001916894051689913, |
| "loss": 0.2499, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.1345665578527385, |
| "grad_norm": 0.11262322962284088, |
| "learning_rate": 0.00019164369883951468, |
| "loss": 0.2749, |
| "step": 371 |
| }, |
| { |
| "epoch": 0.13492927094668117, |
| "grad_norm": 0.11667259782552719, |
| "learning_rate": 0.0001915978726470807, |
| "loss": 0.269, |
| "step": 372 |
| }, |
| { |
| "epoch": 0.13529198404062387, |
| "grad_norm": 0.1220724880695343, |
| "learning_rate": 0.00019155192665162656, |
| "loss": 0.2652, |
| "step": 373 |
| }, |
| { |
| "epoch": 0.13565469713456657, |
| "grad_norm": 0.12185841798782349, |
| "learning_rate": 0.0001915058609132461, |
| "loss": 0.2754, |
| "step": 374 |
| }, |
| { |
| "epoch": 0.13601741022850924, |
| "grad_norm": 0.11733336001634598, |
| "learning_rate": 0.00019145967549218974, |
| "loss": 0.2685, |
| "step": 375 |
| }, |
| { |
| "epoch": 0.13638012332245195, |
| "grad_norm": 0.12325771152973175, |
| "learning_rate": 0.00019141337044886457, |
| "loss": 0.2548, |
| "step": 376 |
| }, |
| { |
| "epoch": 0.13674283641639462, |
| "grad_norm": 0.11737928539514542, |
| "learning_rate": 0.000191366945843834, |
| "loss": 0.2875, |
| "step": 377 |
| }, |
| { |
| "epoch": 0.13710554951033732, |
| "grad_norm": 0.11719442158937454, |
| "learning_rate": 0.00019132040173781788, |
| "loss": 0.244, |
| "step": 378 |
| }, |
| { |
| "epoch": 0.13746826260428002, |
| "grad_norm": 0.1146400049328804, |
| "learning_rate": 0.0001912737381916923, |
| "loss": 0.2595, |
| "step": 379 |
| }, |
| { |
| "epoch": 0.1378309756982227, |
| "grad_norm": 0.11577652394771576, |
| "learning_rate": 0.00019122695526648968, |
| "loss": 0.276, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.1381936887921654, |
| "grad_norm": 0.10648276656866074, |
| "learning_rate": 0.00019118005302339847, |
| "loss": 0.2444, |
| "step": 381 |
| }, |
| { |
| "epoch": 0.1385564018861081, |
| "grad_norm": 0.10874751210212708, |
| "learning_rate": 0.00019113303152376324, |
| "loss": 0.2502, |
| "step": 382 |
| }, |
| { |
| "epoch": 0.13891911498005077, |
| "grad_norm": 0.1190841868519783, |
| "learning_rate": 0.00019108589082908453, |
| "loss": 0.2477, |
| "step": 383 |
| }, |
| { |
| "epoch": 0.13928182807399347, |
| "grad_norm": 0.11433839052915573, |
| "learning_rate": 0.00019103863100101873, |
| "loss": 0.2651, |
| "step": 384 |
| }, |
| { |
| "epoch": 0.13964454116793618, |
| "grad_norm": 0.1088482066988945, |
| "learning_rate": 0.00019099125210137813, |
| "loss": 0.2452, |
| "step": 385 |
| }, |
| { |
| "epoch": 0.14000725426187885, |
| "grad_norm": 0.115386962890625, |
| "learning_rate": 0.00019094375419213065, |
| "loss": 0.2579, |
| "step": 386 |
| }, |
| { |
| "epoch": 0.14036996735582155, |
| "grad_norm": 0.1259610801935196, |
| "learning_rate": 0.0001908961373354, |
| "loss": 0.2712, |
| "step": 387 |
| }, |
| { |
| "epoch": 0.14073268044976422, |
| "grad_norm": 4882568.5, |
| "learning_rate": 0.00019084840159346532, |
| "loss": 0.2385, |
| "step": 388 |
| }, |
| { |
| "epoch": 0.14109539354370693, |
| "grad_norm": 0.12656670808792114, |
| "learning_rate": 0.0001908005470287614, |
| "loss": 0.2406, |
| "step": 389 |
| }, |
| { |
| "epoch": 0.14145810663764963, |
| "grad_norm": 0.13908933103084564, |
| "learning_rate": 0.00019075257370387827, |
| "loss": 0.2433, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.1418208197315923, |
| "grad_norm": 0.14672155678272247, |
| "learning_rate": 0.0001907044816815614, |
| "loss": 0.2544, |
| "step": 391 |
| }, |
| { |
| "epoch": 0.142183532825535, |
| "grad_norm": 0.15031826496124268, |
| "learning_rate": 0.0001906562710247115, |
| "loss": 0.2652, |
| "step": 392 |
| }, |
| { |
| "epoch": 0.1425462459194777, |
| "grad_norm": 0.13194704055786133, |
| "learning_rate": 0.00019060794179638445, |
| "loss": 0.2603, |
| "step": 393 |
| }, |
| { |
| "epoch": 0.14290895901342038, |
| "grad_norm": 0.13189998269081116, |
| "learning_rate": 0.0001905594940597911, |
| "loss": 0.2419, |
| "step": 394 |
| }, |
| { |
| "epoch": 0.14327167210736308, |
| "grad_norm": 0.1245296448469162, |
| "learning_rate": 0.00019051092787829746, |
| "loss": 0.2816, |
| "step": 395 |
| }, |
| { |
| "epoch": 0.14363438520130578, |
| "grad_norm": 0.14372986555099487, |
| "learning_rate": 0.0001904622433154244, |
| "loss": 0.261, |
| "step": 396 |
| }, |
| { |
| "epoch": 0.14399709829524845, |
| "grad_norm": 0.13385535776615143, |
| "learning_rate": 0.00019041344043484754, |
| "loss": 0.2702, |
| "step": 397 |
| }, |
| { |
| "epoch": 0.14435981138919116, |
| "grad_norm": 0.13935022056102753, |
| "learning_rate": 0.00019036451930039738, |
| "loss": 0.2907, |
| "step": 398 |
| }, |
| { |
| "epoch": 0.14472252448313383, |
| "grad_norm": 0.11567000299692154, |
| "learning_rate": 0.00019031547997605902, |
| "loss": 0.2618, |
| "step": 399 |
| }, |
| { |
| "epoch": 0.14508523757707653, |
| "grad_norm": 0.1412486582994461, |
| "learning_rate": 0.0001902663225259721, |
| "loss": 0.3055, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.14544795067101923, |
| "grad_norm": 0.13404829800128937, |
| "learning_rate": 0.00019021704701443083, |
| "loss": 0.2565, |
| "step": 401 |
| }, |
| { |
| "epoch": 0.1458106637649619, |
| "grad_norm": 0.15074236690998077, |
| "learning_rate": 0.00019016765350588389, |
| "loss": 0.2737, |
| "step": 402 |
| }, |
| { |
| "epoch": 0.1461733768589046, |
| "grad_norm": 0.11905822902917862, |
| "learning_rate": 0.00019011814206493411, |
| "loss": 0.2462, |
| "step": 403 |
| }, |
| { |
| "epoch": 0.1465360899528473, |
| "grad_norm": 0.13609488308429718, |
| "learning_rate": 0.00019006851275633871, |
| "loss": 0.3008, |
| "step": 404 |
| }, |
| { |
| "epoch": 0.14689880304678998, |
| "grad_norm": 0.13262596726417542, |
| "learning_rate": 0.00019001876564500909, |
| "loss": 0.2682, |
| "step": 405 |
| }, |
| { |
| "epoch": 0.14726151614073268, |
| "grad_norm": 0.12421231716871262, |
| "learning_rate": 0.00018996890079601059, |
| "loss": 0.2553, |
| "step": 406 |
| }, |
| { |
| "epoch": 0.14762422923467536, |
| "grad_norm": 0.14463739097118378, |
| "learning_rate": 0.00018991891827456266, |
| "loss": 0.2483, |
| "step": 407 |
| }, |
| { |
| "epoch": 0.14798694232861806, |
| "grad_norm": 0.12037564069032669, |
| "learning_rate": 0.00018986881814603862, |
| "loss": 0.2807, |
| "step": 408 |
| }, |
| { |
| "epoch": 0.14834965542256076, |
| "grad_norm": 0.1340160369873047, |
| "learning_rate": 0.0001898186004759656, |
| "loss": 0.248, |
| "step": 409 |
| }, |
| { |
| "epoch": 0.14871236851650343, |
| "grad_norm": 0.13164542615413666, |
| "learning_rate": 0.0001897682653300245, |
| "loss": 0.2617, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.14907508161044614, |
| "grad_norm": 0.12125716358423233, |
| "learning_rate": 0.0001897178127740498, |
| "loss": 0.249, |
| "step": 411 |
| }, |
| { |
| "epoch": 0.14943779470438884, |
| "grad_norm": 0.13088323175907135, |
| "learning_rate": 0.00018966724287402964, |
| "loss": 0.2855, |
| "step": 412 |
| }, |
| { |
| "epoch": 0.1498005077983315, |
| "grad_norm": 0.13843600451946259, |
| "learning_rate": 0.00018961655569610557, |
| "loss": 0.2613, |
| "step": 413 |
| }, |
| { |
| "epoch": 0.1501632208922742, |
| "grad_norm": 0.12319327145814896, |
| "learning_rate": 0.00018956575130657256, |
| "loss": 0.2675, |
| "step": 414 |
| }, |
| { |
| "epoch": 0.15052593398621691, |
| "grad_norm": 0.12738944590091705, |
| "learning_rate": 0.0001895148297718788, |
| "loss": 0.2492, |
| "step": 415 |
| }, |
| { |
| "epoch": 0.1508886470801596, |
| "grad_norm": 0.1370190680027008, |
| "learning_rate": 0.00018946379115862585, |
| "loss": 0.2565, |
| "step": 416 |
| }, |
| { |
| "epoch": 0.1512513601741023, |
| "grad_norm": 0.12752386927604675, |
| "learning_rate": 0.00018941263553356829, |
| "loss": 0.2752, |
| "step": 417 |
| }, |
| { |
| "epoch": 0.15161407326804496, |
| "grad_norm": 0.12467992305755615, |
| "learning_rate": 0.00018936136296361373, |
| "loss": 0.261, |
| "step": 418 |
| }, |
| { |
| "epoch": 0.15197678636198766, |
| "grad_norm": 0.12830005586147308, |
| "learning_rate": 0.00018930997351582286, |
| "loss": 0.2579, |
| "step": 419 |
| }, |
| { |
| "epoch": 0.15233949945593037, |
| "grad_norm": 0.1329096108675003, |
| "learning_rate": 0.00018925846725740907, |
| "loss": 0.2736, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.15270221254987304, |
| "grad_norm": 0.12870270013809204, |
| "learning_rate": 0.00018920684425573865, |
| "loss": 0.2519, |
| "step": 421 |
| }, |
| { |
| "epoch": 0.15306492564381574, |
| "grad_norm": 0.1223597452044487, |
| "learning_rate": 0.00018915510457833055, |
| "loss": 0.2462, |
| "step": 422 |
| }, |
| { |
| "epoch": 0.15342763873775844, |
| "grad_norm": 0.13859276473522186, |
| "learning_rate": 0.0001891032482928563, |
| "loss": 0.2546, |
| "step": 423 |
| }, |
| { |
| "epoch": 0.15379035183170112, |
| "grad_norm": 0.12266798317432404, |
| "learning_rate": 0.00018905127546713996, |
| "loss": 0.2426, |
| "step": 424 |
| }, |
| { |
| "epoch": 0.15415306492564382, |
| "grad_norm": 0.1270112842321396, |
| "learning_rate": 0.00018899918616915802, |
| "loss": 0.2719, |
| "step": 425 |
| }, |
| { |
| "epoch": 0.15451577801958652, |
| "grad_norm": 0.12060489505529404, |
| "learning_rate": 0.0001889469804670393, |
| "loss": 0.2617, |
| "step": 426 |
| }, |
| { |
| "epoch": 0.1548784911135292, |
| "grad_norm": 0.1132146492600441, |
| "learning_rate": 0.00018889465842906488, |
| "loss": 0.2464, |
| "step": 427 |
| }, |
| { |
| "epoch": 0.1552412042074719, |
| "grad_norm": 0.12224707752466202, |
| "learning_rate": 0.00018884222012366796, |
| "loss": 0.2963, |
| "step": 428 |
| }, |
| { |
| "epoch": 0.15560391730141457, |
| "grad_norm": 0.11490823328495026, |
| "learning_rate": 0.00018878966561943386, |
| "loss": 0.2686, |
| "step": 429 |
| }, |
| { |
| "epoch": 0.15596663039535727, |
| "grad_norm": 0.16463352739810944, |
| "learning_rate": 0.00018873699498509988, |
| "loss": 0.2986, |
| "step": 430 |
| }, |
| { |
| "epoch": 0.15632934348929997, |
| "grad_norm": 0.12075062096118927, |
| "learning_rate": 0.00018868420828955514, |
| "loss": 0.2968, |
| "step": 431 |
| }, |
| { |
| "epoch": 0.15669205658324264, |
| "grad_norm": 0.1205056831240654, |
| "learning_rate": 0.00018863130560184063, |
| "loss": 0.2565, |
| "step": 432 |
| }, |
| { |
| "epoch": 0.15705476967718535, |
| "grad_norm": 0.1396438032388687, |
| "learning_rate": 0.00018857828699114904, |
| "loss": 0.2686, |
| "step": 433 |
| }, |
| { |
| "epoch": 0.15741748277112805, |
| "grad_norm": 0.11857564747333527, |
| "learning_rate": 0.0001885251525268246, |
| "loss": 0.2453, |
| "step": 434 |
| }, |
| { |
| "epoch": 0.15778019586507072, |
| "grad_norm": 0.12120261788368225, |
| "learning_rate": 0.0001884719022783632, |
| "loss": 0.2363, |
| "step": 435 |
| }, |
| { |
| "epoch": 0.15814290895901342, |
| "grad_norm": 0.1222701370716095, |
| "learning_rate": 0.00018841853631541207, |
| "loss": 0.2641, |
| "step": 436 |
| }, |
| { |
| "epoch": 0.15850562205295612, |
| "grad_norm": 0.12121476233005524, |
| "learning_rate": 0.00018836505470776983, |
| "loss": 0.2542, |
| "step": 437 |
| }, |
| { |
| "epoch": 0.1588683351468988, |
| "grad_norm": 0.12737686932086945, |
| "learning_rate": 0.0001883114575253863, |
| "loss": 0.2502, |
| "step": 438 |
| }, |
| { |
| "epoch": 0.1592310482408415, |
| "grad_norm": 0.12551474571228027, |
| "learning_rate": 0.00018825774483836248, |
| "loss": 0.2676, |
| "step": 439 |
| }, |
| { |
| "epoch": 0.15959376133478417, |
| "grad_norm": 0.12225164473056793, |
| "learning_rate": 0.00018820391671695057, |
| "loss": 0.2695, |
| "step": 440 |
| }, |
| { |
| "epoch": 0.15995647442872687, |
| "grad_norm": 0.12774313986301422, |
| "learning_rate": 0.00018814997323155357, |
| "loss": 0.2454, |
| "step": 441 |
| }, |
| { |
| "epoch": 0.16031918752266958, |
| "grad_norm": 0.12761445343494415, |
| "learning_rate": 0.0001880959144527254, |
| "loss": 0.2539, |
| "step": 442 |
| }, |
| { |
| "epoch": 0.16068190061661225, |
| "grad_norm": 0.11978595703840256, |
| "learning_rate": 0.00018804174045117087, |
| "loss": 0.2301, |
| "step": 443 |
| }, |
| { |
| "epoch": 0.16104461371055495, |
| "grad_norm": 0.12763962149620056, |
| "learning_rate": 0.00018798745129774543, |
| "loss": 0.2376, |
| "step": 444 |
| }, |
| { |
| "epoch": 0.16140732680449765, |
| "grad_norm": 0.13063186407089233, |
| "learning_rate": 0.00018793304706345515, |
| "loss": 0.2768, |
| "step": 445 |
| }, |
| { |
| "epoch": 0.16177003989844033, |
| "grad_norm": 0.11672946810722351, |
| "learning_rate": 0.00018787852781945656, |
| "loss": 0.246, |
| "step": 446 |
| }, |
| { |
| "epoch": 0.16213275299238303, |
| "grad_norm": 0.12725545465946198, |
| "learning_rate": 0.00018782389363705674, |
| "loss": 0.262, |
| "step": 447 |
| }, |
| { |
| "epoch": 0.16249546608632573, |
| "grad_norm": 0.1206207126379013, |
| "learning_rate": 0.00018776914458771296, |
| "loss": 0.2385, |
| "step": 448 |
| }, |
| { |
| "epoch": 0.1628581791802684, |
| "grad_norm": 0.11878547072410583, |
| "learning_rate": 0.00018771428074303286, |
| "loss": 0.2666, |
| "step": 449 |
| }, |
| { |
| "epoch": 0.1632208922742111, |
| "grad_norm": 0.12689107656478882, |
| "learning_rate": 0.0001876593021747741, |
| "loss": 0.2828, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.16358360536815378, |
| "grad_norm": 0.11968659609556198, |
| "learning_rate": 0.00018760420895484446, |
| "loss": 0.2428, |
| "step": 451 |
| }, |
| { |
| "epoch": 0.16394631846209648, |
| "grad_norm": 0.13296844065189362, |
| "learning_rate": 0.0001875490011553017, |
| "loss": 0.2689, |
| "step": 452 |
| }, |
| { |
| "epoch": 0.16430903155603918, |
| "grad_norm": 0.13149085640907288, |
| "learning_rate": 0.00018749367884835337, |
| "loss": 0.259, |
| "step": 453 |
| }, |
| { |
| "epoch": 0.16467174464998185, |
| "grad_norm": 0.13679270446300507, |
| "learning_rate": 0.00018743824210635683, |
| "loss": 0.2604, |
| "step": 454 |
| }, |
| { |
| "epoch": 0.16503445774392456, |
| "grad_norm": 0.12205653637647629, |
| "learning_rate": 0.0001873826910018191, |
| "loss": 0.2557, |
| "step": 455 |
| }, |
| { |
| "epoch": 0.16539717083786726, |
| "grad_norm": 0.11403360217809677, |
| "learning_rate": 0.00018732702560739678, |
| "loss": 0.2596, |
| "step": 456 |
| }, |
| { |
| "epoch": 0.16575988393180993, |
| "grad_norm": 0.15047647058963776, |
| "learning_rate": 0.000187271245995896, |
| "loss": 0.2571, |
| "step": 457 |
| }, |
| { |
| "epoch": 0.16612259702575263, |
| "grad_norm": 0.12830372154712677, |
| "learning_rate": 0.00018721535224027212, |
| "loss": 0.256, |
| "step": 458 |
| }, |
| { |
| "epoch": 0.16648531011969533, |
| "grad_norm": 0.12144992500543594, |
| "learning_rate": 0.00018715934441363002, |
| "loss": 0.2488, |
| "step": 459 |
| }, |
| { |
| "epoch": 0.166848023213638, |
| "grad_norm": 0.128736212849617, |
| "learning_rate": 0.00018710322258922357, |
| "loss": 0.2541, |
| "step": 460 |
| }, |
| { |
| "epoch": 0.1672107363075807, |
| "grad_norm": 0.1277531534433365, |
| "learning_rate": 0.0001870469868404559, |
| "loss": 0.2609, |
| "step": 461 |
| }, |
| { |
| "epoch": 0.16757344940152338, |
| "grad_norm": 0.12313154339790344, |
| "learning_rate": 0.00018699063724087904, |
| "loss": 0.2547, |
| "step": 462 |
| }, |
| { |
| "epoch": 0.16793616249546608, |
| "grad_norm": 0.12278270721435547, |
| "learning_rate": 0.00018693417386419397, |
| "loss": 0.2509, |
| "step": 463 |
| }, |
| { |
| "epoch": 0.1682988755894088, |
| "grad_norm": 0.12022969871759415, |
| "learning_rate": 0.00018687759678425044, |
| "loss": 0.2384, |
| "step": 464 |
| }, |
| { |
| "epoch": 0.16866158868335146, |
| "grad_norm": 0.12230958789587021, |
| "learning_rate": 0.000186820906075047, |
| "loss": 0.2535, |
| "step": 465 |
| }, |
| { |
| "epoch": 0.16902430177729416, |
| "grad_norm": 0.13055519759655, |
| "learning_rate": 0.00018676410181073073, |
| "loss": 0.244, |
| "step": 466 |
| }, |
| { |
| "epoch": 0.16938701487123686, |
| "grad_norm": 0.12790988385677338, |
| "learning_rate": 0.0001867071840655973, |
| "loss": 0.2479, |
| "step": 467 |
| }, |
| { |
| "epoch": 0.16974972796517954, |
| "grad_norm": 0.13046807050704956, |
| "learning_rate": 0.00018665015291409077, |
| "loss": 0.2493, |
| "step": 468 |
| }, |
| { |
| "epoch": 0.17011244105912224, |
| "grad_norm": 0.1160719096660614, |
| "learning_rate": 0.00018659300843080348, |
| "loss": 0.2274, |
| "step": 469 |
| }, |
| { |
| "epoch": 0.17047515415306494, |
| "grad_norm": 0.1292848438024521, |
| "learning_rate": 0.00018653575069047608, |
| "loss": 0.258, |
| "step": 470 |
| }, |
| { |
| "epoch": 0.1708378672470076, |
| "grad_norm": 0.1197739690542221, |
| "learning_rate": 0.00018647837976799734, |
| "loss": 0.2276, |
| "step": 471 |
| }, |
| { |
| "epoch": 0.17120058034095031, |
| "grad_norm": 0.11929846554994583, |
| "learning_rate": 0.00018642089573840402, |
| "loss": 0.2617, |
| "step": 472 |
| }, |
| { |
| "epoch": 0.171563293434893, |
| "grad_norm": 0.12611514329910278, |
| "learning_rate": 0.00018636329867688085, |
| "loss": 0.2525, |
| "step": 473 |
| }, |
| { |
| "epoch": 0.1719260065288357, |
| "grad_norm": 0.1322082132101059, |
| "learning_rate": 0.0001863055886587604, |
| "loss": 0.2564, |
| "step": 474 |
| }, |
| { |
| "epoch": 0.1722887196227784, |
| "grad_norm": 0.1298658400774002, |
| "learning_rate": 0.0001862477657595229, |
| "loss": 0.2451, |
| "step": 475 |
| }, |
| { |
| "epoch": 0.17265143271672106, |
| "grad_norm": 0.1305808424949646, |
| "learning_rate": 0.00018618983005479637, |
| "loss": 0.2546, |
| "step": 476 |
| }, |
| { |
| "epoch": 0.17301414581066377, |
| "grad_norm": 0.1403343826532364, |
| "learning_rate": 0.00018613178162035624, |
| "loss": 0.2566, |
| "step": 477 |
| }, |
| { |
| "epoch": 0.17337685890460647, |
| "grad_norm": 0.12340683490037918, |
| "learning_rate": 0.00018607362053212545, |
| "loss": 0.2402, |
| "step": 478 |
| }, |
| { |
| "epoch": 0.17373957199854914, |
| "grad_norm": 0.12032376229763031, |
| "learning_rate": 0.00018601534686617423, |
| "loss": 0.2524, |
| "step": 479 |
| }, |
| { |
| "epoch": 0.17410228509249184, |
| "grad_norm": 0.14251156151294708, |
| "learning_rate": 0.00018595696069872013, |
| "loss": 0.2386, |
| "step": 480 |
| }, |
| { |
| "epoch": 0.17446499818643452, |
| "grad_norm": 0.12001265585422516, |
| "learning_rate": 0.00018589846210612776, |
| "loss": 0.2311, |
| "step": 481 |
| }, |
| { |
| "epoch": 0.17482771128037722, |
| "grad_norm": 0.127760112285614, |
| "learning_rate": 0.00018583985116490877, |
| "loss": 0.2528, |
| "step": 482 |
| }, |
| { |
| "epoch": 0.17519042437431992, |
| "grad_norm": 0.1348508894443512, |
| "learning_rate": 0.0001857811279517219, |
| "loss": 0.2861, |
| "step": 483 |
| }, |
| { |
| "epoch": 0.1755531374682626, |
| "grad_norm": 0.1362610161304474, |
| "learning_rate": 0.00018572229254337254, |
| "loss": 0.2606, |
| "step": 484 |
| }, |
| { |
| "epoch": 0.1759158505622053, |
| "grad_norm": 0.12335646897554398, |
| "learning_rate": 0.00018566334501681294, |
| "loss": 0.2735, |
| "step": 485 |
| }, |
| { |
| "epoch": 0.176278563656148, |
| "grad_norm": 0.2398405522108078, |
| "learning_rate": 0.000185604285449142, |
| "loss": 0.2686, |
| "step": 486 |
| }, |
| { |
| "epoch": 0.17664127675009067, |
| "grad_norm": 0.12291895598173141, |
| "learning_rate": 0.00018554511391760502, |
| "loss": 0.251, |
| "step": 487 |
| }, |
| { |
| "epoch": 0.17700398984403337, |
| "grad_norm": 0.1420765072107315, |
| "learning_rate": 0.00018548583049959394, |
| "loss": 0.3053, |
| "step": 488 |
| }, |
| { |
| "epoch": 0.17736670293797607, |
| "grad_norm": 0.13731782138347626, |
| "learning_rate": 0.0001854264352726469, |
| "loss": 0.2508, |
| "step": 489 |
| }, |
| { |
| "epoch": 0.17772941603191875, |
| "grad_norm": 0.12329670786857605, |
| "learning_rate": 0.00018536692831444836, |
| "loss": 0.2544, |
| "step": 490 |
| }, |
| { |
| "epoch": 0.17809212912586145, |
| "grad_norm": 0.13219058513641357, |
| "learning_rate": 0.0001853073097028288, |
| "loss": 0.2933, |
| "step": 491 |
| }, |
| { |
| "epoch": 0.17845484221980412, |
| "grad_norm": 0.13322101533412933, |
| "learning_rate": 0.00018524757951576487, |
| "loss": 0.2546, |
| "step": 492 |
| }, |
| { |
| "epoch": 0.17881755531374682, |
| "grad_norm": 0.13400037586688995, |
| "learning_rate": 0.00018518773783137907, |
| "loss": 0.2538, |
| "step": 493 |
| }, |
| { |
| "epoch": 0.17918026840768952, |
| "grad_norm": 0.1361285001039505, |
| "learning_rate": 0.0001851277847279398, |
| "loss": 0.2522, |
| "step": 494 |
| }, |
| { |
| "epoch": 0.1795429815016322, |
| "grad_norm": 0.1310225874185562, |
| "learning_rate": 0.00018506772028386106, |
| "loss": 0.2667, |
| "step": 495 |
| }, |
| { |
| "epoch": 0.1799056945955749, |
| "grad_norm": 0.12234266102313995, |
| "learning_rate": 0.00018500754457770257, |
| "loss": 0.2392, |
| "step": 496 |
| }, |
| { |
| "epoch": 0.1802684076895176, |
| "grad_norm": 0.1298176795244217, |
| "learning_rate": 0.00018494725768816958, |
| "loss": 0.2573, |
| "step": 497 |
| }, |
| { |
| "epoch": 0.18063112078346028, |
| "grad_norm": 0.1306108981370926, |
| "learning_rate": 0.00018488685969411276, |
| "loss": 0.2524, |
| "step": 498 |
| }, |
| { |
| "epoch": 0.18099383387740298, |
| "grad_norm": 0.13212443888187408, |
| "learning_rate": 0.00018482635067452804, |
| "loss": 0.2577, |
| "step": 499 |
| }, |
| { |
| "epoch": 0.18135654697134568, |
| "grad_norm": 0.12641021609306335, |
| "learning_rate": 0.0001847657307085566, |
| "loss": 0.2585, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.18171926006528835, |
| "grad_norm": 0.13970649242401123, |
| "learning_rate": 0.00018470499987548473, |
| "loss": 0.2652, |
| "step": 501 |
| }, |
| { |
| "epoch": 0.18208197315923105, |
| "grad_norm": 0.12708009779453278, |
| "learning_rate": 0.0001846441582547437, |
| "loss": 0.2675, |
| "step": 502 |
| }, |
| { |
| "epoch": 0.18244468625317373, |
| "grad_norm": 0.1252969652414322, |
| "learning_rate": 0.00018458320592590975, |
| "loss": 0.2622, |
| "step": 503 |
| }, |
| { |
| "epoch": 0.18280739934711643, |
| "grad_norm": 0.13454315066337585, |
| "learning_rate": 0.0001845221429687038, |
| "loss": 0.2848, |
| "step": 504 |
| }, |
| { |
| "epoch": 0.18317011244105913, |
| "grad_norm": 0.11531683802604675, |
| "learning_rate": 0.0001844609694629916, |
| "loss": 0.2335, |
| "step": 505 |
| }, |
| { |
| "epoch": 0.1835328255350018, |
| "grad_norm": 0.12405534833669662, |
| "learning_rate": 0.00018439968548878338, |
| "loss": 0.2494, |
| "step": 506 |
| }, |
| { |
| "epoch": 0.1838955386289445, |
| "grad_norm": 0.12868863344192505, |
| "learning_rate": 0.00018433829112623394, |
| "loss": 0.2551, |
| "step": 507 |
| }, |
| { |
| "epoch": 0.1842582517228872, |
| "grad_norm": 0.12778586149215698, |
| "learning_rate": 0.00018427678645564235, |
| "loss": 0.2519, |
| "step": 508 |
| }, |
| { |
| "epoch": 0.18462096481682988, |
| "grad_norm": 0.12378937751054764, |
| "learning_rate": 0.00018421517155745208, |
| "loss": 0.2463, |
| "step": 509 |
| }, |
| { |
| "epoch": 0.18498367791077258, |
| "grad_norm": 0.12006038427352905, |
| "learning_rate": 0.00018415344651225067, |
| "loss": 0.2434, |
| "step": 510 |
| }, |
| { |
| "epoch": 0.18534639100471528, |
| "grad_norm": 0.12323882430791855, |
| "learning_rate": 0.0001840916114007698, |
| "loss": 0.2495, |
| "step": 511 |
| }, |
| { |
| "epoch": 0.18570910409865796, |
| "grad_norm": 0.12510351836681366, |
| "learning_rate": 0.00018402966630388505, |
| "loss": 0.2421, |
| "step": 512 |
| }, |
| { |
| "epoch": 0.18607181719260066, |
| "grad_norm": 0.16430193185806274, |
| "learning_rate": 0.00018396761130261586, |
| "loss": 0.261, |
| "step": 513 |
| }, |
| { |
| "epoch": 0.18643453028654333, |
| "grad_norm": 0.13129295408725739, |
| "learning_rate": 0.0001839054464781255, |
| "loss": 0.2552, |
| "step": 514 |
| }, |
| { |
| "epoch": 0.18679724338048603, |
| "grad_norm": 0.12675730884075165, |
| "learning_rate": 0.00018384317191172072, |
| "loss": 0.2443, |
| "step": 515 |
| }, |
| { |
| "epoch": 0.18715995647442873, |
| "grad_norm": 0.1283879280090332, |
| "learning_rate": 0.00018378078768485192, |
| "loss": 0.2453, |
| "step": 516 |
| }, |
| { |
| "epoch": 0.1875226695683714, |
| "grad_norm": 0.12647312879562378, |
| "learning_rate": 0.00018371829387911292, |
| "loss": 0.2434, |
| "step": 517 |
| }, |
| { |
| "epoch": 0.1878853826623141, |
| "grad_norm": 0.12233056873083115, |
| "learning_rate": 0.0001836556905762409, |
| "loss": 0.283, |
| "step": 518 |
| }, |
| { |
| "epoch": 0.1882480957562568, |
| "grad_norm": 0.13304516673088074, |
| "learning_rate": 0.00018359297785811612, |
| "loss": 0.2545, |
| "step": 519 |
| }, |
| { |
| "epoch": 0.18861080885019949, |
| "grad_norm": 0.13864544034004211, |
| "learning_rate": 0.000183530155806762, |
| "loss": 0.2571, |
| "step": 520 |
| }, |
| { |
| "epoch": 0.1889735219441422, |
| "grad_norm": 0.1448136270046234, |
| "learning_rate": 0.00018346722450434508, |
| "loss": 0.2576, |
| "step": 521 |
| }, |
| { |
| "epoch": 0.1893362350380849, |
| "grad_norm": 0.14094996452331543, |
| "learning_rate": 0.00018340418403317463, |
| "loss": 0.2568, |
| "step": 522 |
| }, |
| { |
| "epoch": 0.18969894813202756, |
| "grad_norm": 0.13471728563308716, |
| "learning_rate": 0.00018334103447570282, |
| "loss": 0.2271, |
| "step": 523 |
| }, |
| { |
| "epoch": 0.19006166122597026, |
| "grad_norm": 0.12976421415805817, |
| "learning_rate": 0.00018327777591452436, |
| "loss": 0.2386, |
| "step": 524 |
| }, |
| { |
| "epoch": 0.19042437431991294, |
| "grad_norm": 0.15379559993743896, |
| "learning_rate": 0.00018321440843237672, |
| "loss": 0.2681, |
| "step": 525 |
| }, |
| { |
| "epoch": 0.19078708741385564, |
| "grad_norm": 0.16950151324272156, |
| "learning_rate": 0.00018315093211213962, |
| "loss": 0.2526, |
| "step": 526 |
| }, |
| { |
| "epoch": 0.19114980050779834, |
| "grad_norm": 0.13350321352481842, |
| "learning_rate": 0.00018308734703683535, |
| "loss": 0.2495, |
| "step": 527 |
| }, |
| { |
| "epoch": 0.191512513601741, |
| "grad_norm": 0.14698749780654907, |
| "learning_rate": 0.00018302365328962824, |
| "loss": 0.2381, |
| "step": 528 |
| }, |
| { |
| "epoch": 0.19187522669568371, |
| "grad_norm": 0.12897023558616638, |
| "learning_rate": 0.0001829598509538249, |
| "loss": 0.256, |
| "step": 529 |
| }, |
| { |
| "epoch": 0.19223793978962642, |
| "grad_norm": 0.14562232792377472, |
| "learning_rate": 0.0001828959401128739, |
| "loss": 0.2607, |
| "step": 530 |
| }, |
| { |
| "epoch": 0.1926006528835691, |
| "grad_norm": 0.13689380884170532, |
| "learning_rate": 0.0001828319208503657, |
| "loss": 0.2451, |
| "step": 531 |
| }, |
| { |
| "epoch": 0.1929633659775118, |
| "grad_norm": 0.130660280585289, |
| "learning_rate": 0.00018276779325003268, |
| "loss": 0.2554, |
| "step": 532 |
| }, |
| { |
| "epoch": 0.1933260790714545, |
| "grad_norm": 0.12638305127620697, |
| "learning_rate": 0.00018270355739574877, |
| "loss": 0.2496, |
| "step": 533 |
| }, |
| { |
| "epoch": 0.19368879216539717, |
| "grad_norm": 0.14226087927818298, |
| "learning_rate": 0.00018263921337152955, |
| "loss": 0.2423, |
| "step": 534 |
| }, |
| { |
| "epoch": 0.19405150525933987, |
| "grad_norm": 0.1410246342420578, |
| "learning_rate": 0.00018257476126153218, |
| "loss": 0.2721, |
| "step": 535 |
| }, |
| { |
| "epoch": 0.19441421835328254, |
| "grad_norm": 0.1288328468799591, |
| "learning_rate": 0.00018251020115005504, |
| "loss": 0.2321, |
| "step": 536 |
| }, |
| { |
| "epoch": 0.19477693144722524, |
| "grad_norm": 0.14098510146141052, |
| "learning_rate": 0.0001824455331215378, |
| "loss": 0.2467, |
| "step": 537 |
| }, |
| { |
| "epoch": 0.19513964454116794, |
| "grad_norm": 0.13489827513694763, |
| "learning_rate": 0.00018238075726056136, |
| "loss": 0.2491, |
| "step": 538 |
| }, |
| { |
| "epoch": 0.19550235763511062, |
| "grad_norm": 0.13195975124835968, |
| "learning_rate": 0.00018231587365184754, |
| "loss": 0.2443, |
| "step": 539 |
| }, |
| { |
| "epoch": 0.19586507072905332, |
| "grad_norm": 0.1283298283815384, |
| "learning_rate": 0.00018225088238025915, |
| "loss": 0.2465, |
| "step": 540 |
| }, |
| { |
| "epoch": 0.19622778382299602, |
| "grad_norm": 0.11871767789125443, |
| "learning_rate": 0.00018218578353079988, |
| "loss": 0.227, |
| "step": 541 |
| }, |
| { |
| "epoch": 0.1965904969169387, |
| "grad_norm": 0.14271649718284607, |
| "learning_rate": 0.00018212057718861396, |
| "loss": 0.2734, |
| "step": 542 |
| }, |
| { |
| "epoch": 0.1969532100108814, |
| "grad_norm": 0.14445483684539795, |
| "learning_rate": 0.00018205526343898637, |
| "loss": 0.2417, |
| "step": 543 |
| }, |
| { |
| "epoch": 0.1973159231048241, |
| "grad_norm": 0.13704335689544678, |
| "learning_rate": 0.00018198984236734246, |
| "loss": 0.287, |
| "step": 544 |
| }, |
| { |
| "epoch": 0.19767863619876677, |
| "grad_norm": 0.12846963107585907, |
| "learning_rate": 0.00018192431405924804, |
| "loss": 0.2448, |
| "step": 545 |
| }, |
| { |
| "epoch": 0.19804134929270947, |
| "grad_norm": 0.14025187492370605, |
| "learning_rate": 0.00018185867860040907, |
| "loss": 0.2277, |
| "step": 546 |
| }, |
| { |
| "epoch": 0.19840406238665215, |
| "grad_norm": 0.12117055058479309, |
| "learning_rate": 0.00018179293607667178, |
| "loss": 0.2434, |
| "step": 547 |
| }, |
| { |
| "epoch": 0.19876677548059485, |
| "grad_norm": 0.1310604214668274, |
| "learning_rate": 0.00018172708657402233, |
| "loss": 0.2414, |
| "step": 548 |
| }, |
| { |
| "epoch": 0.19912948857453755, |
| "grad_norm": 0.15536460280418396, |
| "learning_rate": 0.00018166113017858683, |
| "loss": 0.2608, |
| "step": 549 |
| }, |
| { |
| "epoch": 0.19949220166848022, |
| "grad_norm": 0.1420615315437317, |
| "learning_rate": 0.00018159506697663127, |
| "loss": 0.269, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.19985491476242292, |
| "grad_norm": 0.13386112451553345, |
| "learning_rate": 0.00018152889705456117, |
| "loss": 0.2728, |
| "step": 551 |
| }, |
| { |
| "epoch": 0.20021762785636563, |
| "grad_norm": 0.12435004115104675, |
| "learning_rate": 0.00018146262049892185, |
| "loss": 0.251, |
| "step": 552 |
| }, |
| { |
| "epoch": 0.2005803409503083, |
| "grad_norm": 0.13267625868320465, |
| "learning_rate": 0.00018139623739639788, |
| "loss": 0.2844, |
| "step": 553 |
| }, |
| { |
| "epoch": 0.200943054044251, |
| "grad_norm": 0.13061115145683289, |
| "learning_rate": 0.00018132974783381336, |
| "loss": 0.2287, |
| "step": 554 |
| }, |
| { |
| "epoch": 0.20130576713819368, |
| "grad_norm": 0.13054601848125458, |
| "learning_rate": 0.0001812631518981315, |
| "loss": 0.237, |
| "step": 555 |
| }, |
| { |
| "epoch": 0.20166848023213638, |
| "grad_norm": 0.1794627159833908, |
| "learning_rate": 0.00018119644967645474, |
| "loss": 0.2752, |
| "step": 556 |
| }, |
| { |
| "epoch": 0.20203119332607908, |
| "grad_norm": 0.13099108636379242, |
| "learning_rate": 0.00018112964125602447, |
| "loss": 0.2514, |
| "step": 557 |
| }, |
| { |
| "epoch": 0.20239390642002175, |
| "grad_norm": 0.13102415204048157, |
| "learning_rate": 0.000181062726724221, |
| "loss": 0.2428, |
| "step": 558 |
| }, |
| { |
| "epoch": 0.20275661951396445, |
| "grad_norm": 0.13251091539859772, |
| "learning_rate": 0.00018099570616856344, |
| "loss": 0.2452, |
| "step": 559 |
| }, |
| { |
| "epoch": 0.20311933260790715, |
| "grad_norm": 0.12863093614578247, |
| "learning_rate": 0.00018092857967670956, |
| "loss": 0.256, |
| "step": 560 |
| }, |
| { |
| "epoch": 0.20348204570184983, |
| "grad_norm": 0.13334688544273376, |
| "learning_rate": 0.00018086134733645565, |
| "loss": 0.2608, |
| "step": 561 |
| }, |
| { |
| "epoch": 0.20384475879579253, |
| "grad_norm": 0.15378229320049286, |
| "learning_rate": 0.00018079400923573652, |
| "loss": 0.2416, |
| "step": 562 |
| }, |
| { |
| "epoch": 0.20420747188973523, |
| "grad_norm": 0.1594190150499344, |
| "learning_rate": 0.00018072656546262524, |
| "loss": 0.2526, |
| "step": 563 |
| }, |
| { |
| "epoch": 0.2045701849836779, |
| "grad_norm": 0.13872471451759338, |
| "learning_rate": 0.00018065901610533306, |
| "loss": 0.2379, |
| "step": 564 |
| }, |
| { |
| "epoch": 0.2049328980776206, |
| "grad_norm": 0.1253708302974701, |
| "learning_rate": 0.0001805913612522095, |
| "loss": 0.2352, |
| "step": 565 |
| }, |
| { |
| "epoch": 0.20529561117156328, |
| "grad_norm": 0.13366468250751495, |
| "learning_rate": 0.00018052360099174184, |
| "loss": 0.2448, |
| "step": 566 |
| }, |
| { |
| "epoch": 0.20565832426550598, |
| "grad_norm": 605528.9375, |
| "learning_rate": 0.00018045573541255534, |
| "loss": 0.2251, |
| "step": 567 |
| }, |
| { |
| "epoch": 0.20602103735944868, |
| "grad_norm": 0.18479633331298828, |
| "learning_rate": 0.00018038776460341303, |
| "loss": 0.254, |
| "step": 568 |
| }, |
| { |
| "epoch": 0.20638375045339136, |
| "grad_norm": 0.20463520288467407, |
| "learning_rate": 0.0001803196886532155, |
| "loss": 0.2328, |
| "step": 569 |
| }, |
| { |
| "epoch": 0.20674646354733406, |
| "grad_norm": 0.1946071833372116, |
| "learning_rate": 0.00018025150765100094, |
| "loss": 0.2622, |
| "step": 570 |
| }, |
| { |
| "epoch": 0.20710917664127676, |
| "grad_norm": 0.16838648915290833, |
| "learning_rate": 0.00018018322168594485, |
| "loss": 0.2712, |
| "step": 571 |
| }, |
| { |
| "epoch": 0.20747188973521943, |
| "grad_norm": 0.20080481469631195, |
| "learning_rate": 0.00018011483084736006, |
| "loss": 0.2465, |
| "step": 572 |
| }, |
| { |
| "epoch": 0.20783460282916214, |
| "grad_norm": 0.19547294080257416, |
| "learning_rate": 0.00018004633522469656, |
| "loss": 0.2829, |
| "step": 573 |
| }, |
| { |
| "epoch": 0.20819731592310484, |
| "grad_norm": 0.14593558013439178, |
| "learning_rate": 0.00017997773490754137, |
| "loss": 0.2532, |
| "step": 574 |
| }, |
| { |
| "epoch": 0.2085600290170475, |
| "grad_norm": 0.1449822634458542, |
| "learning_rate": 0.00017990902998561855, |
| "loss": 0.2528, |
| "step": 575 |
| }, |
| { |
| "epoch": 0.2089227421109902, |
| "grad_norm": 0.14969614148139954, |
| "learning_rate": 0.0001798402205487888, |
| "loss": 0.2389, |
| "step": 576 |
| }, |
| { |
| "epoch": 0.20928545520493289, |
| "grad_norm": 0.14283829927444458, |
| "learning_rate": 0.00017977130668704965, |
| "loss": 0.2337, |
| "step": 577 |
| }, |
| { |
| "epoch": 0.2096481682988756, |
| "grad_norm": 0.1496269404888153, |
| "learning_rate": 0.00017970228849053515, |
| "loss": 0.259, |
| "step": 578 |
| }, |
| { |
| "epoch": 0.2100108813928183, |
| "grad_norm": 0.13835981488227844, |
| "learning_rate": 0.00017963316604951586, |
| "loss": 0.2628, |
| "step": 579 |
| }, |
| { |
| "epoch": 0.21037359448676096, |
| "grad_norm": 0.14784668385982513, |
| "learning_rate": 0.0001795639394543986, |
| "loss": 0.2488, |
| "step": 580 |
| }, |
| { |
| "epoch": 0.21073630758070366, |
| "grad_norm": 0.13575692474842072, |
| "learning_rate": 0.00017949460879572652, |
| "loss": 0.2403, |
| "step": 581 |
| }, |
| { |
| "epoch": 0.21109902067464636, |
| "grad_norm": 0.14234420657157898, |
| "learning_rate": 0.00017942517416417878, |
| "loss": 0.2649, |
| "step": 582 |
| }, |
| { |
| "epoch": 0.21146173376858904, |
| "grad_norm": 0.13922925293445587, |
| "learning_rate": 0.0001793556356505706, |
| "loss": 0.2466, |
| "step": 583 |
| }, |
| { |
| "epoch": 0.21182444686253174, |
| "grad_norm": 0.1288311779499054, |
| "learning_rate": 0.00017928599334585306, |
| "loss": 0.2314, |
| "step": 584 |
| }, |
| { |
| "epoch": 0.21218715995647444, |
| "grad_norm": 0.12375061213970184, |
| "learning_rate": 0.00017921624734111292, |
| "loss": 0.2401, |
| "step": 585 |
| }, |
| { |
| "epoch": 0.21254987305041712, |
| "grad_norm": 0.12890039384365082, |
| "learning_rate": 0.0001791463977275727, |
| "loss": 0.2416, |
| "step": 586 |
| }, |
| { |
| "epoch": 0.21291258614435982, |
| "grad_norm": 0.13691289722919464, |
| "learning_rate": 0.00017907644459659033, |
| "loss": 0.2473, |
| "step": 587 |
| }, |
| { |
| "epoch": 0.2132752992383025, |
| "grad_norm": 0.3051564693450928, |
| "learning_rate": 0.0001790063880396591, |
| "loss": 0.2464, |
| "step": 588 |
| }, |
| { |
| "epoch": 0.2136380123322452, |
| "grad_norm": 0.13991987705230713, |
| "learning_rate": 0.00017893622814840773, |
| "loss": 0.2526, |
| "step": 589 |
| }, |
| { |
| "epoch": 0.2140007254261879, |
| "grad_norm": 0.12774237990379333, |
| "learning_rate": 0.00017886596501459992, |
| "loss": 0.2375, |
| "step": 590 |
| }, |
| { |
| "epoch": 0.21436343852013057, |
| "grad_norm": 0.13759708404541016, |
| "learning_rate": 0.00017879559873013452, |
| "loss": 0.2248, |
| "step": 591 |
| }, |
| { |
| "epoch": 0.21472615161407327, |
| "grad_norm": 0.13571417331695557, |
| "learning_rate": 0.00017872512938704523, |
| "loss": 0.2612, |
| "step": 592 |
| }, |
| { |
| "epoch": 0.21508886470801597, |
| "grad_norm": 0.1446496546268463, |
| "learning_rate": 0.00017865455707750063, |
| "loss": 0.2466, |
| "step": 593 |
| }, |
| { |
| "epoch": 0.21545157780195864, |
| "grad_norm": 0.12743471562862396, |
| "learning_rate": 0.00017858388189380387, |
| "loss": 0.2681, |
| "step": 594 |
| }, |
| { |
| "epoch": 0.21581429089590135, |
| "grad_norm": 0.1251528263092041, |
| "learning_rate": 0.00017851310392839266, |
| "loss": 0.246, |
| "step": 595 |
| }, |
| { |
| "epoch": 0.21617700398984405, |
| "grad_norm": 0.12966857850551605, |
| "learning_rate": 0.0001784422232738392, |
| "loss": 0.2293, |
| "step": 596 |
| }, |
| { |
| "epoch": 0.21653971708378672, |
| "grad_norm": 0.14909860491752625, |
| "learning_rate": 0.00017837124002285, |
| "loss": 0.2577, |
| "step": 597 |
| }, |
| { |
| "epoch": 0.21690243017772942, |
| "grad_norm": 0.136635884642601, |
| "learning_rate": 0.00017830015426826567, |
| "loss": 0.262, |
| "step": 598 |
| }, |
| { |
| "epoch": 0.2172651432716721, |
| "grad_norm": 0.13285911083221436, |
| "learning_rate": 0.000178228966103061, |
| "loss": 0.2598, |
| "step": 599 |
| }, |
| { |
| "epoch": 0.2176278563656148, |
| "grad_norm": 0.13522981107234955, |
| "learning_rate": 0.00017815767562034463, |
| "loss": 0.2469, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.2179905694595575, |
| "grad_norm": 0.13613048195838928, |
| "learning_rate": 0.00017808628291335912, |
| "loss": 0.2519, |
| "step": 601 |
| }, |
| { |
| "epoch": 0.21835328255350017, |
| "grad_norm": 0.14597558975219727, |
| "learning_rate": 0.00017801478807548063, |
| "loss": 0.2651, |
| "step": 602 |
| }, |
| { |
| "epoch": 0.21871599564744287, |
| "grad_norm": 0.13757093250751495, |
| "learning_rate": 0.00017794319120021895, |
| "loss": 0.2593, |
| "step": 603 |
| }, |
| { |
| "epoch": 0.21907870874138557, |
| "grad_norm": 0.13094554841518402, |
| "learning_rate": 0.00017787149238121733, |
| "loss": 0.2546, |
| "step": 604 |
| }, |
| { |
| "epoch": 0.21944142183532825, |
| "grad_norm": 125.78084564208984, |
| "learning_rate": 0.00017779969171225236, |
| "loss": 0.2456, |
| "step": 605 |
| }, |
| { |
| "epoch": 0.21980413492927095, |
| "grad_norm": 0.15768976509571075, |
| "learning_rate": 0.00017772778928723383, |
| "loss": 0.2412, |
| "step": 606 |
| }, |
| { |
| "epoch": 0.22016684802321365, |
| "grad_norm": 0.19074760377407074, |
| "learning_rate": 0.00017765578520020459, |
| "loss": 0.2699, |
| "step": 607 |
| }, |
| { |
| "epoch": 0.22052956111715633, |
| "grad_norm": 0.1577846109867096, |
| "learning_rate": 0.0001775836795453405, |
| "loss": 0.2737, |
| "step": 608 |
| }, |
| { |
| "epoch": 0.22089227421109903, |
| "grad_norm": 0.153973788022995, |
| "learning_rate": 0.00017751147241695025, |
| "loss": 0.2336, |
| "step": 609 |
| }, |
| { |
| "epoch": 0.2212549873050417, |
| "grad_norm": 0.16625823080539703, |
| "learning_rate": 0.0001774391639094753, |
| "loss": 0.248, |
| "step": 610 |
| }, |
| { |
| "epoch": 0.2216177003989844, |
| "grad_norm": 0.17917267978191376, |
| "learning_rate": 0.00017736675411748955, |
| "loss": 0.2559, |
| "step": 611 |
| }, |
| { |
| "epoch": 0.2219804134929271, |
| "grad_norm": 0.15878534317016602, |
| "learning_rate": 0.00017729424313569955, |
| "loss": 0.249, |
| "step": 612 |
| }, |
| { |
| "epoch": 0.22234312658686978, |
| "grad_norm": 0.1509985774755478, |
| "learning_rate": 0.00017722163105894412, |
| "loss": 0.2607, |
| "step": 613 |
| }, |
| { |
| "epoch": 0.22270583968081248, |
| "grad_norm": 0.13934160768985748, |
| "learning_rate": 0.0001771489179821943, |
| "loss": 0.2377, |
| "step": 614 |
| }, |
| { |
| "epoch": 0.22306855277475518, |
| "grad_norm": 0.1717095524072647, |
| "learning_rate": 0.00017707610400055323, |
| "loss": 0.2554, |
| "step": 615 |
| }, |
| { |
| "epoch": 0.22343126586869785, |
| "grad_norm": 0.13818614184856415, |
| "learning_rate": 0.00017700318920925605, |
| "loss": 0.2748, |
| "step": 616 |
| }, |
| { |
| "epoch": 0.22379397896264056, |
| "grad_norm": 0.12828463315963745, |
| "learning_rate": 0.00017693017370366972, |
| "loss": 0.2398, |
| "step": 617 |
| }, |
| { |
| "epoch": 0.22415669205658323, |
| "grad_norm": 0.13687558472156525, |
| "learning_rate": 0.00017685705757929294, |
| "loss": 0.2735, |
| "step": 618 |
| }, |
| { |
| "epoch": 0.22451940515052593, |
| "grad_norm": 0.1353394091129303, |
| "learning_rate": 0.00017678384093175605, |
| "loss": 0.2428, |
| "step": 619 |
| }, |
| { |
| "epoch": 0.22488211824446863, |
| "grad_norm": 0.1443159580230713, |
| "learning_rate": 0.00017671052385682078, |
| "loss": 0.2566, |
| "step": 620 |
| }, |
| { |
| "epoch": 0.2252448313384113, |
| "grad_norm": 0.14144475758075714, |
| "learning_rate": 0.00017663710645038035, |
| "loss": 0.2482, |
| "step": 621 |
| }, |
| { |
| "epoch": 0.225607544432354, |
| "grad_norm": 0.14739158749580383, |
| "learning_rate": 0.000176563588808459, |
| "loss": 0.253, |
| "step": 622 |
| }, |
| { |
| "epoch": 0.2259702575262967, |
| "grad_norm": 0.14374294877052307, |
| "learning_rate": 0.0001764899710272123, |
| "loss": 0.2394, |
| "step": 623 |
| }, |
| { |
| "epoch": 0.22633297062023938, |
| "grad_norm": 0.14988651871681213, |
| "learning_rate": 0.00017641625320292663, |
| "loss": 0.2953, |
| "step": 624 |
| }, |
| { |
| "epoch": 0.22669568371418208, |
| "grad_norm": 0.1295817494392395, |
| "learning_rate": 0.00017634243543201926, |
| "loss": 0.2177, |
| "step": 625 |
| }, |
| { |
| "epoch": 0.22705839680812478, |
| "grad_norm": 0.13908831775188446, |
| "learning_rate": 0.0001762685178110382, |
| "loss": 0.2348, |
| "step": 626 |
| }, |
| { |
| "epoch": 0.22742110990206746, |
| "grad_norm": 0.12676572799682617, |
| "learning_rate": 0.0001761945004366621, |
| "loss": 0.2347, |
| "step": 627 |
| }, |
| { |
| "epoch": 0.22778382299601016, |
| "grad_norm": 0.12473898380994797, |
| "learning_rate": 0.00017612038340569997, |
| "loss": 0.2161, |
| "step": 628 |
| }, |
| { |
| "epoch": 0.22814653608995283, |
| "grad_norm": 0.12910184264183044, |
| "learning_rate": 0.00017604616681509127, |
| "loss": 0.2476, |
| "step": 629 |
| }, |
| { |
| "epoch": 0.22850924918389554, |
| "grad_norm": 0.1438639611005783, |
| "learning_rate": 0.0001759718507619056, |
| "loss": 0.2464, |
| "step": 630 |
| }, |
| { |
| "epoch": 0.22887196227783824, |
| "grad_norm": 0.1412367820739746, |
| "learning_rate": 0.00017589743534334273, |
| "loss": 0.2475, |
| "step": 631 |
| }, |
| { |
| "epoch": 0.2292346753717809, |
| "grad_norm": 0.13323849439620972, |
| "learning_rate": 0.00017582292065673226, |
| "loss": 0.2352, |
| "step": 632 |
| }, |
| { |
| "epoch": 0.2295973884657236, |
| "grad_norm": 0.13439258933067322, |
| "learning_rate": 0.0001757483067995338, |
| "loss": 0.3278, |
| "step": 633 |
| }, |
| { |
| "epoch": 0.2299601015596663, |
| "grad_norm": 0.1343153417110443, |
| "learning_rate": 0.0001756735938693365, |
| "loss": 0.2419, |
| "step": 634 |
| }, |
| { |
| "epoch": 0.230322814653609, |
| "grad_norm": 0.12620678544044495, |
| "learning_rate": 0.0001755987819638592, |
| "loss": 0.2428, |
| "step": 635 |
| }, |
| { |
| "epoch": 0.2306855277475517, |
| "grad_norm": 0.1367313414812088, |
| "learning_rate": 0.00017552387118095015, |
| "loss": 0.2501, |
| "step": 636 |
| }, |
| { |
| "epoch": 0.2310482408414944, |
| "grad_norm": 0.14542607963085175, |
| "learning_rate": 0.00017544886161858695, |
| "loss": 0.2838, |
| "step": 637 |
| }, |
| { |
| "epoch": 0.23141095393543706, |
| "grad_norm": 0.13652457296848297, |
| "learning_rate": 0.0001753737533748763, |
| "loss": 0.2328, |
| "step": 638 |
| }, |
| { |
| "epoch": 0.23177366702937977, |
| "grad_norm": 0.13839620351791382, |
| "learning_rate": 0.00017529854654805416, |
| "loss": 0.2479, |
| "step": 639 |
| }, |
| { |
| "epoch": 0.23213638012332244, |
| "grad_norm": 0.1453743427991867, |
| "learning_rate": 0.00017522324123648525, |
| "loss": 0.2267, |
| "step": 640 |
| }, |
| { |
| "epoch": 0.23249909321726514, |
| "grad_norm": 0.1310967206954956, |
| "learning_rate": 0.0001751478375386632, |
| "loss": 0.2194, |
| "step": 641 |
| }, |
| { |
| "epoch": 0.23286180631120784, |
| "grad_norm": 0.13854770362377167, |
| "learning_rate": 0.00017507233555321024, |
| "loss": 0.2447, |
| "step": 642 |
| }, |
| { |
| "epoch": 0.23322451940515052, |
| "grad_norm": 0.12980610132217407, |
| "learning_rate": 0.00017499673537887722, |
| "loss": 0.2391, |
| "step": 643 |
| }, |
| { |
| "epoch": 0.23358723249909322, |
| "grad_norm": 0.12693443894386292, |
| "learning_rate": 0.0001749210371145434, |
| "loss": 0.2267, |
| "step": 644 |
| }, |
| { |
| "epoch": 0.23394994559303592, |
| "grad_norm": 0.13409999012947083, |
| "learning_rate": 0.00017484524085921633, |
| "loss": 0.2464, |
| "step": 645 |
| }, |
| { |
| "epoch": 0.2343126586869786, |
| "grad_norm": 0.1421654224395752, |
| "learning_rate": 0.0001747693467120317, |
| "loss": 0.2544, |
| "step": 646 |
| }, |
| { |
| "epoch": 0.2346753717809213, |
| "grad_norm": 0.13795344531536102, |
| "learning_rate": 0.00017469335477225326, |
| "loss": 0.2368, |
| "step": 647 |
| }, |
| { |
| "epoch": 0.235038084874864, |
| "grad_norm": 0.14090494811534882, |
| "learning_rate": 0.0001746172651392727, |
| "loss": 0.2414, |
| "step": 648 |
| }, |
| { |
| "epoch": 0.23540079796880667, |
| "grad_norm": 0.13511234521865845, |
| "learning_rate": 0.0001745410779126094, |
| "loss": 0.2548, |
| "step": 649 |
| }, |
| { |
| "epoch": 0.23576351106274937, |
| "grad_norm": 0.12285248935222626, |
| "learning_rate": 0.00017446479319191047, |
| "loss": 0.2211, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.23612622415669204, |
| "grad_norm": 0.13343022763729095, |
| "learning_rate": 0.00017438841107695046, |
| "loss": 0.2848, |
| "step": 651 |
| }, |
| { |
| "epoch": 0.23648893725063475, |
| "grad_norm": 0.1315213143825531, |
| "learning_rate": 0.00017431193166763138, |
| "loss": 0.2493, |
| "step": 652 |
| }, |
| { |
| "epoch": 0.23685165034457745, |
| "grad_norm": 0.13958190381526947, |
| "learning_rate": 0.0001742353550639824, |
| "loss": 0.3001, |
| "step": 653 |
| }, |
| { |
| "epoch": 0.23721436343852012, |
| "grad_norm": 0.13711069524288177, |
| "learning_rate": 0.00017415868136615994, |
| "loss": 0.249, |
| "step": 654 |
| }, |
| { |
| "epoch": 0.23757707653246282, |
| "grad_norm": 0.13686099648475647, |
| "learning_rate": 0.0001740819106744473, |
| "loss": 0.2493, |
| "step": 655 |
| }, |
| { |
| "epoch": 0.23793978962640552, |
| "grad_norm": 0.14648962020874023, |
| "learning_rate": 0.00017400504308925468, |
| "loss": 0.2368, |
| "step": 656 |
| }, |
| { |
| "epoch": 0.2383025027203482, |
| "grad_norm": 0.13652493059635162, |
| "learning_rate": 0.000173928078711119, |
| "loss": 0.2198, |
| "step": 657 |
| }, |
| { |
| "epoch": 0.2386652158142909, |
| "grad_norm": 0.13376450538635254, |
| "learning_rate": 0.00017385101764070383, |
| "loss": 0.2388, |
| "step": 658 |
| }, |
| { |
| "epoch": 0.2390279289082336, |
| "grad_norm": 0.13941293954849243, |
| "learning_rate": 0.00017377385997879911, |
| "loss": 0.2465, |
| "step": 659 |
| }, |
| { |
| "epoch": 0.23939064200217627, |
| "grad_norm": 0.13455533981323242, |
| "learning_rate": 0.0001736966058263212, |
| "loss": 0.2366, |
| "step": 660 |
| }, |
| { |
| "epoch": 0.23975335509611898, |
| "grad_norm": 0.1292707622051239, |
| "learning_rate": 0.00017361925528431262, |
| "loss": 0.2234, |
| "step": 661 |
| }, |
| { |
| "epoch": 0.24011606819006165, |
| "grad_norm": 0.14742062985897064, |
| "learning_rate": 0.00017354180845394196, |
| "loss": 0.2498, |
| "step": 662 |
| }, |
| { |
| "epoch": 0.24047878128400435, |
| "grad_norm": 0.14243729412555695, |
| "learning_rate": 0.00017346426543650377, |
| "loss": 0.249, |
| "step": 663 |
| }, |
| { |
| "epoch": 0.24084149437794705, |
| "grad_norm": 0.12824714183807373, |
| "learning_rate": 0.00017338662633341844, |
| "loss": 0.2407, |
| "step": 664 |
| }, |
| { |
| "epoch": 0.24120420747188973, |
| "grad_norm": 0.13394343852996826, |
| "learning_rate": 0.00017330889124623187, |
| "loss": 0.2375, |
| "step": 665 |
| }, |
| { |
| "epoch": 0.24156692056583243, |
| "grad_norm": 0.13167209923267365, |
| "learning_rate": 0.0001732310602766157, |
| "loss": 0.2201, |
| "step": 666 |
| }, |
| { |
| "epoch": 0.24192963365977513, |
| "grad_norm": 0.14167827367782593, |
| "learning_rate": 0.0001731531335263669, |
| "loss": 0.2351, |
| "step": 667 |
| }, |
| { |
| "epoch": 0.2422923467537178, |
| "grad_norm": 0.13489162921905518, |
| "learning_rate": 0.0001730751110974077, |
| "loss": 0.2298, |
| "step": 668 |
| }, |
| { |
| "epoch": 0.2426550598476605, |
| "grad_norm": 0.1397753804922104, |
| "learning_rate": 0.0001729969930917854, |
| "loss": 0.2408, |
| "step": 669 |
| }, |
| { |
| "epoch": 0.2430177729416032, |
| "grad_norm": 0.1405513882637024, |
| "learning_rate": 0.00017291877961167251, |
| "loss": 0.2098, |
| "step": 670 |
| }, |
| { |
| "epoch": 0.24338048603554588, |
| "grad_norm": 0.17330865561962128, |
| "learning_rate": 0.00017284047075936617, |
| "loss": 0.2655, |
| "step": 671 |
| }, |
| { |
| "epoch": 0.24374319912948858, |
| "grad_norm": 0.1363557130098343, |
| "learning_rate": 0.00017276206663728846, |
| "loss": 0.2611, |
| "step": 672 |
| }, |
| { |
| "epoch": 0.24410591222343125, |
| "grad_norm": 0.1307671070098877, |
| "learning_rate": 0.00017268356734798595, |
| "loss": 0.2198, |
| "step": 673 |
| }, |
| { |
| "epoch": 0.24446862531737396, |
| "grad_norm": 0.1409989595413208, |
| "learning_rate": 0.0001726049729941297, |
| "loss": 0.2404, |
| "step": 674 |
| }, |
| { |
| "epoch": 0.24483133841131666, |
| "grad_norm": 0.136042058467865, |
| "learning_rate": 0.00017252628367851513, |
| "loss": 0.2537, |
| "step": 675 |
| }, |
| { |
| "epoch": 0.24519405150525933, |
| "grad_norm": 0.1308341771364212, |
| "learning_rate": 0.00017244749950406186, |
| "loss": 0.2296, |
| "step": 676 |
| }, |
| { |
| "epoch": 0.24555676459920203, |
| "grad_norm": 0.14312215149402618, |
| "learning_rate": 0.00017236862057381358, |
| "loss": 0.2414, |
| "step": 677 |
| }, |
| { |
| "epoch": 0.24591947769314473, |
| "grad_norm": 0.14419759809970856, |
| "learning_rate": 0.0001722896469909379, |
| "loss": 0.2353, |
| "step": 678 |
| }, |
| { |
| "epoch": 0.2462821907870874, |
| "grad_norm": 0.13765071332454681, |
| "learning_rate": 0.0001722105788587262, |
| "loss": 0.2317, |
| "step": 679 |
| }, |
| { |
| "epoch": 0.2466449038810301, |
| "grad_norm": 0.1362527757883072, |
| "learning_rate": 0.0001721314162805936, |
| "loss": 0.2201, |
| "step": 680 |
| }, |
| { |
| "epoch": 0.2470076169749728, |
| "grad_norm": 0.13269595801830292, |
| "learning_rate": 0.0001720521593600787, |
| "loss": 0.2625, |
| "step": 681 |
| }, |
| { |
| "epoch": 0.24737033006891548, |
| "grad_norm": 0.12634457647800446, |
| "learning_rate": 0.0001719728082008435, |
| "loss": 0.223, |
| "step": 682 |
| }, |
| { |
| "epoch": 0.24773304316285819, |
| "grad_norm": 0.1394185721874237, |
| "learning_rate": 0.00017189336290667325, |
| "loss": 0.2418, |
| "step": 683 |
| }, |
| { |
| "epoch": 0.24809575625680086, |
| "grad_norm": 0.14138251543045044, |
| "learning_rate": 0.00017181382358147625, |
| "loss": 0.2377, |
| "step": 684 |
| }, |
| { |
| "epoch": 0.24845846935074356, |
| "grad_norm": 0.14079631865024567, |
| "learning_rate": 0.00017173419032928398, |
| "loss": 0.2207, |
| "step": 685 |
| }, |
| { |
| "epoch": 0.24882118244468626, |
| "grad_norm": 0.1409912407398224, |
| "learning_rate": 0.00017165446325425064, |
| "loss": 0.2234, |
| "step": 686 |
| }, |
| { |
| "epoch": 0.24918389553862894, |
| "grad_norm": 0.16069121658802032, |
| "learning_rate": 0.00017157464246065306, |
| "loss": 0.2661, |
| "step": 687 |
| }, |
| { |
| "epoch": 0.24954660863257164, |
| "grad_norm": 0.14292632043361664, |
| "learning_rate": 0.0001714947280528908, |
| "loss": 0.2316, |
| "step": 688 |
| }, |
| { |
| "epoch": 0.24990932172651434, |
| "grad_norm": 0.13920721411705017, |
| "learning_rate": 0.0001714147201354858, |
| "loss": 0.2432, |
| "step": 689 |
| }, |
| { |
| "epoch": 0.250272034820457, |
| "grad_norm": 0.13971884548664093, |
| "learning_rate": 0.0001713346188130823, |
| "loss": 0.2281, |
| "step": 690 |
| }, |
| { |
| "epoch": 0.2506347479143997, |
| "grad_norm": 0.15373115241527557, |
| "learning_rate": 0.0001712544241904467, |
| "loss": 0.2264, |
| "step": 691 |
| }, |
| { |
| "epoch": 0.2509974610083424, |
| "grad_norm": 0.13534583151340485, |
| "learning_rate": 0.00017117413637246748, |
| "loss": 0.2263, |
| "step": 692 |
| }, |
| { |
| "epoch": 0.2513601741022851, |
| "grad_norm": 0.14140291512012482, |
| "learning_rate": 0.00017109375546415495, |
| "loss": 0.24, |
| "step": 693 |
| }, |
| { |
| "epoch": 0.25172288719622776, |
| "grad_norm": 0.1363680064678192, |
| "learning_rate": 0.00017101328157064115, |
| "loss": 0.2212, |
| "step": 694 |
| }, |
| { |
| "epoch": 0.25208560029017046, |
| "grad_norm": 0.13761445879936218, |
| "learning_rate": 0.00017093271479717986, |
| "loss": 0.2368, |
| "step": 695 |
| }, |
| { |
| "epoch": 0.25244831338411317, |
| "grad_norm": 0.13729073107242584, |
| "learning_rate": 0.0001708520552491462, |
| "loss": 0.2403, |
| "step": 696 |
| }, |
| { |
| "epoch": 0.25281102647805587, |
| "grad_norm": 0.13290317356586456, |
| "learning_rate": 0.00017077130303203673, |
| "loss": 0.2234, |
| "step": 697 |
| }, |
| { |
| "epoch": 0.25317373957199857, |
| "grad_norm": 0.14121422171592712, |
| "learning_rate": 0.0001706904582514692, |
| "loss": 0.2289, |
| "step": 698 |
| }, |
| { |
| "epoch": 0.2535364526659412, |
| "grad_norm": 0.1334342509508133, |
| "learning_rate": 0.0001706095210131824, |
| "loss": 0.2333, |
| "step": 699 |
| }, |
| { |
| "epoch": 0.2538991657598839, |
| "grad_norm": 0.13697004318237305, |
| "learning_rate": 0.00017052849142303603, |
| "loss": 0.2244, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.2542618788538266, |
| "grad_norm": 0.14427930116653442, |
| "learning_rate": 0.00017044736958701058, |
| "loss": 0.2731, |
| "step": 701 |
| }, |
| { |
| "epoch": 0.2546245919477693, |
| "grad_norm": 0.14478136599063873, |
| "learning_rate": 0.00017036615561120727, |
| "loss": 0.2432, |
| "step": 702 |
| }, |
| { |
| "epoch": 0.254987305041712, |
| "grad_norm": 0.1374034285545349, |
| "learning_rate": 0.0001702848496018478, |
| "loss": 0.217, |
| "step": 703 |
| }, |
| { |
| "epoch": 0.2553500181356547, |
| "grad_norm": 0.14599081873893738, |
| "learning_rate": 0.00017020345166527412, |
| "loss": 0.241, |
| "step": 704 |
| }, |
| { |
| "epoch": 0.25571273122959737, |
| "grad_norm": 0.13574494421482086, |
| "learning_rate": 0.00017012196190794858, |
| "loss": 0.2329, |
| "step": 705 |
| }, |
| { |
| "epoch": 0.25607544432354007, |
| "grad_norm": 0.1376832127571106, |
| "learning_rate": 0.00017004038043645357, |
| "loss": 0.252, |
| "step": 706 |
| }, |
| { |
| "epoch": 0.25643815741748277, |
| "grad_norm": 0.13819095492362976, |
| "learning_rate": 0.00016995870735749138, |
| "loss": 0.2547, |
| "step": 707 |
| }, |
| { |
| "epoch": 0.25680087051142547, |
| "grad_norm": 0.12175976485013962, |
| "learning_rate": 0.00016987694277788417, |
| "loss": 0.2058, |
| "step": 708 |
| }, |
| { |
| "epoch": 0.2571635836053682, |
| "grad_norm": 0.13914383947849274, |
| "learning_rate": 0.0001697950868045738, |
| "loss": 0.2311, |
| "step": 709 |
| }, |
| { |
| "epoch": 0.2575262966993108, |
| "grad_norm": 0.1349351704120636, |
| "learning_rate": 0.00016971313954462156, |
| "loss": 0.2203, |
| "step": 710 |
| }, |
| { |
| "epoch": 0.2578890097932535, |
| "grad_norm": 0.1311430037021637, |
| "learning_rate": 0.00016963110110520827, |
| "loss": 0.242, |
| "step": 711 |
| }, |
| { |
| "epoch": 0.2582517228871962, |
| "grad_norm": 0.13092203438282013, |
| "learning_rate": 0.0001695489715936339, |
| "loss": 0.25, |
| "step": 712 |
| }, |
| { |
| "epoch": 0.2586144359811389, |
| "grad_norm": 0.13544927537441254, |
| "learning_rate": 0.00016946675111731766, |
| "loss": 0.2263, |
| "step": 713 |
| }, |
| { |
| "epoch": 0.2589771490750816, |
| "grad_norm": 0.13862383365631104, |
| "learning_rate": 0.00016938443978379753, |
| "loss": 0.2404, |
| "step": 714 |
| }, |
| { |
| "epoch": 0.2593398621690243, |
| "grad_norm": 0.14725641906261444, |
| "learning_rate": 0.00016930203770073053, |
| "loss": 0.2482, |
| "step": 715 |
| }, |
| { |
| "epoch": 0.259702575262967, |
| "grad_norm": 0.13641703128814697, |
| "learning_rate": 0.00016921954497589226, |
| "loss": 0.2431, |
| "step": 716 |
| }, |
| { |
| "epoch": 0.2600652883569097, |
| "grad_norm": 0.1381891518831253, |
| "learning_rate": 0.00016913696171717688, |
| "loss": 0.2321, |
| "step": 717 |
| }, |
| { |
| "epoch": 0.2604280014508524, |
| "grad_norm": 0.14194577932357788, |
| "learning_rate": 0.000169054288032597, |
| "loss": 0.2907, |
| "step": 718 |
| }, |
| { |
| "epoch": 0.2607907145447951, |
| "grad_norm": 0.14137552678585052, |
| "learning_rate": 0.00016897152403028357, |
| "loss": 0.2205, |
| "step": 719 |
| }, |
| { |
| "epoch": 0.2611534276387378, |
| "grad_norm": 0.12619373202323914, |
| "learning_rate": 0.00016888866981848544, |
| "loss": 0.2097, |
| "step": 720 |
| }, |
| { |
| "epoch": 0.2615161407326804, |
| "grad_norm": 0.15918751060962677, |
| "learning_rate": 0.0001688057255055697, |
| "loss": 0.2578, |
| "step": 721 |
| }, |
| { |
| "epoch": 0.2618788538266231, |
| "grad_norm": 0.13455507159233093, |
| "learning_rate": 0.00016872269120002108, |
| "loss": 0.2676, |
| "step": 722 |
| }, |
| { |
| "epoch": 0.2622415669205658, |
| "grad_norm": 0.14259149134159088, |
| "learning_rate": 0.0001686395670104422, |
| "loss": 0.2176, |
| "step": 723 |
| }, |
| { |
| "epoch": 0.26260428001450853, |
| "grad_norm": 0.13362933695316315, |
| "learning_rate": 0.0001685563530455531, |
| "loss": 0.2167, |
| "step": 724 |
| }, |
| { |
| "epoch": 0.26296699310845123, |
| "grad_norm": 0.13542160391807556, |
| "learning_rate": 0.00016847304941419128, |
| "loss": 0.2288, |
| "step": 725 |
| }, |
| { |
| "epoch": 0.26332970620239393, |
| "grad_norm": 0.15378214418888092, |
| "learning_rate": 0.00016838965622531157, |
| "loss": 0.287, |
| "step": 726 |
| }, |
| { |
| "epoch": 0.2636924192963366, |
| "grad_norm": 0.1565556526184082, |
| "learning_rate": 0.00016830617358798587, |
| "loss": 0.2692, |
| "step": 727 |
| }, |
| { |
| "epoch": 0.2640551323902793, |
| "grad_norm": 0.14884917438030243, |
| "learning_rate": 0.0001682226016114031, |
| "loss": 0.2368, |
| "step": 728 |
| }, |
| { |
| "epoch": 0.264417845484222, |
| "grad_norm": 0.13870306313037872, |
| "learning_rate": 0.000168138940404869, |
| "loss": 0.2356, |
| "step": 729 |
| }, |
| { |
| "epoch": 0.2647805585781647, |
| "grad_norm": 0.15050628781318665, |
| "learning_rate": 0.00016805519007780602, |
| "loss": 0.2524, |
| "step": 730 |
| }, |
| { |
| "epoch": 0.2651432716721074, |
| "grad_norm": 0.1477731466293335, |
| "learning_rate": 0.00016797135073975326, |
| "loss": 0.2184, |
| "step": 731 |
| }, |
| { |
| "epoch": 0.26550598476605003, |
| "grad_norm": 0.1533484160900116, |
| "learning_rate": 0.0001678874225003661, |
| "loss": 0.2301, |
| "step": 732 |
| }, |
| { |
| "epoch": 0.26586869785999273, |
| "grad_norm": 0.14348532259464264, |
| "learning_rate": 0.0001678034054694163, |
| "loss": 0.2397, |
| "step": 733 |
| }, |
| { |
| "epoch": 0.26623141095393543, |
| "grad_norm": 0.14960677921772003, |
| "learning_rate": 0.0001677192997567917, |
| "loss": 0.2244, |
| "step": 734 |
| }, |
| { |
| "epoch": 0.26659412404787813, |
| "grad_norm": 0.15019361674785614, |
| "learning_rate": 0.00016763510547249615, |
| "loss": 0.2466, |
| "step": 735 |
| }, |
| { |
| "epoch": 0.26695683714182084, |
| "grad_norm": 0.14875197410583496, |
| "learning_rate": 0.00016755082272664937, |
| "loss": 0.2106, |
| "step": 736 |
| }, |
| { |
| "epoch": 0.26731955023576354, |
| "grad_norm": 0.14142164587974548, |
| "learning_rate": 0.00016746645162948672, |
| "loss": 0.2387, |
| "step": 737 |
| }, |
| { |
| "epoch": 0.2676822633297062, |
| "grad_norm": 0.16096633672714233, |
| "learning_rate": 0.0001673819922913592, |
| "loss": 0.2346, |
| "step": 738 |
| }, |
| { |
| "epoch": 0.2680449764236489, |
| "grad_norm": 0.15639543533325195, |
| "learning_rate": 0.0001672974448227331, |
| "loss": 0.2839, |
| "step": 739 |
| }, |
| { |
| "epoch": 0.2684076895175916, |
| "grad_norm": 0.1443796008825302, |
| "learning_rate": 0.0001672128093341901, |
| "loss": 0.2314, |
| "step": 740 |
| }, |
| { |
| "epoch": 0.2687704026115343, |
| "grad_norm": 0.15442712604999542, |
| "learning_rate": 0.00016712808593642695, |
| "loss": 0.2299, |
| "step": 741 |
| }, |
| { |
| "epoch": 0.269133115705477, |
| "grad_norm": 0.14457674324512482, |
| "learning_rate": 0.00016704327474025533, |
| "loss": 0.2526, |
| "step": 742 |
| }, |
| { |
| "epoch": 0.26949582879941963, |
| "grad_norm": 0.14981432259082794, |
| "learning_rate": 0.00016695837585660187, |
| "loss": 0.2288, |
| "step": 743 |
| }, |
| { |
| "epoch": 0.26985854189336234, |
| "grad_norm": 0.1518179178237915, |
| "learning_rate": 0.00016687338939650782, |
| "loss": 0.2264, |
| "step": 744 |
| }, |
| { |
| "epoch": 0.27022125498730504, |
| "grad_norm": 0.16115126013755798, |
| "learning_rate": 0.00016678831547112895, |
| "loss": 0.2533, |
| "step": 745 |
| }, |
| { |
| "epoch": 0.27058396808124774, |
| "grad_norm": 0.1538068801164627, |
| "learning_rate": 0.00016670315419173548, |
| "loss": 0.2429, |
| "step": 746 |
| }, |
| { |
| "epoch": 0.27094668117519044, |
| "grad_norm": 0.1365380436182022, |
| "learning_rate": 0.00016661790566971181, |
| "loss": 0.2222, |
| "step": 747 |
| }, |
| { |
| "epoch": 0.27130939426913314, |
| "grad_norm": 0.14484576880931854, |
| "learning_rate": 0.00016653257001655652, |
| "loss": 0.2197, |
| "step": 748 |
| }, |
| { |
| "epoch": 0.2716721073630758, |
| "grad_norm": 0.16303595900535583, |
| "learning_rate": 0.00016644714734388217, |
| "loss": 0.253, |
| "step": 749 |
| }, |
| { |
| "epoch": 0.2720348204570185, |
| "grad_norm": 0.14876610040664673, |
| "learning_rate": 0.00016636163776341504, |
| "loss": 0.2205, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.2723975335509612, |
| "grad_norm": 0.13568569719791412, |
| "learning_rate": 0.00016627604138699515, |
| "loss": 0.2251, |
| "step": 751 |
| }, |
| { |
| "epoch": 0.2727602466449039, |
| "grad_norm": 0.14528821408748627, |
| "learning_rate": 0.00016619035832657602, |
| "loss": 0.2346, |
| "step": 752 |
| }, |
| { |
| "epoch": 0.2731229597388466, |
| "grad_norm": 0.13951005041599274, |
| "learning_rate": 0.0001661045886942245, |
| "loss": 0.2311, |
| "step": 753 |
| }, |
| { |
| "epoch": 0.27348567283278924, |
| "grad_norm": 0.1355544924736023, |
| "learning_rate": 0.0001660187326021208, |
| "loss": 0.2235, |
| "step": 754 |
| }, |
| { |
| "epoch": 0.27384838592673194, |
| "grad_norm": 0.14282123744487762, |
| "learning_rate": 0.00016593279016255806, |
| "loss": 0.211, |
| "step": 755 |
| }, |
| { |
| "epoch": 0.27421109902067464, |
| "grad_norm": 0.1680796593427658, |
| "learning_rate": 0.0001658467614879425, |
| "loss": 0.2518, |
| "step": 756 |
| }, |
| { |
| "epoch": 0.27457381211461734, |
| "grad_norm": 0.15991435945034027, |
| "learning_rate": 0.00016576064669079297, |
| "loss": 0.2419, |
| "step": 757 |
| }, |
| { |
| "epoch": 0.27493652520856005, |
| "grad_norm": 0.1730770766735077, |
| "learning_rate": 0.0001656744458837411, |
| "loss": 0.257, |
| "step": 758 |
| }, |
| { |
| "epoch": 0.27529923830250275, |
| "grad_norm": 0.1453644037246704, |
| "learning_rate": 0.00016558815917953095, |
| "loss": 0.2532, |
| "step": 759 |
| }, |
| { |
| "epoch": 0.2756619513964454, |
| "grad_norm": 0.1334659457206726, |
| "learning_rate": 0.00016550178669101891, |
| "loss": 0.2098, |
| "step": 760 |
| }, |
| { |
| "epoch": 0.2760246644903881, |
| "grad_norm": 0.13118910789489746, |
| "learning_rate": 0.00016541532853117365, |
| "loss": 0.214, |
| "step": 761 |
| }, |
| { |
| "epoch": 0.2763873775843308, |
| "grad_norm": 0.14156754314899445, |
| "learning_rate": 0.0001653287848130758, |
| "loss": 0.2434, |
| "step": 762 |
| }, |
| { |
| "epoch": 0.2767500906782735, |
| "grad_norm": 0.16743269562721252, |
| "learning_rate": 0.0001652421556499179, |
| "loss": 0.2692, |
| "step": 763 |
| }, |
| { |
| "epoch": 0.2771128037722162, |
| "grad_norm": 0.16182062029838562, |
| "learning_rate": 0.0001651554411550044, |
| "loss": 0.2194, |
| "step": 764 |
| }, |
| { |
| "epoch": 0.27747551686615884, |
| "grad_norm": 0.14829173684120178, |
| "learning_rate": 0.0001650686414417511, |
| "loss": 0.2444, |
| "step": 765 |
| }, |
| { |
| "epoch": 0.27783822996010155, |
| "grad_norm": 0.14184747636318207, |
| "learning_rate": 0.00016498175662368544, |
| "loss": 0.2275, |
| "step": 766 |
| }, |
| { |
| "epoch": 0.27820094305404425, |
| "grad_norm": 0.14175622165203094, |
| "learning_rate": 0.00016489478681444615, |
| "loss": 0.2368, |
| "step": 767 |
| }, |
| { |
| "epoch": 0.27856365614798695, |
| "grad_norm": 0.14495515823364258, |
| "learning_rate": 0.0001648077321277831, |
| "loss": 0.2087, |
| "step": 768 |
| }, |
| { |
| "epoch": 0.27892636924192965, |
| "grad_norm": 0.14581428468227386, |
| "learning_rate": 0.0001647205926775571, |
| "loss": 0.2339, |
| "step": 769 |
| }, |
| { |
| "epoch": 0.27928908233587235, |
| "grad_norm": 0.16971313953399658, |
| "learning_rate": 0.00016463336857773996, |
| "loss": 0.2564, |
| "step": 770 |
| }, |
| { |
| "epoch": 0.279651795429815, |
| "grad_norm": 0.16059347987174988, |
| "learning_rate": 0.00016454605994241413, |
| "loss": 0.2495, |
| "step": 771 |
| }, |
| { |
| "epoch": 0.2800145085237577, |
| "grad_norm": 0.13135506212711334, |
| "learning_rate": 0.00016445866688577268, |
| "loss": 0.221, |
| "step": 772 |
| }, |
| { |
| "epoch": 0.2803772216177004, |
| "grad_norm": 0.14712165296077728, |
| "learning_rate": 0.00016437118952211893, |
| "loss": 0.232, |
| "step": 773 |
| }, |
| { |
| "epoch": 0.2807399347116431, |
| "grad_norm": 0.1340080052614212, |
| "learning_rate": 0.00016428362796586668, |
| "loss": 0.2134, |
| "step": 774 |
| }, |
| { |
| "epoch": 0.2811026478055858, |
| "grad_norm": 0.1442837119102478, |
| "learning_rate": 0.00016419598233153977, |
| "loss": 0.2507, |
| "step": 775 |
| }, |
| { |
| "epoch": 0.28146536089952845, |
| "grad_norm": 0.1472170352935791, |
| "learning_rate": 0.00016410825273377192, |
| "loss": 0.2053, |
| "step": 776 |
| }, |
| { |
| "epoch": 0.28182807399347115, |
| "grad_norm": 0.16951750218868256, |
| "learning_rate": 0.0001640204392873068, |
| "loss": 0.2226, |
| "step": 777 |
| }, |
| { |
| "epoch": 0.28219078708741385, |
| "grad_norm": 0.1475476771593094, |
| "learning_rate": 0.00016393254210699765, |
| "loss": 0.2255, |
| "step": 778 |
| }, |
| { |
| "epoch": 0.28255350018135655, |
| "grad_norm": 0.1399717628955841, |
| "learning_rate": 0.00016384456130780732, |
| "loss": 0.2296, |
| "step": 779 |
| }, |
| { |
| "epoch": 0.28291621327529926, |
| "grad_norm": 0.15422862768173218, |
| "learning_rate": 0.00016375649700480792, |
| "loss": 0.2549, |
| "step": 780 |
| }, |
| { |
| "epoch": 0.28327892636924196, |
| "grad_norm": 0.14808495342731476, |
| "learning_rate": 0.0001636683493131809, |
| "loss": 0.2125, |
| "step": 781 |
| }, |
| { |
| "epoch": 0.2836416394631846, |
| "grad_norm": 0.13389019668102264, |
| "learning_rate": 0.00016358011834821662, |
| "loss": 0.2216, |
| "step": 782 |
| }, |
| { |
| "epoch": 0.2840043525571273, |
| "grad_norm": 0.14201773703098297, |
| "learning_rate": 0.0001634918042253145, |
| "loss": 0.2257, |
| "step": 783 |
| }, |
| { |
| "epoch": 0.28436706565107, |
| "grad_norm": 0.16533806920051575, |
| "learning_rate": 0.00016340340705998265, |
| "loss": 0.2245, |
| "step": 784 |
| }, |
| { |
| "epoch": 0.2847297787450127, |
| "grad_norm": 0.15893639624118805, |
| "learning_rate": 0.0001633149269678378, |
| "loss": 0.2175, |
| "step": 785 |
| }, |
| { |
| "epoch": 0.2850924918389554, |
| "grad_norm": 0.1425047069787979, |
| "learning_rate": 0.0001632263640646052, |
| "loss": 0.252, |
| "step": 786 |
| }, |
| { |
| "epoch": 0.28545520493289805, |
| "grad_norm": 0.15391702950000763, |
| "learning_rate": 0.00016313771846611827, |
| "loss": 0.2222, |
| "step": 787 |
| }, |
| { |
| "epoch": 0.28581791802684076, |
| "grad_norm": 132164.875, |
| "learning_rate": 0.00016304899028831874, |
| "loss": 0.2179, |
| "step": 788 |
| }, |
| { |
| "epoch": 0.28618063112078346, |
| "grad_norm": 0.1637081801891327, |
| "learning_rate": 0.00016296017964725632, |
| "loss": 0.2205, |
| "step": 789 |
| }, |
| { |
| "epoch": 0.28654334421472616, |
| "grad_norm": 0.20489241182804108, |
| "learning_rate": 0.0001628712866590885, |
| "loss": 0.2479, |
| "step": 790 |
| }, |
| { |
| "epoch": 0.28690605730866886, |
| "grad_norm": 0.17106997966766357, |
| "learning_rate": 0.00016278231144008053, |
| "loss": 0.227, |
| "step": 791 |
| }, |
| { |
| "epoch": 0.28726877040261156, |
| "grad_norm": 0.16591399908065796, |
| "learning_rate": 0.00016269325410660517, |
| "loss": 0.2001, |
| "step": 792 |
| }, |
| { |
| "epoch": 0.2876314834965542, |
| "grad_norm": 0.17908765375614166, |
| "learning_rate": 0.00016260411477514265, |
| "loss": 0.2311, |
| "step": 793 |
| }, |
| { |
| "epoch": 0.2879941965904969, |
| "grad_norm": 0.2103756070137024, |
| "learning_rate": 0.00016251489356228037, |
| "loss": 0.251, |
| "step": 794 |
| }, |
| { |
| "epoch": 0.2883569096844396, |
| "grad_norm": 0.1727806031703949, |
| "learning_rate": 0.00016242559058471292, |
| "loss": 0.2193, |
| "step": 795 |
| }, |
| { |
| "epoch": 0.2887196227783823, |
| "grad_norm": 0.15671540796756744, |
| "learning_rate": 0.0001623362059592417, |
| "loss": 0.2462, |
| "step": 796 |
| }, |
| { |
| "epoch": 0.289082335872325, |
| "grad_norm": 0.14824596047401428, |
| "learning_rate": 0.00016224673980277503, |
| "loss": 0.2235, |
| "step": 797 |
| }, |
| { |
| "epoch": 0.28944504896626766, |
| "grad_norm": 0.15403501689434052, |
| "learning_rate": 0.00016215719223232778, |
| "loss": 0.2644, |
| "step": 798 |
| }, |
| { |
| "epoch": 0.28980776206021036, |
| "grad_norm": 0.15009653568267822, |
| "learning_rate": 0.0001620675633650213, |
| "loss": 0.243, |
| "step": 799 |
| }, |
| { |
| "epoch": 0.29017047515415306, |
| "grad_norm": 0.16066166758537292, |
| "learning_rate": 0.0001619778533180834, |
| "loss": 0.2171, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.29053318824809576, |
| "grad_norm": 0.15927597880363464, |
| "learning_rate": 0.00016188806220884786, |
| "loss": 0.217, |
| "step": 801 |
| }, |
| { |
| "epoch": 0.29089590134203847, |
| "grad_norm": 0.14611735939979553, |
| "learning_rate": 0.00016179819015475465, |
| "loss": 0.2204, |
| "step": 802 |
| }, |
| { |
| "epoch": 0.2912586144359811, |
| "grad_norm": 0.14521051943302155, |
| "learning_rate": 0.00016170823727334956, |
| "loss": 0.1962, |
| "step": 803 |
| }, |
| { |
| "epoch": 0.2916213275299238, |
| "grad_norm": 0.1608162224292755, |
| "learning_rate": 0.00016161820368228402, |
| "loss": 0.2263, |
| "step": 804 |
| }, |
| { |
| "epoch": 0.2919840406238665, |
| "grad_norm": 0.1577100157737732, |
| "learning_rate": 0.00016152808949931516, |
| "loss": 0.2208, |
| "step": 805 |
| }, |
| { |
| "epoch": 0.2923467537178092, |
| "grad_norm": 0.15033476054668427, |
| "learning_rate": 0.00016143789484230543, |
| "loss": 0.215, |
| "step": 806 |
| }, |
| { |
| "epoch": 0.2927094668117519, |
| "grad_norm": 0.14740067720413208, |
| "learning_rate": 0.00016134761982922253, |
| "loss": 0.2042, |
| "step": 807 |
| }, |
| { |
| "epoch": 0.2930721799056946, |
| "grad_norm": 0.15068073570728302, |
| "learning_rate": 0.0001612572645781393, |
| "loss": 0.2221, |
| "step": 808 |
| }, |
| { |
| "epoch": 0.29343489299963726, |
| "grad_norm": 0.17142775654792786, |
| "learning_rate": 0.00016116682920723352, |
| "loss": 0.2142, |
| "step": 809 |
| }, |
| { |
| "epoch": 0.29379760609357997, |
| "grad_norm": 0.15067829191684723, |
| "learning_rate": 0.0001610763138347877, |
| "loss": 0.2225, |
| "step": 810 |
| }, |
| { |
| "epoch": 0.29416031918752267, |
| "grad_norm": 0.1574852466583252, |
| "learning_rate": 0.0001609857185791891, |
| "loss": 0.2106, |
| "step": 811 |
| }, |
| { |
| "epoch": 0.29452303228146537, |
| "grad_norm": 0.17060889303684235, |
| "learning_rate": 0.00016089504355892931, |
| "loss": 0.233, |
| "step": 812 |
| }, |
| { |
| "epoch": 0.29488574537540807, |
| "grad_norm": 0.14020898938179016, |
| "learning_rate": 0.0001608042888926044, |
| "loss": 0.2162, |
| "step": 813 |
| }, |
| { |
| "epoch": 0.2952484584693507, |
| "grad_norm": 0.1367609053850174, |
| "learning_rate": 0.0001607134546989145, |
| "loss": 0.2224, |
| "step": 814 |
| }, |
| { |
| "epoch": 0.2956111715632934, |
| "grad_norm": 0.14028465747833252, |
| "learning_rate": 0.0001606225410966638, |
| "loss": 0.2237, |
| "step": 815 |
| }, |
| { |
| "epoch": 0.2959738846572361, |
| "grad_norm": 0.13773570954799652, |
| "learning_rate": 0.00016053154820476037, |
| "loss": 0.224, |
| "step": 816 |
| }, |
| { |
| "epoch": 0.2963365977511788, |
| "grad_norm": 0.14603252708911896, |
| "learning_rate": 0.000160440476142216, |
| "loss": 0.217, |
| "step": 817 |
| }, |
| { |
| "epoch": 0.2966993108451215, |
| "grad_norm": 0.15531830489635468, |
| "learning_rate": 0.00016034932502814587, |
| "loss": 0.2137, |
| "step": 818 |
| }, |
| { |
| "epoch": 0.2970620239390642, |
| "grad_norm": 0.15454085171222687, |
| "learning_rate": 0.00016025809498176874, |
| "loss": 0.2244, |
| "step": 819 |
| }, |
| { |
| "epoch": 0.29742473703300687, |
| "grad_norm": 0.1548180729150772, |
| "learning_rate": 0.0001601667861224066, |
| "loss": 0.2517, |
| "step": 820 |
| }, |
| { |
| "epoch": 0.29778745012694957, |
| "grad_norm": 0.1498357206583023, |
| "learning_rate": 0.00016007539856948436, |
| "loss": 0.2512, |
| "step": 821 |
| }, |
| { |
| "epoch": 0.2981501632208923, |
| "grad_norm": 0.1419772207736969, |
| "learning_rate": 0.00015998393244253002, |
| "loss": 0.2067, |
| "step": 822 |
| }, |
| { |
| "epoch": 0.298512876314835, |
| "grad_norm": 0.14814653992652893, |
| "learning_rate": 0.0001598923878611743, |
| "loss": 0.2293, |
| "step": 823 |
| }, |
| { |
| "epoch": 0.2988755894087777, |
| "grad_norm": 0.15222403407096863, |
| "learning_rate": 0.00015980076494515047, |
| "loss": 0.2247, |
| "step": 824 |
| }, |
| { |
| "epoch": 0.2992383025027203, |
| "grad_norm": 0.1679450124502182, |
| "learning_rate": 0.0001597090638142943, |
| "loss": 0.2631, |
| "step": 825 |
| }, |
| { |
| "epoch": 0.299601015596663, |
| "grad_norm": 0.14880560338497162, |
| "learning_rate": 0.00015961728458854397, |
| "loss": 0.2069, |
| "step": 826 |
| }, |
| { |
| "epoch": 0.2999637286906057, |
| "grad_norm": 0.14599819481372833, |
| "learning_rate": 0.00015952542738793956, |
| "loss": 0.226, |
| "step": 827 |
| }, |
| { |
| "epoch": 0.3003264417845484, |
| "grad_norm": 0.14673501253128052, |
| "learning_rate": 0.00015943349233262332, |
| "loss": 0.2131, |
| "step": 828 |
| }, |
| { |
| "epoch": 0.3006891548784911, |
| "grad_norm": 0.1625213623046875, |
| "learning_rate": 0.00015934147954283932, |
| "loss": 0.2289, |
| "step": 829 |
| }, |
| { |
| "epoch": 0.30105186797243383, |
| "grad_norm": 0.15041042864322662, |
| "learning_rate": 0.00015924938913893324, |
| "loss": 0.2217, |
| "step": 830 |
| }, |
| { |
| "epoch": 0.3014145810663765, |
| "grad_norm": 0.14617730677127838, |
| "learning_rate": 0.00015915722124135227, |
| "loss": 0.2396, |
| "step": 831 |
| }, |
| { |
| "epoch": 0.3017772941603192, |
| "grad_norm": 0.15437570214271545, |
| "learning_rate": 0.00015906497597064495, |
| "loss": 0.2434, |
| "step": 832 |
| }, |
| { |
| "epoch": 0.3021400072542619, |
| "grad_norm": 0.146324023604393, |
| "learning_rate": 0.00015897265344746113, |
| "loss": 0.2621, |
| "step": 833 |
| }, |
| { |
| "epoch": 0.3025027203482046, |
| "grad_norm": 0.15348979830741882, |
| "learning_rate": 0.00015888025379255156, |
| "loss": 0.2198, |
| "step": 834 |
| }, |
| { |
| "epoch": 0.3028654334421473, |
| "grad_norm": 0.14553911983966827, |
| "learning_rate": 0.00015878777712676796, |
| "loss": 0.2168, |
| "step": 835 |
| }, |
| { |
| "epoch": 0.3032281465360899, |
| "grad_norm": 0.15064238011837006, |
| "learning_rate": 0.00015869522357106272, |
| "loss": 0.2381, |
| "step": 836 |
| }, |
| { |
| "epoch": 0.30359085963003263, |
| "grad_norm": 0.1429353505373001, |
| "learning_rate": 0.00015860259324648886, |
| "loss": 0.2444, |
| "step": 837 |
| }, |
| { |
| "epoch": 0.30395357272397533, |
| "grad_norm": 0.14742977917194366, |
| "learning_rate": 0.00015850988627419968, |
| "loss": 0.2112, |
| "step": 838 |
| }, |
| { |
| "epoch": 0.30431628581791803, |
| "grad_norm": 0.14249765872955322, |
| "learning_rate": 0.00015841710277544896, |
| "loss": 0.2287, |
| "step": 839 |
| }, |
| { |
| "epoch": 0.30467899891186073, |
| "grad_norm": 0.14514710009098053, |
| "learning_rate": 0.00015832424287159027, |
| "loss": 0.2229, |
| "step": 840 |
| }, |
| { |
| "epoch": 0.30504171200580343, |
| "grad_norm": 0.15762075781822205, |
| "learning_rate": 0.00015823130668407738, |
| "loss": 0.212, |
| "step": 841 |
| }, |
| { |
| "epoch": 0.3054044250997461, |
| "grad_norm": 0.16756275296211243, |
| "learning_rate": 0.00015813829433446367, |
| "loss": 0.2431, |
| "step": 842 |
| }, |
| { |
| "epoch": 0.3057671381936888, |
| "grad_norm": 0.2156544029712677, |
| "learning_rate": 0.00015804520594440223, |
| "loss": 0.2045, |
| "step": 843 |
| }, |
| { |
| "epoch": 0.3061298512876315, |
| "grad_norm": 0.18604739010334015, |
| "learning_rate": 0.00015795204163564556, |
| "loss": 0.2644, |
| "step": 844 |
| }, |
| { |
| "epoch": 0.3064925643815742, |
| "grad_norm": 0.14301113784313202, |
| "learning_rate": 0.0001578588015300454, |
| "loss": 0.2114, |
| "step": 845 |
| }, |
| { |
| "epoch": 0.3068552774755169, |
| "grad_norm": 0.14301526546478271, |
| "learning_rate": 0.00015776548574955275, |
| "loss": 0.2127, |
| "step": 846 |
| }, |
| { |
| "epoch": 0.30721799056945953, |
| "grad_norm": 0.15024398267269135, |
| "learning_rate": 0.0001576720944162175, |
| "loss": 0.207, |
| "step": 847 |
| }, |
| { |
| "epoch": 0.30758070366340223, |
| "grad_norm": 0.14672665297985077, |
| "learning_rate": 0.00015757862765218838, |
| "loss": 0.2112, |
| "step": 848 |
| }, |
| { |
| "epoch": 0.30794341675734493, |
| "grad_norm": 0.177405446767807, |
| "learning_rate": 0.00015748508557971276, |
| "loss": 0.2248, |
| "step": 849 |
| }, |
| { |
| "epoch": 0.30830612985128764, |
| "grad_norm": 0.16310465335845947, |
| "learning_rate": 0.00015739146832113656, |
| "loss": 0.2389, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.30866884294523034, |
| "grad_norm": 0.14648981392383575, |
| "learning_rate": 0.00015729777599890395, |
| "loss": 0.2159, |
| "step": 851 |
| }, |
| { |
| "epoch": 0.30903155603917304, |
| "grad_norm": 0.1470453441143036, |
| "learning_rate": 0.0001572040087355574, |
| "loss": 0.2216, |
| "step": 852 |
| }, |
| { |
| "epoch": 0.3093942691331157, |
| "grad_norm": 0.15409401059150696, |
| "learning_rate": 0.00015711016665373727, |
| "loss": 0.2497, |
| "step": 853 |
| }, |
| { |
| "epoch": 0.3097569822270584, |
| "grad_norm": 0.16030748188495636, |
| "learning_rate": 0.0001570162498761819, |
| "loss": 0.2108, |
| "step": 854 |
| }, |
| { |
| "epoch": 0.3101196953210011, |
| "grad_norm": 0.16415894031524658, |
| "learning_rate": 0.00015692225852572715, |
| "loss": 0.2297, |
| "step": 855 |
| }, |
| { |
| "epoch": 0.3104824084149438, |
| "grad_norm": 0.1503467857837677, |
| "learning_rate": 0.00015682819272530663, |
| "loss": 0.1972, |
| "step": 856 |
| }, |
| { |
| "epoch": 0.3108451215088865, |
| "grad_norm": 0.15261000394821167, |
| "learning_rate": 0.00015673405259795118, |
| "loss": 0.2296, |
| "step": 857 |
| }, |
| { |
| "epoch": 0.31120783460282914, |
| "grad_norm": 0.15605837106704712, |
| "learning_rate": 0.00015663983826678888, |
| "loss": 0.2135, |
| "step": 858 |
| }, |
| { |
| "epoch": 0.31157054769677184, |
| "grad_norm": 0.13954474031925201, |
| "learning_rate": 0.0001565455498550449, |
| "loss": 0.2064, |
| "step": 859 |
| }, |
| { |
| "epoch": 0.31193326079071454, |
| "grad_norm": 0.14538753032684326, |
| "learning_rate": 0.0001564511874860413, |
| "loss": 0.2279, |
| "step": 860 |
| }, |
| { |
| "epoch": 0.31229597388465724, |
| "grad_norm": 0.1461893618106842, |
| "learning_rate": 0.00015635675128319683, |
| "loss": 0.2203, |
| "step": 861 |
| }, |
| { |
| "epoch": 0.31265868697859994, |
| "grad_norm": 0.14321376383304596, |
| "learning_rate": 0.0001562622413700268, |
| "loss": 0.2112, |
| "step": 862 |
| }, |
| { |
| "epoch": 0.31302140007254264, |
| "grad_norm": 0.14480461180210114, |
| "learning_rate": 0.00015616765787014302, |
| "loss": 0.2182, |
| "step": 863 |
| }, |
| { |
| "epoch": 0.3133841131664853, |
| "grad_norm": 0.16734722256660461, |
| "learning_rate": 0.00015607300090725342, |
| "loss": 0.2222, |
| "step": 864 |
| }, |
| { |
| "epoch": 0.313746826260428, |
| "grad_norm": 0.14616838097572327, |
| "learning_rate": 0.00015597827060516211, |
| "loss": 0.2075, |
| "step": 865 |
| }, |
| { |
| "epoch": 0.3141095393543707, |
| "grad_norm": 0.16457431018352509, |
| "learning_rate": 0.00015588346708776904, |
| "loss": 0.2271, |
| "step": 866 |
| }, |
| { |
| "epoch": 0.3144722524483134, |
| "grad_norm": 0.16780099272727966, |
| "learning_rate": 0.00015578859047907004, |
| "loss": 0.2196, |
| "step": 867 |
| }, |
| { |
| "epoch": 0.3148349655422561, |
| "grad_norm": 0.14990176260471344, |
| "learning_rate": 0.00015569364090315646, |
| "loss": 0.2162, |
| "step": 868 |
| }, |
| { |
| "epoch": 0.31519767863619874, |
| "grad_norm": 0.1400328129529953, |
| "learning_rate": 0.00015559861848421505, |
| "loss": 0.2114, |
| "step": 869 |
| }, |
| { |
| "epoch": 0.31556039173014144, |
| "grad_norm": 0.15837667882442474, |
| "learning_rate": 0.00015550352334652788, |
| "loss": 0.2755, |
| "step": 870 |
| }, |
| { |
| "epoch": 0.31592310482408414, |
| "grad_norm": 0.14617806673049927, |
| "learning_rate": 0.00015540835561447214, |
| "loss": 0.2029, |
| "step": 871 |
| }, |
| { |
| "epoch": 0.31628581791802685, |
| "grad_norm": 0.1634027361869812, |
| "learning_rate": 0.00015531311541251995, |
| "loss": 0.2451, |
| "step": 872 |
| }, |
| { |
| "epoch": 0.31664853101196955, |
| "grad_norm": 0.17340759932994843, |
| "learning_rate": 0.00015521780286523824, |
| "loss": 0.2267, |
| "step": 873 |
| }, |
| { |
| "epoch": 0.31701124410591225, |
| "grad_norm": 0.15501338243484497, |
| "learning_rate": 0.0001551224180972885, |
| "loss": 0.1988, |
| "step": 874 |
| }, |
| { |
| "epoch": 0.3173739571998549, |
| "grad_norm": 0.15017758309841156, |
| "learning_rate": 0.00015502696123342676, |
| "loss": 0.211, |
| "step": 875 |
| }, |
| { |
| "epoch": 0.3177366702937976, |
| "grad_norm": 0.15657378733158112, |
| "learning_rate": 0.00015493143239850329, |
| "loss": 0.2092, |
| "step": 876 |
| }, |
| { |
| "epoch": 0.3180993833877403, |
| "grad_norm": 0.15220540761947632, |
| "learning_rate": 0.00015483583171746248, |
| "loss": 0.2413, |
| "step": 877 |
| }, |
| { |
| "epoch": 0.318462096481683, |
| "grad_norm": 0.15332242846488953, |
| "learning_rate": 0.00015474015931534276, |
| "loss": 0.2333, |
| "step": 878 |
| }, |
| { |
| "epoch": 0.3188248095756257, |
| "grad_norm": 0.14318165183067322, |
| "learning_rate": 0.00015464441531727632, |
| "loss": 0.2282, |
| "step": 879 |
| }, |
| { |
| "epoch": 0.31918752266956835, |
| "grad_norm": 0.15234385430812836, |
| "learning_rate": 0.00015454859984848895, |
| "loss": 0.2092, |
| "step": 880 |
| }, |
| { |
| "epoch": 0.31955023576351105, |
| "grad_norm": 0.15263251960277557, |
| "learning_rate": 0.0001544527130343, |
| "loss": 0.2142, |
| "step": 881 |
| }, |
| { |
| "epoch": 0.31991294885745375, |
| "grad_norm": 0.1610080748796463, |
| "learning_rate": 0.00015435675500012212, |
| "loss": 0.2305, |
| "step": 882 |
| }, |
| { |
| "epoch": 0.32027566195139645, |
| "grad_norm": 0.15507538616657257, |
| "learning_rate": 0.00015426072587146106, |
| "loss": 0.2316, |
| "step": 883 |
| }, |
| { |
| "epoch": 0.32063837504533915, |
| "grad_norm": 0.16231822967529297, |
| "learning_rate": 0.00015416462577391558, |
| "loss": 0.2953, |
| "step": 884 |
| }, |
| { |
| "epoch": 0.32100108813928185, |
| "grad_norm": 0.14619815349578857, |
| "learning_rate": 0.00015406845483317727, |
| "loss": 0.2335, |
| "step": 885 |
| }, |
| { |
| "epoch": 0.3213638012332245, |
| "grad_norm": 0.15803977847099304, |
| "learning_rate": 0.00015397221317503039, |
| "loss": 0.212, |
| "step": 886 |
| }, |
| { |
| "epoch": 0.3217265143271672, |
| "grad_norm": 0.148417666554451, |
| "learning_rate": 0.00015387590092535164, |
| "loss": 0.2063, |
| "step": 887 |
| }, |
| { |
| "epoch": 0.3220892274211099, |
| "grad_norm": 0.1504986435174942, |
| "learning_rate": 0.00015377951821011015, |
| "loss": 0.2156, |
| "step": 888 |
| }, |
| { |
| "epoch": 0.3224519405150526, |
| "grad_norm": 0.1552225649356842, |
| "learning_rate": 0.00015368306515536708, |
| "loss": 0.209, |
| "step": 889 |
| }, |
| { |
| "epoch": 0.3228146536089953, |
| "grad_norm": 0.1671207845211029, |
| "learning_rate": 0.00015358654188727568, |
| "loss": 0.218, |
| "step": 890 |
| }, |
| { |
| "epoch": 0.32317736670293795, |
| "grad_norm": 0.15497446060180664, |
| "learning_rate": 0.00015348994853208104, |
| "loss": 0.2239, |
| "step": 891 |
| }, |
| { |
| "epoch": 0.32354007979688065, |
| "grad_norm": 0.16032548248767853, |
| "learning_rate": 0.00015339328521611983, |
| "loss": 0.2069, |
| "step": 892 |
| }, |
| { |
| "epoch": 0.32390279289082335, |
| "grad_norm": 0.15629202127456665, |
| "learning_rate": 0.00015329655206582036, |
| "loss": 0.2262, |
| "step": 893 |
| }, |
| { |
| "epoch": 0.32426550598476606, |
| "grad_norm": 0.15609470009803772, |
| "learning_rate": 0.00015319974920770214, |
| "loss": 0.2444, |
| "step": 894 |
| }, |
| { |
| "epoch": 0.32462821907870876, |
| "grad_norm": 0.16244526207447052, |
| "learning_rate": 0.00015310287676837593, |
| "loss": 0.211, |
| "step": 895 |
| }, |
| { |
| "epoch": 0.32499093217265146, |
| "grad_norm": 0.1519642472267151, |
| "learning_rate": 0.00015300593487454348, |
| "loss": 0.2091, |
| "step": 896 |
| }, |
| { |
| "epoch": 0.3253536452665941, |
| "grad_norm": 0.1546807587146759, |
| "learning_rate": 0.0001529089236529974, |
| "loss": 0.2226, |
| "step": 897 |
| }, |
| { |
| "epoch": 0.3257163583605368, |
| "grad_norm": 0.14414747059345245, |
| "learning_rate": 0.00015281184323062097, |
| "loss": 0.2259, |
| "step": 898 |
| }, |
| { |
| "epoch": 0.3260790714544795, |
| "grad_norm": 0.1484064757823944, |
| "learning_rate": 0.00015271469373438792, |
| "loss": 0.2353, |
| "step": 899 |
| }, |
| { |
| "epoch": 0.3264417845484222, |
| "grad_norm": 0.15261922776699066, |
| "learning_rate": 0.00015261747529136236, |
| "loss": 0.2094, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.3268044976423649, |
| "grad_norm": 0.16096492111682892, |
| "learning_rate": 0.00015252018802869866, |
| "loss": 0.2102, |
| "step": 901 |
| }, |
| { |
| "epoch": 0.32716721073630756, |
| "grad_norm": 0.14988648891448975, |
| "learning_rate": 0.00015242283207364107, |
| "loss": 0.1933, |
| "step": 902 |
| }, |
| { |
| "epoch": 0.32752992383025026, |
| "grad_norm": 0.16668923199176788, |
| "learning_rate": 0.00015232540755352373, |
| "loss": 0.2132, |
| "step": 903 |
| }, |
| { |
| "epoch": 0.32789263692419296, |
| "grad_norm": 0.1562613993883133, |
| "learning_rate": 0.00015222791459577051, |
| "loss": 0.2174, |
| "step": 904 |
| }, |
| { |
| "epoch": 0.32825535001813566, |
| "grad_norm": 0.15152856707572937, |
| "learning_rate": 0.00015213035332789477, |
| "loss": 0.2223, |
| "step": 905 |
| }, |
| { |
| "epoch": 0.32861806311207836, |
| "grad_norm": 0.15007184445858002, |
| "learning_rate": 0.00015203272387749915, |
| "loss": 0.2184, |
| "step": 906 |
| }, |
| { |
| "epoch": 0.32898077620602106, |
| "grad_norm": 0.1500440090894699, |
| "learning_rate": 0.0001519350263722755, |
| "loss": 0.2493, |
| "step": 907 |
| }, |
| { |
| "epoch": 0.3293434892999637, |
| "grad_norm": 0.15756063163280487, |
| "learning_rate": 0.00015183726094000476, |
| "loss": 0.2112, |
| "step": 908 |
| }, |
| { |
| "epoch": 0.3297062023939064, |
| "grad_norm": 0.15649868547916412, |
| "learning_rate": 0.00015173942770855655, |
| "loss": 0.2105, |
| "step": 909 |
| }, |
| { |
| "epoch": 0.3300689154878491, |
| "grad_norm": 0.17396046221256256, |
| "learning_rate": 0.00015164152680588938, |
| "loss": 0.2092, |
| "step": 910 |
| }, |
| { |
| "epoch": 0.3304316285817918, |
| "grad_norm": 0.15336064994335175, |
| "learning_rate": 0.00015154355836005006, |
| "loss": 0.2168, |
| "step": 911 |
| }, |
| { |
| "epoch": 0.3307943416757345, |
| "grad_norm": 0.1463136523962021, |
| "learning_rate": 0.00015144552249917386, |
| "loss": 0.2175, |
| "step": 912 |
| }, |
| { |
| "epoch": 0.33115705476967716, |
| "grad_norm": 0.14064238965511322, |
| "learning_rate": 0.0001513474193514842, |
| "loss": 0.2342, |
| "step": 913 |
| }, |
| { |
| "epoch": 0.33151976786361986, |
| "grad_norm": 0.15353120863437653, |
| "learning_rate": 0.00015124924904529253, |
| "loss": 0.2269, |
| "step": 914 |
| }, |
| { |
| "epoch": 0.33188248095756256, |
| "grad_norm": 0.1634497493505478, |
| "learning_rate": 0.00015115101170899806, |
| "loss": 0.2303, |
| "step": 915 |
| }, |
| { |
| "epoch": 0.33224519405150527, |
| "grad_norm": 0.15802593529224396, |
| "learning_rate": 0.00015105270747108778, |
| "loss": 0.2181, |
| "step": 916 |
| }, |
| { |
| "epoch": 0.33260790714544797, |
| "grad_norm": 0.16792048513889313, |
| "learning_rate": 0.00015095433646013606, |
| "loss": 0.2042, |
| "step": 917 |
| }, |
| { |
| "epoch": 0.33297062023939067, |
| "grad_norm": 0.14907622337341309, |
| "learning_rate": 0.0001508558988048047, |
| "loss": 0.198, |
| "step": 918 |
| }, |
| { |
| "epoch": 0.3333333333333333, |
| "grad_norm": 0.15107260644435883, |
| "learning_rate": 0.00015075739463384267, |
| "loss": 0.2103, |
| "step": 919 |
| }, |
| { |
| "epoch": 0.333696046427276, |
| "grad_norm": 0.16222083568572998, |
| "learning_rate": 0.00015065882407608582, |
| "loss": 0.2267, |
| "step": 920 |
| }, |
| { |
| "epoch": 0.3340587595212187, |
| "grad_norm": 0.14550422132015228, |
| "learning_rate": 0.00015056018726045697, |
| "loss": 0.2197, |
| "step": 921 |
| }, |
| { |
| "epoch": 0.3344214726151614, |
| "grad_norm": 0.14713485538959503, |
| "learning_rate": 0.00015046148431596554, |
| "loss": 0.2261, |
| "step": 922 |
| }, |
| { |
| "epoch": 0.3347841857091041, |
| "grad_norm": 0.15137678384780884, |
| "learning_rate": 0.0001503627153717074, |
| "loss": 0.2196, |
| "step": 923 |
| }, |
| { |
| "epoch": 0.33514689880304677, |
| "grad_norm": 0.15455511212348938, |
| "learning_rate": 0.00015026388055686485, |
| "loss": 0.2111, |
| "step": 924 |
| }, |
| { |
| "epoch": 0.33550961189698947, |
| "grad_norm": 0.15710324048995972, |
| "learning_rate": 0.00015016498000070618, |
| "loss": 0.2138, |
| "step": 925 |
| }, |
| { |
| "epoch": 0.33587232499093217, |
| "grad_norm": 0.19984076917171478, |
| "learning_rate": 0.00015006601383258584, |
| "loss": 0.2264, |
| "step": 926 |
| }, |
| { |
| "epoch": 0.33623503808487487, |
| "grad_norm": 0.15135234594345093, |
| "learning_rate": 0.000149966982181944, |
| "loss": 0.2121, |
| "step": 927 |
| }, |
| { |
| "epoch": 0.3365977511788176, |
| "grad_norm": 0.14553037285804749, |
| "learning_rate": 0.0001498678851783065, |
| "loss": 0.2095, |
| "step": 928 |
| }, |
| { |
| "epoch": 0.3369604642727603, |
| "grad_norm": 0.1508447229862213, |
| "learning_rate": 0.00014976872295128463, |
| "loss": 0.2377, |
| "step": 929 |
| }, |
| { |
| "epoch": 0.3373231773667029, |
| "grad_norm": 0.15726783871650696, |
| "learning_rate": 0.000149669495630575, |
| "loss": 0.2453, |
| "step": 930 |
| }, |
| { |
| "epoch": 0.3376858904606456, |
| "grad_norm": 0.1487269550561905, |
| "learning_rate": 0.0001495702033459594, |
| "loss": 0.1958, |
| "step": 931 |
| }, |
| { |
| "epoch": 0.3380486035545883, |
| "grad_norm": 0.1618356555700302, |
| "learning_rate": 0.00014947084622730453, |
| "loss": 0.2061, |
| "step": 932 |
| }, |
| { |
| "epoch": 0.338411316648531, |
| "grad_norm": 0.178387850522995, |
| "learning_rate": 0.00014937142440456195, |
| "loss": 0.2358, |
| "step": 933 |
| }, |
| { |
| "epoch": 0.3387740297424737, |
| "grad_norm": 0.15690761804580688, |
| "learning_rate": 0.00014927193800776776, |
| "loss": 0.2077, |
| "step": 934 |
| }, |
| { |
| "epoch": 0.33913674283641637, |
| "grad_norm": 0.15255998075008392, |
| "learning_rate": 0.00014917238716704258, |
| "loss": 0.214, |
| "step": 935 |
| }, |
| { |
| "epoch": 0.3394994559303591, |
| "grad_norm": 0.15309607982635498, |
| "learning_rate": 0.00014907277201259132, |
| "loss": 0.2326, |
| "step": 936 |
| }, |
| { |
| "epoch": 0.3398621690243018, |
| "grad_norm": 0.15764005482196808, |
| "learning_rate": 0.00014897309267470295, |
| "loss": 0.2096, |
| "step": 937 |
| }, |
| { |
| "epoch": 0.3402248821182445, |
| "grad_norm": 0.15512487292289734, |
| "learning_rate": 0.0001488733492837505, |
| "loss": 0.2322, |
| "step": 938 |
| }, |
| { |
| "epoch": 0.3405875952121872, |
| "grad_norm": 0.17276284098625183, |
| "learning_rate": 0.00014877354197019064, |
| "loss": 0.2217, |
| "step": 939 |
| }, |
| { |
| "epoch": 0.3409503083061299, |
| "grad_norm": 0.16367502510547638, |
| "learning_rate": 0.00014867367086456373, |
| "loss": 0.2187, |
| "step": 940 |
| }, |
| { |
| "epoch": 0.3413130214000725, |
| "grad_norm": 0.1784859150648117, |
| "learning_rate": 0.0001485737360974936, |
| "loss": 0.2339, |
| "step": 941 |
| }, |
| { |
| "epoch": 0.3416757344940152, |
| "grad_norm": 0.15108786523342133, |
| "learning_rate": 0.00014847373779968724, |
| "loss": 0.207, |
| "step": 942 |
| }, |
| { |
| "epoch": 0.34203844758795793, |
| "grad_norm": 0.15686751902103424, |
| "learning_rate": 0.00014837367610193476, |
| "loss": 0.2155, |
| "step": 943 |
| }, |
| { |
| "epoch": 0.34240116068190063, |
| "grad_norm": 0.17520059645175934, |
| "learning_rate": 0.00014827355113510927, |
| "loss": 0.2185, |
| "step": 944 |
| }, |
| { |
| "epoch": 0.34276387377584333, |
| "grad_norm": 0.14490067958831787, |
| "learning_rate": 0.0001481733630301666, |
| "loss": 0.2049, |
| "step": 945 |
| }, |
| { |
| "epoch": 0.343126586869786, |
| "grad_norm": 0.15382413566112518, |
| "learning_rate": 0.000148073111918145, |
| "loss": 0.2061, |
| "step": 946 |
| }, |
| { |
| "epoch": 0.3434892999637287, |
| "grad_norm": 0.15271534025669098, |
| "learning_rate": 0.0001479727979301654, |
| "loss": 0.2208, |
| "step": 947 |
| }, |
| { |
| "epoch": 0.3438520130576714, |
| "grad_norm": 0.1692724972963333, |
| "learning_rate": 0.0001478724211974308, |
| "loss": 0.24, |
| "step": 948 |
| }, |
| { |
| "epoch": 0.3442147261516141, |
| "grad_norm": 0.18430864810943604, |
| "learning_rate": 0.0001477719818512263, |
| "loss": 0.2347, |
| "step": 949 |
| }, |
| { |
| "epoch": 0.3445774392455568, |
| "grad_norm": 0.16035676002502441, |
| "learning_rate": 0.00014767148002291886, |
| "loss": 0.229, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.34494015233949943, |
| "grad_norm": 0.14710398018360138, |
| "learning_rate": 0.00014757091584395726, |
| "loss": 0.2184, |
| "step": 951 |
| }, |
| { |
| "epoch": 0.34530286543344213, |
| "grad_norm": 0.1524522453546524, |
| "learning_rate": 0.00014747028944587167, |
| "loss": 0.2067, |
| "step": 952 |
| }, |
| { |
| "epoch": 0.34566557852738483, |
| "grad_norm": 0.1544627547264099, |
| "learning_rate": 0.00014736960096027385, |
| "loss": 0.1903, |
| "step": 953 |
| }, |
| { |
| "epoch": 0.34602829162132753, |
| "grad_norm": 0.15999731421470642, |
| "learning_rate": 0.00014726885051885653, |
| "loss": 0.1956, |
| "step": 954 |
| }, |
| { |
| "epoch": 0.34639100471527023, |
| "grad_norm": 0.16488391160964966, |
| "learning_rate": 0.00014716803825339368, |
| "loss": 0.227, |
| "step": 955 |
| }, |
| { |
| "epoch": 0.34675371780921294, |
| "grad_norm": 0.1626642644405365, |
| "learning_rate": 0.00014706716429573996, |
| "loss": 0.2302, |
| "step": 956 |
| }, |
| { |
| "epoch": 0.3471164309031556, |
| "grad_norm": 0.1589454710483551, |
| "learning_rate": 0.00014696622877783088, |
| "loss": 0.1998, |
| "step": 957 |
| }, |
| { |
| "epoch": 0.3474791439970983, |
| "grad_norm": 0.17863640189170837, |
| "learning_rate": 0.00014686523183168236, |
| "loss": 0.2244, |
| "step": 958 |
| }, |
| { |
| "epoch": 0.347841857091041, |
| "grad_norm": 0.15809310972690582, |
| "learning_rate": 0.00014676417358939063, |
| "loss": 0.2156, |
| "step": 959 |
| }, |
| { |
| "epoch": 0.3482045701849837, |
| "grad_norm": 0.14684627950191498, |
| "learning_rate": 0.00014666305418313224, |
| "loss": 0.2037, |
| "step": 960 |
| }, |
| { |
| "epoch": 0.3485672832789264, |
| "grad_norm": 0.14479795098304749, |
| "learning_rate": 0.00014656187374516365, |
| "loss": 0.1991, |
| "step": 961 |
| }, |
| { |
| "epoch": 0.34892999637286903, |
| "grad_norm": 0.17033007740974426, |
| "learning_rate": 0.00014646063240782105, |
| "loss": 0.1991, |
| "step": 962 |
| }, |
| { |
| "epoch": 0.34929270946681173, |
| "grad_norm": 0.1695454865694046, |
| "learning_rate": 0.00014635933030352044, |
| "loss": 0.2039, |
| "step": 963 |
| }, |
| { |
| "epoch": 0.34965542256075444, |
| "grad_norm": 0.16838496923446655, |
| "learning_rate": 0.00014625796756475724, |
| "loss": 0.2111, |
| "step": 964 |
| }, |
| { |
| "epoch": 0.35001813565469714, |
| "grad_norm": 0.16217052936553955, |
| "learning_rate": 0.00014615654432410612, |
| "loss": 0.2091, |
| "step": 965 |
| }, |
| { |
| "epoch": 0.35038084874863984, |
| "grad_norm": 0.15333756804466248, |
| "learning_rate": 0.00014605506071422103, |
| "loss": 0.2225, |
| "step": 966 |
| }, |
| { |
| "epoch": 0.35074356184258254, |
| "grad_norm": 0.15081751346588135, |
| "learning_rate": 0.00014595351686783465, |
| "loss": 0.2138, |
| "step": 967 |
| }, |
| { |
| "epoch": 0.3511062749365252, |
| "grad_norm": 0.16661369800567627, |
| "learning_rate": 0.00014585191291775868, |
| "loss": 0.211, |
| "step": 968 |
| }, |
| { |
| "epoch": 0.3514689880304679, |
| "grad_norm": 0.15592342615127563, |
| "learning_rate": 0.00014575024899688324, |
| "loss": 0.2069, |
| "step": 969 |
| }, |
| { |
| "epoch": 0.3518317011244106, |
| "grad_norm": 0.15869508683681488, |
| "learning_rate": 0.00014564852523817705, |
| "loss": 0.1961, |
| "step": 970 |
| }, |
| { |
| "epoch": 0.3521944142183533, |
| "grad_norm": 0.18337900936603546, |
| "learning_rate": 0.00014554674177468695, |
| "loss": 0.2039, |
| "step": 971 |
| }, |
| { |
| "epoch": 0.352557127312296, |
| "grad_norm": 0.20202304422855377, |
| "learning_rate": 0.00014544489873953803, |
| "loss": 0.2344, |
| "step": 972 |
| }, |
| { |
| "epoch": 0.35291984040623864, |
| "grad_norm": 0.1616135686635971, |
| "learning_rate": 0.0001453429962659331, |
| "loss": 0.2117, |
| "step": 973 |
| }, |
| { |
| "epoch": 0.35328255350018134, |
| "grad_norm": 0.15346059203147888, |
| "learning_rate": 0.00014524103448715283, |
| "loss": 0.2235, |
| "step": 974 |
| }, |
| { |
| "epoch": 0.35364526659412404, |
| "grad_norm": 0.148000568151474, |
| "learning_rate": 0.00014513901353655547, |
| "loss": 0.1944, |
| "step": 975 |
| }, |
| { |
| "epoch": 0.35400797968806674, |
| "grad_norm": 0.15789712965488434, |
| "learning_rate": 0.00014503693354757667, |
| "loss": 0.2139, |
| "step": 976 |
| }, |
| { |
| "epoch": 0.35437069278200944, |
| "grad_norm": 0.16983194649219513, |
| "learning_rate": 0.00014493479465372912, |
| "loss": 0.2122, |
| "step": 977 |
| }, |
| { |
| "epoch": 0.35473340587595215, |
| "grad_norm": 0.19161252677440643, |
| "learning_rate": 0.0001448325969886028, |
| "loss": 0.2799, |
| "step": 978 |
| }, |
| { |
| "epoch": 0.3550961189698948, |
| "grad_norm": 0.16653449833393097, |
| "learning_rate": 0.00014473034068586445, |
| "loss": 0.2166, |
| "step": 979 |
| }, |
| { |
| "epoch": 0.3554588320638375, |
| "grad_norm": 0.1566229611635208, |
| "learning_rate": 0.00014462802587925742, |
| "loss": 0.2104, |
| "step": 980 |
| }, |
| { |
| "epoch": 0.3558215451577802, |
| "grad_norm": 0.15640553832054138, |
| "learning_rate": 0.00014452565270260177, |
| "loss": 0.1979, |
| "step": 981 |
| }, |
| { |
| "epoch": 0.3561842582517229, |
| "grad_norm": 0.15835930407047272, |
| "learning_rate": 0.00014442322128979372, |
| "loss": 0.2412, |
| "step": 982 |
| }, |
| { |
| "epoch": 0.3565469713456656, |
| "grad_norm": 0.172097310423851, |
| "learning_rate": 0.00014432073177480576, |
| "loss": 0.2146, |
| "step": 983 |
| }, |
| { |
| "epoch": 0.35690968443960824, |
| "grad_norm": 0.1693415641784668, |
| "learning_rate": 0.00014421818429168634, |
| "loss": 0.2408, |
| "step": 984 |
| }, |
| { |
| "epoch": 0.35727239753355094, |
| "grad_norm": 0.15985938906669617, |
| "learning_rate": 0.00014411557897455973, |
| "loss": 0.2167, |
| "step": 985 |
| }, |
| { |
| "epoch": 0.35763511062749365, |
| "grad_norm": 0.16702041029930115, |
| "learning_rate": 0.00014401291595762586, |
| "loss": 0.2062, |
| "step": 986 |
| }, |
| { |
| "epoch": 0.35799782372143635, |
| "grad_norm": 0.16588671505451202, |
| "learning_rate": 0.00014391019537516006, |
| "loss": 0.2023, |
| "step": 987 |
| }, |
| { |
| "epoch": 0.35836053681537905, |
| "grad_norm": 0.15971873700618744, |
| "learning_rate": 0.0001438074173615131, |
| "loss": 0.2162, |
| "step": 988 |
| }, |
| { |
| "epoch": 0.35872324990932175, |
| "grad_norm": 0.1973976194858551, |
| "learning_rate": 0.0001437045820511107, |
| "loss": 0.2135, |
| "step": 989 |
| }, |
| { |
| "epoch": 0.3590859630032644, |
| "grad_norm": 0.1852118968963623, |
| "learning_rate": 0.00014360168957845362, |
| "loss": 0.2161, |
| "step": 990 |
| }, |
| { |
| "epoch": 0.3594486760972071, |
| "grad_norm": 0.15597601234912872, |
| "learning_rate": 0.00014349874007811735, |
| "loss": 0.2053, |
| "step": 991 |
| }, |
| { |
| "epoch": 0.3598113891911498, |
| "grad_norm": 0.16251103579998016, |
| "learning_rate": 0.00014339573368475197, |
| "loss": 0.2122, |
| "step": 992 |
| }, |
| { |
| "epoch": 0.3601741022850925, |
| "grad_norm": 0.1582382768392563, |
| "learning_rate": 0.00014329267053308194, |
| "loss": 0.2175, |
| "step": 993 |
| }, |
| { |
| "epoch": 0.3605368153790352, |
| "grad_norm": 0.15138986706733704, |
| "learning_rate": 0.00014318955075790605, |
| "loss": 0.201, |
| "step": 994 |
| }, |
| { |
| "epoch": 0.36089952847297785, |
| "grad_norm": 0.16074247658252716, |
| "learning_rate": 0.00014308637449409706, |
| "loss": 0.2281, |
| "step": 995 |
| }, |
| { |
| "epoch": 0.36126224156692055, |
| "grad_norm": 0.153158500790596, |
| "learning_rate": 0.00014298314187660162, |
| "loss": 0.1925, |
| "step": 996 |
| }, |
| { |
| "epoch": 0.36162495466086325, |
| "grad_norm": 0.17264969646930695, |
| "learning_rate": 0.00014287985304044015, |
| "loss": 0.2069, |
| "step": 997 |
| }, |
| { |
| "epoch": 0.36198766775480595, |
| "grad_norm": 0.18429549038410187, |
| "learning_rate": 0.0001427765081207065, |
| "loss": 0.2185, |
| "step": 998 |
| }, |
| { |
| "epoch": 0.36235038084874865, |
| "grad_norm": 0.1758868545293808, |
| "learning_rate": 0.000142673107252568, |
| "loss": 0.2432, |
| "step": 999 |
| }, |
| { |
| "epoch": 0.36271309394269136, |
| "grad_norm": 0.15705294907093048, |
| "learning_rate": 0.00014256965057126504, |
| "loss": 0.1986, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.363075807036634, |
| "grad_norm": 0.1507769376039505, |
| "learning_rate": 0.00014246613821211108, |
| "loss": 0.1876, |
| "step": 1001 |
| }, |
| { |
| "epoch": 0.3634385201305767, |
| "grad_norm": 0.17133677005767822, |
| "learning_rate": 0.00014236257031049232, |
| "loss": 0.209, |
| "step": 1002 |
| }, |
| { |
| "epoch": 0.3638012332245194, |
| "grad_norm": 0.15936224162578583, |
| "learning_rate": 0.00014225894700186774, |
| "loss": 0.1974, |
| "step": 1003 |
| }, |
| { |
| "epoch": 0.3641639463184621, |
| "grad_norm": 0.19848595559597015, |
| "learning_rate": 0.00014215526842176868, |
| "loss": 0.2218, |
| "step": 1004 |
| }, |
| { |
| "epoch": 0.3645266594124048, |
| "grad_norm": 0.17126554250717163, |
| "learning_rate": 0.00014205153470579882, |
| "loss": 0.2229, |
| "step": 1005 |
| }, |
| { |
| "epoch": 0.36488937250634745, |
| "grad_norm": 0.15903635323047638, |
| "learning_rate": 0.0001419477459896339, |
| "loss": 0.2127, |
| "step": 1006 |
| }, |
| { |
| "epoch": 0.36525208560029016, |
| "grad_norm": 0.16994720697402954, |
| "learning_rate": 0.00014184390240902167, |
| "loss": 0.2289, |
| "step": 1007 |
| }, |
| { |
| "epoch": 0.36561479869423286, |
| "grad_norm": 0.17226669192314148, |
| "learning_rate": 0.00014174000409978156, |
| "loss": 0.2147, |
| "step": 1008 |
| }, |
| { |
| "epoch": 0.36597751178817556, |
| "grad_norm": 0.1492406278848648, |
| "learning_rate": 0.00014163605119780467, |
| "loss": 0.2087, |
| "step": 1009 |
| }, |
| { |
| "epoch": 0.36634022488211826, |
| "grad_norm": 0.16116073727607727, |
| "learning_rate": 0.00014153204383905344, |
| "loss": 0.2176, |
| "step": 1010 |
| }, |
| { |
| "epoch": 0.36670293797606096, |
| "grad_norm": 0.16366463899612427, |
| "learning_rate": 0.00014142798215956148, |
| "loss": 0.1925, |
| "step": 1011 |
| }, |
| { |
| "epoch": 0.3670656510700036, |
| "grad_norm": 0.15476755797863007, |
| "learning_rate": 0.00014132386629543364, |
| "loss": 0.1994, |
| "step": 1012 |
| }, |
| { |
| "epoch": 0.3674283641639463, |
| "grad_norm": 0.16290143132209778, |
| "learning_rate": 0.00014121969638284542, |
| "loss": 0.2131, |
| "step": 1013 |
| }, |
| { |
| "epoch": 0.367791077257889, |
| "grad_norm": 0.15869063138961792, |
| "learning_rate": 0.00014111547255804316, |
| "loss": 0.1889, |
| "step": 1014 |
| }, |
| { |
| "epoch": 0.3681537903518317, |
| "grad_norm": 0.1735077053308487, |
| "learning_rate": 0.00014101119495734364, |
| "loss": 0.2261, |
| "step": 1015 |
| }, |
| { |
| "epoch": 0.3685165034457744, |
| "grad_norm": 0.16333554685115814, |
| "learning_rate": 0.00014090686371713402, |
| "loss": 0.2247, |
| "step": 1016 |
| }, |
| { |
| "epoch": 0.36887921653971706, |
| "grad_norm": 0.18004798889160156, |
| "learning_rate": 0.00014080247897387156, |
| "loss": 0.2334, |
| "step": 1017 |
| }, |
| { |
| "epoch": 0.36924192963365976, |
| "grad_norm": 0.16508519649505615, |
| "learning_rate": 0.0001406980408640835, |
| "loss": 0.1995, |
| "step": 1018 |
| }, |
| { |
| "epoch": 0.36960464272760246, |
| "grad_norm": 0.1622190773487091, |
| "learning_rate": 0.00014059354952436698, |
| "loss": 0.2003, |
| "step": 1019 |
| }, |
| { |
| "epoch": 0.36996735582154516, |
| "grad_norm": 0.16706664860248566, |
| "learning_rate": 0.00014048900509138867, |
| "loss": 0.219, |
| "step": 1020 |
| }, |
| { |
| "epoch": 0.37033006891548786, |
| "grad_norm": 0.1640990823507309, |
| "learning_rate": 0.00014038440770188467, |
| "loss": 0.2018, |
| "step": 1021 |
| }, |
| { |
| "epoch": 0.37069278200943057, |
| "grad_norm": 0.17155148088932037, |
| "learning_rate": 0.0001402797574926604, |
| "loss": 0.2234, |
| "step": 1022 |
| }, |
| { |
| "epoch": 0.3710554951033732, |
| "grad_norm": 0.1780928671360016, |
| "learning_rate": 0.00014017505460059036, |
| "loss": 0.2346, |
| "step": 1023 |
| }, |
| { |
| "epoch": 0.3714182081973159, |
| "grad_norm": 0.1557503193616867, |
| "learning_rate": 0.0001400702991626179, |
| "loss": 0.1969, |
| "step": 1024 |
| }, |
| { |
| "epoch": 0.3717809212912586, |
| "grad_norm": 0.14212948083877563, |
| "learning_rate": 0.00013996549131575515, |
| "loss": 0.1883, |
| "step": 1025 |
| }, |
| { |
| "epoch": 0.3721436343852013, |
| "grad_norm": 0.16952791810035706, |
| "learning_rate": 0.00013986063119708275, |
| "loss": 0.2157, |
| "step": 1026 |
| }, |
| { |
| "epoch": 0.372506347479144, |
| "grad_norm": 0.16988742351531982, |
| "learning_rate": 0.00013975571894374973, |
| "loss": 0.2103, |
| "step": 1027 |
| }, |
| { |
| "epoch": 0.37286906057308666, |
| "grad_norm": 0.16801071166992188, |
| "learning_rate": 0.00013965075469297332, |
| "loss": 0.2094, |
| "step": 1028 |
| }, |
| { |
| "epoch": 0.37323177366702937, |
| "grad_norm": 0.19034814834594727, |
| "learning_rate": 0.00013954573858203874, |
| "loss": 0.2444, |
| "step": 1029 |
| }, |
| { |
| "epoch": 0.37359448676097207, |
| "grad_norm": 0.15771815180778503, |
| "learning_rate": 0.000139440670748299, |
| "loss": 0.1987, |
| "step": 1030 |
| }, |
| { |
| "epoch": 0.37395719985491477, |
| "grad_norm": 0.1528027504682541, |
| "learning_rate": 0.00013933555132917487, |
| "loss": 0.2138, |
| "step": 1031 |
| }, |
| { |
| "epoch": 0.37431991294885747, |
| "grad_norm": 0.16030389070510864, |
| "learning_rate": 0.00013923038046215446, |
| "loss": 0.2057, |
| "step": 1032 |
| }, |
| { |
| "epoch": 0.37468262604280017, |
| "grad_norm": 0.1645725667476654, |
| "learning_rate": 0.0001391251582847932, |
| "loss": 0.1957, |
| "step": 1033 |
| }, |
| { |
| "epoch": 0.3750453391367428, |
| "grad_norm": 0.17184780538082123, |
| "learning_rate": 0.0001390198849347138, |
| "loss": 0.2244, |
| "step": 1034 |
| }, |
| { |
| "epoch": 0.3754080522306855, |
| "grad_norm": 0.16507604718208313, |
| "learning_rate": 0.00013891456054960564, |
| "loss": 0.2126, |
| "step": 1035 |
| }, |
| { |
| "epoch": 0.3757707653246282, |
| "grad_norm": 0.15355214476585388, |
| "learning_rate": 0.00013880918526722497, |
| "loss": 0.1853, |
| "step": 1036 |
| }, |
| { |
| "epoch": 0.3761334784185709, |
| "grad_norm": 0.1596059501171112, |
| "learning_rate": 0.00013870375922539466, |
| "loss": 0.229, |
| "step": 1037 |
| }, |
| { |
| "epoch": 0.3764961915125136, |
| "grad_norm": 0.16307580471038818, |
| "learning_rate": 0.00013859828256200394, |
| "loss": 0.2149, |
| "step": 1038 |
| }, |
| { |
| "epoch": 0.37685890460645627, |
| "grad_norm": 0.15789788961410522, |
| "learning_rate": 0.00013849275541500812, |
| "loss": 0.2351, |
| "step": 1039 |
| }, |
| { |
| "epoch": 0.37722161770039897, |
| "grad_norm": 0.1589316725730896, |
| "learning_rate": 0.00013838717792242876, |
| "loss": 0.2164, |
| "step": 1040 |
| }, |
| { |
| "epoch": 0.37758433079434167, |
| "grad_norm": 0.15134315192699432, |
| "learning_rate": 0.00013828155022235308, |
| "loss": 0.1925, |
| "step": 1041 |
| }, |
| { |
| "epoch": 0.3779470438882844, |
| "grad_norm": 0.14640171825885773, |
| "learning_rate": 0.00013817587245293407, |
| "loss": 0.2138, |
| "step": 1042 |
| }, |
| { |
| "epoch": 0.3783097569822271, |
| "grad_norm": 0.1695149838924408, |
| "learning_rate": 0.0001380701447523902, |
| "loss": 0.2139, |
| "step": 1043 |
| }, |
| { |
| "epoch": 0.3786724700761698, |
| "grad_norm": 0.1683790236711502, |
| "learning_rate": 0.0001379643672590052, |
| "loss": 0.1954, |
| "step": 1044 |
| }, |
| { |
| "epoch": 0.3790351831701124, |
| "grad_norm": 0.17694401741027832, |
| "learning_rate": 0.00013785854011112798, |
| "loss": 0.2022, |
| "step": 1045 |
| }, |
| { |
| "epoch": 0.3793978962640551, |
| "grad_norm": 0.17428404092788696, |
| "learning_rate": 0.00013775266344717233, |
| "loss": 0.1832, |
| "step": 1046 |
| }, |
| { |
| "epoch": 0.3797606093579978, |
| "grad_norm": 0.1612454652786255, |
| "learning_rate": 0.00013764673740561685, |
| "loss": 0.1917, |
| "step": 1047 |
| }, |
| { |
| "epoch": 0.3801233224519405, |
| "grad_norm": 0.16686902940273285, |
| "learning_rate": 0.0001375407621250047, |
| "loss": 0.1989, |
| "step": 1048 |
| }, |
| { |
| "epoch": 0.3804860355458832, |
| "grad_norm": 0.14911605417728424, |
| "learning_rate": 0.00013743473774394346, |
| "loss": 0.2004, |
| "step": 1049 |
| }, |
| { |
| "epoch": 0.3808487486398259, |
| "grad_norm": 0.15896974503993988, |
| "learning_rate": 0.00013732866440110497, |
| "loss": 0.2466, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.3812114617337686, |
| "grad_norm": 0.16059251129627228, |
| "learning_rate": 0.000137222542235225, |
| "loss": 0.2042, |
| "step": 1051 |
| }, |
| { |
| "epoch": 0.3815741748277113, |
| "grad_norm": 0.16174575686454773, |
| "learning_rate": 0.0001371163713851032, |
| "loss": 0.1979, |
| "step": 1052 |
| }, |
| { |
| "epoch": 0.381936887921654, |
| "grad_norm": 0.1577538102865219, |
| "learning_rate": 0.00013701015198960302, |
| "loss": 0.213, |
| "step": 1053 |
| }, |
| { |
| "epoch": 0.3822996010155967, |
| "grad_norm": 0.1710449755191803, |
| "learning_rate": 0.0001369038841876513, |
| "loss": 0.223, |
| "step": 1054 |
| }, |
| { |
| "epoch": 0.3826623141095394, |
| "grad_norm": 0.17627973854541779, |
| "learning_rate": 0.00013679756811823813, |
| "loss": 0.2397, |
| "step": 1055 |
| }, |
| { |
| "epoch": 0.383025027203482, |
| "grad_norm": 0.15820728242397308, |
| "learning_rate": 0.0001366912039204169, |
| "loss": 0.1959, |
| "step": 1056 |
| }, |
| { |
| "epoch": 0.38338774029742473, |
| "grad_norm": 0.15889425575733185, |
| "learning_rate": 0.00013658479173330384, |
| "loss": 0.1805, |
| "step": 1057 |
| }, |
| { |
| "epoch": 0.38375045339136743, |
| "grad_norm": 0.18348795175552368, |
| "learning_rate": 0.00013647833169607788, |
| "loss": 0.2061, |
| "step": 1058 |
| }, |
| { |
| "epoch": 0.38411316648531013, |
| "grad_norm": 0.16327665746212006, |
| "learning_rate": 0.0001363718239479807, |
| "loss": 0.1899, |
| "step": 1059 |
| }, |
| { |
| "epoch": 0.38447587957925283, |
| "grad_norm": 0.15636590123176575, |
| "learning_rate": 0.00013626526862831628, |
| "loss": 0.2161, |
| "step": 1060 |
| }, |
| { |
| "epoch": 0.3848385926731955, |
| "grad_norm": 0.158644899725914, |
| "learning_rate": 0.00013615866587645084, |
| "loss": 0.1991, |
| "step": 1061 |
| }, |
| { |
| "epoch": 0.3852013057671382, |
| "grad_norm": 0.16064795851707458, |
| "learning_rate": 0.0001360520158318126, |
| "loss": 0.2009, |
| "step": 1062 |
| }, |
| { |
| "epoch": 0.3855640188610809, |
| "grad_norm": 0.18209217488765717, |
| "learning_rate": 0.00013594531863389173, |
| "loss": 0.2538, |
| "step": 1063 |
| }, |
| { |
| "epoch": 0.3859267319550236, |
| "grad_norm": 0.16186301410198212, |
| "learning_rate": 0.00013583857442223994, |
| "loss": 0.2249, |
| "step": 1064 |
| }, |
| { |
| "epoch": 0.3862894450489663, |
| "grad_norm": 0.16660407185554504, |
| "learning_rate": 0.00013573178333647058, |
| "loss": 0.2116, |
| "step": 1065 |
| }, |
| { |
| "epoch": 0.386652158142909, |
| "grad_norm": 0.16199025511741638, |
| "learning_rate": 0.0001356249455162582, |
| "loss": 0.2156, |
| "step": 1066 |
| }, |
| { |
| "epoch": 0.38701487123685163, |
| "grad_norm": 0.1578529328107834, |
| "learning_rate": 0.0001355180611013385, |
| "loss": 0.2066, |
| "step": 1067 |
| }, |
| { |
| "epoch": 0.38737758433079433, |
| "grad_norm": 0.17841364443302155, |
| "learning_rate": 0.00013541113023150816, |
| "loss": 0.205, |
| "step": 1068 |
| }, |
| { |
| "epoch": 0.38774029742473703, |
| "grad_norm": 0.1555965095758438, |
| "learning_rate": 0.00013530415304662457, |
| "loss": 0.2027, |
| "step": 1069 |
| }, |
| { |
| "epoch": 0.38810301051867974, |
| "grad_norm": 0.15105211734771729, |
| "learning_rate": 0.00013519712968660568, |
| "loss": 0.1963, |
| "step": 1070 |
| }, |
| { |
| "epoch": 0.38846572361262244, |
| "grad_norm": 0.16452065110206604, |
| "learning_rate": 0.0001350900602914299, |
| "loss": 0.2129, |
| "step": 1071 |
| }, |
| { |
| "epoch": 0.3888284367065651, |
| "grad_norm": 0.16760526597499847, |
| "learning_rate": 0.00013498294500113585, |
| "loss": 0.2418, |
| "step": 1072 |
| }, |
| { |
| "epoch": 0.3891911498005078, |
| "grad_norm": 0.16931942105293274, |
| "learning_rate": 0.00013487578395582206, |
| "loss": 0.1914, |
| "step": 1073 |
| }, |
| { |
| "epoch": 0.3895538628944505, |
| "grad_norm": 0.1739332228899002, |
| "learning_rate": 0.0001347685772956471, |
| "loss": 0.2107, |
| "step": 1074 |
| }, |
| { |
| "epoch": 0.3899165759883932, |
| "grad_norm": 0.1568581908941269, |
| "learning_rate": 0.00013466132516082907, |
| "loss": 0.1835, |
| "step": 1075 |
| }, |
| { |
| "epoch": 0.3902792890823359, |
| "grad_norm": 0.16916148364543915, |
| "learning_rate": 0.0001345540276916455, |
| "loss": 0.2041, |
| "step": 1076 |
| }, |
| { |
| "epoch": 0.3906420021762786, |
| "grad_norm": 0.16345995664596558, |
| "learning_rate": 0.0001344466850284333, |
| "loss": 0.1789, |
| "step": 1077 |
| }, |
| { |
| "epoch": 0.39100471527022124, |
| "grad_norm": 0.16848930716514587, |
| "learning_rate": 0.00013433929731158852, |
| "loss": 0.1961, |
| "step": 1078 |
| }, |
| { |
| "epoch": 0.39136742836416394, |
| "grad_norm": 0.1991538405418396, |
| "learning_rate": 0.00013423186468156608, |
| "loss": 0.2544, |
| "step": 1079 |
| }, |
| { |
| "epoch": 0.39173014145810664, |
| "grad_norm": 0.17732208967208862, |
| "learning_rate": 0.0001341243872788796, |
| "loss": 0.258, |
| "step": 1080 |
| }, |
| { |
| "epoch": 0.39209285455204934, |
| "grad_norm": 0.16117359697818756, |
| "learning_rate": 0.0001340168652441014, |
| "loss": 0.2389, |
| "step": 1081 |
| }, |
| { |
| "epoch": 0.39245556764599204, |
| "grad_norm": 0.1693982034921646, |
| "learning_rate": 0.00013390929871786203, |
| "loss": 0.2022, |
| "step": 1082 |
| }, |
| { |
| "epoch": 0.3928182807399347, |
| "grad_norm": 0.1722104698419571, |
| "learning_rate": 0.00013380168784085027, |
| "loss": 0.1977, |
| "step": 1083 |
| }, |
| { |
| "epoch": 0.3931809938338774, |
| "grad_norm": 0.1871337741613388, |
| "learning_rate": 0.000133694032753813, |
| "loss": 0.2249, |
| "step": 1084 |
| }, |
| { |
| "epoch": 0.3935437069278201, |
| "grad_norm": 0.17777620255947113, |
| "learning_rate": 0.0001335863335975548, |
| "loss": 0.1949, |
| "step": 1085 |
| }, |
| { |
| "epoch": 0.3939064200217628, |
| "grad_norm": 0.18331852555274963, |
| "learning_rate": 0.00013347859051293792, |
| "loss": 0.1969, |
| "step": 1086 |
| }, |
| { |
| "epoch": 0.3942691331157055, |
| "grad_norm": 0.158721461892128, |
| "learning_rate": 0.0001333708036408821, |
| "loss": 0.1919, |
| "step": 1087 |
| }, |
| { |
| "epoch": 0.3946318462096482, |
| "grad_norm": 0.16589364409446716, |
| "learning_rate": 0.00013326297312236439, |
| "loss": 0.2044, |
| "step": 1088 |
| }, |
| { |
| "epoch": 0.39499455930359084, |
| "grad_norm": 0.15952499210834503, |
| "learning_rate": 0.0001331550990984188, |
| "loss": 0.2005, |
| "step": 1089 |
| }, |
| { |
| "epoch": 0.39535727239753354, |
| "grad_norm": 0.15588688850402832, |
| "learning_rate": 0.00013304718171013632, |
| "loss": 0.2234, |
| "step": 1090 |
| }, |
| { |
| "epoch": 0.39571998549147624, |
| "grad_norm": 0.17283542454242706, |
| "learning_rate": 0.0001329392210986647, |
| "loss": 0.2001, |
| "step": 1091 |
| }, |
| { |
| "epoch": 0.39608269858541895, |
| "grad_norm": 0.15617555379867554, |
| "learning_rate": 0.00013283121740520812, |
| "loss": 0.1982, |
| "step": 1092 |
| }, |
| { |
| "epoch": 0.39644541167936165, |
| "grad_norm": 0.18503715097904205, |
| "learning_rate": 0.0001327231707710272, |
| "loss": 0.2315, |
| "step": 1093 |
| }, |
| { |
| "epoch": 0.3968081247733043, |
| "grad_norm": 0.16704030334949493, |
| "learning_rate": 0.00013261508133743865, |
| "loss": 0.2, |
| "step": 1094 |
| }, |
| { |
| "epoch": 0.397170837867247, |
| "grad_norm": 0.17188745737075806, |
| "learning_rate": 0.0001325069492458152, |
| "loss": 0.258, |
| "step": 1095 |
| }, |
| { |
| "epoch": 0.3975335509611897, |
| "grad_norm": 0.1544748693704605, |
| "learning_rate": 0.00013239877463758537, |
| "loss": 0.19, |
| "step": 1096 |
| }, |
| { |
| "epoch": 0.3978962640551324, |
| "grad_norm": 0.1821664571762085, |
| "learning_rate": 0.0001322905576542333, |
| "loss": 0.2071, |
| "step": 1097 |
| }, |
| { |
| "epoch": 0.3982589771490751, |
| "grad_norm": 0.15686167776584625, |
| "learning_rate": 0.00013218229843729856, |
| "loss": 0.1807, |
| "step": 1098 |
| }, |
| { |
| "epoch": 0.39862169024301775, |
| "grad_norm": 0.1645747721195221, |
| "learning_rate": 0.00013207399712837582, |
| "loss": 0.1941, |
| "step": 1099 |
| }, |
| { |
| "epoch": 0.39898440333696045, |
| "grad_norm": 0.15510335564613342, |
| "learning_rate": 0.00013196565386911505, |
| "loss": 0.1982, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.39934711643090315, |
| "grad_norm": 0.17434607446193695, |
| "learning_rate": 0.0001318572688012209, |
| "loss": 0.2012, |
| "step": 1101 |
| }, |
| { |
| "epoch": 0.39970982952484585, |
| "grad_norm": 0.1454346626996994, |
| "learning_rate": 0.00013174884206645278, |
| "loss": 0.1887, |
| "step": 1102 |
| }, |
| { |
| "epoch": 0.40007254261878855, |
| "grad_norm": 0.16709522902965546, |
| "learning_rate": 0.00013164037380662452, |
| "loss": 0.1914, |
| "step": 1103 |
| }, |
| { |
| "epoch": 0.40043525571273125, |
| "grad_norm": 0.17922160029411316, |
| "learning_rate": 0.0001315318641636044, |
| "loss": 0.2002, |
| "step": 1104 |
| }, |
| { |
| "epoch": 0.4007979688066739, |
| "grad_norm": 0.1769881397485733, |
| "learning_rate": 0.00013142331327931469, |
| "loss": 0.1993, |
| "step": 1105 |
| }, |
| { |
| "epoch": 0.4011606819006166, |
| "grad_norm": 0.1627112329006195, |
| "learning_rate": 0.00013131472129573166, |
| "loss": 0.2096, |
| "step": 1106 |
| }, |
| { |
| "epoch": 0.4015233949945593, |
| "grad_norm": 0.1649940013885498, |
| "learning_rate": 0.00013120608835488532, |
| "loss": 0.2032, |
| "step": 1107 |
| }, |
| { |
| "epoch": 0.401886108088502, |
| "grad_norm": 0.18944235146045685, |
| "learning_rate": 0.00013109741459885928, |
| "loss": 0.2163, |
| "step": 1108 |
| }, |
| { |
| "epoch": 0.4022488211824447, |
| "grad_norm": 0.16329450905323029, |
| "learning_rate": 0.00013098870016979051, |
| "loss": 0.1833, |
| "step": 1109 |
| }, |
| { |
| "epoch": 0.40261153427638735, |
| "grad_norm": 0.20053814351558685, |
| "learning_rate": 0.00013087994520986923, |
| "loss": 0.2166, |
| "step": 1110 |
| }, |
| { |
| "epoch": 0.40297424737033005, |
| "grad_norm": 0.19225598871707916, |
| "learning_rate": 0.00013077114986133847, |
| "loss": 0.2544, |
| "step": 1111 |
| }, |
| { |
| "epoch": 0.40333696046427275, |
| "grad_norm": 0.17340917885303497, |
| "learning_rate": 0.00013066231426649437, |
| "loss": 0.2005, |
| "step": 1112 |
| }, |
| { |
| "epoch": 0.40369967355821545, |
| "grad_norm": 0.1653253436088562, |
| "learning_rate": 0.00013055343856768555, |
| "loss": 0.2119, |
| "step": 1113 |
| }, |
| { |
| "epoch": 0.40406238665215816, |
| "grad_norm": 0.16865472495555878, |
| "learning_rate": 0.00013044452290731306, |
| "loss": 0.1748, |
| "step": 1114 |
| }, |
| { |
| "epoch": 0.40442509974610086, |
| "grad_norm": 0.17820391058921814, |
| "learning_rate": 0.0001303355674278303, |
| "loss": 0.2094, |
| "step": 1115 |
| }, |
| { |
| "epoch": 0.4047878128400435, |
| "grad_norm": 0.17825227975845337, |
| "learning_rate": 0.0001302265722717427, |
| "loss": 0.2174, |
| "step": 1116 |
| }, |
| { |
| "epoch": 0.4051505259339862, |
| "grad_norm": 0.6229606866836548, |
| "learning_rate": 0.0001301175375816076, |
| "loss": 0.2072, |
| "step": 1117 |
| }, |
| { |
| "epoch": 0.4055132390279289, |
| "grad_norm": 0.21105241775512695, |
| "learning_rate": 0.0001300084635000341, |
| "loss": 0.2041, |
| "step": 1118 |
| }, |
| { |
| "epoch": 0.4058759521218716, |
| "grad_norm": 0.20768609642982483, |
| "learning_rate": 0.00012989935016968266, |
| "loss": 0.2091, |
| "step": 1119 |
| }, |
| { |
| "epoch": 0.4062386652158143, |
| "grad_norm": 0.1655317097902298, |
| "learning_rate": 0.00012979019773326524, |
| "loss": 0.2095, |
| "step": 1120 |
| }, |
| { |
| "epoch": 0.40660137830975696, |
| "grad_norm": 0.1594689041376114, |
| "learning_rate": 0.00012968100633354492, |
| "loss": 0.1922, |
| "step": 1121 |
| }, |
| { |
| "epoch": 0.40696409140369966, |
| "grad_norm": 0.1779058277606964, |
| "learning_rate": 0.00012957177611333566, |
| "loss": 0.1948, |
| "step": 1122 |
| }, |
| { |
| "epoch": 0.40732680449764236, |
| "grad_norm": 0.18424735963344574, |
| "learning_rate": 0.00012946250721550224, |
| "loss": 0.2174, |
| "step": 1123 |
| }, |
| { |
| "epoch": 0.40768951759158506, |
| "grad_norm": 0.19321289658546448, |
| "learning_rate": 0.00012935319978296008, |
| "loss": 0.2032, |
| "step": 1124 |
| }, |
| { |
| "epoch": 0.40805223068552776, |
| "grad_norm": 0.1741238832473755, |
| "learning_rate": 0.00012924385395867493, |
| "loss": 0.1928, |
| "step": 1125 |
| }, |
| { |
| "epoch": 0.40841494377947046, |
| "grad_norm": 0.16779236495494843, |
| "learning_rate": 0.00012913446988566273, |
| "loss": 0.2021, |
| "step": 1126 |
| }, |
| { |
| "epoch": 0.4087776568734131, |
| "grad_norm": 0.16747735440731049, |
| "learning_rate": 0.00012902504770698954, |
| "loss": 0.1993, |
| "step": 1127 |
| }, |
| { |
| "epoch": 0.4091403699673558, |
| "grad_norm": 0.18401382863521576, |
| "learning_rate": 0.00012891558756577122, |
| "loss": 0.2151, |
| "step": 1128 |
| }, |
| { |
| "epoch": 0.4095030830612985, |
| "grad_norm": 0.15898433327674866, |
| "learning_rate": 0.00012880608960517322, |
| "loss": 0.187, |
| "step": 1129 |
| }, |
| { |
| "epoch": 0.4098657961552412, |
| "grad_norm": 0.1666088104248047, |
| "learning_rate": 0.0001286965539684106, |
| "loss": 0.1849, |
| "step": 1130 |
| }, |
| { |
| "epoch": 0.4102285092491839, |
| "grad_norm": 0.17613482475280762, |
| "learning_rate": 0.00012858698079874748, |
| "loss": 0.1993, |
| "step": 1131 |
| }, |
| { |
| "epoch": 0.41059122234312656, |
| "grad_norm": 0.17263801395893097, |
| "learning_rate": 0.0001284773702394973, |
| "loss": 0.1947, |
| "step": 1132 |
| }, |
| { |
| "epoch": 0.41095393543706926, |
| "grad_norm": 0.1618073433637619, |
| "learning_rate": 0.00012836772243402224, |
| "loss": 0.1869, |
| "step": 1133 |
| }, |
| { |
| "epoch": 0.41131664853101196, |
| "grad_norm": 0.1828174889087677, |
| "learning_rate": 0.00012825803752573327, |
| "loss": 0.2207, |
| "step": 1134 |
| }, |
| { |
| "epoch": 0.41167936162495467, |
| "grad_norm": 0.17469796538352966, |
| "learning_rate": 0.00012814831565808986, |
| "loss": 0.2008, |
| "step": 1135 |
| }, |
| { |
| "epoch": 0.41204207471889737, |
| "grad_norm": 0.17154814302921295, |
| "learning_rate": 0.00012803855697459987, |
| "loss": 0.2098, |
| "step": 1136 |
| }, |
| { |
| "epoch": 0.41240478781284007, |
| "grad_norm": 0.1646650731563568, |
| "learning_rate": 0.00012792876161881925, |
| "loss": 0.2103, |
| "step": 1137 |
| }, |
| { |
| "epoch": 0.4127675009067827, |
| "grad_norm": 0.17539532482624054, |
| "learning_rate": 0.00012781892973435195, |
| "loss": 0.1966, |
| "step": 1138 |
| }, |
| { |
| "epoch": 0.4131302140007254, |
| "grad_norm": 0.17781807482242584, |
| "learning_rate": 0.00012770906146484964, |
| "loss": 0.206, |
| "step": 1139 |
| }, |
| { |
| "epoch": 0.4134929270946681, |
| "grad_norm": 0.1847347617149353, |
| "learning_rate": 0.0001275991569540117, |
| "loss": 0.2026, |
| "step": 1140 |
| }, |
| { |
| "epoch": 0.4138556401886108, |
| "grad_norm": 0.17020414769649506, |
| "learning_rate": 0.00012748921634558473, |
| "loss": 0.1958, |
| "step": 1141 |
| }, |
| { |
| "epoch": 0.4142183532825535, |
| "grad_norm": 0.18093371391296387, |
| "learning_rate": 0.00012737923978336274, |
| "loss": 0.2062, |
| "step": 1142 |
| }, |
| { |
| "epoch": 0.41458106637649617, |
| "grad_norm": 0.1588636189699173, |
| "learning_rate": 0.00012726922741118662, |
| "loss": 0.1892, |
| "step": 1143 |
| }, |
| { |
| "epoch": 0.41494377947043887, |
| "grad_norm": 0.19953924417495728, |
| "learning_rate": 0.00012715917937294418, |
| "loss": 0.2188, |
| "step": 1144 |
| }, |
| { |
| "epoch": 0.41530649256438157, |
| "grad_norm": 0.16585423052310944, |
| "learning_rate": 0.00012704909581256986, |
| "loss": 0.2231, |
| "step": 1145 |
| }, |
| { |
| "epoch": 0.41566920565832427, |
| "grad_norm": 0.17226840555667877, |
| "learning_rate": 0.0001269389768740445, |
| "loss": 0.1895, |
| "step": 1146 |
| }, |
| { |
| "epoch": 0.41603191875226697, |
| "grad_norm": 0.2125304490327835, |
| "learning_rate": 0.00012682882270139526, |
| "loss": 0.2122, |
| "step": 1147 |
| }, |
| { |
| "epoch": 0.4163946318462097, |
| "grad_norm": 0.19522660970687866, |
| "learning_rate": 0.00012671863343869543, |
| "loss": 0.2055, |
| "step": 1148 |
| }, |
| { |
| "epoch": 0.4167573449401523, |
| "grad_norm": 0.19831117987632751, |
| "learning_rate": 0.00012660840923006412, |
| "loss": 0.189, |
| "step": 1149 |
| }, |
| { |
| "epoch": 0.417120058034095, |
| "grad_norm": 0.16252368688583374, |
| "learning_rate": 0.0001264981502196662, |
| "loss": 0.2051, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.4174827711280377, |
| "grad_norm": 0.17360906302928925, |
| "learning_rate": 0.00012638785655171196, |
| "loss": 0.1957, |
| "step": 1151 |
| }, |
| { |
| "epoch": 0.4178454842219804, |
| "grad_norm": 0.1837020069360733, |
| "learning_rate": 0.0001262775283704572, |
| "loss": 0.2131, |
| "step": 1152 |
| }, |
| { |
| "epoch": 0.4182081973159231, |
| "grad_norm": 0.1726016104221344, |
| "learning_rate": 0.00012616716582020265, |
| "loss": 0.1897, |
| "step": 1153 |
| }, |
| { |
| "epoch": 0.41857091040986577, |
| "grad_norm": 0.16881223022937775, |
| "learning_rate": 0.00012605676904529415, |
| "loss": 0.1905, |
| "step": 1154 |
| }, |
| { |
| "epoch": 0.41893362350380847, |
| "grad_norm": 0.2182941734790802, |
| "learning_rate": 0.00012594633819012225, |
| "loss": 0.2176, |
| "step": 1155 |
| }, |
| { |
| "epoch": 0.4192963365977512, |
| "grad_norm": 0.1766914576292038, |
| "learning_rate": 0.00012583587339912207, |
| "loss": 0.2067, |
| "step": 1156 |
| }, |
| { |
| "epoch": 0.4196590496916939, |
| "grad_norm": 0.16632500290870667, |
| "learning_rate": 0.00012572537481677308, |
| "loss": 0.1902, |
| "step": 1157 |
| }, |
| { |
| "epoch": 0.4200217627856366, |
| "grad_norm": 0.16559042036533356, |
| "learning_rate": 0.00012561484258759905, |
| "loss": 0.1848, |
| "step": 1158 |
| }, |
| { |
| "epoch": 0.4203844758795793, |
| "grad_norm": 0.16212663054466248, |
| "learning_rate": 0.00012550427685616765, |
| "loss": 0.2009, |
| "step": 1159 |
| }, |
| { |
| "epoch": 0.4207471889735219, |
| "grad_norm": 0.16951881349086761, |
| "learning_rate": 0.0001253936777670904, |
| "loss": 0.1896, |
| "step": 1160 |
| }, |
| { |
| "epoch": 0.4211099020674646, |
| "grad_norm": 0.19102217257022858, |
| "learning_rate": 0.0001252830454650225, |
| "loss": 0.2012, |
| "step": 1161 |
| }, |
| { |
| "epoch": 0.4214726151614073, |
| "grad_norm": 0.1638030707836151, |
| "learning_rate": 0.00012517238009466253, |
| "loss": 0.1731, |
| "step": 1162 |
| }, |
| { |
| "epoch": 0.42183532825535003, |
| "grad_norm": 0.1885092556476593, |
| "learning_rate": 0.00012506168180075232, |
| "loss": 0.212, |
| "step": 1163 |
| }, |
| { |
| "epoch": 0.42219804134929273, |
| "grad_norm": 0.19661776721477509, |
| "learning_rate": 0.00012495095072807678, |
| "loss": 0.1969, |
| "step": 1164 |
| }, |
| { |
| "epoch": 0.4225607544432354, |
| "grad_norm": 0.1665484458208084, |
| "learning_rate": 0.00012484018702146375, |
| "loss": 0.1886, |
| "step": 1165 |
| }, |
| { |
| "epoch": 0.4229234675371781, |
| "grad_norm": 0.16225306689739227, |
| "learning_rate": 0.00012472939082578365, |
| "loss": 0.1869, |
| "step": 1166 |
| }, |
| { |
| "epoch": 0.4232861806311208, |
| "grad_norm": 0.16616645455360413, |
| "learning_rate": 0.00012461856228594947, |
| "loss": 0.1778, |
| "step": 1167 |
| }, |
| { |
| "epoch": 0.4236488937250635, |
| "grad_norm": 0.15914376080036163, |
| "learning_rate": 0.00012450770154691642, |
| "loss": 0.1809, |
| "step": 1168 |
| }, |
| { |
| "epoch": 0.4240116068190062, |
| "grad_norm": 0.18165045976638794, |
| "learning_rate": 0.00012439680875368192, |
| "loss": 0.1981, |
| "step": 1169 |
| }, |
| { |
| "epoch": 0.4243743199129489, |
| "grad_norm": 0.17815563082695007, |
| "learning_rate": 0.00012428588405128527, |
| "loss": 0.2462, |
| "step": 1170 |
| }, |
| { |
| "epoch": 0.42473703300689153, |
| "grad_norm": 0.1577123999595642, |
| "learning_rate": 0.0001241749275848075, |
| "loss": 0.1848, |
| "step": 1171 |
| }, |
| { |
| "epoch": 0.42509974610083423, |
| "grad_norm": 0.16714733839035034, |
| "learning_rate": 0.0001240639394993712, |
| "loss": 0.1878, |
| "step": 1172 |
| }, |
| { |
| "epoch": 0.42546245919477693, |
| "grad_norm": 0.18040674924850464, |
| "learning_rate": 0.0001239529199401403, |
| "loss": 0.2087, |
| "step": 1173 |
| }, |
| { |
| "epoch": 0.42582517228871963, |
| "grad_norm": 0.17369875311851501, |
| "learning_rate": 0.0001238418690523199, |
| "loss": 0.2198, |
| "step": 1174 |
| }, |
| { |
| "epoch": 0.42618788538266233, |
| "grad_norm": 0.17522990703582764, |
| "learning_rate": 0.0001237307869811561, |
| "loss": 0.1898, |
| "step": 1175 |
| }, |
| { |
| "epoch": 0.426550598476605, |
| "grad_norm": 0.1890110820531845, |
| "learning_rate": 0.0001236196738719357, |
| "loss": 0.1946, |
| "step": 1176 |
| }, |
| { |
| "epoch": 0.4269133115705477, |
| "grad_norm": 0.19072000682353973, |
| "learning_rate": 0.00012350852986998628, |
| "loss": 0.1782, |
| "step": 1177 |
| }, |
| { |
| "epoch": 0.4272760246644904, |
| "grad_norm": 0.16412675380706787, |
| "learning_rate": 0.00012339735512067557, |
| "loss": 0.1957, |
| "step": 1178 |
| }, |
| { |
| "epoch": 0.4276387377584331, |
| "grad_norm": 0.16497628390789032, |
| "learning_rate": 0.0001232861497694117, |
| "loss": 0.1914, |
| "step": 1179 |
| }, |
| { |
| "epoch": 0.4280014508523758, |
| "grad_norm": 0.1696443408727646, |
| "learning_rate": 0.00012317491396164281, |
| "loss": 0.2205, |
| "step": 1180 |
| }, |
| { |
| "epoch": 0.4283641639463185, |
| "grad_norm": 0.1990218162536621, |
| "learning_rate": 0.00012306364784285683, |
| "loss": 0.221, |
| "step": 1181 |
| }, |
| { |
| "epoch": 0.42872687704026113, |
| "grad_norm": 0.15306927263736725, |
| "learning_rate": 0.00012295235155858128, |
| "loss": 0.1894, |
| "step": 1182 |
| }, |
| { |
| "epoch": 0.42908959013420384, |
| "grad_norm": 0.16716569662094116, |
| "learning_rate": 0.00012284102525438327, |
| "loss": 0.2124, |
| "step": 1183 |
| }, |
| { |
| "epoch": 0.42945230322814654, |
| "grad_norm": 0.16371683776378632, |
| "learning_rate": 0.00012272966907586906, |
| "loss": 0.1952, |
| "step": 1184 |
| }, |
| { |
| "epoch": 0.42981501632208924, |
| "grad_norm": 0.19524066150188446, |
| "learning_rate": 0.00012261828316868404, |
| "loss": 0.1967, |
| "step": 1185 |
| }, |
| { |
| "epoch": 0.43017772941603194, |
| "grad_norm": 0.1753699630498886, |
| "learning_rate": 0.0001225068676785125, |
| "loss": 0.2057, |
| "step": 1186 |
| }, |
| { |
| "epoch": 0.4305404425099746, |
| "grad_norm": 0.15853376686573029, |
| "learning_rate": 0.00012239542275107733, |
| "loss": 0.1852, |
| "step": 1187 |
| }, |
| { |
| "epoch": 0.4309031556039173, |
| "grad_norm": 0.1545594483613968, |
| "learning_rate": 0.00012228394853214, |
| "loss": 0.1827, |
| "step": 1188 |
| }, |
| { |
| "epoch": 0.43126586869786, |
| "grad_norm": 0.1596081703901291, |
| "learning_rate": 0.0001221724451675003, |
| "loss": 0.2032, |
| "step": 1189 |
| }, |
| { |
| "epoch": 0.4316285817918027, |
| "grad_norm": 0.17133690416812897, |
| "learning_rate": 0.00012206091280299608, |
| "loss": 0.201, |
| "step": 1190 |
| }, |
| { |
| "epoch": 0.4319912948857454, |
| "grad_norm": 0.18594324588775635, |
| "learning_rate": 0.00012194935158450318, |
| "loss": 0.1999, |
| "step": 1191 |
| }, |
| { |
| "epoch": 0.4323540079796881, |
| "grad_norm": 0.1757342368364334, |
| "learning_rate": 0.0001218377616579351, |
| "loss": 0.2048, |
| "step": 1192 |
| }, |
| { |
| "epoch": 0.43271672107363074, |
| "grad_norm": 0.15969473123550415, |
| "learning_rate": 0.00012172614316924303, |
| "loss": 0.1896, |
| "step": 1193 |
| }, |
| { |
| "epoch": 0.43307943416757344, |
| "grad_norm": 0.1708168387413025, |
| "learning_rate": 0.00012161449626441535, |
| "loss": 0.1871, |
| "step": 1194 |
| }, |
| { |
| "epoch": 0.43344214726151614, |
| "grad_norm": 0.16224978864192963, |
| "learning_rate": 0.0001215028210894777, |
| "loss": 0.1995, |
| "step": 1195 |
| }, |
| { |
| "epoch": 0.43380486035545884, |
| "grad_norm": 0.17344152927398682, |
| "learning_rate": 0.00012139111779049272, |
| "loss": 0.2102, |
| "step": 1196 |
| }, |
| { |
| "epoch": 0.43416757344940154, |
| "grad_norm": 0.1607237160205841, |
| "learning_rate": 0.00012127938651355973, |
| "loss": 0.198, |
| "step": 1197 |
| }, |
| { |
| "epoch": 0.4345302865433442, |
| "grad_norm": 0.19598302245140076, |
| "learning_rate": 0.00012116762740481473, |
| "loss": 0.2048, |
| "step": 1198 |
| }, |
| { |
| "epoch": 0.4348929996372869, |
| "grad_norm": 0.17380495369434357, |
| "learning_rate": 0.00012105584061043011, |
| "loss": 0.1998, |
| "step": 1199 |
| }, |
| { |
| "epoch": 0.4352557127312296, |
| "grad_norm": 0.16845153272151947, |
| "learning_rate": 0.00012094402627661447, |
| "loss": 0.1944, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.4356184258251723, |
| "grad_norm": 0.17525669932365417, |
| "learning_rate": 0.00012083218454961237, |
| "loss": 0.2262, |
| "step": 1201 |
| }, |
| { |
| "epoch": 0.435981138919115, |
| "grad_norm": 0.182146355509758, |
| "learning_rate": 0.00012072031557570425, |
| "loss": 0.1899, |
| "step": 1202 |
| }, |
| { |
| "epoch": 0.4363438520130577, |
| "grad_norm": 0.1767880618572235, |
| "learning_rate": 0.00012060841950120623, |
| "loss": 0.1853, |
| "step": 1203 |
| }, |
| { |
| "epoch": 0.43670656510700034, |
| "grad_norm": 0.1868688315153122, |
| "learning_rate": 0.00012049649647246976, |
| "loss": 0.1884, |
| "step": 1204 |
| }, |
| { |
| "epoch": 0.43706927820094305, |
| "grad_norm": 0.16299636662006378, |
| "learning_rate": 0.0001203845466358817, |
| "loss": 0.1903, |
| "step": 1205 |
| }, |
| { |
| "epoch": 0.43743199129488575, |
| "grad_norm": 0.1743989884853363, |
| "learning_rate": 0.00012027257013786382, |
| "loss": 0.1741, |
| "step": 1206 |
| }, |
| { |
| "epoch": 0.43779470438882845, |
| "grad_norm": 0.16983556747436523, |
| "learning_rate": 0.00012016056712487281, |
| "loss": 0.1756, |
| "step": 1207 |
| }, |
| { |
| "epoch": 0.43815741748277115, |
| "grad_norm": 0.16869889199733734, |
| "learning_rate": 0.0001200485377434001, |
| "loss": 0.2091, |
| "step": 1208 |
| }, |
| { |
| "epoch": 0.4385201305767138, |
| "grad_norm": 0.18009315431118011, |
| "learning_rate": 0.00011993648213997155, |
| "loss": 0.1876, |
| "step": 1209 |
| }, |
| { |
| "epoch": 0.4388828436706565, |
| "grad_norm": 0.17261937260627747, |
| "learning_rate": 0.00011982440046114734, |
| "loss": 0.1888, |
| "step": 1210 |
| }, |
| { |
| "epoch": 0.4392455567645992, |
| "grad_norm": 0.1700652837753296, |
| "learning_rate": 0.00011971229285352173, |
| "loss": 0.1929, |
| "step": 1211 |
| }, |
| { |
| "epoch": 0.4396082698585419, |
| "grad_norm": 0.1701359897851944, |
| "learning_rate": 0.0001196001594637229, |
| "loss": 0.196, |
| "step": 1212 |
| }, |
| { |
| "epoch": 0.4399709829524846, |
| "grad_norm": 0.17813630402088165, |
| "learning_rate": 0.00011948800043841275, |
| "loss": 0.2116, |
| "step": 1213 |
| }, |
| { |
| "epoch": 0.4403336960464273, |
| "grad_norm": 0.1756308227777481, |
| "learning_rate": 0.00011937581592428677, |
| "loss": 0.2036, |
| "step": 1214 |
| }, |
| { |
| "epoch": 0.44069640914036995, |
| "grad_norm": 0.17653414607048035, |
| "learning_rate": 0.00011926360606807367, |
| "loss": 0.186, |
| "step": 1215 |
| }, |
| { |
| "epoch": 0.44105912223431265, |
| "grad_norm": 0.16713349521160126, |
| "learning_rate": 0.00011915137101653539, |
| "loss": 0.2161, |
| "step": 1216 |
| }, |
| { |
| "epoch": 0.44142183532825535, |
| "grad_norm": 0.17466074228286743, |
| "learning_rate": 0.00011903911091646684, |
| "loss": 0.2025, |
| "step": 1217 |
| }, |
| { |
| "epoch": 0.44178454842219805, |
| "grad_norm": 0.17018508911132812, |
| "learning_rate": 0.00011892682591469562, |
| "loss": 0.1901, |
| "step": 1218 |
| }, |
| { |
| "epoch": 0.44214726151614075, |
| "grad_norm": 0.18613681197166443, |
| "learning_rate": 0.00011881451615808192, |
| "loss": 0.1994, |
| "step": 1219 |
| }, |
| { |
| "epoch": 0.4425099746100834, |
| "grad_norm": 0.17624922096729279, |
| "learning_rate": 0.00011870218179351838, |
| "loss": 0.1909, |
| "step": 1220 |
| }, |
| { |
| "epoch": 0.4428726877040261, |
| "grad_norm": 0.16530555486679077, |
| "learning_rate": 0.00011858982296792971, |
| "loss": 0.1925, |
| "step": 1221 |
| }, |
| { |
| "epoch": 0.4432354007979688, |
| "grad_norm": 0.17213410139083862, |
| "learning_rate": 0.00011847743982827269, |
| "loss": 0.188, |
| "step": 1222 |
| }, |
| { |
| "epoch": 0.4435981138919115, |
| "grad_norm": 0.17941850423812866, |
| "learning_rate": 0.00011836503252153588, |
| "loss": 0.1836, |
| "step": 1223 |
| }, |
| { |
| "epoch": 0.4439608269858542, |
| "grad_norm": 0.211356058716774, |
| "learning_rate": 0.00011825260119473946, |
| "loss": 0.1958, |
| "step": 1224 |
| }, |
| { |
| "epoch": 0.4443235400797969, |
| "grad_norm": 0.1753711849451065, |
| "learning_rate": 0.00011814014599493502, |
| "loss": 0.1784, |
| "step": 1225 |
| }, |
| { |
| "epoch": 0.44468625317373955, |
| "grad_norm": 0.17775994539260864, |
| "learning_rate": 0.00011802766706920533, |
| "loss": 0.1984, |
| "step": 1226 |
| }, |
| { |
| "epoch": 0.44504896626768226, |
| "grad_norm": 0.15988726913928986, |
| "learning_rate": 0.00011791516456466429, |
| "loss": 0.196, |
| "step": 1227 |
| }, |
| { |
| "epoch": 0.44541167936162496, |
| "grad_norm": 0.17853982746601105, |
| "learning_rate": 0.00011780263862845655, |
| "loss": 0.193, |
| "step": 1228 |
| }, |
| { |
| "epoch": 0.44577439245556766, |
| "grad_norm": 0.1804809272289276, |
| "learning_rate": 0.00011769008940775744, |
| "loss": 0.1995, |
| "step": 1229 |
| }, |
| { |
| "epoch": 0.44613710554951036, |
| "grad_norm": 0.18296337127685547, |
| "learning_rate": 0.00011757751704977275, |
| "loss": 0.1907, |
| "step": 1230 |
| }, |
| { |
| "epoch": 0.446499818643453, |
| "grad_norm": 0.15713930130004883, |
| "learning_rate": 0.00011746492170173853, |
| "loss": 0.1945, |
| "step": 1231 |
| }, |
| { |
| "epoch": 0.4468625317373957, |
| "grad_norm": 0.18204668164253235, |
| "learning_rate": 0.00011735230351092087, |
| "loss": 0.2187, |
| "step": 1232 |
| }, |
| { |
| "epoch": 0.4472252448313384, |
| "grad_norm": 0.16009126603603363, |
| "learning_rate": 0.00011723966262461579, |
| "loss": 0.1786, |
| "step": 1233 |
| }, |
| { |
| "epoch": 0.4475879579252811, |
| "grad_norm": 0.20128843188285828, |
| "learning_rate": 0.00011712699919014896, |
| "loss": 0.1941, |
| "step": 1234 |
| }, |
| { |
| "epoch": 0.4479506710192238, |
| "grad_norm": 0.17296966910362244, |
| "learning_rate": 0.0001170143133548755, |
| "loss": 0.1843, |
| "step": 1235 |
| }, |
| { |
| "epoch": 0.44831338411316646, |
| "grad_norm": 0.18363478779792786, |
| "learning_rate": 0.00011690160526617995, |
| "loss": 0.197, |
| "step": 1236 |
| }, |
| { |
| "epoch": 0.44867609720710916, |
| "grad_norm": 0.17751774191856384, |
| "learning_rate": 0.00011678887507147582, |
| "loss": 0.1756, |
| "step": 1237 |
| }, |
| { |
| "epoch": 0.44903881030105186, |
| "grad_norm": 0.1821131557226181, |
| "learning_rate": 0.00011667612291820562, |
| "loss": 0.1911, |
| "step": 1238 |
| }, |
| { |
| "epoch": 0.44940152339499456, |
| "grad_norm": 0.16961705684661865, |
| "learning_rate": 0.00011656334895384053, |
| "loss": 0.1782, |
| "step": 1239 |
| }, |
| { |
| "epoch": 0.44976423648893726, |
| "grad_norm": 0.1650359183549881, |
| "learning_rate": 0.00011645055332588032, |
| "loss": 0.1849, |
| "step": 1240 |
| }, |
| { |
| "epoch": 0.45012694958287996, |
| "grad_norm": 0.1794784963130951, |
| "learning_rate": 0.00011633773618185302, |
| "loss": 0.2059, |
| "step": 1241 |
| }, |
| { |
| "epoch": 0.4504896626768226, |
| "grad_norm": 0.17137840390205383, |
| "learning_rate": 0.00011622489766931488, |
| "loss": 0.206, |
| "step": 1242 |
| }, |
| { |
| "epoch": 0.4508523757707653, |
| "grad_norm": 0.1728799045085907, |
| "learning_rate": 0.00011611203793584999, |
| "loss": 0.1812, |
| "step": 1243 |
| }, |
| { |
| "epoch": 0.451215088864708, |
| "grad_norm": 0.17596741020679474, |
| "learning_rate": 0.0001159991571290703, |
| "loss": 0.1935, |
| "step": 1244 |
| }, |
| { |
| "epoch": 0.4515778019586507, |
| "grad_norm": 0.18633347749710083, |
| "learning_rate": 0.00011588625539661528, |
| "loss": 0.1908, |
| "step": 1245 |
| }, |
| { |
| "epoch": 0.4519405150525934, |
| "grad_norm": 0.15337157249450684, |
| "learning_rate": 0.00011577333288615175, |
| "loss": 0.1779, |
| "step": 1246 |
| }, |
| { |
| "epoch": 0.45230322814653606, |
| "grad_norm": 0.18902058899402618, |
| "learning_rate": 0.00011566038974537374, |
| "loss": 0.2063, |
| "step": 1247 |
| }, |
| { |
| "epoch": 0.45266594124047876, |
| "grad_norm": 0.17245811223983765, |
| "learning_rate": 0.00011554742612200229, |
| "loss": 0.1827, |
| "step": 1248 |
| }, |
| { |
| "epoch": 0.45302865433442147, |
| "grad_norm": 0.17236045002937317, |
| "learning_rate": 0.00011543444216378517, |
| "loss": 0.1944, |
| "step": 1249 |
| }, |
| { |
| "epoch": 0.45339136742836417, |
| "grad_norm": 0.1754477322101593, |
| "learning_rate": 0.00011532143801849668, |
| "loss": 0.1933, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.45375408052230687, |
| "grad_norm": 0.16361160576343536, |
| "learning_rate": 0.00011520841383393774, |
| "loss": 0.193, |
| "step": 1251 |
| }, |
| { |
| "epoch": 0.45411679361624957, |
| "grad_norm": 0.17561082541942596, |
| "learning_rate": 0.00011509536975793527, |
| "loss": 0.2062, |
| "step": 1252 |
| }, |
| { |
| "epoch": 0.4544795067101922, |
| "grad_norm": 0.1636163592338562, |
| "learning_rate": 0.00011498230593834229, |
| "loss": 0.1839, |
| "step": 1253 |
| }, |
| { |
| "epoch": 0.4548422198041349, |
| "grad_norm": 0.16940078139305115, |
| "learning_rate": 0.00011486922252303769, |
| "loss": 0.18, |
| "step": 1254 |
| }, |
| { |
| "epoch": 0.4552049328980776, |
| "grad_norm": 0.1866592913866043, |
| "learning_rate": 0.0001147561196599259, |
| "loss": 0.1789, |
| "step": 1255 |
| }, |
| { |
| "epoch": 0.4555676459920203, |
| "grad_norm": 0.1689455509185791, |
| "learning_rate": 0.00011464299749693679, |
| "loss": 0.1775, |
| "step": 1256 |
| }, |
| { |
| "epoch": 0.455930359085963, |
| "grad_norm": 0.17223703861236572, |
| "learning_rate": 0.00011452985618202559, |
| "loss": 0.1813, |
| "step": 1257 |
| }, |
| { |
| "epoch": 0.45629307217990567, |
| "grad_norm": 0.16031506657600403, |
| "learning_rate": 0.00011441669586317243, |
| "loss": 0.1867, |
| "step": 1258 |
| }, |
| { |
| "epoch": 0.45665578527384837, |
| "grad_norm": 0.17869757115840912, |
| "learning_rate": 0.00011430351668838237, |
| "loss": 0.1678, |
| "step": 1259 |
| }, |
| { |
| "epoch": 0.45701849836779107, |
| "grad_norm": 0.18296487629413605, |
| "learning_rate": 0.00011419031880568518, |
| "loss": 0.1848, |
| "step": 1260 |
| }, |
| { |
| "epoch": 0.45738121146173377, |
| "grad_norm": 0.19954228401184082, |
| "learning_rate": 0.00011407710236313498, |
| "loss": 0.1961, |
| "step": 1261 |
| }, |
| { |
| "epoch": 0.4577439245556765, |
| "grad_norm": 0.16006030142307281, |
| "learning_rate": 0.00011396386750881025, |
| "loss": 0.1738, |
| "step": 1262 |
| }, |
| { |
| "epoch": 0.4581066376496192, |
| "grad_norm": 0.17467838525772095, |
| "learning_rate": 0.00011385061439081355, |
| "loss": 0.2, |
| "step": 1263 |
| }, |
| { |
| "epoch": 0.4584693507435618, |
| "grad_norm": 0.1634225696325302, |
| "learning_rate": 0.00011373734315727125, |
| "loss": 0.1593, |
| "step": 1264 |
| }, |
| { |
| "epoch": 0.4588320638375045, |
| "grad_norm": 0.1675540953874588, |
| "learning_rate": 0.00011362405395633355, |
| "loss": 0.1761, |
| "step": 1265 |
| }, |
| { |
| "epoch": 0.4591947769314472, |
| "grad_norm": 0.2374797910451889, |
| "learning_rate": 0.00011351074693617398, |
| "loss": 0.2401, |
| "step": 1266 |
| }, |
| { |
| "epoch": 0.4595574900253899, |
| "grad_norm": 0.16424275934696198, |
| "learning_rate": 0.00011339742224498957, |
| "loss": 0.1822, |
| "step": 1267 |
| }, |
| { |
| "epoch": 0.4599202031193326, |
| "grad_norm": 0.1777309626340866, |
| "learning_rate": 0.00011328408003100031, |
| "loss": 0.199, |
| "step": 1268 |
| }, |
| { |
| "epoch": 0.4602829162132753, |
| "grad_norm": 0.17055995762348175, |
| "learning_rate": 0.0001131707204424491, |
| "loss": 0.1743, |
| "step": 1269 |
| }, |
| { |
| "epoch": 0.460645629307218, |
| "grad_norm": 0.17005477845668793, |
| "learning_rate": 0.0001130573436276017, |
| "loss": 0.1767, |
| "step": 1270 |
| }, |
| { |
| "epoch": 0.4610083424011607, |
| "grad_norm": 0.18844565749168396, |
| "learning_rate": 0.00011294394973474631, |
| "loss": 0.1836, |
| "step": 1271 |
| }, |
| { |
| "epoch": 0.4613710554951034, |
| "grad_norm": 0.17676351964473724, |
| "learning_rate": 0.00011283053891219344, |
| "loss": 0.1806, |
| "step": 1272 |
| }, |
| { |
| "epoch": 0.4617337685890461, |
| "grad_norm": 0.1949535459280014, |
| "learning_rate": 0.00011271711130827584, |
| "loss": 0.2162, |
| "step": 1273 |
| }, |
| { |
| "epoch": 0.4620964816829888, |
| "grad_norm": 0.16555753350257874, |
| "learning_rate": 0.0001126036670713481, |
| "loss": 0.2051, |
| "step": 1274 |
| }, |
| { |
| "epoch": 0.4624591947769314, |
| "grad_norm": 0.16618479788303375, |
| "learning_rate": 0.00011249020634978664, |
| "loss": 0.1686, |
| "step": 1275 |
| }, |
| { |
| "epoch": 0.4628219078708741, |
| "grad_norm": 0.16579975187778473, |
| "learning_rate": 0.00011237672929198944, |
| "loss": 0.1887, |
| "step": 1276 |
| }, |
| { |
| "epoch": 0.46318462096481683, |
| "grad_norm": 0.1672372817993164, |
| "learning_rate": 0.00011226323604637577, |
| "loss": 0.1801, |
| "step": 1277 |
| }, |
| { |
| "epoch": 0.46354733405875953, |
| "grad_norm": 0.18061618506908417, |
| "learning_rate": 0.00011214972676138612, |
| "loss": 0.2006, |
| "step": 1278 |
| }, |
| { |
| "epoch": 0.46391004715270223, |
| "grad_norm": 0.19238020479679108, |
| "learning_rate": 0.00011203620158548205, |
| "loss": 0.1693, |
| "step": 1279 |
| }, |
| { |
| "epoch": 0.4642727602466449, |
| "grad_norm": 0.18483294546604156, |
| "learning_rate": 0.00011192266066714576, |
| "loss": 0.181, |
| "step": 1280 |
| }, |
| { |
| "epoch": 0.4646354733405876, |
| "grad_norm": 0.1617163121700287, |
| "learning_rate": 0.00011180910415488006, |
| "loss": 0.1812, |
| "step": 1281 |
| }, |
| { |
| "epoch": 0.4649981864345303, |
| "grad_norm": 0.18640659749507904, |
| "learning_rate": 0.00011169553219720828, |
| "loss": 0.1877, |
| "step": 1282 |
| }, |
| { |
| "epoch": 0.465360899528473, |
| "grad_norm": 0.1695108264684677, |
| "learning_rate": 0.00011158194494267375, |
| "loss": 0.1848, |
| "step": 1283 |
| }, |
| { |
| "epoch": 0.4657236126224157, |
| "grad_norm": 0.1813160479068756, |
| "learning_rate": 0.00011146834253984006, |
| "loss": 0.1897, |
| "step": 1284 |
| }, |
| { |
| "epoch": 0.4660863257163584, |
| "grad_norm": 0.19932959973812103, |
| "learning_rate": 0.00011135472513729037, |
| "loss": 0.1924, |
| "step": 1285 |
| }, |
| { |
| "epoch": 0.46644903881030103, |
| "grad_norm": 0.18082661926746368, |
| "learning_rate": 0.0001112410928836276, |
| "loss": 0.1856, |
| "step": 1286 |
| }, |
| { |
| "epoch": 0.46681175190424373, |
| "grad_norm": 0.18553735315799713, |
| "learning_rate": 0.00011112744592747406, |
| "loss": 0.215, |
| "step": 1287 |
| }, |
| { |
| "epoch": 0.46717446499818643, |
| "grad_norm": 0.1664389669895172, |
| "learning_rate": 0.0001110137844174713, |
| "loss": 0.181, |
| "step": 1288 |
| }, |
| { |
| "epoch": 0.46753717809212914, |
| "grad_norm": 0.16226251423358917, |
| "learning_rate": 0.00011090010850227987, |
| "loss": 0.1818, |
| "step": 1289 |
| }, |
| { |
| "epoch": 0.46789989118607184, |
| "grad_norm": 0.17768961191177368, |
| "learning_rate": 0.00011078641833057917, |
| "loss": 0.2087, |
| "step": 1290 |
| }, |
| { |
| "epoch": 0.4682626042800145, |
| "grad_norm": 0.16539828479290009, |
| "learning_rate": 0.0001106727140510673, |
| "loss": 0.1882, |
| "step": 1291 |
| }, |
| { |
| "epoch": 0.4686253173739572, |
| "grad_norm": 0.17121171951293945, |
| "learning_rate": 0.00011055899581246074, |
| "loss": 0.1664, |
| "step": 1292 |
| }, |
| { |
| "epoch": 0.4689880304678999, |
| "grad_norm": 0.19726701080799103, |
| "learning_rate": 0.00011044526376349427, |
| "loss": 0.1924, |
| "step": 1293 |
| }, |
| { |
| "epoch": 0.4693507435618426, |
| "grad_norm": 0.16600336134433746, |
| "learning_rate": 0.0001103315180529207, |
| "loss": 0.1775, |
| "step": 1294 |
| }, |
| { |
| "epoch": 0.4697134566557853, |
| "grad_norm": 0.1898517608642578, |
| "learning_rate": 0.00011021775882951078, |
| "loss": 0.248, |
| "step": 1295 |
| }, |
| { |
| "epoch": 0.470076169749728, |
| "grad_norm": 0.16445770859718323, |
| "learning_rate": 0.00011010398624205285, |
| "loss": 0.1828, |
| "step": 1296 |
| }, |
| { |
| "epoch": 0.47043888284367064, |
| "grad_norm": 0.17627963423728943, |
| "learning_rate": 0.00010999020043935275, |
| "loss": 0.1736, |
| "step": 1297 |
| }, |
| { |
| "epoch": 0.47080159593761334, |
| "grad_norm": 0.19058868288993835, |
| "learning_rate": 0.00010987640157023367, |
| "loss": 0.2618, |
| "step": 1298 |
| }, |
| { |
| "epoch": 0.47116430903155604, |
| "grad_norm": 0.1651872992515564, |
| "learning_rate": 0.00010976258978353577, |
| "loss": 0.1975, |
| "step": 1299 |
| }, |
| { |
| "epoch": 0.47152702212549874, |
| "grad_norm": 0.20072801411151886, |
| "learning_rate": 0.00010964876522811623, |
| "loss": 0.1723, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.47188973521944144, |
| "grad_norm": 0.19362793862819672, |
| "learning_rate": 0.00010953492805284882, |
| "loss": 0.1682, |
| "step": 1301 |
| }, |
| { |
| "epoch": 0.4722524483133841, |
| "grad_norm": 0.16751596331596375, |
| "learning_rate": 0.0001094210784066239, |
| "loss": 0.1792, |
| "step": 1302 |
| }, |
| { |
| "epoch": 0.4726151614073268, |
| "grad_norm": 0.16240975260734558, |
| "learning_rate": 0.00010930721643834811, |
| "loss": 0.1805, |
| "step": 1303 |
| }, |
| { |
| "epoch": 0.4729778745012695, |
| "grad_norm": 0.1741744726896286, |
| "learning_rate": 0.00010919334229694424, |
| "loss": 0.1823, |
| "step": 1304 |
| }, |
| { |
| "epoch": 0.4733405875952122, |
| "grad_norm": 0.17905928194522858, |
| "learning_rate": 0.00010907945613135093, |
| "loss": 0.1873, |
| "step": 1305 |
| }, |
| { |
| "epoch": 0.4737033006891549, |
| "grad_norm": 0.16759923100471497, |
| "learning_rate": 0.00010896555809052255, |
| "loss": 0.1805, |
| "step": 1306 |
| }, |
| { |
| "epoch": 0.4740660137830976, |
| "grad_norm": 0.1604134738445282, |
| "learning_rate": 0.00010885164832342911, |
| "loss": 0.1817, |
| "step": 1307 |
| }, |
| { |
| "epoch": 0.47442872687704024, |
| "grad_norm": 0.22676977515220642, |
| "learning_rate": 0.00010873772697905584, |
| "loss": 0.1901, |
| "step": 1308 |
| }, |
| { |
| "epoch": 0.47479143997098294, |
| "grad_norm": 0.21346516907215118, |
| "learning_rate": 0.00010862379420640316, |
| "loss": 0.2146, |
| "step": 1309 |
| }, |
| { |
| "epoch": 0.47515415306492564, |
| "grad_norm": 0.18681135773658752, |
| "learning_rate": 0.00010850985015448644, |
| "loss": 0.1992, |
| "step": 1310 |
| }, |
| { |
| "epoch": 0.47551686615886835, |
| "grad_norm": 0.18223214149475098, |
| "learning_rate": 0.00010839589497233579, |
| "loss": 0.1937, |
| "step": 1311 |
| }, |
| { |
| "epoch": 0.47587957925281105, |
| "grad_norm": 0.16445523500442505, |
| "learning_rate": 0.00010828192880899588, |
| "loss": 0.201, |
| "step": 1312 |
| }, |
| { |
| "epoch": 0.4762422923467537, |
| "grad_norm": 0.20072023570537567, |
| "learning_rate": 0.00010816795181352576, |
| "loss": 0.221, |
| "step": 1313 |
| }, |
| { |
| "epoch": 0.4766050054406964, |
| "grad_norm": 0.1709073781967163, |
| "learning_rate": 0.00010805396413499865, |
| "loss": 0.1726, |
| "step": 1314 |
| }, |
| { |
| "epoch": 0.4769677185346391, |
| "grad_norm": 0.20039378106594086, |
| "learning_rate": 0.00010793996592250166, |
| "loss": 0.2096, |
| "step": 1315 |
| }, |
| { |
| "epoch": 0.4773304316285818, |
| "grad_norm": 0.17664781212806702, |
| "learning_rate": 0.00010782595732513581, |
| "loss": 0.1763, |
| "step": 1316 |
| }, |
| { |
| "epoch": 0.4776931447225245, |
| "grad_norm": 0.19013923406600952, |
| "learning_rate": 0.00010771193849201561, |
| "loss": 0.1977, |
| "step": 1317 |
| }, |
| { |
| "epoch": 0.4780558578164672, |
| "grad_norm": 0.18075336515903473, |
| "learning_rate": 0.00010759790957226896, |
| "loss": 0.2101, |
| "step": 1318 |
| }, |
| { |
| "epoch": 0.47841857091040985, |
| "grad_norm": 0.16578859090805054, |
| "learning_rate": 0.00010748387071503703, |
| "loss": 0.1724, |
| "step": 1319 |
| }, |
| { |
| "epoch": 0.47878128400435255, |
| "grad_norm": 0.1706560105085373, |
| "learning_rate": 0.00010736982206947388, |
| "loss": 0.1826, |
| "step": 1320 |
| }, |
| { |
| "epoch": 0.47914399709829525, |
| "grad_norm": 0.1749362051486969, |
| "learning_rate": 0.00010725576378474647, |
| "loss": 0.1957, |
| "step": 1321 |
| }, |
| { |
| "epoch": 0.47950671019223795, |
| "grad_norm": 0.16315925121307373, |
| "learning_rate": 0.0001071416960100343, |
| "loss": 0.1675, |
| "step": 1322 |
| }, |
| { |
| "epoch": 0.47986942328618065, |
| "grad_norm": 0.18400579690933228, |
| "learning_rate": 0.0001070276188945293, |
| "loss": 0.1896, |
| "step": 1323 |
| }, |
| { |
| "epoch": 0.4802321363801233, |
| "grad_norm": 0.15948963165283203, |
| "learning_rate": 0.00010691353258743566, |
| "loss": 0.1816, |
| "step": 1324 |
| }, |
| { |
| "epoch": 0.480594849474066, |
| "grad_norm": 0.17198865115642548, |
| "learning_rate": 0.00010679943723796948, |
| "loss": 0.176, |
| "step": 1325 |
| }, |
| { |
| "epoch": 0.4809575625680087, |
| "grad_norm": 0.15731912851333618, |
| "learning_rate": 0.00010668533299535885, |
| "loss": 0.1717, |
| "step": 1326 |
| }, |
| { |
| "epoch": 0.4813202756619514, |
| "grad_norm": 0.19525468349456787, |
| "learning_rate": 0.00010657122000884334, |
| "loss": 0.2036, |
| "step": 1327 |
| }, |
| { |
| "epoch": 0.4816829887558941, |
| "grad_norm": 0.18892909586429596, |
| "learning_rate": 0.00010645709842767404, |
| "loss": 0.1886, |
| "step": 1328 |
| }, |
| { |
| "epoch": 0.4820457018498368, |
| "grad_norm": 0.1732751876115799, |
| "learning_rate": 0.00010634296840111328, |
| "loss": 0.1654, |
| "step": 1329 |
| }, |
| { |
| "epoch": 0.48240841494377945, |
| "grad_norm": 0.17465728521347046, |
| "learning_rate": 0.00010622883007843439, |
| "loss": 0.2119, |
| "step": 1330 |
| }, |
| { |
| "epoch": 0.48277112803772215, |
| "grad_norm": 0.17398551106452942, |
| "learning_rate": 0.00010611468360892157, |
| "loss": 0.1833, |
| "step": 1331 |
| }, |
| { |
| "epoch": 0.48313384113166485, |
| "grad_norm": 0.16920240223407745, |
| "learning_rate": 0.00010600052914186971, |
| "loss": 0.182, |
| "step": 1332 |
| }, |
| { |
| "epoch": 0.48349655422560756, |
| "grad_norm": 0.1846507489681244, |
| "learning_rate": 0.0001058863668265841, |
| "loss": 0.2106, |
| "step": 1333 |
| }, |
| { |
| "epoch": 0.48385926731955026, |
| "grad_norm": 0.2055717557668686, |
| "learning_rate": 0.00010577219681238035, |
| "loss": 0.1992, |
| "step": 1334 |
| }, |
| { |
| "epoch": 0.4842219804134929, |
| "grad_norm": 0.18416710197925568, |
| "learning_rate": 0.00010565801924858411, |
| "loss": 0.1997, |
| "step": 1335 |
| }, |
| { |
| "epoch": 0.4845846935074356, |
| "grad_norm": 0.1609608680009842, |
| "learning_rate": 0.00010554383428453093, |
| "loss": 0.2147, |
| "step": 1336 |
| }, |
| { |
| "epoch": 0.4849474066013783, |
| "grad_norm": 0.1770334094762802, |
| "learning_rate": 0.00010542964206956601, |
| "loss": 0.1796, |
| "step": 1337 |
| }, |
| { |
| "epoch": 0.485310119695321, |
| "grad_norm": 0.24041593074798584, |
| "learning_rate": 0.00010531544275304403, |
| "loss": 0.1828, |
| "step": 1338 |
| }, |
| { |
| "epoch": 0.4856728327892637, |
| "grad_norm": 0.18589763343334198, |
| "learning_rate": 0.00010520123648432896, |
| "loss": 0.1894, |
| "step": 1339 |
| }, |
| { |
| "epoch": 0.4860355458832064, |
| "grad_norm": 0.19816087186336517, |
| "learning_rate": 0.00010508702341279391, |
| "loss": 0.1849, |
| "step": 1340 |
| }, |
| { |
| "epoch": 0.48639825897714906, |
| "grad_norm": 0.20071928203105927, |
| "learning_rate": 0.00010497280368782083, |
| "loss": 0.1871, |
| "step": 1341 |
| }, |
| { |
| "epoch": 0.48676097207109176, |
| "grad_norm": 0.19222816824913025, |
| "learning_rate": 0.00010485857745880043, |
| "loss": 0.2114, |
| "step": 1342 |
| }, |
| { |
| "epoch": 0.48712368516503446, |
| "grad_norm": 0.17220762372016907, |
| "learning_rate": 0.00010474434487513183, |
| "loss": 0.186, |
| "step": 1343 |
| }, |
| { |
| "epoch": 0.48748639825897716, |
| "grad_norm": 0.1726873815059662, |
| "learning_rate": 0.00010463010608622259, |
| "loss": 0.1945, |
| "step": 1344 |
| }, |
| { |
| "epoch": 0.48784911135291986, |
| "grad_norm": 0.1876380741596222, |
| "learning_rate": 0.0001045158612414883, |
| "loss": 0.214, |
| "step": 1345 |
| }, |
| { |
| "epoch": 0.4882118244468625, |
| "grad_norm": 0.16988040506839752, |
| "learning_rate": 0.00010440161049035242, |
| "loss": 0.1901, |
| "step": 1346 |
| }, |
| { |
| "epoch": 0.4885745375408052, |
| "grad_norm": 0.15666206181049347, |
| "learning_rate": 0.00010428735398224629, |
| "loss": 0.1788, |
| "step": 1347 |
| }, |
| { |
| "epoch": 0.4889372506347479, |
| "grad_norm": 0.16927142441272736, |
| "learning_rate": 0.00010417309186660869, |
| "loss": 0.1847, |
| "step": 1348 |
| }, |
| { |
| "epoch": 0.4892999637286906, |
| "grad_norm": 0.17525021731853485, |
| "learning_rate": 0.00010405882429288567, |
| "loss": 0.1826, |
| "step": 1349 |
| }, |
| { |
| "epoch": 0.4896626768226333, |
| "grad_norm": 0.19699347019195557, |
| "learning_rate": 0.00010394455141053056, |
| "loss": 0.2124, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.490025389916576, |
| "grad_norm": 0.17007745802402496, |
| "learning_rate": 0.00010383027336900355, |
| "loss": 0.1936, |
| "step": 1351 |
| }, |
| { |
| "epoch": 0.49038810301051866, |
| "grad_norm": 0.19556905329227448, |
| "learning_rate": 0.00010371599031777155, |
| "loss": 0.197, |
| "step": 1352 |
| }, |
| { |
| "epoch": 0.49075081610446136, |
| "grad_norm": 0.16477836668491364, |
| "learning_rate": 0.00010360170240630808, |
| "loss": 0.1625, |
| "step": 1353 |
| }, |
| { |
| "epoch": 0.49111352919840406, |
| "grad_norm": 0.1732366383075714, |
| "learning_rate": 0.00010348740978409302, |
| "loss": 0.1971, |
| "step": 1354 |
| }, |
| { |
| "epoch": 0.49147624229234677, |
| "grad_norm": 0.16834014654159546, |
| "learning_rate": 0.00010337311260061233, |
| "loss": 0.1766, |
| "step": 1355 |
| }, |
| { |
| "epoch": 0.49183895538628947, |
| "grad_norm": 0.16868503391742706, |
| "learning_rate": 0.00010325881100535806, |
| "loss": 0.1708, |
| "step": 1356 |
| }, |
| { |
| "epoch": 0.4922016684802321, |
| "grad_norm": 0.1709543913602829, |
| "learning_rate": 0.00010314450514782792, |
| "loss": 0.1639, |
| "step": 1357 |
| }, |
| { |
| "epoch": 0.4925643815741748, |
| "grad_norm": 0.19506582617759705, |
| "learning_rate": 0.0001030301951775253, |
| "loss": 0.2164, |
| "step": 1358 |
| }, |
| { |
| "epoch": 0.4929270946681175, |
| "grad_norm": 0.1822308748960495, |
| "learning_rate": 0.00010291588124395881, |
| "loss": 0.1743, |
| "step": 1359 |
| }, |
| { |
| "epoch": 0.4932898077620602, |
| "grad_norm": 0.16552822291851044, |
| "learning_rate": 0.00010280156349664245, |
| "loss": 0.1826, |
| "step": 1360 |
| }, |
| { |
| "epoch": 0.4936525208560029, |
| "grad_norm": 0.1888803243637085, |
| "learning_rate": 0.00010268724208509504, |
| "loss": 0.2016, |
| "step": 1361 |
| }, |
| { |
| "epoch": 0.4940152339499456, |
| "grad_norm": 0.15892508625984192, |
| "learning_rate": 0.00010257291715884023, |
| "loss": 0.1666, |
| "step": 1362 |
| }, |
| { |
| "epoch": 0.49437794704388827, |
| "grad_norm": 0.18861308693885803, |
| "learning_rate": 0.00010245858886740636, |
| "loss": 0.1969, |
| "step": 1363 |
| }, |
| { |
| "epoch": 0.49474066013783097, |
| "grad_norm": 0.1808152198791504, |
| "learning_rate": 0.00010234425736032607, |
| "loss": 0.181, |
| "step": 1364 |
| }, |
| { |
| "epoch": 0.49510337323177367, |
| "grad_norm": 0.18545283377170563, |
| "learning_rate": 0.00010222992278713619, |
| "loss": 0.1757, |
| "step": 1365 |
| }, |
| { |
| "epoch": 0.49546608632571637, |
| "grad_norm": 0.16214706003665924, |
| "learning_rate": 0.00010211558529737768, |
| "loss": 0.1809, |
| "step": 1366 |
| }, |
| { |
| "epoch": 0.49582879941965907, |
| "grad_norm": 0.16413475573062897, |
| "learning_rate": 0.00010200124504059522, |
| "loss": 0.1765, |
| "step": 1367 |
| }, |
| { |
| "epoch": 0.4961915125136017, |
| "grad_norm": 0.17465685307979584, |
| "learning_rate": 0.0001018869021663371, |
| "loss": 0.1786, |
| "step": 1368 |
| }, |
| { |
| "epoch": 0.4965542256075444, |
| "grad_norm": 0.16205236315727234, |
| "learning_rate": 0.00010177255682415512, |
| "loss": 0.1778, |
| "step": 1369 |
| }, |
| { |
| "epoch": 0.4969169387014871, |
| "grad_norm": 0.17154665291309357, |
| "learning_rate": 0.0001016582091636042, |
| "loss": 0.1848, |
| "step": 1370 |
| }, |
| { |
| "epoch": 0.4972796517954298, |
| "grad_norm": 0.19808538258075714, |
| "learning_rate": 0.00010154385933424236, |
| "loss": 0.1872, |
| "step": 1371 |
| }, |
| { |
| "epoch": 0.4976423648893725, |
| "grad_norm": 0.17381629347801208, |
| "learning_rate": 0.00010142950748563047, |
| "loss": 0.1706, |
| "step": 1372 |
| }, |
| { |
| "epoch": 0.4980050779833152, |
| "grad_norm": 0.18413105607032776, |
| "learning_rate": 0.00010131515376733199, |
| "loss": 0.2041, |
| "step": 1373 |
| }, |
| { |
| "epoch": 0.49836779107725787, |
| "grad_norm": 0.16707438230514526, |
| "learning_rate": 0.0001012007983289128, |
| "loss": 0.1824, |
| "step": 1374 |
| }, |
| { |
| "epoch": 0.4987305041712006, |
| "grad_norm": 0.18369126319885254, |
| "learning_rate": 0.00010108644131994118, |
| "loss": 0.1838, |
| "step": 1375 |
| }, |
| { |
| "epoch": 0.4990932172651433, |
| "grad_norm": 0.17866884171962738, |
| "learning_rate": 0.00010097208288998727, |
| "loss": 0.18, |
| "step": 1376 |
| }, |
| { |
| "epoch": 0.499455930359086, |
| "grad_norm": 0.17458714544773102, |
| "learning_rate": 0.0001008577231886232, |
| "loss": 0.1863, |
| "step": 1377 |
| }, |
| { |
| "epoch": 0.4998186434530287, |
| "grad_norm": 0.16435278952121735, |
| "learning_rate": 0.00010074336236542275, |
| "loss": 0.1691, |
| "step": 1378 |
| }, |
| { |
| "epoch": 0.5001813565469714, |
| "grad_norm": 0.18374355137348175, |
| "learning_rate": 0.00010062900056996111, |
| "loss": 0.2016, |
| "step": 1379 |
| }, |
| { |
| "epoch": 0.500544069640914, |
| "grad_norm": 0.1715199500322342, |
| "learning_rate": 0.00010051463795181475, |
| "loss": 0.1775, |
| "step": 1380 |
| }, |
| { |
| "epoch": 0.5009067827348568, |
| "grad_norm": 0.17471933364868164, |
| "learning_rate": 0.00010040027466056128, |
| "loss": 0.1784, |
| "step": 1381 |
| }, |
| { |
| "epoch": 0.5012694958287994, |
| "grad_norm": 0.182729572057724, |
| "learning_rate": 0.00010028591084577914, |
| "loss": 0.1848, |
| "step": 1382 |
| }, |
| { |
| "epoch": 0.5016322089227421, |
| "grad_norm": 0.1831514835357666, |
| "learning_rate": 0.00010017154665704742, |
| "loss": 0.1782, |
| "step": 1383 |
| }, |
| { |
| "epoch": 0.5019949220166848, |
| "grad_norm": 0.18920493125915527, |
| "learning_rate": 0.00010005718224394583, |
| "loss": 0.1983, |
| "step": 1384 |
| }, |
| { |
| "epoch": 0.5023576351106275, |
| "grad_norm": 0.18116223812103271, |
| "learning_rate": 9.994281775605417e-05, |
| "loss": 0.1703, |
| "step": 1385 |
| }, |
| { |
| "epoch": 0.5027203482045702, |
| "grad_norm": 0.16635280847549438, |
| "learning_rate": 9.982845334295257e-05, |
| "loss": 0.1826, |
| "step": 1386 |
| }, |
| { |
| "epoch": 0.5030830612985129, |
| "grad_norm": 0.1902262419462204, |
| "learning_rate": 9.971408915422089e-05, |
| "loss": 0.1821, |
| "step": 1387 |
| }, |
| { |
| "epoch": 0.5034457743924555, |
| "grad_norm": 0.1716509759426117, |
| "learning_rate": 9.959972533943873e-05, |
| "loss": 0.1774, |
| "step": 1388 |
| }, |
| { |
| "epoch": 0.5038084874863983, |
| "grad_norm": 0.1831229031085968, |
| "learning_rate": 9.948536204818527e-05, |
| "loss": 0.1859, |
| "step": 1389 |
| }, |
| { |
| "epoch": 0.5041712005803409, |
| "grad_norm": 0.17858019471168518, |
| "learning_rate": 9.937099943003894e-05, |
| "loss": 0.1763, |
| "step": 1390 |
| }, |
| { |
| "epoch": 0.5045339136742837, |
| "grad_norm": 0.20118439197540283, |
| "learning_rate": 9.925663763457726e-05, |
| "loss": 0.2055, |
| "step": 1391 |
| }, |
| { |
| "epoch": 0.5048966267682263, |
| "grad_norm": 0.1790417581796646, |
| "learning_rate": 9.91422768113768e-05, |
| "loss": 0.1886, |
| "step": 1392 |
| }, |
| { |
| "epoch": 0.505259339862169, |
| "grad_norm": 0.18328474462032318, |
| "learning_rate": 9.902791711001274e-05, |
| "loss": 0.2024, |
| "step": 1393 |
| }, |
| { |
| "epoch": 0.5056220529561117, |
| "grad_norm": 0.17188413441181183, |
| "learning_rate": 9.891355868005885e-05, |
| "loss": 0.1822, |
| "step": 1394 |
| }, |
| { |
| "epoch": 0.5059847660500544, |
| "grad_norm": 0.20239926874637604, |
| "learning_rate": 9.879920167108722e-05, |
| "loss": 0.1811, |
| "step": 1395 |
| }, |
| { |
| "epoch": 0.5063474791439971, |
| "grad_norm": 0.1758367419242859, |
| "learning_rate": 9.868484623266807e-05, |
| "loss": 0.2074, |
| "step": 1396 |
| }, |
| { |
| "epoch": 0.5067101922379398, |
| "grad_norm": 0.16482442617416382, |
| "learning_rate": 9.857049251436957e-05, |
| "loss": 0.1748, |
| "step": 1397 |
| }, |
| { |
| "epoch": 0.5070729053318824, |
| "grad_norm": 0.19277919828891754, |
| "learning_rate": 9.845614066575764e-05, |
| "loss": 0.2831, |
| "step": 1398 |
| }, |
| { |
| "epoch": 0.5074356184258252, |
| "grad_norm": 0.19243241846561432, |
| "learning_rate": 9.834179083639581e-05, |
| "loss": 0.1817, |
| "step": 1399 |
| }, |
| { |
| "epoch": 0.5077983315197678, |
| "grad_norm": 0.19496306777000427, |
| "learning_rate": 9.822744317584492e-05, |
| "loss": 0.1614, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.5081610446137106, |
| "grad_norm": 0.1891697645187378, |
| "learning_rate": 9.811309783366291e-05, |
| "loss": 0.1952, |
| "step": 1401 |
| }, |
| { |
| "epoch": 0.5085237577076532, |
| "grad_norm": 0.19444064795970917, |
| "learning_rate": 9.799875495940481e-05, |
| "loss": 0.1942, |
| "step": 1402 |
| }, |
| { |
| "epoch": 0.508886470801596, |
| "grad_norm": 0.18112586438655853, |
| "learning_rate": 9.788441470262235e-05, |
| "loss": 0.1963, |
| "step": 1403 |
| }, |
| { |
| "epoch": 0.5092491838955386, |
| "grad_norm": 0.17287184298038483, |
| "learning_rate": 9.777007721286381e-05, |
| "loss": 0.1733, |
| "step": 1404 |
| }, |
| { |
| "epoch": 0.5096118969894813, |
| "grad_norm": 0.18775591254234314, |
| "learning_rate": 9.765574263967396e-05, |
| "loss": 0.1965, |
| "step": 1405 |
| }, |
| { |
| "epoch": 0.509974610083424, |
| "grad_norm": 0.17914709448814392, |
| "learning_rate": 9.754141113259366e-05, |
| "loss": 0.1748, |
| "step": 1406 |
| }, |
| { |
| "epoch": 0.5103373231773667, |
| "grad_norm": 0.19423453509807587, |
| "learning_rate": 9.74270828411598e-05, |
| "loss": 0.1833, |
| "step": 1407 |
| }, |
| { |
| "epoch": 0.5107000362713094, |
| "grad_norm": 0.18104608356952667, |
| "learning_rate": 9.731275791490501e-05, |
| "loss": 0.177, |
| "step": 1408 |
| }, |
| { |
| "epoch": 0.5110627493652521, |
| "grad_norm": 0.17595738172531128, |
| "learning_rate": 9.719843650335758e-05, |
| "loss": 0.1839, |
| "step": 1409 |
| }, |
| { |
| "epoch": 0.5114254624591947, |
| "grad_norm": 0.18560685217380524, |
| "learning_rate": 9.708411875604118e-05, |
| "loss": 0.1995, |
| "step": 1410 |
| }, |
| { |
| "epoch": 0.5117881755531375, |
| "grad_norm": 0.18210434913635254, |
| "learning_rate": 9.696980482247474e-05, |
| "loss": 0.1963, |
| "step": 1411 |
| }, |
| { |
| "epoch": 0.5121508886470801, |
| "grad_norm": 0.16906267404556274, |
| "learning_rate": 9.685549485217209e-05, |
| "loss": 0.1636, |
| "step": 1412 |
| }, |
| { |
| "epoch": 0.5125136017410229, |
| "grad_norm": 0.19701135158538818, |
| "learning_rate": 9.674118899464195e-05, |
| "loss": 0.2184, |
| "step": 1413 |
| }, |
| { |
| "epoch": 0.5128763148349655, |
| "grad_norm": 0.18875081837177277, |
| "learning_rate": 9.662688739938769e-05, |
| "loss": 0.2142, |
| "step": 1414 |
| }, |
| { |
| "epoch": 0.5132390279289082, |
| "grad_norm": 0.20290465652942657, |
| "learning_rate": 9.651259021590703e-05, |
| "loss": 0.2041, |
| "step": 1415 |
| }, |
| { |
| "epoch": 0.5136017410228509, |
| "grad_norm": 0.1915699690580368, |
| "learning_rate": 9.639829759369191e-05, |
| "loss": 0.1741, |
| "step": 1416 |
| }, |
| { |
| "epoch": 0.5139644541167936, |
| "grad_norm": 0.1645934134721756, |
| "learning_rate": 9.628400968222846e-05, |
| "loss": 0.179, |
| "step": 1417 |
| }, |
| { |
| "epoch": 0.5143271672107363, |
| "grad_norm": 0.18472225964069366, |
| "learning_rate": 9.616972663099647e-05, |
| "loss": 0.1876, |
| "step": 1418 |
| }, |
| { |
| "epoch": 0.514689880304679, |
| "grad_norm": 0.17435920238494873, |
| "learning_rate": 9.605544858946945e-05, |
| "loss": 0.175, |
| "step": 1419 |
| }, |
| { |
| "epoch": 0.5150525933986216, |
| "grad_norm": 0.1865229606628418, |
| "learning_rate": 9.594117570711434e-05, |
| "loss": 0.2141, |
| "step": 1420 |
| }, |
| { |
| "epoch": 0.5154153064925644, |
| "grad_norm": 0.18339309096336365, |
| "learning_rate": 9.582690813339136e-05, |
| "loss": 0.1794, |
| "step": 1421 |
| }, |
| { |
| "epoch": 0.515778019586507, |
| "grad_norm": 0.1994606852531433, |
| "learning_rate": 9.571264601775369e-05, |
| "loss": 0.1835, |
| "step": 1422 |
| }, |
| { |
| "epoch": 0.5161407326804498, |
| "grad_norm": 0.16973696649074554, |
| "learning_rate": 9.559838950964757e-05, |
| "loss": 0.1587, |
| "step": 1423 |
| }, |
| { |
| "epoch": 0.5165034457743924, |
| "grad_norm": 0.17294169962406158, |
| "learning_rate": 9.548413875851174e-05, |
| "loss": 0.1748, |
| "step": 1424 |
| }, |
| { |
| "epoch": 0.5168661588683352, |
| "grad_norm": 0.19328264892101288, |
| "learning_rate": 9.536989391377743e-05, |
| "loss": 0.2012, |
| "step": 1425 |
| }, |
| { |
| "epoch": 0.5172288719622778, |
| "grad_norm": 0.18262383341789246, |
| "learning_rate": 9.52556551248682e-05, |
| "loss": 0.1806, |
| "step": 1426 |
| }, |
| { |
| "epoch": 0.5175915850562205, |
| "grad_norm": 0.16941824555397034, |
| "learning_rate": 9.514142254119962e-05, |
| "loss": 0.1739, |
| "step": 1427 |
| }, |
| { |
| "epoch": 0.5179542981501633, |
| "grad_norm": 0.1808822602033615, |
| "learning_rate": 9.502719631217917e-05, |
| "loss": 0.1685, |
| "step": 1428 |
| }, |
| { |
| "epoch": 0.5183170112441059, |
| "grad_norm": 0.213886559009552, |
| "learning_rate": 9.49129765872061e-05, |
| "loss": 0.1851, |
| "step": 1429 |
| }, |
| { |
| "epoch": 0.5186797243380487, |
| "grad_norm": 0.1952863335609436, |
| "learning_rate": 9.479876351567107e-05, |
| "loss": 0.1691, |
| "step": 1430 |
| }, |
| { |
| "epoch": 0.5190424374319913, |
| "grad_norm": 0.1745711863040924, |
| "learning_rate": 9.4684557246956e-05, |
| "loss": 0.1883, |
| "step": 1431 |
| }, |
| { |
| "epoch": 0.519405150525934, |
| "grad_norm": 0.19590620696544647, |
| "learning_rate": 9.457035793043401e-05, |
| "loss": 0.1822, |
| "step": 1432 |
| }, |
| { |
| "epoch": 0.5197678636198767, |
| "grad_norm": 0.17998209595680237, |
| "learning_rate": 9.445616571546909e-05, |
| "loss": 0.172, |
| "step": 1433 |
| }, |
| { |
| "epoch": 0.5201305767138193, |
| "grad_norm": 0.1765129566192627, |
| "learning_rate": 9.434198075141591e-05, |
| "loss": 0.1748, |
| "step": 1434 |
| }, |
| { |
| "epoch": 0.5204932898077621, |
| "grad_norm": 0.19922930002212524, |
| "learning_rate": 9.422780318761965e-05, |
| "loss": 0.1941, |
| "step": 1435 |
| }, |
| { |
| "epoch": 0.5208560029017048, |
| "grad_norm": 0.1994534283876419, |
| "learning_rate": 9.411363317341592e-05, |
| "loss": 0.1838, |
| "step": 1436 |
| }, |
| { |
| "epoch": 0.5212187159956474, |
| "grad_norm": 0.19850608706474304, |
| "learning_rate": 9.399947085813032e-05, |
| "loss": 0.1768, |
| "step": 1437 |
| }, |
| { |
| "epoch": 0.5215814290895902, |
| "grad_norm": 0.16051959991455078, |
| "learning_rate": 9.388531639107846e-05, |
| "loss": 0.1781, |
| "step": 1438 |
| }, |
| { |
| "epoch": 0.5219441421835328, |
| "grad_norm": 0.18641552329063416, |
| "learning_rate": 9.377116992156566e-05, |
| "loss": 0.1884, |
| "step": 1439 |
| }, |
| { |
| "epoch": 0.5223068552774756, |
| "grad_norm": 0.16958610713481903, |
| "learning_rate": 9.365703159888677e-05, |
| "loss": 0.1768, |
| "step": 1440 |
| }, |
| { |
| "epoch": 0.5226695683714182, |
| "grad_norm": 0.16557306051254272, |
| "learning_rate": 9.354290157232596e-05, |
| "loss": 0.1648, |
| "step": 1441 |
| }, |
| { |
| "epoch": 0.5230322814653608, |
| "grad_norm": 0.18799157440662384, |
| "learning_rate": 9.342877999115667e-05, |
| "loss": 0.1711, |
| "step": 1442 |
| }, |
| { |
| "epoch": 0.5233949945593036, |
| "grad_norm": 0.19848479330539703, |
| "learning_rate": 9.331466700464117e-05, |
| "loss": 0.1807, |
| "step": 1443 |
| }, |
| { |
| "epoch": 0.5237577076532463, |
| "grad_norm": 0.17750594019889832, |
| "learning_rate": 9.320056276203054e-05, |
| "loss": 0.1907, |
| "step": 1444 |
| }, |
| { |
| "epoch": 0.524120420747189, |
| "grad_norm": 0.16206082701683044, |
| "learning_rate": 9.308646741256439e-05, |
| "loss": 0.1808, |
| "step": 1445 |
| }, |
| { |
| "epoch": 0.5244831338411317, |
| "grad_norm": 0.1657271534204483, |
| "learning_rate": 9.297238110547074e-05, |
| "loss": 0.177, |
| "step": 1446 |
| }, |
| { |
| "epoch": 0.5248458469350744, |
| "grad_norm": 0.19123826920986176, |
| "learning_rate": 9.285830398996571e-05, |
| "loss": 0.1817, |
| "step": 1447 |
| }, |
| { |
| "epoch": 0.5252085600290171, |
| "grad_norm": 0.16904449462890625, |
| "learning_rate": 9.274423621525354e-05, |
| "loss": 0.1837, |
| "step": 1448 |
| }, |
| { |
| "epoch": 0.5255712731229597, |
| "grad_norm": 0.19816622138023376, |
| "learning_rate": 9.263017793052615e-05, |
| "loss": 0.1954, |
| "step": 1449 |
| }, |
| { |
| "epoch": 0.5259339862169025, |
| "grad_norm": 0.18440890312194824, |
| "learning_rate": 9.251612928496298e-05, |
| "loss": 0.1708, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.5262966993108451, |
| "grad_norm": 0.18821316957473755, |
| "learning_rate": 9.240209042773105e-05, |
| "loss": 0.1929, |
| "step": 1451 |
| }, |
| { |
| "epoch": 0.5266594124047879, |
| "grad_norm": 0.18499478697776794, |
| "learning_rate": 9.228806150798442e-05, |
| "loss": 0.1774, |
| "step": 1452 |
| }, |
| { |
| "epoch": 0.5270221254987305, |
| "grad_norm": 0.21519748866558075, |
| "learning_rate": 9.21740426748642e-05, |
| "loss": 0.1915, |
| "step": 1453 |
| }, |
| { |
| "epoch": 0.5273848385926732, |
| "grad_norm": 0.18411661684513092, |
| "learning_rate": 9.206003407749833e-05, |
| "loss": 0.2101, |
| "step": 1454 |
| }, |
| { |
| "epoch": 0.5277475516866159, |
| "grad_norm": 0.17182524502277374, |
| "learning_rate": 9.194603586500136e-05, |
| "loss": 0.1672, |
| "step": 1455 |
| }, |
| { |
| "epoch": 0.5281102647805586, |
| "grad_norm": 0.18551282584667206, |
| "learning_rate": 9.183204818647424e-05, |
| "loss": 0.1924, |
| "step": 1456 |
| }, |
| { |
| "epoch": 0.5284729778745013, |
| "grad_norm": 0.18289272487163544, |
| "learning_rate": 9.171807119100413e-05, |
| "loss": 0.1781, |
| "step": 1457 |
| }, |
| { |
| "epoch": 0.528835690968444, |
| "grad_norm": 0.169638991355896, |
| "learning_rate": 9.160410502766424e-05, |
| "loss": 0.1704, |
| "step": 1458 |
| }, |
| { |
| "epoch": 0.5291984040623866, |
| "grad_norm": 0.17855599522590637, |
| "learning_rate": 9.149014984551357e-05, |
| "loss": 0.1761, |
| "step": 1459 |
| }, |
| { |
| "epoch": 0.5295611171563294, |
| "grad_norm": 0.21452195942401886, |
| "learning_rate": 9.137620579359685e-05, |
| "loss": 0.1778, |
| "step": 1460 |
| }, |
| { |
| "epoch": 0.529923830250272, |
| "grad_norm": 0.20922896265983582, |
| "learning_rate": 9.126227302094417e-05, |
| "loss": 0.2186, |
| "step": 1461 |
| }, |
| { |
| "epoch": 0.5302865433442148, |
| "grad_norm": 0.15859532356262207, |
| "learning_rate": 9.114835167657091e-05, |
| "loss": 0.1829, |
| "step": 1462 |
| }, |
| { |
| "epoch": 0.5306492564381574, |
| "grad_norm": 0.17610323429107666, |
| "learning_rate": 9.103444190947746e-05, |
| "loss": 0.2027, |
| "step": 1463 |
| }, |
| { |
| "epoch": 0.5310119695321001, |
| "grad_norm": 0.17557282745838165, |
| "learning_rate": 9.092054386864912e-05, |
| "loss": 0.1747, |
| "step": 1464 |
| }, |
| { |
| "epoch": 0.5313746826260428, |
| "grad_norm": 0.19372673332691193, |
| "learning_rate": 9.080665770305578e-05, |
| "loss": 0.1644, |
| "step": 1465 |
| }, |
| { |
| "epoch": 0.5317373957199855, |
| "grad_norm": 0.20970730483531952, |
| "learning_rate": 9.069278356165187e-05, |
| "loss": 0.2032, |
| "step": 1466 |
| }, |
| { |
| "epoch": 0.5321001088139282, |
| "grad_norm": 0.2470318228006363, |
| "learning_rate": 9.057892159337612e-05, |
| "loss": 0.2121, |
| "step": 1467 |
| }, |
| { |
| "epoch": 0.5324628219078709, |
| "grad_norm": 0.1663379967212677, |
| "learning_rate": 9.046507194715121e-05, |
| "loss": 0.1741, |
| "step": 1468 |
| }, |
| { |
| "epoch": 0.5328255350018135, |
| "grad_norm": 0.1842135637998581, |
| "learning_rate": 9.035123477188381e-05, |
| "loss": 0.1793, |
| "step": 1469 |
| }, |
| { |
| "epoch": 0.5331882480957563, |
| "grad_norm": 0.19390299916267395, |
| "learning_rate": 9.023741021646427e-05, |
| "loss": 0.2071, |
| "step": 1470 |
| }, |
| { |
| "epoch": 0.5335509611896989, |
| "grad_norm": 0.17016194760799408, |
| "learning_rate": 9.012359842976638e-05, |
| "loss": 0.1718, |
| "step": 1471 |
| }, |
| { |
| "epoch": 0.5339136742836417, |
| "grad_norm": 0.19337502121925354, |
| "learning_rate": 9.000979956064725e-05, |
| "loss": 0.2095, |
| "step": 1472 |
| }, |
| { |
| "epoch": 0.5342763873775843, |
| "grad_norm": 0.21092645823955536, |
| "learning_rate": 8.989601375794717e-05, |
| "loss": 0.1854, |
| "step": 1473 |
| }, |
| { |
| "epoch": 0.5346391004715271, |
| "grad_norm": 0.18103566765785217, |
| "learning_rate": 8.978224117048925e-05, |
| "loss": 0.1829, |
| "step": 1474 |
| }, |
| { |
| "epoch": 0.5350018135654697, |
| "grad_norm": 0.17190292477607727, |
| "learning_rate": 8.966848194707931e-05, |
| "loss": 0.1733, |
| "step": 1475 |
| }, |
| { |
| "epoch": 0.5353645266594124, |
| "grad_norm": 0.18108366429805756, |
| "learning_rate": 8.955473623650578e-05, |
| "loss": 0.2058, |
| "step": 1476 |
| }, |
| { |
| "epoch": 0.5357272397533551, |
| "grad_norm": 0.16649720072746277, |
| "learning_rate": 8.944100418753931e-05, |
| "loss": 0.1744, |
| "step": 1477 |
| }, |
| { |
| "epoch": 0.5360899528472978, |
| "grad_norm": 0.15770559012889862, |
| "learning_rate": 8.932728594893271e-05, |
| "loss": 0.1775, |
| "step": 1478 |
| }, |
| { |
| "epoch": 0.5364526659412405, |
| "grad_norm": 0.1907668113708496, |
| "learning_rate": 8.921358166942084e-05, |
| "loss": 0.1766, |
| "step": 1479 |
| }, |
| { |
| "epoch": 0.5368153790351832, |
| "grad_norm": 0.18284808099269867, |
| "learning_rate": 8.909989149772015e-05, |
| "loss": 0.1708, |
| "step": 1480 |
| }, |
| { |
| "epoch": 0.5371780921291258, |
| "grad_norm": 0.20297999680042267, |
| "learning_rate": 8.898621558252874e-05, |
| "loss": 0.165, |
| "step": 1481 |
| }, |
| { |
| "epoch": 0.5375408052230686, |
| "grad_norm": 0.22023969888687134, |
| "learning_rate": 8.887255407252596e-05, |
| "loss": 0.1668, |
| "step": 1482 |
| }, |
| { |
| "epoch": 0.5379035183170112, |
| "grad_norm": 0.17669132351875305, |
| "learning_rate": 8.875890711637243e-05, |
| "loss": 0.2046, |
| "step": 1483 |
| }, |
| { |
| "epoch": 0.538266231410954, |
| "grad_norm": 0.17783772945404053, |
| "learning_rate": 8.864527486270964e-05, |
| "loss": 0.1648, |
| "step": 1484 |
| }, |
| { |
| "epoch": 0.5386289445048966, |
| "grad_norm": 0.171718031167984, |
| "learning_rate": 8.853165746015997e-05, |
| "loss": 0.1897, |
| "step": 1485 |
| }, |
| { |
| "epoch": 0.5389916575988393, |
| "grad_norm": 0.16997992992401123, |
| "learning_rate": 8.841805505732626e-05, |
| "loss": 0.167, |
| "step": 1486 |
| }, |
| { |
| "epoch": 0.539354370692782, |
| "grad_norm": 0.1764468550682068, |
| "learning_rate": 8.830446780279176e-05, |
| "loss": 0.1659, |
| "step": 1487 |
| }, |
| { |
| "epoch": 0.5397170837867247, |
| "grad_norm": 0.18435722589492798, |
| "learning_rate": 8.819089584511996e-05, |
| "loss": 0.1754, |
| "step": 1488 |
| }, |
| { |
| "epoch": 0.5400797968806674, |
| "grad_norm": 0.19305875897407532, |
| "learning_rate": 8.807733933285429e-05, |
| "loss": 0.1918, |
| "step": 1489 |
| }, |
| { |
| "epoch": 0.5404425099746101, |
| "grad_norm": 0.1882489174604416, |
| "learning_rate": 8.796379841451796e-05, |
| "loss": 0.1906, |
| "step": 1490 |
| }, |
| { |
| "epoch": 0.5408052230685527, |
| "grad_norm": 0.14983880519866943, |
| "learning_rate": 8.785027323861386e-05, |
| "loss": 0.1552, |
| "step": 1491 |
| }, |
| { |
| "epoch": 0.5411679361624955, |
| "grad_norm": 0.16522106528282166, |
| "learning_rate": 8.773676395362425e-05, |
| "loss": 0.1761, |
| "step": 1492 |
| }, |
| { |
| "epoch": 0.5415306492564381, |
| "grad_norm": 0.17727860808372498, |
| "learning_rate": 8.76232707080106e-05, |
| "loss": 0.1631, |
| "step": 1493 |
| }, |
| { |
| "epoch": 0.5418933623503809, |
| "grad_norm": 0.1912899911403656, |
| "learning_rate": 8.750979365021338e-05, |
| "loss": 0.1804, |
| "step": 1494 |
| }, |
| { |
| "epoch": 0.5422560754443235, |
| "grad_norm": 0.185381218791008, |
| "learning_rate": 8.739633292865192e-05, |
| "loss": 0.1831, |
| "step": 1495 |
| }, |
| { |
| "epoch": 0.5426187885382663, |
| "grad_norm": 0.18887324631214142, |
| "learning_rate": 8.728288869172421e-05, |
| "loss": 0.178, |
| "step": 1496 |
| }, |
| { |
| "epoch": 0.5429815016322089, |
| "grad_norm": 0.1737644374370575, |
| "learning_rate": 8.716946108780655e-05, |
| "loss": 0.1769, |
| "step": 1497 |
| }, |
| { |
| "epoch": 0.5433442147261516, |
| "grad_norm": 0.18002916872501373, |
| "learning_rate": 8.705605026525371e-05, |
| "loss": 0.1599, |
| "step": 1498 |
| }, |
| { |
| "epoch": 0.5437069278200943, |
| "grad_norm": 0.18868666887283325, |
| "learning_rate": 8.694265637239831e-05, |
| "loss": 0.1661, |
| "step": 1499 |
| }, |
| { |
| "epoch": 0.544069640914037, |
| "grad_norm": 0.20771367847919464, |
| "learning_rate": 8.682927955755093e-05, |
| "loss": 0.1839, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.5444323540079797, |
| "grad_norm": 0.1799492985010147, |
| "learning_rate": 8.671591996899974e-05, |
| "loss": 0.1782, |
| "step": 1501 |
| }, |
| { |
| "epoch": 0.5447950671019224, |
| "grad_norm": 0.17485234141349792, |
| "learning_rate": 8.660257775501045e-05, |
| "loss": 0.1698, |
| "step": 1502 |
| }, |
| { |
| "epoch": 0.545157780195865, |
| "grad_norm": 0.17470629513263702, |
| "learning_rate": 8.6489253063826e-05, |
| "loss": 0.1695, |
| "step": 1503 |
| }, |
| { |
| "epoch": 0.5455204932898078, |
| "grad_norm": 0.17630697786808014, |
| "learning_rate": 8.637594604366647e-05, |
| "loss": 0.175, |
| "step": 1504 |
| }, |
| { |
| "epoch": 0.5458832063837504, |
| "grad_norm": 0.19793953001499176, |
| "learning_rate": 8.626265684272876e-05, |
| "loss": 0.1798, |
| "step": 1505 |
| }, |
| { |
| "epoch": 0.5462459194776932, |
| "grad_norm": 0.19965516030788422, |
| "learning_rate": 8.614938560918649e-05, |
| "loss": 0.2011, |
| "step": 1506 |
| }, |
| { |
| "epoch": 0.5466086325716358, |
| "grad_norm": 0.18119129538536072, |
| "learning_rate": 8.603613249118977e-05, |
| "loss": 0.1624, |
| "step": 1507 |
| }, |
| { |
| "epoch": 0.5469713456655785, |
| "grad_norm": 0.19433656334877014, |
| "learning_rate": 8.592289763686505e-05, |
| "loss": 0.1842, |
| "step": 1508 |
| }, |
| { |
| "epoch": 0.5473340587595212, |
| "grad_norm": 0.17872895300388336, |
| "learning_rate": 8.580968119431483e-05, |
| "loss": 0.1628, |
| "step": 1509 |
| }, |
| { |
| "epoch": 0.5476967718534639, |
| "grad_norm": 0.18134737014770508, |
| "learning_rate": 8.569648331161762e-05, |
| "loss": 0.1649, |
| "step": 1510 |
| }, |
| { |
| "epoch": 0.5480594849474066, |
| "grad_norm": 0.19080941379070282, |
| "learning_rate": 8.558330413682759e-05, |
| "loss": 0.1856, |
| "step": 1511 |
| }, |
| { |
| "epoch": 0.5484221980413493, |
| "grad_norm": 0.20772339403629303, |
| "learning_rate": 8.547014381797445e-05, |
| "loss": 0.1904, |
| "step": 1512 |
| }, |
| { |
| "epoch": 0.5487849111352919, |
| "grad_norm": 0.1807977259159088, |
| "learning_rate": 8.535700250306322e-05, |
| "loss": 0.1719, |
| "step": 1513 |
| }, |
| { |
| "epoch": 0.5491476242292347, |
| "grad_norm": 0.18353581428527832, |
| "learning_rate": 8.524388034007415e-05, |
| "loss": 0.1758, |
| "step": 1514 |
| }, |
| { |
| "epoch": 0.5495103373231773, |
| "grad_norm": 0.22524112462997437, |
| "learning_rate": 8.51307774769623e-05, |
| "loss": 0.1821, |
| "step": 1515 |
| }, |
| { |
| "epoch": 0.5498730504171201, |
| "grad_norm": 0.17495766282081604, |
| "learning_rate": 8.501769406165769e-05, |
| "loss": 0.2193, |
| "step": 1516 |
| }, |
| { |
| "epoch": 0.5502357635110627, |
| "grad_norm": 0.17903603613376617, |
| "learning_rate": 8.490463024206474e-05, |
| "loss": 0.1687, |
| "step": 1517 |
| }, |
| { |
| "epoch": 0.5505984766050055, |
| "grad_norm": 0.1783863753080368, |
| "learning_rate": 8.479158616606228e-05, |
| "loss": 0.1699, |
| "step": 1518 |
| }, |
| { |
| "epoch": 0.5509611896989481, |
| "grad_norm": 0.17774266004562378, |
| "learning_rate": 8.467856198150333e-05, |
| "loss": 0.1946, |
| "step": 1519 |
| }, |
| { |
| "epoch": 0.5513239027928908, |
| "grad_norm": 0.20432449877262115, |
| "learning_rate": 8.45655578362149e-05, |
| "loss": 0.193, |
| "step": 1520 |
| }, |
| { |
| "epoch": 0.5516866158868335, |
| "grad_norm": 0.1733636111021042, |
| "learning_rate": 8.44525738779977e-05, |
| "loss": 0.1712, |
| "step": 1521 |
| }, |
| { |
| "epoch": 0.5520493289807762, |
| "grad_norm": 0.19748555123806, |
| "learning_rate": 8.433961025462624e-05, |
| "loss": 0.1969, |
| "step": 1522 |
| }, |
| { |
| "epoch": 0.552412042074719, |
| "grad_norm": 0.18513956665992737, |
| "learning_rate": 8.422666711384827e-05, |
| "loss": 0.1735, |
| "step": 1523 |
| }, |
| { |
| "epoch": 0.5527747551686616, |
| "grad_norm": 0.22357869148254395, |
| "learning_rate": 8.411374460338474e-05, |
| "loss": 0.1725, |
| "step": 1524 |
| }, |
| { |
| "epoch": 0.5531374682626042, |
| "grad_norm": 0.18229088187217712, |
| "learning_rate": 8.400084287092973e-05, |
| "loss": 0.1724, |
| "step": 1525 |
| }, |
| { |
| "epoch": 0.553500181356547, |
| "grad_norm": 0.15753042697906494, |
| "learning_rate": 8.388796206415004e-05, |
| "loss": 0.1762, |
| "step": 1526 |
| }, |
| { |
| "epoch": 0.5538628944504896, |
| "grad_norm": 0.18276041746139526, |
| "learning_rate": 8.377510233068518e-05, |
| "loss": 0.1862, |
| "step": 1527 |
| }, |
| { |
| "epoch": 0.5542256075444324, |
| "grad_norm": 0.2091018706560135, |
| "learning_rate": 8.366226381814697e-05, |
| "loss": 0.1722, |
| "step": 1528 |
| }, |
| { |
| "epoch": 0.554588320638375, |
| "grad_norm": 0.1851229965686798, |
| "learning_rate": 8.354944667411968e-05, |
| "loss": 0.174, |
| "step": 1529 |
| }, |
| { |
| "epoch": 0.5549510337323177, |
| "grad_norm": 0.18812698125839233, |
| "learning_rate": 8.343665104615948e-05, |
| "loss": 0.192, |
| "step": 1530 |
| }, |
| { |
| "epoch": 0.5553137468262604, |
| "grad_norm": 0.18323373794555664, |
| "learning_rate": 8.332387708179441e-05, |
| "loss": 0.185, |
| "step": 1531 |
| }, |
| { |
| "epoch": 0.5556764599202031, |
| "grad_norm": 0.187171071767807, |
| "learning_rate": 8.321112492852422e-05, |
| "loss": 0.18, |
| "step": 1532 |
| }, |
| { |
| "epoch": 0.5560391730141458, |
| "grad_norm": 0.18064919114112854, |
| "learning_rate": 8.30983947338201e-05, |
| "loss": 0.1739, |
| "step": 1533 |
| }, |
| { |
| "epoch": 0.5564018861080885, |
| "grad_norm": 0.1815587282180786, |
| "learning_rate": 8.29856866451245e-05, |
| "loss": 0.1818, |
| "step": 1534 |
| }, |
| { |
| "epoch": 0.5567645992020311, |
| "grad_norm": 0.19945740699768066, |
| "learning_rate": 8.287300080985106e-05, |
| "loss": 0.2014, |
| "step": 1535 |
| }, |
| { |
| "epoch": 0.5571273122959739, |
| "grad_norm": 0.1874108761548996, |
| "learning_rate": 8.276033737538424e-05, |
| "loss": 0.1719, |
| "step": 1536 |
| }, |
| { |
| "epoch": 0.5574900253899165, |
| "grad_norm": 0.173946350812912, |
| "learning_rate": 8.264769648907915e-05, |
| "loss": 0.1616, |
| "step": 1537 |
| }, |
| { |
| "epoch": 0.5578527384838593, |
| "grad_norm": 0.20264151692390442, |
| "learning_rate": 8.25350782982615e-05, |
| "loss": 0.1815, |
| "step": 1538 |
| }, |
| { |
| "epoch": 0.558215451577802, |
| "grad_norm": 0.17723354697227478, |
| "learning_rate": 8.242248295022727e-05, |
| "loss": 0.1869, |
| "step": 1539 |
| }, |
| { |
| "epoch": 0.5585781646717447, |
| "grad_norm": 0.16882532835006714, |
| "learning_rate": 8.230991059224257e-05, |
| "loss": 0.1593, |
| "step": 1540 |
| }, |
| { |
| "epoch": 0.5589408777656873, |
| "grad_norm": 0.17361445724964142, |
| "learning_rate": 8.219736137154347e-05, |
| "loss": 0.1696, |
| "step": 1541 |
| }, |
| { |
| "epoch": 0.55930359085963, |
| "grad_norm": 0.1865490823984146, |
| "learning_rate": 8.208483543533573e-05, |
| "loss": 0.2033, |
| "step": 1542 |
| }, |
| { |
| "epoch": 0.5596663039535728, |
| "grad_norm": 0.17689920961856842, |
| "learning_rate": 8.197233293079468e-05, |
| "loss": 0.1679, |
| "step": 1543 |
| }, |
| { |
| "epoch": 0.5600290170475154, |
| "grad_norm": 0.18286365270614624, |
| "learning_rate": 8.185985400506502e-05, |
| "loss": 0.1654, |
| "step": 1544 |
| }, |
| { |
| "epoch": 0.5603917301414582, |
| "grad_norm": 0.18033449351787567, |
| "learning_rate": 8.174739880526057e-05, |
| "loss": 0.1814, |
| "step": 1545 |
| }, |
| { |
| "epoch": 0.5607544432354008, |
| "grad_norm": 0.17507143318653107, |
| "learning_rate": 8.163496747846411e-05, |
| "loss": 0.1669, |
| "step": 1546 |
| }, |
| { |
| "epoch": 0.5611171563293434, |
| "grad_norm": 0.16485197842121124, |
| "learning_rate": 8.152256017172732e-05, |
| "loss": 0.1666, |
| "step": 1547 |
| }, |
| { |
| "epoch": 0.5614798694232862, |
| "grad_norm": 0.18058069050312042, |
| "learning_rate": 8.14101770320703e-05, |
| "loss": 0.1626, |
| "step": 1548 |
| }, |
| { |
| "epoch": 0.5618425825172289, |
| "grad_norm": 0.17364412546157837, |
| "learning_rate": 8.129781820648164e-05, |
| "loss": 0.1913, |
| "step": 1549 |
| }, |
| { |
| "epoch": 0.5622052956111716, |
| "grad_norm": 0.18617358803749084, |
| "learning_rate": 8.118548384191809e-05, |
| "loss": 0.1844, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.5625680087051143, |
| "grad_norm": 0.17379792034626007, |
| "learning_rate": 8.107317408530441e-05, |
| "loss": 0.1657, |
| "step": 1551 |
| }, |
| { |
| "epoch": 0.5629307217990569, |
| "grad_norm": 0.1696668565273285, |
| "learning_rate": 8.096088908353315e-05, |
| "loss": 0.1663, |
| "step": 1552 |
| }, |
| { |
| "epoch": 0.5632934348929997, |
| "grad_norm": 0.16332849860191345, |
| "learning_rate": 8.084862898346459e-05, |
| "loss": 0.1707, |
| "step": 1553 |
| }, |
| { |
| "epoch": 0.5636561479869423, |
| "grad_norm": 0.20836418867111206, |
| "learning_rate": 8.073639393192634e-05, |
| "loss": 0.1849, |
| "step": 1554 |
| }, |
| { |
| "epoch": 0.5640188610808851, |
| "grad_norm": 0.1766640543937683, |
| "learning_rate": 8.062418407571326e-05, |
| "loss": 0.1593, |
| "step": 1555 |
| }, |
| { |
| "epoch": 0.5643815741748277, |
| "grad_norm": 0.1723148226737976, |
| "learning_rate": 8.051199956158727e-05, |
| "loss": 0.1753, |
| "step": 1556 |
| }, |
| { |
| "epoch": 0.5647442872687704, |
| "grad_norm": 0.17197547852993011, |
| "learning_rate": 8.039984053627714e-05, |
| "loss": 0.1664, |
| "step": 1557 |
| }, |
| { |
| "epoch": 0.5651070003627131, |
| "grad_norm": 0.17370520532131195, |
| "learning_rate": 8.02877071464783e-05, |
| "loss": 0.1767, |
| "step": 1558 |
| }, |
| { |
| "epoch": 0.5654697134566558, |
| "grad_norm": 0.18708960711956024, |
| "learning_rate": 8.017559953885267e-05, |
| "loss": 0.1951, |
| "step": 1559 |
| }, |
| { |
| "epoch": 0.5658324265505985, |
| "grad_norm": 0.21225912868976593, |
| "learning_rate": 8.006351786002846e-05, |
| "loss": 0.1752, |
| "step": 1560 |
| }, |
| { |
| "epoch": 0.5661951396445412, |
| "grad_norm": 0.17883837223052979, |
| "learning_rate": 7.995146225659994e-05, |
| "loss": 0.1665, |
| "step": 1561 |
| }, |
| { |
| "epoch": 0.5665578527384839, |
| "grad_norm": 0.16992917656898499, |
| "learning_rate": 7.98394328751272e-05, |
| "loss": 0.1691, |
| "step": 1562 |
| }, |
| { |
| "epoch": 0.5669205658324266, |
| "grad_norm": 0.18541240692138672, |
| "learning_rate": 7.972742986213623e-05, |
| "loss": 0.1818, |
| "step": 1563 |
| }, |
| { |
| "epoch": 0.5672832789263692, |
| "grad_norm": 0.17470984160900116, |
| "learning_rate": 7.961545336411836e-05, |
| "loss": 0.1715, |
| "step": 1564 |
| }, |
| { |
| "epoch": 0.567645992020312, |
| "grad_norm": 0.21040913462638855, |
| "learning_rate": 7.950350352753023e-05, |
| "loss": 0.1873, |
| "step": 1565 |
| }, |
| { |
| "epoch": 0.5680087051142546, |
| "grad_norm": 0.17107225954532623, |
| "learning_rate": 7.93915804987938e-05, |
| "loss": 0.1559, |
| "step": 1566 |
| }, |
| { |
| "epoch": 0.5683714182081974, |
| "grad_norm": 0.16713112592697144, |
| "learning_rate": 7.927968442429576e-05, |
| "loss": 0.1734, |
| "step": 1567 |
| }, |
| { |
| "epoch": 0.56873413130214, |
| "grad_norm": 0.18837302923202515, |
| "learning_rate": 7.916781545038767e-05, |
| "loss": 0.167, |
| "step": 1568 |
| }, |
| { |
| "epoch": 0.5690968443960827, |
| "grad_norm": 0.17015686631202698, |
| "learning_rate": 7.905597372338558e-05, |
| "loss": 0.1703, |
| "step": 1569 |
| }, |
| { |
| "epoch": 0.5694595574900254, |
| "grad_norm": 0.17552775144577026, |
| "learning_rate": 7.894415938956991e-05, |
| "loss": 0.1623, |
| "step": 1570 |
| }, |
| { |
| "epoch": 0.5698222705839681, |
| "grad_norm": 0.1910295933485031, |
| "learning_rate": 7.883237259518526e-05, |
| "loss": 0.1642, |
| "step": 1571 |
| }, |
| { |
| "epoch": 0.5701849836779108, |
| "grad_norm": 0.19286568462848663, |
| "learning_rate": 7.872061348644028e-05, |
| "loss": 0.1776, |
| "step": 1572 |
| }, |
| { |
| "epoch": 0.5705476967718535, |
| "grad_norm": 0.17776118218898773, |
| "learning_rate": 7.86088822095073e-05, |
| "loss": 0.167, |
| "step": 1573 |
| }, |
| { |
| "epoch": 0.5709104098657961, |
| "grad_norm": 0.1805812269449234, |
| "learning_rate": 7.84971789105223e-05, |
| "loss": 0.1666, |
| "step": 1574 |
| }, |
| { |
| "epoch": 0.5712731229597389, |
| "grad_norm": 0.3048454523086548, |
| "learning_rate": 7.838550373558469e-05, |
| "loss": 0.2252, |
| "step": 1575 |
| }, |
| { |
| "epoch": 0.5716358360536815, |
| "grad_norm": 0.18575210869312286, |
| "learning_rate": 7.827385683075701e-05, |
| "loss": 0.1673, |
| "step": 1576 |
| }, |
| { |
| "epoch": 0.5719985491476243, |
| "grad_norm": 0.19140534102916718, |
| "learning_rate": 7.816223834206489e-05, |
| "loss": 0.1651, |
| "step": 1577 |
| }, |
| { |
| "epoch": 0.5723612622415669, |
| "grad_norm": 0.15774936974048615, |
| "learning_rate": 7.805064841549685e-05, |
| "loss": 0.1579, |
| "step": 1578 |
| }, |
| { |
| "epoch": 0.5727239753355096, |
| "grad_norm": 0.16118699312210083, |
| "learning_rate": 7.793908719700396e-05, |
| "loss": 0.1656, |
| "step": 1579 |
| }, |
| { |
| "epoch": 0.5730866884294523, |
| "grad_norm": 0.19020985066890717, |
| "learning_rate": 7.782755483249973e-05, |
| "loss": 0.1775, |
| "step": 1580 |
| }, |
| { |
| "epoch": 0.573449401523395, |
| "grad_norm": 0.1851213425397873, |
| "learning_rate": 7.771605146786003e-05, |
| "loss": 0.1876, |
| "step": 1581 |
| }, |
| { |
| "epoch": 0.5738121146173377, |
| "grad_norm": 0.17101642489433289, |
| "learning_rate": 7.760457724892272e-05, |
| "loss": 0.1714, |
| "step": 1582 |
| }, |
| { |
| "epoch": 0.5741748277112804, |
| "grad_norm": 0.17683084309101105, |
| "learning_rate": 7.749313232148753e-05, |
| "loss": 0.166, |
| "step": 1583 |
| }, |
| { |
| "epoch": 0.5745375408052231, |
| "grad_norm": 0.1966182291507721, |
| "learning_rate": 7.738171683131594e-05, |
| "loss": 0.1763, |
| "step": 1584 |
| }, |
| { |
| "epoch": 0.5749002538991658, |
| "grad_norm": 0.1787012666463852, |
| "learning_rate": 7.727033092413094e-05, |
| "loss": 0.1621, |
| "step": 1585 |
| }, |
| { |
| "epoch": 0.5752629669931084, |
| "grad_norm": 0.18337036669254303, |
| "learning_rate": 7.715897474561675e-05, |
| "loss": 0.1568, |
| "step": 1586 |
| }, |
| { |
| "epoch": 0.5756256800870512, |
| "grad_norm": 0.18342240154743195, |
| "learning_rate": 7.704764844141873e-05, |
| "loss": 0.1722, |
| "step": 1587 |
| }, |
| { |
| "epoch": 0.5759883931809938, |
| "grad_norm": 0.17828598618507385, |
| "learning_rate": 7.693635215714322e-05, |
| "loss": 0.1562, |
| "step": 1588 |
| }, |
| { |
| "epoch": 0.5763511062749366, |
| "grad_norm": 0.19080400466918945, |
| "learning_rate": 7.682508603835722e-05, |
| "loss": 0.1783, |
| "step": 1589 |
| }, |
| { |
| "epoch": 0.5767138193688792, |
| "grad_norm": 0.16964450478553772, |
| "learning_rate": 7.67138502305883e-05, |
| "loss": 0.1726, |
| "step": 1590 |
| }, |
| { |
| "epoch": 0.5770765324628219, |
| "grad_norm": 0.19029711186885834, |
| "learning_rate": 7.660264487932444e-05, |
| "loss": 0.1574, |
| "step": 1591 |
| }, |
| { |
| "epoch": 0.5774392455567646, |
| "grad_norm": 0.21546104550361633, |
| "learning_rate": 7.649147013001376e-05, |
| "loss": 0.1691, |
| "step": 1592 |
| }, |
| { |
| "epoch": 0.5778019586507073, |
| "grad_norm": 0.17420600354671478, |
| "learning_rate": 7.63803261280643e-05, |
| "loss": 0.1612, |
| "step": 1593 |
| }, |
| { |
| "epoch": 0.57816467174465, |
| "grad_norm": 0.18015912175178528, |
| "learning_rate": 7.626921301884395e-05, |
| "loss": 0.1622, |
| "step": 1594 |
| }, |
| { |
| "epoch": 0.5785273848385927, |
| "grad_norm": 0.16851022839546204, |
| "learning_rate": 7.615813094768012e-05, |
| "loss": 0.1642, |
| "step": 1595 |
| }, |
| { |
| "epoch": 0.5788900979325353, |
| "grad_norm": 0.1783701479434967, |
| "learning_rate": 7.604708005985971e-05, |
| "loss": 0.1726, |
| "step": 1596 |
| }, |
| { |
| "epoch": 0.5792528110264781, |
| "grad_norm": 0.16931217908859253, |
| "learning_rate": 7.593606050062881e-05, |
| "loss": 0.1608, |
| "step": 1597 |
| }, |
| { |
| "epoch": 0.5796155241204207, |
| "grad_norm": 0.16568873822689056, |
| "learning_rate": 7.582507241519252e-05, |
| "loss": 0.163, |
| "step": 1598 |
| }, |
| { |
| "epoch": 0.5799782372143635, |
| "grad_norm": 0.16731184720993042, |
| "learning_rate": 7.571411594871474e-05, |
| "loss": 0.2004, |
| "step": 1599 |
| }, |
| { |
| "epoch": 0.5803409503083061, |
| "grad_norm": 0.2044878900051117, |
| "learning_rate": 7.56031912463181e-05, |
| "loss": 0.1608, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.5807036634022488, |
| "grad_norm": 0.2527421712875366, |
| "learning_rate": 7.549229845308362e-05, |
| "loss": 0.1948, |
| "step": 1601 |
| }, |
| { |
| "epoch": 0.5810663764961915, |
| "grad_norm": 0.16458679735660553, |
| "learning_rate": 7.538143771405055e-05, |
| "loss": 0.1495, |
| "step": 1602 |
| }, |
| { |
| "epoch": 0.5814290895901342, |
| "grad_norm": 0.16658927500247955, |
| "learning_rate": 7.527060917421635e-05, |
| "loss": 0.1555, |
| "step": 1603 |
| }, |
| { |
| "epoch": 0.5817918026840769, |
| "grad_norm": 0.17401687800884247, |
| "learning_rate": 7.515981297853626e-05, |
| "loss": 0.1702, |
| "step": 1604 |
| }, |
| { |
| "epoch": 0.5821545157780196, |
| "grad_norm": 0.17915883660316467, |
| "learning_rate": 7.504904927192322e-05, |
| "loss": 0.175, |
| "step": 1605 |
| }, |
| { |
| "epoch": 0.5825172288719622, |
| "grad_norm": 0.18019749224185944, |
| "learning_rate": 7.493831819924772e-05, |
| "loss": 0.1703, |
| "step": 1606 |
| }, |
| { |
| "epoch": 0.582879941965905, |
| "grad_norm": 0.18500368297100067, |
| "learning_rate": 7.482761990533752e-05, |
| "loss": 0.1741, |
| "step": 1607 |
| }, |
| { |
| "epoch": 0.5832426550598476, |
| "grad_norm": 0.18486149609088898, |
| "learning_rate": 7.47169545349775e-05, |
| "loss": 0.1508, |
| "step": 1608 |
| }, |
| { |
| "epoch": 0.5836053681537904, |
| "grad_norm": 0.2042957842350006, |
| "learning_rate": 7.46063222329096e-05, |
| "loss": 0.173, |
| "step": 1609 |
| }, |
| { |
| "epoch": 0.583968081247733, |
| "grad_norm": 0.19605065882205963, |
| "learning_rate": 7.449572314383237e-05, |
| "loss": 0.1865, |
| "step": 1610 |
| }, |
| { |
| "epoch": 0.5843307943416758, |
| "grad_norm": 0.18277035653591156, |
| "learning_rate": 7.438515741240097e-05, |
| "loss": 0.195, |
| "step": 1611 |
| }, |
| { |
| "epoch": 0.5846935074356184, |
| "grad_norm": 0.18347297608852386, |
| "learning_rate": 7.427462518322693e-05, |
| "loss": 0.1579, |
| "step": 1612 |
| }, |
| { |
| "epoch": 0.5850562205295611, |
| "grad_norm": 0.1746947020292282, |
| "learning_rate": 7.416412660087796e-05, |
| "loss": 0.1951, |
| "step": 1613 |
| }, |
| { |
| "epoch": 0.5854189336235038, |
| "grad_norm": 0.1751972883939743, |
| "learning_rate": 7.405366180987775e-05, |
| "loss": 0.1633, |
| "step": 1614 |
| }, |
| { |
| "epoch": 0.5857816467174465, |
| "grad_norm": 0.17814141511917114, |
| "learning_rate": 7.394323095470586e-05, |
| "loss": 0.1845, |
| "step": 1615 |
| }, |
| { |
| "epoch": 0.5861443598113892, |
| "grad_norm": 0.1747366487979889, |
| "learning_rate": 7.383283417979739e-05, |
| "loss": 0.1777, |
| "step": 1616 |
| }, |
| { |
| "epoch": 0.5865070729053319, |
| "grad_norm": 0.177615687251091, |
| "learning_rate": 7.372247162954282e-05, |
| "loss": 0.1691, |
| "step": 1617 |
| }, |
| { |
| "epoch": 0.5868697859992745, |
| "grad_norm": 0.1927955448627472, |
| "learning_rate": 7.361214344828805e-05, |
| "loss": 0.164, |
| "step": 1618 |
| }, |
| { |
| "epoch": 0.5872324990932173, |
| "grad_norm": 0.17188555002212524, |
| "learning_rate": 7.350184978033386e-05, |
| "loss": 0.1704, |
| "step": 1619 |
| }, |
| { |
| "epoch": 0.5875952121871599, |
| "grad_norm": 0.25001007318496704, |
| "learning_rate": 7.339159076993592e-05, |
| "loss": 0.2025, |
| "step": 1620 |
| }, |
| { |
| "epoch": 0.5879579252811027, |
| "grad_norm": 0.18958470225334167, |
| "learning_rate": 7.328136656130458e-05, |
| "loss": 0.1793, |
| "step": 1621 |
| }, |
| { |
| "epoch": 0.5883206383750453, |
| "grad_norm": 0.18085351586341858, |
| "learning_rate": 7.317117729860475e-05, |
| "loss": 0.1669, |
| "step": 1622 |
| }, |
| { |
| "epoch": 0.588683351468988, |
| "grad_norm": 0.18232987821102142, |
| "learning_rate": 7.306102312595553e-05, |
| "loss": 0.1649, |
| "step": 1623 |
| }, |
| { |
| "epoch": 0.5890460645629307, |
| "grad_norm": 0.17970141768455505, |
| "learning_rate": 7.295090418743018e-05, |
| "loss": 0.1757, |
| "step": 1624 |
| }, |
| { |
| "epoch": 0.5894087776568734, |
| "grad_norm": 0.1799871325492859, |
| "learning_rate": 7.284082062705584e-05, |
| "loss": 0.1716, |
| "step": 1625 |
| }, |
| { |
| "epoch": 0.5897714907508161, |
| "grad_norm": 0.1792754977941513, |
| "learning_rate": 7.273077258881342e-05, |
| "loss": 0.1825, |
| "step": 1626 |
| }, |
| { |
| "epoch": 0.5901342038447588, |
| "grad_norm": 0.17742280662059784, |
| "learning_rate": 7.262076021663727e-05, |
| "loss": 0.159, |
| "step": 1627 |
| }, |
| { |
| "epoch": 0.5904969169387014, |
| "grad_norm": 0.20353969931602478, |
| "learning_rate": 7.251078365441528e-05, |
| "loss": 0.1597, |
| "step": 1628 |
| }, |
| { |
| "epoch": 0.5908596300326442, |
| "grad_norm": 0.18415038287639618, |
| "learning_rate": 7.240084304598835e-05, |
| "loss": 0.1774, |
| "step": 1629 |
| }, |
| { |
| "epoch": 0.5912223431265868, |
| "grad_norm": 0.18927162885665894, |
| "learning_rate": 7.229093853515038e-05, |
| "loss": 0.1628, |
| "step": 1630 |
| }, |
| { |
| "epoch": 0.5915850562205296, |
| "grad_norm": 0.1826174110174179, |
| "learning_rate": 7.21810702656481e-05, |
| "loss": 0.1604, |
| "step": 1631 |
| }, |
| { |
| "epoch": 0.5919477693144722, |
| "grad_norm": 0.17375624179840088, |
| "learning_rate": 7.207123838118077e-05, |
| "loss": 0.1647, |
| "step": 1632 |
| }, |
| { |
| "epoch": 0.592310482408415, |
| "grad_norm": 0.1889926791191101, |
| "learning_rate": 7.196144302540014e-05, |
| "loss": 0.1882, |
| "step": 1633 |
| }, |
| { |
| "epoch": 0.5926731955023576, |
| "grad_norm": 0.17155472934246063, |
| "learning_rate": 7.185168434191014e-05, |
| "loss": 0.1552, |
| "step": 1634 |
| }, |
| { |
| "epoch": 0.5930359085963003, |
| "grad_norm": 0.18929725885391235, |
| "learning_rate": 7.174196247426677e-05, |
| "loss": 0.163, |
| "step": 1635 |
| }, |
| { |
| "epoch": 0.593398621690243, |
| "grad_norm": 0.18491095304489136, |
| "learning_rate": 7.163227756597779e-05, |
| "loss": 0.172, |
| "step": 1636 |
| }, |
| { |
| "epoch": 0.5937613347841857, |
| "grad_norm": 0.19160285592079163, |
| "learning_rate": 7.152262976050275e-05, |
| "loss": 0.1642, |
| "step": 1637 |
| }, |
| { |
| "epoch": 0.5941240478781284, |
| "grad_norm": 0.18393130600452423, |
| "learning_rate": 7.141301920125256e-05, |
| "loss": 0.1504, |
| "step": 1638 |
| }, |
| { |
| "epoch": 0.5944867609720711, |
| "grad_norm": 0.1797264665365219, |
| "learning_rate": 7.130344603158942e-05, |
| "loss": 0.1607, |
| "step": 1639 |
| }, |
| { |
| "epoch": 0.5948494740660137, |
| "grad_norm": 0.16639918088912964, |
| "learning_rate": 7.119391039482677e-05, |
| "loss": 0.1637, |
| "step": 1640 |
| }, |
| { |
| "epoch": 0.5952121871599565, |
| "grad_norm": 0.17723850905895233, |
| "learning_rate": 7.10844124342288e-05, |
| "loss": 0.1695, |
| "step": 1641 |
| }, |
| { |
| "epoch": 0.5955749002538991, |
| "grad_norm": 0.1672993302345276, |
| "learning_rate": 7.097495229301048e-05, |
| "loss": 0.1596, |
| "step": 1642 |
| }, |
| { |
| "epoch": 0.5959376133478419, |
| "grad_norm": 0.18969713151454926, |
| "learning_rate": 7.08655301143373e-05, |
| "loss": 0.1658, |
| "step": 1643 |
| }, |
| { |
| "epoch": 0.5963003264417845, |
| "grad_norm": 0.18681742250919342, |
| "learning_rate": 7.075614604132512e-05, |
| "loss": 0.1822, |
| "step": 1644 |
| }, |
| { |
| "epoch": 0.5966630395357272, |
| "grad_norm": 0.22509360313415527, |
| "learning_rate": 7.064680021703992e-05, |
| "loss": 0.1951, |
| "step": 1645 |
| }, |
| { |
| "epoch": 0.59702575262967, |
| "grad_norm": 0.1588478535413742, |
| "learning_rate": 7.053749278449774e-05, |
| "loss": 0.1643, |
| "step": 1646 |
| }, |
| { |
| "epoch": 0.5973884657236126, |
| "grad_norm": 0.1908983290195465, |
| "learning_rate": 7.042822388666436e-05, |
| "loss": 0.1674, |
| "step": 1647 |
| }, |
| { |
| "epoch": 0.5977511788175554, |
| "grad_norm": 0.19821012020111084, |
| "learning_rate": 7.031899366645511e-05, |
| "loss": 0.1817, |
| "step": 1648 |
| }, |
| { |
| "epoch": 0.598113891911498, |
| "grad_norm": 0.18674594163894653, |
| "learning_rate": 7.020980226673477e-05, |
| "loss": 0.1547, |
| "step": 1649 |
| }, |
| { |
| "epoch": 0.5984766050054406, |
| "grad_norm": 0.2012438029050827, |
| "learning_rate": 7.010064983031737e-05, |
| "loss": 0.1793, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.5988393180993834, |
| "grad_norm": 0.18832942843437195, |
| "learning_rate": 6.999153649996595e-05, |
| "loss": 0.1797, |
| "step": 1651 |
| }, |
| { |
| "epoch": 0.599202031193326, |
| "grad_norm": 0.20757931470870972, |
| "learning_rate": 6.98824624183924e-05, |
| "loss": 0.174, |
| "step": 1652 |
| }, |
| { |
| "epoch": 0.5995647442872688, |
| "grad_norm": 0.1787773221731186, |
| "learning_rate": 6.977342772825732e-05, |
| "loss": 0.1577, |
| "step": 1653 |
| }, |
| { |
| "epoch": 0.5999274573812114, |
| "grad_norm": 0.18228726089000702, |
| "learning_rate": 6.966443257216971e-05, |
| "loss": 0.1834, |
| "step": 1654 |
| }, |
| { |
| "epoch": 0.6002901704751542, |
| "grad_norm": 10869.5341796875, |
| "learning_rate": 6.955547709268697e-05, |
| "loss": 0.1647, |
| "step": 1655 |
| }, |
| { |
| "epoch": 0.6006528835690969, |
| "grad_norm": 0.18677209317684174, |
| "learning_rate": 6.94465614323145e-05, |
| "loss": 0.1921, |
| "step": 1656 |
| }, |
| { |
| "epoch": 0.6010155966630395, |
| "grad_norm": 0.21163515746593475, |
| "learning_rate": 6.933768573350567e-05, |
| "loss": 0.171, |
| "step": 1657 |
| }, |
| { |
| "epoch": 0.6013783097569823, |
| "grad_norm": 0.1897449642419815, |
| "learning_rate": 6.922885013866153e-05, |
| "loss": 0.1877, |
| "step": 1658 |
| }, |
| { |
| "epoch": 0.6017410228509249, |
| "grad_norm": 0.20126648247241974, |
| "learning_rate": 6.912005479013082e-05, |
| "loss": 0.2154, |
| "step": 1659 |
| }, |
| { |
| "epoch": 0.6021037359448677, |
| "grad_norm": 0.21092937886714935, |
| "learning_rate": 6.901129983020948e-05, |
| "loss": 0.1868, |
| "step": 1660 |
| }, |
| { |
| "epoch": 0.6024664490388103, |
| "grad_norm": 0.23496972024440765, |
| "learning_rate": 6.890258540114074e-05, |
| "loss": 0.1784, |
| "step": 1661 |
| }, |
| { |
| "epoch": 0.602829162132753, |
| "grad_norm": 0.21016502380371094, |
| "learning_rate": 6.879391164511471e-05, |
| "loss": 0.1728, |
| "step": 1662 |
| }, |
| { |
| "epoch": 0.6031918752266957, |
| "grad_norm": 0.2230292558670044, |
| "learning_rate": 6.86852787042684e-05, |
| "loss": 0.1849, |
| "step": 1663 |
| }, |
| { |
| "epoch": 0.6035545883206384, |
| "grad_norm": 0.19853949546813965, |
| "learning_rate": 6.857668672068534e-05, |
| "loss": 0.1782, |
| "step": 1664 |
| }, |
| { |
| "epoch": 0.6039173014145811, |
| "grad_norm": 0.1775451898574829, |
| "learning_rate": 6.846813583639562e-05, |
| "loss": 0.1497, |
| "step": 1665 |
| }, |
| { |
| "epoch": 0.6042800145085238, |
| "grad_norm": 0.1857757419347763, |
| "learning_rate": 6.835962619337549e-05, |
| "loss": 0.1836, |
| "step": 1666 |
| }, |
| { |
| "epoch": 0.6046427276024664, |
| "grad_norm": 0.1867503970861435, |
| "learning_rate": 6.825115793354726e-05, |
| "loss": 0.1556, |
| "step": 1667 |
| }, |
| { |
| "epoch": 0.6050054406964092, |
| "grad_norm": 0.18607592582702637, |
| "learning_rate": 6.814273119877912e-05, |
| "loss": 0.2011, |
| "step": 1668 |
| }, |
| { |
| "epoch": 0.6053681537903518, |
| "grad_norm": 0.18926583230495453, |
| "learning_rate": 6.803434613088497e-05, |
| "loss": 0.1661, |
| "step": 1669 |
| }, |
| { |
| "epoch": 0.6057308668842946, |
| "grad_norm": 0.18735969066619873, |
| "learning_rate": 6.792600287162416e-05, |
| "loss": 0.1591, |
| "step": 1670 |
| }, |
| { |
| "epoch": 0.6060935799782372, |
| "grad_norm": 0.23324711620807648, |
| "learning_rate": 6.781770156270149e-05, |
| "loss": 0.1656, |
| "step": 1671 |
| }, |
| { |
| "epoch": 0.6064562930721799, |
| "grad_norm": 0.1974279284477234, |
| "learning_rate": 6.77094423457667e-05, |
| "loss": 0.1585, |
| "step": 1672 |
| }, |
| { |
| "epoch": 0.6068190061661226, |
| "grad_norm": 0.20500749349594116, |
| "learning_rate": 6.760122536241462e-05, |
| "loss": 0.164, |
| "step": 1673 |
| }, |
| { |
| "epoch": 0.6071817192600653, |
| "grad_norm": 0.16157761216163635, |
| "learning_rate": 6.749305075418482e-05, |
| "loss": 0.171, |
| "step": 1674 |
| }, |
| { |
| "epoch": 0.607544432354008, |
| "grad_norm": 0.19271859526634216, |
| "learning_rate": 6.738491866256138e-05, |
| "loss": 0.1777, |
| "step": 1675 |
| }, |
| { |
| "epoch": 0.6079071454479507, |
| "grad_norm": 0.18441638350486755, |
| "learning_rate": 6.727682922897282e-05, |
| "loss": 0.1683, |
| "step": 1676 |
| }, |
| { |
| "epoch": 0.6082698585418934, |
| "grad_norm": 0.17519617080688477, |
| "learning_rate": 6.716878259479189e-05, |
| "loss": 0.1739, |
| "step": 1677 |
| }, |
| { |
| "epoch": 0.6086325716358361, |
| "grad_norm": 0.18938271701335907, |
| "learning_rate": 6.706077890133531e-05, |
| "loss": 0.1606, |
| "step": 1678 |
| }, |
| { |
| "epoch": 0.6089952847297787, |
| "grad_norm": 0.20264668762683868, |
| "learning_rate": 6.695281828986369e-05, |
| "loss": 0.174, |
| "step": 1679 |
| }, |
| { |
| "epoch": 0.6093579978237215, |
| "grad_norm": 0.22438956797122955, |
| "learning_rate": 6.684490090158124e-05, |
| "loss": 0.1594, |
| "step": 1680 |
| }, |
| { |
| "epoch": 0.6097207109176641, |
| "grad_norm": 0.19163423776626587, |
| "learning_rate": 6.673702687763565e-05, |
| "loss": 0.1594, |
| "step": 1681 |
| }, |
| { |
| "epoch": 0.6100834240116069, |
| "grad_norm": 0.1845075786113739, |
| "learning_rate": 6.662919635911793e-05, |
| "loss": 0.173, |
| "step": 1682 |
| }, |
| { |
| "epoch": 0.6104461371055495, |
| "grad_norm": 0.18868669867515564, |
| "learning_rate": 6.652140948706209e-05, |
| "loss": 0.1786, |
| "step": 1683 |
| }, |
| { |
| "epoch": 0.6108088501994922, |
| "grad_norm": 0.22319957613945007, |
| "learning_rate": 6.641366640244525e-05, |
| "loss": 0.2068, |
| "step": 1684 |
| }, |
| { |
| "epoch": 0.6111715632934349, |
| "grad_norm": 0.18685069680213928, |
| "learning_rate": 6.630596724618703e-05, |
| "loss": 0.1751, |
| "step": 1685 |
| }, |
| { |
| "epoch": 0.6115342763873776, |
| "grad_norm": 0.18427863717079163, |
| "learning_rate": 6.619831215914974e-05, |
| "loss": 0.1707, |
| "step": 1686 |
| }, |
| { |
| "epoch": 0.6118969894813203, |
| "grad_norm": 0.19461330771446228, |
| "learning_rate": 6.609070128213802e-05, |
| "loss": 0.178, |
| "step": 1687 |
| }, |
| { |
| "epoch": 0.612259702575263, |
| "grad_norm": 0.21272696554660797, |
| "learning_rate": 6.598313475589863e-05, |
| "loss": 0.1789, |
| "step": 1688 |
| }, |
| { |
| "epoch": 0.6126224156692056, |
| "grad_norm": 0.20163173973560333, |
| "learning_rate": 6.58756127211204e-05, |
| "loss": 0.2014, |
| "step": 1689 |
| }, |
| { |
| "epoch": 0.6129851287631484, |
| "grad_norm": 0.1940133273601532, |
| "learning_rate": 6.576813531843396e-05, |
| "loss": 0.1703, |
| "step": 1690 |
| }, |
| { |
| "epoch": 0.613347841857091, |
| "grad_norm": 0.17384611070156097, |
| "learning_rate": 6.566070268841152e-05, |
| "loss": 0.1556, |
| "step": 1691 |
| }, |
| { |
| "epoch": 0.6137105549510338, |
| "grad_norm": 0.1869945228099823, |
| "learning_rate": 6.555331497156672e-05, |
| "loss": 0.1548, |
| "step": 1692 |
| }, |
| { |
| "epoch": 0.6140732680449764, |
| "grad_norm": 0.18520064651966095, |
| "learning_rate": 6.544597230835454e-05, |
| "loss": 0.1807, |
| "step": 1693 |
| }, |
| { |
| "epoch": 0.6144359811389191, |
| "grad_norm": 0.17966820299625397, |
| "learning_rate": 6.533867483917098e-05, |
| "loss": 0.1516, |
| "step": 1694 |
| }, |
| { |
| "epoch": 0.6147986942328618, |
| "grad_norm": 0.1705074906349182, |
| "learning_rate": 6.523142270435288e-05, |
| "loss": 0.1518, |
| "step": 1695 |
| }, |
| { |
| "epoch": 0.6151614073268045, |
| "grad_norm": 0.24414807558059692, |
| "learning_rate": 6.512421604417792e-05, |
| "loss": 0.2026, |
| "step": 1696 |
| }, |
| { |
| "epoch": 0.6155241204207472, |
| "grad_norm": 0.16796554625034332, |
| "learning_rate": 6.501705499886418e-05, |
| "loss": 0.1554, |
| "step": 1697 |
| }, |
| { |
| "epoch": 0.6158868335146899, |
| "grad_norm": 0.19749103486537933, |
| "learning_rate": 6.490993970857011e-05, |
| "loss": 0.1807, |
| "step": 1698 |
| }, |
| { |
| "epoch": 0.6162495466086326, |
| "grad_norm": 0.16789931058883667, |
| "learning_rate": 6.480287031339436e-05, |
| "loss": 0.1617, |
| "step": 1699 |
| }, |
| { |
| "epoch": 0.6166122597025753, |
| "grad_norm": 0.1916869580745697, |
| "learning_rate": 6.469584695337548e-05, |
| "loss": 0.188, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.6169749727965179, |
| "grad_norm": 0.19540345668792725, |
| "learning_rate": 6.458886976849183e-05, |
| "loss": 0.1743, |
| "step": 1701 |
| }, |
| { |
| "epoch": 0.6173376858904607, |
| "grad_norm": 0.17193295061588287, |
| "learning_rate": 6.448193889866149e-05, |
| "loss": 0.1763, |
| "step": 1702 |
| }, |
| { |
| "epoch": 0.6177003989844033, |
| "grad_norm": 0.17156308889389038, |
| "learning_rate": 6.43750544837418e-05, |
| "loss": 0.158, |
| "step": 1703 |
| }, |
| { |
| "epoch": 0.6180631120783461, |
| "grad_norm": 0.1796158254146576, |
| "learning_rate": 6.426821666352942e-05, |
| "loss": 0.1656, |
| "step": 1704 |
| }, |
| { |
| "epoch": 0.6184258251722887, |
| "grad_norm": 0.18700680136680603, |
| "learning_rate": 6.416142557776006e-05, |
| "loss": 0.174, |
| "step": 1705 |
| }, |
| { |
| "epoch": 0.6187885382662314, |
| "grad_norm": 0.16723744571208954, |
| "learning_rate": 6.405468136610832e-05, |
| "loss": 0.1619, |
| "step": 1706 |
| }, |
| { |
| "epoch": 0.6191512513601741, |
| "grad_norm": 0.17422862350940704, |
| "learning_rate": 6.394798416818739e-05, |
| "loss": 0.1609, |
| "step": 1707 |
| }, |
| { |
| "epoch": 0.6195139644541168, |
| "grad_norm": 0.20079629123210907, |
| "learning_rate": 6.384133412354918e-05, |
| "loss": 0.1652, |
| "step": 1708 |
| }, |
| { |
| "epoch": 0.6198766775480595, |
| "grad_norm": 0.2474866658449173, |
| "learning_rate": 6.373473137168373e-05, |
| "loss": 0.1663, |
| "step": 1709 |
| }, |
| { |
| "epoch": 0.6202393906420022, |
| "grad_norm": 0.1707204282283783, |
| "learning_rate": 6.36281760520193e-05, |
| "loss": 0.1592, |
| "step": 1710 |
| }, |
| { |
| "epoch": 0.6206021037359448, |
| "grad_norm": 0.17606933414936066, |
| "learning_rate": 6.352166830392213e-05, |
| "loss": 0.1662, |
| "step": 1711 |
| }, |
| { |
| "epoch": 0.6209648168298876, |
| "grad_norm": 0.17025688290596008, |
| "learning_rate": 6.341520826669621e-05, |
| "loss": 0.1592, |
| "step": 1712 |
| }, |
| { |
| "epoch": 0.6213275299238302, |
| "grad_norm": 0.18838566541671753, |
| "learning_rate": 6.330879607958314e-05, |
| "loss": 0.1816, |
| "step": 1713 |
| }, |
| { |
| "epoch": 0.621690243017773, |
| "grad_norm": 0.2592281103134155, |
| "learning_rate": 6.320243188176185e-05, |
| "loss": 0.2014, |
| "step": 1714 |
| }, |
| { |
| "epoch": 0.6220529561117156, |
| "grad_norm": 0.16398011147975922, |
| "learning_rate": 6.309611581234872e-05, |
| "loss": 0.1585, |
| "step": 1715 |
| }, |
| { |
| "epoch": 0.6224156692056583, |
| "grad_norm": 0.1793876439332962, |
| "learning_rate": 6.298984801039697e-05, |
| "loss": 0.1532, |
| "step": 1716 |
| }, |
| { |
| "epoch": 0.622778382299601, |
| "grad_norm": 0.1910189986228943, |
| "learning_rate": 6.28836286148968e-05, |
| "loss": 0.1666, |
| "step": 1717 |
| }, |
| { |
| "epoch": 0.6231410953935437, |
| "grad_norm": 0.20349231362342834, |
| "learning_rate": 6.277745776477506e-05, |
| "loss": 0.2075, |
| "step": 1718 |
| }, |
| { |
| "epoch": 0.6235038084874864, |
| "grad_norm": 0.19140169024467468, |
| "learning_rate": 6.267133559889509e-05, |
| "loss": 0.1574, |
| "step": 1719 |
| }, |
| { |
| "epoch": 0.6238665215814291, |
| "grad_norm": 0.18104875087738037, |
| "learning_rate": 6.256526225605652e-05, |
| "loss": 0.1594, |
| "step": 1720 |
| }, |
| { |
| "epoch": 0.6242292346753718, |
| "grad_norm": 0.18763144314289093, |
| "learning_rate": 6.245923787499532e-05, |
| "loss": 0.1613, |
| "step": 1721 |
| }, |
| { |
| "epoch": 0.6245919477693145, |
| "grad_norm": 0.16338056325912476, |
| "learning_rate": 6.235326259438317e-05, |
| "loss": 0.1823, |
| "step": 1722 |
| }, |
| { |
| "epoch": 0.6249546608632571, |
| "grad_norm": 0.1663455367088318, |
| "learning_rate": 6.224733655282771e-05, |
| "loss": 0.167, |
| "step": 1723 |
| }, |
| { |
| "epoch": 0.6253173739571999, |
| "grad_norm": 0.17179372906684875, |
| "learning_rate": 6.214145988887206e-05, |
| "loss": 0.1645, |
| "step": 1724 |
| }, |
| { |
| "epoch": 0.6256800870511425, |
| "grad_norm": 0.16161875426769257, |
| "learning_rate": 6.203563274099481e-05, |
| "loss": 0.1402, |
| "step": 1725 |
| }, |
| { |
| "epoch": 0.6260428001450853, |
| "grad_norm": 0.2017858475446701, |
| "learning_rate": 6.19298552476098e-05, |
| "loss": 0.1667, |
| "step": 1726 |
| }, |
| { |
| "epoch": 0.6264055132390279, |
| "grad_norm": 0.22198174893856049, |
| "learning_rate": 6.182412754706594e-05, |
| "loss": 0.1902, |
| "step": 1727 |
| }, |
| { |
| "epoch": 0.6267682263329706, |
| "grad_norm": 0.1705772578716278, |
| "learning_rate": 6.171844977764695e-05, |
| "loss": 0.1588, |
| "step": 1728 |
| }, |
| { |
| "epoch": 0.6271309394269133, |
| "grad_norm": 0.17019295692443848, |
| "learning_rate": 6.161282207757126e-05, |
| "loss": 0.1609, |
| "step": 1729 |
| }, |
| { |
| "epoch": 0.627493652520856, |
| "grad_norm": 0.1743742674589157, |
| "learning_rate": 6.15072445849919e-05, |
| "loss": 0.179, |
| "step": 1730 |
| }, |
| { |
| "epoch": 0.6278563656147987, |
| "grad_norm": 0.16775129735469818, |
| "learning_rate": 6.140171743799611e-05, |
| "loss": 0.1807, |
| "step": 1731 |
| }, |
| { |
| "epoch": 0.6282190787087414, |
| "grad_norm": 0.18963152170181274, |
| "learning_rate": 6.129624077460532e-05, |
| "loss": 0.2007, |
| "step": 1732 |
| }, |
| { |
| "epoch": 0.628581791802684, |
| "grad_norm": 0.182524174451828, |
| "learning_rate": 6.119081473277501e-05, |
| "loss": 0.1738, |
| "step": 1733 |
| }, |
| { |
| "epoch": 0.6289445048966268, |
| "grad_norm": 0.18262414634227753, |
| "learning_rate": 6.108543945039438e-05, |
| "loss": 0.1897, |
| "step": 1734 |
| }, |
| { |
| "epoch": 0.6293072179905694, |
| "grad_norm": 0.1729535162448883, |
| "learning_rate": 6.098011506528623e-05, |
| "loss": 0.1586, |
| "step": 1735 |
| }, |
| { |
| "epoch": 0.6296699310845122, |
| "grad_norm": 0.1677355319261551, |
| "learning_rate": 6.0874841715206785e-05, |
| "loss": 0.1871, |
| "step": 1736 |
| }, |
| { |
| "epoch": 0.6300326441784548, |
| "grad_norm": 0.17900875210762024, |
| "learning_rate": 6.076961953784559e-05, |
| "loss": 0.1595, |
| "step": 1737 |
| }, |
| { |
| "epoch": 0.6303953572723975, |
| "grad_norm": 0.18250757455825806, |
| "learning_rate": 6.066444867082515e-05, |
| "loss": 0.1842, |
| "step": 1738 |
| }, |
| { |
| "epoch": 0.6307580703663402, |
| "grad_norm": 0.17696964740753174, |
| "learning_rate": 6.0559329251701005e-05, |
| "loss": 0.1709, |
| "step": 1739 |
| }, |
| { |
| "epoch": 0.6311207834602829, |
| "grad_norm": 0.1764724850654602, |
| "learning_rate": 6.045426141796128e-05, |
| "loss": 0.161, |
| "step": 1740 |
| }, |
| { |
| "epoch": 0.6314834965542256, |
| "grad_norm": 0.17228443920612335, |
| "learning_rate": 6.03492453070267e-05, |
| "loss": 0.1579, |
| "step": 1741 |
| }, |
| { |
| "epoch": 0.6318462096481683, |
| "grad_norm": 0.17399545013904572, |
| "learning_rate": 6.024428105625028e-05, |
| "loss": 0.1555, |
| "step": 1742 |
| }, |
| { |
| "epoch": 0.6322089227421109, |
| "grad_norm": 0.1953967958688736, |
| "learning_rate": 6.0139368802917284e-05, |
| "loss": 0.2569, |
| "step": 1743 |
| }, |
| { |
| "epoch": 0.6325716358360537, |
| "grad_norm": 0.17359597980976105, |
| "learning_rate": 6.0034508684244875e-05, |
| "loss": 0.1783, |
| "step": 1744 |
| }, |
| { |
| "epoch": 0.6329343489299963, |
| "grad_norm": 0.1505521535873413, |
| "learning_rate": 5.992970083738212e-05, |
| "loss": 0.1567, |
| "step": 1745 |
| }, |
| { |
| "epoch": 0.6332970620239391, |
| "grad_norm": 0.18801428377628326, |
| "learning_rate": 5.982494539940966e-05, |
| "loss": 0.2076, |
| "step": 1746 |
| }, |
| { |
| "epoch": 0.6336597751178817, |
| "grad_norm": 0.16666316986083984, |
| "learning_rate": 5.97202425073396e-05, |
| "loss": 0.1617, |
| "step": 1747 |
| }, |
| { |
| "epoch": 0.6340224882118245, |
| "grad_norm": 0.174256831407547, |
| "learning_rate": 5.961559229811535e-05, |
| "loss": 0.167, |
| "step": 1748 |
| }, |
| { |
| "epoch": 0.6343852013057671, |
| "grad_norm": 0.16997861862182617, |
| "learning_rate": 5.951099490861136e-05, |
| "loss": 0.191, |
| "step": 1749 |
| }, |
| { |
| "epoch": 0.6347479143997098, |
| "grad_norm": 0.18059667944908142, |
| "learning_rate": 5.940645047563306e-05, |
| "loss": 0.1769, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.6351106274936525, |
| "grad_norm": 0.17815832793712616, |
| "learning_rate": 5.9301959135916496e-05, |
| "loss": 0.1406, |
| "step": 1751 |
| }, |
| { |
| "epoch": 0.6354733405875952, |
| "grad_norm": 0.1702101081609726, |
| "learning_rate": 5.919752102612848e-05, |
| "loss": 0.1471, |
| "step": 1752 |
| }, |
| { |
| "epoch": 0.635836053681538, |
| "grad_norm": 0.1625283807516098, |
| "learning_rate": 5.909313628286601e-05, |
| "loss": 0.1446, |
| "step": 1753 |
| }, |
| { |
| "epoch": 0.6361987667754806, |
| "grad_norm": 0.16857244074344635, |
| "learning_rate": 5.898880504265638e-05, |
| "loss": 0.1561, |
| "step": 1754 |
| }, |
| { |
| "epoch": 0.6365614798694232, |
| "grad_norm": 0.18340398371219635, |
| "learning_rate": 5.888452744195687e-05, |
| "loss": 0.1862, |
| "step": 1755 |
| }, |
| { |
| "epoch": 0.636924192963366, |
| "grad_norm": 0.20158030092716217, |
| "learning_rate": 5.878030361715461e-05, |
| "loss": 0.1571, |
| "step": 1756 |
| }, |
| { |
| "epoch": 0.6372869060573086, |
| "grad_norm": 0.17433685064315796, |
| "learning_rate": 5.867613370456636e-05, |
| "loss": 0.1629, |
| "step": 1757 |
| }, |
| { |
| "epoch": 0.6376496191512514, |
| "grad_norm": 0.16959048807621002, |
| "learning_rate": 5.857201784043851e-05, |
| "loss": 0.1742, |
| "step": 1758 |
| }, |
| { |
| "epoch": 0.638012332245194, |
| "grad_norm": 0.17399851977825165, |
| "learning_rate": 5.8467956160946604e-05, |
| "loss": 0.1605, |
| "step": 1759 |
| }, |
| { |
| "epoch": 0.6383750453391367, |
| "grad_norm": 0.1925593912601471, |
| "learning_rate": 5.8363948802195356e-05, |
| "loss": 0.2142, |
| "step": 1760 |
| }, |
| { |
| "epoch": 0.6387377584330794, |
| "grad_norm": 0.1870613396167755, |
| "learning_rate": 5.8259995900218465e-05, |
| "loss": 0.1619, |
| "step": 1761 |
| }, |
| { |
| "epoch": 0.6391004715270221, |
| "grad_norm": 0.18008996546268463, |
| "learning_rate": 5.815609759097837e-05, |
| "loss": 0.1594, |
| "step": 1762 |
| }, |
| { |
| "epoch": 0.6394631846209649, |
| "grad_norm": 0.1749439388513565, |
| "learning_rate": 5.8052254010366105e-05, |
| "loss": 0.1543, |
| "step": 1763 |
| }, |
| { |
| "epoch": 0.6398258977149075, |
| "grad_norm": 0.17792417109012604, |
| "learning_rate": 5.7948465294201194e-05, |
| "loss": 0.1679, |
| "step": 1764 |
| }, |
| { |
| "epoch": 0.6401886108088501, |
| "grad_norm": 0.18781551718711853, |
| "learning_rate": 5.7844731578231334e-05, |
| "loss": 0.1634, |
| "step": 1765 |
| }, |
| { |
| "epoch": 0.6405513239027929, |
| "grad_norm": 0.17064349353313446, |
| "learning_rate": 5.7741052998132285e-05, |
| "loss": 0.1547, |
| "step": 1766 |
| }, |
| { |
| "epoch": 0.6409140369967355, |
| "grad_norm": 0.15985310077667236, |
| "learning_rate": 5.7637429689507713e-05, |
| "loss": 0.1446, |
| "step": 1767 |
| }, |
| { |
| "epoch": 0.6412767500906783, |
| "grad_norm": 0.18584533035755157, |
| "learning_rate": 5.7533861787888995e-05, |
| "loss": 0.1692, |
| "step": 1768 |
| }, |
| { |
| "epoch": 0.641639463184621, |
| "grad_norm": 0.18340182304382324, |
| "learning_rate": 5.7430349428734995e-05, |
| "loss": 0.1698, |
| "step": 1769 |
| }, |
| { |
| "epoch": 0.6420021762785637, |
| "grad_norm": 0.15710604190826416, |
| "learning_rate": 5.732689274743204e-05, |
| "loss": 0.1465, |
| "step": 1770 |
| }, |
| { |
| "epoch": 0.6423648893725064, |
| "grad_norm": 0.17073456943035126, |
| "learning_rate": 5.7223491879293526e-05, |
| "loss": 0.1531, |
| "step": 1771 |
| }, |
| { |
| "epoch": 0.642727602466449, |
| "grad_norm": 0.17552490532398224, |
| "learning_rate": 5.712014695955991e-05, |
| "loss": 0.1519, |
| "step": 1772 |
| }, |
| { |
| "epoch": 0.6430903155603918, |
| "grad_norm": 0.20075669884681702, |
| "learning_rate": 5.7016858123398434e-05, |
| "loss": 0.167, |
| "step": 1773 |
| }, |
| { |
| "epoch": 0.6434530286543344, |
| "grad_norm": 0.20733250677585602, |
| "learning_rate": 5.691362550590297e-05, |
| "loss": 0.1745, |
| "step": 1774 |
| }, |
| { |
| "epoch": 0.6438157417482772, |
| "grad_norm": 0.16159029304981232, |
| "learning_rate": 5.681044924209398e-05, |
| "loss": 0.15, |
| "step": 1775 |
| }, |
| { |
| "epoch": 0.6441784548422198, |
| "grad_norm": 0.184630885720253, |
| "learning_rate": 5.670732946691808e-05, |
| "loss": 0.1756, |
| "step": 1776 |
| }, |
| { |
| "epoch": 0.6445411679361625, |
| "grad_norm": 0.16852855682373047, |
| "learning_rate": 5.6604266315248034e-05, |
| "loss": 0.1642, |
| "step": 1777 |
| }, |
| { |
| "epoch": 0.6449038810301052, |
| "grad_norm": 0.16728003323078156, |
| "learning_rate": 5.6501259921882655e-05, |
| "loss": 0.1612, |
| "step": 1778 |
| }, |
| { |
| "epoch": 0.6452665941240479, |
| "grad_norm": 0.17908404767513275, |
| "learning_rate": 5.6398310421546376e-05, |
| "loss": 0.1759, |
| "step": 1779 |
| }, |
| { |
| "epoch": 0.6456293072179906, |
| "grad_norm": 0.16568151116371155, |
| "learning_rate": 5.6295417948889306e-05, |
| "loss": 0.1514, |
| "step": 1780 |
| }, |
| { |
| "epoch": 0.6459920203119333, |
| "grad_norm": 0.2028510570526123, |
| "learning_rate": 5.619258263848692e-05, |
| "loss": 0.1626, |
| "step": 1781 |
| }, |
| { |
| "epoch": 0.6463547334058759, |
| "grad_norm": 0.19075465202331543, |
| "learning_rate": 5.608980462483991e-05, |
| "loss": 0.1809, |
| "step": 1782 |
| }, |
| { |
| "epoch": 0.6467174464998187, |
| "grad_norm": 0.18601737916469574, |
| "learning_rate": 5.598708404237416e-05, |
| "loss": 0.1606, |
| "step": 1783 |
| }, |
| { |
| "epoch": 0.6470801595937613, |
| "grad_norm": 0.18421201407909393, |
| "learning_rate": 5.588442102544029e-05, |
| "loss": 0.1527, |
| "step": 1784 |
| }, |
| { |
| "epoch": 0.6474428726877041, |
| "grad_norm": 0.20656828582286835, |
| "learning_rate": 5.578181570831369e-05, |
| "loss": 0.1726, |
| "step": 1785 |
| }, |
| { |
| "epoch": 0.6478055857816467, |
| "grad_norm": 0.1901615560054779, |
| "learning_rate": 5.567926822519427e-05, |
| "loss": 0.1865, |
| "step": 1786 |
| }, |
| { |
| "epoch": 0.6481682988755894, |
| "grad_norm": 0.17387042939662933, |
| "learning_rate": 5.55767787102063e-05, |
| "loss": 0.1643, |
| "step": 1787 |
| }, |
| { |
| "epoch": 0.6485310119695321, |
| "grad_norm": 0.16012033820152283, |
| "learning_rate": 5.547434729739822e-05, |
| "loss": 0.162, |
| "step": 1788 |
| }, |
| { |
| "epoch": 0.6488937250634748, |
| "grad_norm": 0.17737270891666412, |
| "learning_rate": 5.537197412074257e-05, |
| "loss": 0.1563, |
| "step": 1789 |
| }, |
| { |
| "epoch": 0.6492564381574175, |
| "grad_norm": 0.17308826744556427, |
| "learning_rate": 5.526965931413557e-05, |
| "loss": 0.1596, |
| "step": 1790 |
| }, |
| { |
| "epoch": 0.6496191512513602, |
| "grad_norm": 0.20024463534355164, |
| "learning_rate": 5.516740301139721e-05, |
| "loss": 0.1763, |
| "step": 1791 |
| }, |
| { |
| "epoch": 0.6499818643453029, |
| "grad_norm": 0.17333653569221497, |
| "learning_rate": 5.506520534627091e-05, |
| "loss": 0.1666, |
| "step": 1792 |
| }, |
| { |
| "epoch": 0.6503445774392456, |
| "grad_norm": 0.17827224731445312, |
| "learning_rate": 5.496306645242339e-05, |
| "loss": 0.1718, |
| "step": 1793 |
| }, |
| { |
| "epoch": 0.6507072905331882, |
| "grad_norm": 0.19950279593467712, |
| "learning_rate": 5.4860986463444506e-05, |
| "loss": 0.2117, |
| "step": 1794 |
| }, |
| { |
| "epoch": 0.651070003627131, |
| "grad_norm": 0.17631955444812775, |
| "learning_rate": 5.475896551284716e-05, |
| "loss": 0.1784, |
| "step": 1795 |
| }, |
| { |
| "epoch": 0.6514327167210736, |
| "grad_norm": 0.18082845211029053, |
| "learning_rate": 5.4657003734066925e-05, |
| "loss": 0.2068, |
| "step": 1796 |
| }, |
| { |
| "epoch": 0.6517954298150164, |
| "grad_norm": 0.17366324365139008, |
| "learning_rate": 5.455510126046199e-05, |
| "loss": 0.1443, |
| "step": 1797 |
| }, |
| { |
| "epoch": 0.652158142908959, |
| "grad_norm": 0.17154483497142792, |
| "learning_rate": 5.445325822531304e-05, |
| "loss": 0.17, |
| "step": 1798 |
| }, |
| { |
| "epoch": 0.6525208560029017, |
| "grad_norm": 0.18583987653255463, |
| "learning_rate": 5.435147476182298e-05, |
| "loss": 0.1609, |
| "step": 1799 |
| }, |
| { |
| "epoch": 0.6528835690968444, |
| "grad_norm": 0.16991505026817322, |
| "learning_rate": 5.424975100311676e-05, |
| "loss": 0.1537, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.6532462821907871, |
| "grad_norm": 0.1840389221906662, |
| "learning_rate": 5.414808708224135e-05, |
| "loss": 0.1628, |
| "step": 1801 |
| }, |
| { |
| "epoch": 0.6536089952847298, |
| "grad_norm": 0.197292760014534, |
| "learning_rate": 5.404648313216538e-05, |
| "loss": 0.1722, |
| "step": 1802 |
| }, |
| { |
| "epoch": 0.6539717083786725, |
| "grad_norm": 0.1785934418439865, |
| "learning_rate": 5.394493928577903e-05, |
| "loss": 0.1629, |
| "step": 1803 |
| }, |
| { |
| "epoch": 0.6543344214726151, |
| "grad_norm": 0.17052417993545532, |
| "learning_rate": 5.384345567589391e-05, |
| "loss": 0.1639, |
| "step": 1804 |
| }, |
| { |
| "epoch": 0.6546971345665579, |
| "grad_norm": 0.1716339886188507, |
| "learning_rate": 5.374203243524283e-05, |
| "loss": 0.1628, |
| "step": 1805 |
| }, |
| { |
| "epoch": 0.6550598476605005, |
| "grad_norm": 0.16768915951251984, |
| "learning_rate": 5.364066969647963e-05, |
| "loss": 0.1426, |
| "step": 1806 |
| }, |
| { |
| "epoch": 0.6554225607544433, |
| "grad_norm": 0.1639591008424759, |
| "learning_rate": 5.353936759217899e-05, |
| "loss": 0.1604, |
| "step": 1807 |
| }, |
| { |
| "epoch": 0.6557852738483859, |
| "grad_norm": 0.1945423036813736, |
| "learning_rate": 5.343812625483642e-05, |
| "loss": 0.1562, |
| "step": 1808 |
| }, |
| { |
| "epoch": 0.6561479869423286, |
| "grad_norm": 0.1996852457523346, |
| "learning_rate": 5.333694581686779e-05, |
| "loss": 0.1712, |
| "step": 1809 |
| }, |
| { |
| "epoch": 0.6565107000362713, |
| "grad_norm": 0.18032366037368774, |
| "learning_rate": 5.32358264106094e-05, |
| "loss": 0.196, |
| "step": 1810 |
| }, |
| { |
| "epoch": 0.656873413130214, |
| "grad_norm": 0.16884812712669373, |
| "learning_rate": 5.313476816831768e-05, |
| "loss": 0.1558, |
| "step": 1811 |
| }, |
| { |
| "epoch": 0.6572361262241567, |
| "grad_norm": 0.1865408569574356, |
| "learning_rate": 5.303377122216915e-05, |
| "loss": 0.184, |
| "step": 1812 |
| }, |
| { |
| "epoch": 0.6575988393180994, |
| "grad_norm": 0.18371020257472992, |
| "learning_rate": 5.293283570426007e-05, |
| "loss": 0.1672, |
| "step": 1813 |
| }, |
| { |
| "epoch": 0.6579615524120421, |
| "grad_norm": 0.1799343377351761, |
| "learning_rate": 5.283196174660633e-05, |
| "loss": 0.1544, |
| "step": 1814 |
| }, |
| { |
| "epoch": 0.6583242655059848, |
| "grad_norm": 0.17262513935565948, |
| "learning_rate": 5.273114948114346e-05, |
| "loss": 0.1582, |
| "step": 1815 |
| }, |
| { |
| "epoch": 0.6586869785999274, |
| "grad_norm": 0.19773328304290771, |
| "learning_rate": 5.263039903972618e-05, |
| "loss": 0.1649, |
| "step": 1816 |
| }, |
| { |
| "epoch": 0.6590496916938702, |
| "grad_norm": 0.18928907811641693, |
| "learning_rate": 5.252971055412832e-05, |
| "loss": 0.1853, |
| "step": 1817 |
| }, |
| { |
| "epoch": 0.6594124047878128, |
| "grad_norm": 0.17779038846492767, |
| "learning_rate": 5.242908415604277e-05, |
| "loss": 0.1643, |
| "step": 1818 |
| }, |
| { |
| "epoch": 0.6597751178817556, |
| "grad_norm": 0.2303963601589203, |
| "learning_rate": 5.2328519977081105e-05, |
| "loss": 0.1926, |
| "step": 1819 |
| }, |
| { |
| "epoch": 0.6601378309756982, |
| "grad_norm": 0.16455812752246857, |
| "learning_rate": 5.222801814877369e-05, |
| "loss": 0.1582, |
| "step": 1820 |
| }, |
| { |
| "epoch": 0.6605005440696409, |
| "grad_norm": 0.16079877316951752, |
| "learning_rate": 5.21275788025692e-05, |
| "loss": 0.149, |
| "step": 1821 |
| }, |
| { |
| "epoch": 0.6608632571635836, |
| "grad_norm": 0.1705598533153534, |
| "learning_rate": 5.20272020698346e-05, |
| "loss": 0.1624, |
| "step": 1822 |
| }, |
| { |
| "epoch": 0.6612259702575263, |
| "grad_norm": 0.16610048711299896, |
| "learning_rate": 5.192688808185502e-05, |
| "loss": 0.1527, |
| "step": 1823 |
| }, |
| { |
| "epoch": 0.661588683351469, |
| "grad_norm": 0.19774171710014343, |
| "learning_rate": 5.1826636969833475e-05, |
| "loss": 0.1631, |
| "step": 1824 |
| }, |
| { |
| "epoch": 0.6619513964454117, |
| "grad_norm": 0.17446525394916534, |
| "learning_rate": 5.172644886489073e-05, |
| "loss": 0.1621, |
| "step": 1825 |
| }, |
| { |
| "epoch": 0.6623141095393543, |
| "grad_norm": 0.20300233364105225, |
| "learning_rate": 5.162632389806523e-05, |
| "loss": 0.1907, |
| "step": 1826 |
| }, |
| { |
| "epoch": 0.6626768226332971, |
| "grad_norm": 0.204659104347229, |
| "learning_rate": 5.152626220031278e-05, |
| "loss": 0.1596, |
| "step": 1827 |
| }, |
| { |
| "epoch": 0.6630395357272397, |
| "grad_norm": 0.1757912039756775, |
| "learning_rate": 5.1426263902506414e-05, |
| "loss": 0.1535, |
| "step": 1828 |
| }, |
| { |
| "epoch": 0.6634022488211825, |
| "grad_norm": 0.19932380318641663, |
| "learning_rate": 5.132632913543627e-05, |
| "loss": 0.1705, |
| "step": 1829 |
| }, |
| { |
| "epoch": 0.6637649619151251, |
| "grad_norm": 0.18215243518352509, |
| "learning_rate": 5.1226458029809387e-05, |
| "loss": 0.1636, |
| "step": 1830 |
| }, |
| { |
| "epoch": 0.6641276750090678, |
| "grad_norm": 0.1725538820028305, |
| "learning_rate": 5.112665071624951e-05, |
| "loss": 0.1397, |
| "step": 1831 |
| }, |
| { |
| "epoch": 0.6644903881030105, |
| "grad_norm": 0.18406741321086884, |
| "learning_rate": 5.1026907325297044e-05, |
| "loss": 0.1639, |
| "step": 1832 |
| }, |
| { |
| "epoch": 0.6648531011969532, |
| "grad_norm": 0.17330917716026306, |
| "learning_rate": 5.092722798740871e-05, |
| "loss": 0.1588, |
| "step": 1833 |
| }, |
| { |
| "epoch": 0.6652158142908959, |
| "grad_norm": 0.16775713860988617, |
| "learning_rate": 5.082761283295745e-05, |
| "loss": 0.1407, |
| "step": 1834 |
| }, |
| { |
| "epoch": 0.6655785273848386, |
| "grad_norm": 0.17397847771644592, |
| "learning_rate": 5.072806199223228e-05, |
| "loss": 0.1767, |
| "step": 1835 |
| }, |
| { |
| "epoch": 0.6659412404787813, |
| "grad_norm": 0.17217876017093658, |
| "learning_rate": 5.062857559543809e-05, |
| "loss": 0.1644, |
| "step": 1836 |
| }, |
| { |
| "epoch": 0.666303953572724, |
| "grad_norm": 0.1916993409395218, |
| "learning_rate": 5.0529153772695495e-05, |
| "loss": 0.1631, |
| "step": 1837 |
| }, |
| { |
| "epoch": 0.6666666666666666, |
| "grad_norm": 0.19008702039718628, |
| "learning_rate": 5.0429796654040595e-05, |
| "loss": 0.1677, |
| "step": 1838 |
| }, |
| { |
| "epoch": 0.6670293797606094, |
| "grad_norm": 0.18073846399784088, |
| "learning_rate": 5.033050436942501e-05, |
| "loss": 0.1644, |
| "step": 1839 |
| }, |
| { |
| "epoch": 0.667392092854552, |
| "grad_norm": 0.1769622266292572, |
| "learning_rate": 5.023127704871541e-05, |
| "loss": 0.1764, |
| "step": 1840 |
| }, |
| { |
| "epoch": 0.6677548059484948, |
| "grad_norm": 0.17394478619098663, |
| "learning_rate": 5.013211482169354e-05, |
| "loss": 0.1652, |
| "step": 1841 |
| }, |
| { |
| "epoch": 0.6681175190424374, |
| "grad_norm": 0.18357783555984497, |
| "learning_rate": 5.003301781805604e-05, |
| "loss": 0.1799, |
| "step": 1842 |
| }, |
| { |
| "epoch": 0.6684802321363801, |
| "grad_norm": 0.18445712327957153, |
| "learning_rate": 4.993398616741421e-05, |
| "loss": 0.1731, |
| "step": 1843 |
| }, |
| { |
| "epoch": 0.6688429452303228, |
| "grad_norm": 0.17181545495986938, |
| "learning_rate": 4.983501999929384e-05, |
| "loss": 0.1647, |
| "step": 1844 |
| }, |
| { |
| "epoch": 0.6692056583242655, |
| "grad_norm": 0.1643923968076706, |
| "learning_rate": 4.97361194431352e-05, |
| "loss": 0.1522, |
| "step": 1845 |
| }, |
| { |
| "epoch": 0.6695683714182082, |
| "grad_norm": 0.178927481174469, |
| "learning_rate": 4.963728462829262e-05, |
| "loss": 0.1693, |
| "step": 1846 |
| }, |
| { |
| "epoch": 0.6699310845121509, |
| "grad_norm": 0.16977953910827637, |
| "learning_rate": 4.95385156840345e-05, |
| "loss": 0.1634, |
| "step": 1847 |
| }, |
| { |
| "epoch": 0.6702937976060935, |
| "grad_norm": 0.19453585147857666, |
| "learning_rate": 4.943981273954302e-05, |
| "loss": 0.161, |
| "step": 1848 |
| }, |
| { |
| "epoch": 0.6706565107000363, |
| "grad_norm": 0.15591104328632355, |
| "learning_rate": 4.9341175923914184e-05, |
| "loss": 0.1336, |
| "step": 1849 |
| }, |
| { |
| "epoch": 0.6710192237939789, |
| "grad_norm": 0.19056206941604614, |
| "learning_rate": 4.9242605366157356e-05, |
| "loss": 0.1647, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.6713819368879217, |
| "grad_norm": 0.18081988394260406, |
| "learning_rate": 4.914410119519528e-05, |
| "loss": 0.1679, |
| "step": 1851 |
| }, |
| { |
| "epoch": 0.6717446499818643, |
| "grad_norm": 0.1665160208940506, |
| "learning_rate": 4.904566353986394e-05, |
| "loss": 0.1585, |
| "step": 1852 |
| }, |
| { |
| "epoch": 0.672107363075807, |
| "grad_norm": 0.18015241622924805, |
| "learning_rate": 4.894729252891224e-05, |
| "loss": 0.1687, |
| "step": 1853 |
| }, |
| { |
| "epoch": 0.6724700761697497, |
| "grad_norm": 0.16529425978660583, |
| "learning_rate": 4.884898829100194e-05, |
| "loss": 0.1569, |
| "step": 1854 |
| }, |
| { |
| "epoch": 0.6728327892636924, |
| "grad_norm": 0.17505323886871338, |
| "learning_rate": 4.87507509547075e-05, |
| "loss": 0.1651, |
| "step": 1855 |
| }, |
| { |
| "epoch": 0.6731955023576351, |
| "grad_norm": 0.18190018832683563, |
| "learning_rate": 4.865258064851579e-05, |
| "loss": 0.1706, |
| "step": 1856 |
| }, |
| { |
| "epoch": 0.6735582154515778, |
| "grad_norm": 0.1668224334716797, |
| "learning_rate": 4.855447750082615e-05, |
| "loss": 0.1639, |
| "step": 1857 |
| }, |
| { |
| "epoch": 0.6739209285455205, |
| "grad_norm": 0.18514534831047058, |
| "learning_rate": 4.845644163994996e-05, |
| "loss": 0.1912, |
| "step": 1858 |
| }, |
| { |
| "epoch": 0.6742836416394632, |
| "grad_norm": 0.19206570088863373, |
| "learning_rate": 4.835847319411065e-05, |
| "loss": 0.1595, |
| "step": 1859 |
| }, |
| { |
| "epoch": 0.6746463547334058, |
| "grad_norm": 0.19193512201309204, |
| "learning_rate": 4.8260572291443465e-05, |
| "loss": 0.1586, |
| "step": 1860 |
| }, |
| { |
| "epoch": 0.6750090678273486, |
| "grad_norm": 0.1866559088230133, |
| "learning_rate": 4.816273905999529e-05, |
| "loss": 0.1841, |
| "step": 1861 |
| }, |
| { |
| "epoch": 0.6753717809212912, |
| "grad_norm": 0.1677185595035553, |
| "learning_rate": 4.80649736277245e-05, |
| "loss": 0.1672, |
| "step": 1862 |
| }, |
| { |
| "epoch": 0.675734494015234, |
| "grad_norm": 0.16328024864196777, |
| "learning_rate": 4.796727612250087e-05, |
| "loss": 0.1556, |
| "step": 1863 |
| }, |
| { |
| "epoch": 0.6760972071091766, |
| "grad_norm": 0.1733468621969223, |
| "learning_rate": 4.7869646672105254e-05, |
| "loss": 0.1572, |
| "step": 1864 |
| }, |
| { |
| "epoch": 0.6764599202031193, |
| "grad_norm": 0.19276085495948792, |
| "learning_rate": 4.7772085404229495e-05, |
| "loss": 0.1681, |
| "step": 1865 |
| }, |
| { |
| "epoch": 0.676822633297062, |
| "grad_norm": 0.2415236532688141, |
| "learning_rate": 4.767459244647629e-05, |
| "loss": 0.2347, |
| "step": 1866 |
| }, |
| { |
| "epoch": 0.6771853463910047, |
| "grad_norm": 0.21599356830120087, |
| "learning_rate": 4.757716792635898e-05, |
| "loss": 0.1985, |
| "step": 1867 |
| }, |
| { |
| "epoch": 0.6775480594849475, |
| "grad_norm": 0.17399145662784576, |
| "learning_rate": 4.747981197130139e-05, |
| "loss": 0.1662, |
| "step": 1868 |
| }, |
| { |
| "epoch": 0.6779107725788901, |
| "grad_norm": 0.1672641634941101, |
| "learning_rate": 4.738252470863763e-05, |
| "loss": 0.1482, |
| "step": 1869 |
| }, |
| { |
| "epoch": 0.6782734856728327, |
| "grad_norm": 0.17770545184612274, |
| "learning_rate": 4.7285306265612106e-05, |
| "loss": 0.157, |
| "step": 1870 |
| }, |
| { |
| "epoch": 0.6786361987667755, |
| "grad_norm": 0.18158309161663055, |
| "learning_rate": 4.7188156769379063e-05, |
| "loss": 0.1569, |
| "step": 1871 |
| }, |
| { |
| "epoch": 0.6789989118607181, |
| "grad_norm": 0.2067386507987976, |
| "learning_rate": 4.7091076347002613e-05, |
| "loss": 0.1686, |
| "step": 1872 |
| }, |
| { |
| "epoch": 0.6793616249546609, |
| "grad_norm": 0.16841058433055878, |
| "learning_rate": 4.6994065125456546e-05, |
| "loss": 0.1564, |
| "step": 1873 |
| }, |
| { |
| "epoch": 0.6797243380486035, |
| "grad_norm": 0.19121627509593964, |
| "learning_rate": 4.6897123231624105e-05, |
| "loss": 0.1794, |
| "step": 1874 |
| }, |
| { |
| "epoch": 0.6800870511425462, |
| "grad_norm": 0.16333697736263275, |
| "learning_rate": 4.6800250792297885e-05, |
| "loss": 0.1497, |
| "step": 1875 |
| }, |
| { |
| "epoch": 0.680449764236489, |
| "grad_norm": 0.16970248520374298, |
| "learning_rate": 4.670344793417967e-05, |
| "loss": 0.1672, |
| "step": 1876 |
| }, |
| { |
| "epoch": 0.6808124773304316, |
| "grad_norm": 0.1738625019788742, |
| "learning_rate": 4.660671478388019e-05, |
| "loss": 0.1689, |
| "step": 1877 |
| }, |
| { |
| "epoch": 0.6811751904243744, |
| "grad_norm": 0.167289599776268, |
| "learning_rate": 4.651005146791901e-05, |
| "loss": 0.1446, |
| "step": 1878 |
| }, |
| { |
| "epoch": 0.681537903518317, |
| "grad_norm": 0.1755392998456955, |
| "learning_rate": 4.641345811272436e-05, |
| "loss": 0.1509, |
| "step": 1879 |
| }, |
| { |
| "epoch": 0.6819006166122598, |
| "grad_norm": 0.1769733875989914, |
| "learning_rate": 4.631693484463299e-05, |
| "loss": 0.1688, |
| "step": 1880 |
| }, |
| { |
| "epoch": 0.6822633297062024, |
| "grad_norm": 0.17857052385807037, |
| "learning_rate": 4.622048178988989e-05, |
| "loss": 0.1799, |
| "step": 1881 |
| }, |
| { |
| "epoch": 0.682626042800145, |
| "grad_norm": 0.17262940108776093, |
| "learning_rate": 4.6124099074648375e-05, |
| "loss": 0.1613, |
| "step": 1882 |
| }, |
| { |
| "epoch": 0.6829887558940878, |
| "grad_norm": 0.17255136370658875, |
| "learning_rate": 4.602778682496965e-05, |
| "loss": 0.1647, |
| "step": 1883 |
| }, |
| { |
| "epoch": 0.6833514689880305, |
| "grad_norm": 0.19975058734416962, |
| "learning_rate": 4.593154516682276e-05, |
| "loss": 0.1705, |
| "step": 1884 |
| }, |
| { |
| "epoch": 0.6837141820819732, |
| "grad_norm": 0.19348910450935364, |
| "learning_rate": 4.5835374226084424e-05, |
| "loss": 0.1635, |
| "step": 1885 |
| }, |
| { |
| "epoch": 0.6840768951759159, |
| "grad_norm": 0.17311705648899078, |
| "learning_rate": 4.573927412853896e-05, |
| "loss": 0.1764, |
| "step": 1886 |
| }, |
| { |
| "epoch": 0.6844396082698585, |
| "grad_norm": 0.17351648211479187, |
| "learning_rate": 4.56432449998779e-05, |
| "loss": 0.1466, |
| "step": 1887 |
| }, |
| { |
| "epoch": 0.6848023213638013, |
| "grad_norm": 0.16917894780635834, |
| "learning_rate": 4.554728696570001e-05, |
| "loss": 0.1565, |
| "step": 1888 |
| }, |
| { |
| "epoch": 0.6851650344577439, |
| "grad_norm": 0.17121654748916626, |
| "learning_rate": 4.545140015151105e-05, |
| "loss": 0.1638, |
| "step": 1889 |
| }, |
| { |
| "epoch": 0.6855277475516867, |
| "grad_norm": 0.18969422578811646, |
| "learning_rate": 4.535558468272371e-05, |
| "loss": 0.1533, |
| "step": 1890 |
| }, |
| { |
| "epoch": 0.6858904606456293, |
| "grad_norm": 0.17447051405906677, |
| "learning_rate": 4.525984068465725e-05, |
| "loss": 0.1624, |
| "step": 1891 |
| }, |
| { |
| "epoch": 0.686253173739572, |
| "grad_norm": 0.1624990999698639, |
| "learning_rate": 4.5164168282537546e-05, |
| "loss": 0.1482, |
| "step": 1892 |
| }, |
| { |
| "epoch": 0.6866158868335147, |
| "grad_norm": 0.17492160201072693, |
| "learning_rate": 4.506856760149671e-05, |
| "loss": 0.1733, |
| "step": 1893 |
| }, |
| { |
| "epoch": 0.6869785999274574, |
| "grad_norm": 0.16198083758354187, |
| "learning_rate": 4.497303876657324e-05, |
| "loss": 0.1433, |
| "step": 1894 |
| }, |
| { |
| "epoch": 0.6873413130214001, |
| "grad_norm": 0.1759859323501587, |
| "learning_rate": 4.48775819027115e-05, |
| "loss": 0.146, |
| "step": 1895 |
| }, |
| { |
| "epoch": 0.6877040261153428, |
| "grad_norm": 0.16649121046066284, |
| "learning_rate": 4.478219713476178e-05, |
| "loss": 0.1652, |
| "step": 1896 |
| }, |
| { |
| "epoch": 0.6880667392092854, |
| "grad_norm": 0.1907196342945099, |
| "learning_rate": 4.468688458748006e-05, |
| "loss": 0.1731, |
| "step": 1897 |
| }, |
| { |
| "epoch": 0.6884294523032282, |
| "grad_norm": 0.1932022124528885, |
| "learning_rate": 4.459164438552789e-05, |
| "loss": 0.1693, |
| "step": 1898 |
| }, |
| { |
| "epoch": 0.6887921653971708, |
| "grad_norm": 0.1829594522714615, |
| "learning_rate": 4.449647665347216e-05, |
| "loss": 0.1957, |
| "step": 1899 |
| }, |
| { |
| "epoch": 0.6891548784911136, |
| "grad_norm": 0.17210708558559418, |
| "learning_rate": 4.4401381515784965e-05, |
| "loss": 0.1596, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.6895175915850562, |
| "grad_norm": 0.16382241249084473, |
| "learning_rate": 4.430635909684356e-05, |
| "loss": 0.1417, |
| "step": 1901 |
| }, |
| { |
| "epoch": 0.6898803046789989, |
| "grad_norm": 0.16617849469184875, |
| "learning_rate": 4.421140952092997e-05, |
| "loss": 0.1459, |
| "step": 1902 |
| }, |
| { |
| "epoch": 0.6902430177729416, |
| "grad_norm": 0.16519035398960114, |
| "learning_rate": 4.411653291223097e-05, |
| "loss": 0.1616, |
| "step": 1903 |
| }, |
| { |
| "epoch": 0.6906057308668843, |
| "grad_norm": 0.17537926137447357, |
| "learning_rate": 4.402172939483794e-05, |
| "loss": 0.1637, |
| "step": 1904 |
| }, |
| { |
| "epoch": 0.690968443960827, |
| "grad_norm": 0.18427397310733795, |
| "learning_rate": 4.392699909274664e-05, |
| "loss": 0.1876, |
| "step": 1905 |
| }, |
| { |
| "epoch": 0.6913311570547697, |
| "grad_norm": 0.1629849672317505, |
| "learning_rate": 4.383234212985701e-05, |
| "loss": 0.1436, |
| "step": 1906 |
| }, |
| { |
| "epoch": 0.6916938701487124, |
| "grad_norm": 0.1907191276550293, |
| "learning_rate": 4.3737758629973204e-05, |
| "loss": 0.1723, |
| "step": 1907 |
| }, |
| { |
| "epoch": 0.6920565832426551, |
| "grad_norm": 0.18214593827724457, |
| "learning_rate": 4.3643248716803184e-05, |
| "loss": 0.1683, |
| "step": 1908 |
| }, |
| { |
| "epoch": 0.6924192963365977, |
| "grad_norm": 0.18101546168327332, |
| "learning_rate": 4.354881251395871e-05, |
| "loss": 0.1554, |
| "step": 1909 |
| }, |
| { |
| "epoch": 0.6927820094305405, |
| "grad_norm": 0.18527980148792267, |
| "learning_rate": 4.3454450144955105e-05, |
| "loss": 0.1832, |
| "step": 1910 |
| }, |
| { |
| "epoch": 0.6931447225244831, |
| "grad_norm": 0.16371949017047882, |
| "learning_rate": 4.3360161733211145e-05, |
| "loss": 0.1528, |
| "step": 1911 |
| }, |
| { |
| "epoch": 0.6935074356184259, |
| "grad_norm": 0.172775536775589, |
| "learning_rate": 4.3265947402048834e-05, |
| "loss": 0.1564, |
| "step": 1912 |
| }, |
| { |
| "epoch": 0.6938701487123685, |
| "grad_norm": 0.17069590091705322, |
| "learning_rate": 4.3171807274693386e-05, |
| "loss": 0.1555, |
| "step": 1913 |
| }, |
| { |
| "epoch": 0.6942328618063112, |
| "grad_norm": 0.1884002387523651, |
| "learning_rate": 4.307774147427287e-05, |
| "loss": 0.1611, |
| "step": 1914 |
| }, |
| { |
| "epoch": 0.6945955749002539, |
| "grad_norm": 0.17518699169158936, |
| "learning_rate": 4.2983750123818155e-05, |
| "loss": 0.1651, |
| "step": 1915 |
| }, |
| { |
| "epoch": 0.6949582879941966, |
| "grad_norm": 0.17112936079502106, |
| "learning_rate": 4.288983334626275e-05, |
| "loss": 0.1472, |
| "step": 1916 |
| }, |
| { |
| "epoch": 0.6953210010881393, |
| "grad_norm": 0.1765616238117218, |
| "learning_rate": 4.279599126444264e-05, |
| "loss": 0.1552, |
| "step": 1917 |
| }, |
| { |
| "epoch": 0.695683714182082, |
| "grad_norm": 0.18281279504299164, |
| "learning_rate": 4.2702224001096045e-05, |
| "loss": 0.1758, |
| "step": 1918 |
| }, |
| { |
| "epoch": 0.6960464272760246, |
| "grad_norm": 0.1792001724243164, |
| "learning_rate": 4.2608531678863475e-05, |
| "loss": 0.1643, |
| "step": 1919 |
| }, |
| { |
| "epoch": 0.6964091403699674, |
| "grad_norm": 0.1666647344827652, |
| "learning_rate": 4.2514914420287266e-05, |
| "loss": 0.146, |
| "step": 1920 |
| }, |
| { |
| "epoch": 0.69677185346391, |
| "grad_norm": 0.2033475637435913, |
| "learning_rate": 4.242137234781166e-05, |
| "loss": 0.1841, |
| "step": 1921 |
| }, |
| { |
| "epoch": 0.6971345665578528, |
| "grad_norm": 0.17656663060188293, |
| "learning_rate": 4.23279055837825e-05, |
| "loss": 0.1614, |
| "step": 1922 |
| }, |
| { |
| "epoch": 0.6974972796517954, |
| "grad_norm": 0.1725003868341446, |
| "learning_rate": 4.2234514250447255e-05, |
| "loss": 0.155, |
| "step": 1923 |
| }, |
| { |
| "epoch": 0.6978599927457381, |
| "grad_norm": 0.17976543307304382, |
| "learning_rate": 4.214119846995461e-05, |
| "loss": 0.1646, |
| "step": 1924 |
| }, |
| { |
| "epoch": 0.6982227058396808, |
| "grad_norm": 0.16774506866931915, |
| "learning_rate": 4.204795836435448e-05, |
| "loss": 0.1672, |
| "step": 1925 |
| }, |
| { |
| "epoch": 0.6985854189336235, |
| "grad_norm": 0.18107999861240387, |
| "learning_rate": 4.1954794055597756e-05, |
| "loss": 0.1769, |
| "step": 1926 |
| }, |
| { |
| "epoch": 0.6989481320275662, |
| "grad_norm": 0.19499120116233826, |
| "learning_rate": 4.1861705665536324e-05, |
| "loss": 0.1737, |
| "step": 1927 |
| }, |
| { |
| "epoch": 0.6993108451215089, |
| "grad_norm": 0.18403582274913788, |
| "learning_rate": 4.1768693315922635e-05, |
| "loss": 0.1671, |
| "step": 1928 |
| }, |
| { |
| "epoch": 0.6996735582154516, |
| "grad_norm": 0.18355792760849, |
| "learning_rate": 4.167575712840974e-05, |
| "loss": 0.1587, |
| "step": 1929 |
| }, |
| { |
| "epoch": 0.7000362713093943, |
| "grad_norm": 0.20113395154476166, |
| "learning_rate": 4.15828972245511e-05, |
| "loss": 0.1667, |
| "step": 1930 |
| }, |
| { |
| "epoch": 0.7003989844033369, |
| "grad_norm": 0.1907624453306198, |
| "learning_rate": 4.149011372580029e-05, |
| "loss": 0.1708, |
| "step": 1931 |
| }, |
| { |
| "epoch": 0.7007616974972797, |
| "grad_norm": 0.16733594238758087, |
| "learning_rate": 4.139740675351116e-05, |
| "loss": 0.1629, |
| "step": 1932 |
| }, |
| { |
| "epoch": 0.7011244105912223, |
| "grad_norm": 0.15931111574172974, |
| "learning_rate": 4.130477642893729e-05, |
| "loss": 0.1534, |
| "step": 1933 |
| }, |
| { |
| "epoch": 0.7014871236851651, |
| "grad_norm": 0.19512903690338135, |
| "learning_rate": 4.1212222873232054e-05, |
| "loss": 0.1814, |
| "step": 1934 |
| }, |
| { |
| "epoch": 0.7018498367791077, |
| "grad_norm": 0.18595078587532043, |
| "learning_rate": 4.111974620744845e-05, |
| "loss": 0.1632, |
| "step": 1935 |
| }, |
| { |
| "epoch": 0.7022125498730504, |
| "grad_norm": 0.17419064044952393, |
| "learning_rate": 4.10273465525389e-05, |
| "loss": 0.1448, |
| "step": 1936 |
| }, |
| { |
| "epoch": 0.7025752629669931, |
| "grad_norm": 0.178279310464859, |
| "learning_rate": 4.093502402935504e-05, |
| "loss": 0.1578, |
| "step": 1937 |
| }, |
| { |
| "epoch": 0.7029379760609358, |
| "grad_norm": 0.18063177168369293, |
| "learning_rate": 4.084277875864776e-05, |
| "loss": 0.1502, |
| "step": 1938 |
| }, |
| { |
| "epoch": 0.7033006891548785, |
| "grad_norm": 0.20529168844223022, |
| "learning_rate": 4.075061086106678e-05, |
| "loss": 0.1748, |
| "step": 1939 |
| }, |
| { |
| "epoch": 0.7036634022488212, |
| "grad_norm": 0.1844182014465332, |
| "learning_rate": 4.065852045716069e-05, |
| "loss": 0.1543, |
| "step": 1940 |
| }, |
| { |
| "epoch": 0.7040261153427638, |
| "grad_norm": 0.1840999871492386, |
| "learning_rate": 4.056650766737669e-05, |
| "loss": 0.189, |
| "step": 1941 |
| }, |
| { |
| "epoch": 0.7043888284367066, |
| "grad_norm": 0.1571437418460846, |
| "learning_rate": 4.047457261206047e-05, |
| "loss": 0.1546, |
| "step": 1942 |
| }, |
| { |
| "epoch": 0.7047515415306492, |
| "grad_norm": 0.17258736491203308, |
| "learning_rate": 4.038271541145604e-05, |
| "loss": 0.1531, |
| "step": 1943 |
| }, |
| { |
| "epoch": 0.705114254624592, |
| "grad_norm": 0.16301092505455017, |
| "learning_rate": 4.0290936185705674e-05, |
| "loss": 0.1554, |
| "step": 1944 |
| }, |
| { |
| "epoch": 0.7054769677185346, |
| "grad_norm": 0.1766006052494049, |
| "learning_rate": 4.0199235054849546e-05, |
| "loss": 0.1484, |
| "step": 1945 |
| }, |
| { |
| "epoch": 0.7058396808124773, |
| "grad_norm": 0.18022476136684418, |
| "learning_rate": 4.010761213882572e-05, |
| "loss": 0.1519, |
| "step": 1946 |
| }, |
| { |
| "epoch": 0.70620239390642, |
| "grad_norm": 0.16101764142513275, |
| "learning_rate": 4.001606755746999e-05, |
| "loss": 0.1564, |
| "step": 1947 |
| }, |
| { |
| "epoch": 0.7065651070003627, |
| "grad_norm": 0.18494002521038055, |
| "learning_rate": 3.992460143051566e-05, |
| "loss": 0.1549, |
| "step": 1948 |
| }, |
| { |
| "epoch": 0.7069278200943054, |
| "grad_norm": 0.18700887262821198, |
| "learning_rate": 3.983321387759342e-05, |
| "loss": 0.1656, |
| "step": 1949 |
| }, |
| { |
| "epoch": 0.7072905331882481, |
| "grad_norm": 0.18422120809555054, |
| "learning_rate": 3.974190501823126e-05, |
| "loss": 0.1646, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.7076532462821908, |
| "grad_norm": 0.17262974381446838, |
| "learning_rate": 3.965067497185416e-05, |
| "loss": 0.1553, |
| "step": 1951 |
| }, |
| { |
| "epoch": 0.7080159593761335, |
| "grad_norm": 0.16152386367321014, |
| "learning_rate": 3.955952385778406e-05, |
| "loss": 0.1341, |
| "step": 1952 |
| }, |
| { |
| "epoch": 0.7083786724700761, |
| "grad_norm": 0.16990354657173157, |
| "learning_rate": 3.946845179523965e-05, |
| "loss": 0.1727, |
| "step": 1953 |
| }, |
| { |
| "epoch": 0.7087413855640189, |
| "grad_norm": 0.1854991912841797, |
| "learning_rate": 3.937745890333623e-05, |
| "loss": 0.1548, |
| "step": 1954 |
| }, |
| { |
| "epoch": 0.7091040986579615, |
| "grad_norm": 0.1773202270269394, |
| "learning_rate": 3.928654530108552e-05, |
| "loss": 0.1723, |
| "step": 1955 |
| }, |
| { |
| "epoch": 0.7094668117519043, |
| "grad_norm": 0.18670934438705444, |
| "learning_rate": 3.9195711107395624e-05, |
| "loss": 0.1688, |
| "step": 1956 |
| }, |
| { |
| "epoch": 0.7098295248458469, |
| "grad_norm": 0.17176151275634766, |
| "learning_rate": 3.9104956441070715e-05, |
| "loss": 0.1524, |
| "step": 1957 |
| }, |
| { |
| "epoch": 0.7101922379397896, |
| "grad_norm": 0.17264217138290405, |
| "learning_rate": 3.901428142081095e-05, |
| "loss": 0.1568, |
| "step": 1958 |
| }, |
| { |
| "epoch": 0.7105549510337323, |
| "grad_norm": 0.16863767802715302, |
| "learning_rate": 3.892368616521229e-05, |
| "loss": 0.1514, |
| "step": 1959 |
| }, |
| { |
| "epoch": 0.710917664127675, |
| "grad_norm": 0.1810598075389862, |
| "learning_rate": 3.883317079276649e-05, |
| "loss": 0.1494, |
| "step": 1960 |
| }, |
| { |
| "epoch": 0.7112803772216177, |
| "grad_norm": 0.18499146401882172, |
| "learning_rate": 3.87427354218607e-05, |
| "loss": 0.155, |
| "step": 1961 |
| }, |
| { |
| "epoch": 0.7116430903155604, |
| "grad_norm": 0.16301509737968445, |
| "learning_rate": 3.865238017077748e-05, |
| "loss": 0.1505, |
| "step": 1962 |
| }, |
| { |
| "epoch": 0.712005803409503, |
| "grad_norm": 0.18313588201999664, |
| "learning_rate": 3.856210515769456e-05, |
| "loss": 0.1696, |
| "step": 1963 |
| }, |
| { |
| "epoch": 0.7123685165034458, |
| "grad_norm": 0.18576788902282715, |
| "learning_rate": 3.847191050068483e-05, |
| "loss": 0.1584, |
| "step": 1964 |
| }, |
| { |
| "epoch": 0.7127312295973884, |
| "grad_norm": 0.16800563037395477, |
| "learning_rate": 3.838179631771598e-05, |
| "loss": 0.1409, |
| "step": 1965 |
| }, |
| { |
| "epoch": 0.7130939426913312, |
| "grad_norm": 0.15716706216335297, |
| "learning_rate": 3.829176272665047e-05, |
| "loss": 0.1647, |
| "step": 1966 |
| }, |
| { |
| "epoch": 0.7134566557852738, |
| "grad_norm": 0.19974446296691895, |
| "learning_rate": 3.8201809845245364e-05, |
| "loss": 0.2084, |
| "step": 1967 |
| }, |
| { |
| "epoch": 0.7138193688792165, |
| "grad_norm": 0.18544046580791473, |
| "learning_rate": 3.811193779115213e-05, |
| "loss": 0.1579, |
| "step": 1968 |
| }, |
| { |
| "epoch": 0.7141820819731592, |
| "grad_norm": 0.17015773057937622, |
| "learning_rate": 3.80221466819166e-05, |
| "loss": 0.1663, |
| "step": 1969 |
| }, |
| { |
| "epoch": 0.7145447950671019, |
| "grad_norm": 0.1646818220615387, |
| "learning_rate": 3.7932436634978684e-05, |
| "loss": 0.1582, |
| "step": 1970 |
| }, |
| { |
| "epoch": 0.7149075081610446, |
| "grad_norm": 0.16714130342006683, |
| "learning_rate": 3.784280776767224e-05, |
| "loss": 0.137, |
| "step": 1971 |
| }, |
| { |
| "epoch": 0.7152702212549873, |
| "grad_norm": 0.17864611744880676, |
| "learning_rate": 3.7753260197224995e-05, |
| "loss": 0.1496, |
| "step": 1972 |
| }, |
| { |
| "epoch": 0.71563293434893, |
| "grad_norm": 0.18264222145080566, |
| "learning_rate": 3.766379404075832e-05, |
| "loss": 0.1583, |
| "step": 1973 |
| }, |
| { |
| "epoch": 0.7159956474428727, |
| "grad_norm": 0.1730545610189438, |
| "learning_rate": 3.757440941528708e-05, |
| "loss": 0.1616, |
| "step": 1974 |
| }, |
| { |
| "epoch": 0.7163583605368153, |
| "grad_norm": 0.1775929182767868, |
| "learning_rate": 3.748510643771962e-05, |
| "loss": 0.1514, |
| "step": 1975 |
| }, |
| { |
| "epoch": 0.7167210736307581, |
| "grad_norm": 0.1856832504272461, |
| "learning_rate": 3.739588522485736e-05, |
| "loss": 0.1558, |
| "step": 1976 |
| }, |
| { |
| "epoch": 0.7170837867247007, |
| "grad_norm": 0.19256243109703064, |
| "learning_rate": 3.7306745893394845e-05, |
| "loss": 0.1966, |
| "step": 1977 |
| }, |
| { |
| "epoch": 0.7174464998186435, |
| "grad_norm": 0.15902438759803772, |
| "learning_rate": 3.72176885599195e-05, |
| "loss": 0.1493, |
| "step": 1978 |
| }, |
| { |
| "epoch": 0.7178092129125861, |
| "grad_norm": 0.16954579949378967, |
| "learning_rate": 3.7128713340911535e-05, |
| "loss": 0.1692, |
| "step": 1979 |
| }, |
| { |
| "epoch": 0.7181719260065288, |
| "grad_norm": 0.17363213002681732, |
| "learning_rate": 3.7039820352743685e-05, |
| "loss": 0.1491, |
| "step": 1980 |
| }, |
| { |
| "epoch": 0.7185346391004716, |
| "grad_norm": 0.18617630004882812, |
| "learning_rate": 3.6951009711681253e-05, |
| "loss": 0.1762, |
| "step": 1981 |
| }, |
| { |
| "epoch": 0.7188973521944142, |
| "grad_norm": 0.15999780595302582, |
| "learning_rate": 3.6862281533881745e-05, |
| "loss": 0.1488, |
| "step": 1982 |
| }, |
| { |
| "epoch": 0.719260065288357, |
| "grad_norm": 0.16866905987262726, |
| "learning_rate": 3.677363593539485e-05, |
| "loss": 0.1467, |
| "step": 1983 |
| }, |
| { |
| "epoch": 0.7196227783822996, |
| "grad_norm": 0.1777690201997757, |
| "learning_rate": 3.668507303216223e-05, |
| "loss": 0.1525, |
| "step": 1984 |
| }, |
| { |
| "epoch": 0.7199854914762422, |
| "grad_norm": 0.19426722824573517, |
| "learning_rate": 3.659659294001739e-05, |
| "loss": 0.2006, |
| "step": 1985 |
| }, |
| { |
| "epoch": 0.720348204570185, |
| "grad_norm": 0.17638282477855682, |
| "learning_rate": 3.6508195774685515e-05, |
| "loss": 0.1548, |
| "step": 1986 |
| }, |
| { |
| "epoch": 0.7207109176641276, |
| "grad_norm": 0.16942881047725677, |
| "learning_rate": 3.641988165178339e-05, |
| "loss": 0.1646, |
| "step": 1987 |
| }, |
| { |
| "epoch": 0.7210736307580704, |
| "grad_norm": 0.17678217589855194, |
| "learning_rate": 3.633165068681914e-05, |
| "loss": 0.1342, |
| "step": 1988 |
| }, |
| { |
| "epoch": 0.721436343852013, |
| "grad_norm": 0.15457268059253693, |
| "learning_rate": 3.624350299519209e-05, |
| "loss": 0.1489, |
| "step": 1989 |
| }, |
| { |
| "epoch": 0.7217990569459557, |
| "grad_norm": 0.17524264752864838, |
| "learning_rate": 3.615543869219271e-05, |
| "loss": 0.1565, |
| "step": 1990 |
| }, |
| { |
| "epoch": 0.7221617700398985, |
| "grad_norm": 0.16811302304267883, |
| "learning_rate": 3.6067457893002376e-05, |
| "loss": 0.1518, |
| "step": 1991 |
| }, |
| { |
| "epoch": 0.7225244831338411, |
| "grad_norm": 0.18975135684013367, |
| "learning_rate": 3.597956071269326e-05, |
| "loss": 0.1605, |
| "step": 1992 |
| }, |
| { |
| "epoch": 0.7228871962277839, |
| "grad_norm": 0.17413167655467987, |
| "learning_rate": 3.58917472662281e-05, |
| "loss": 0.1782, |
| "step": 1993 |
| }, |
| { |
| "epoch": 0.7232499093217265, |
| "grad_norm": 0.17248669266700745, |
| "learning_rate": 3.580401766846028e-05, |
| "loss": 0.1499, |
| "step": 1994 |
| }, |
| { |
| "epoch": 0.7236126224156693, |
| "grad_norm": 0.16712360084056854, |
| "learning_rate": 3.571637203413334e-05, |
| "loss": 0.1561, |
| "step": 1995 |
| }, |
| { |
| "epoch": 0.7239753355096119, |
| "grad_norm": 0.17022311687469482, |
| "learning_rate": 3.56288104778811e-05, |
| "loss": 0.152, |
| "step": 1996 |
| }, |
| { |
| "epoch": 0.7243380486035546, |
| "grad_norm": 0.17325520515441895, |
| "learning_rate": 3.554133311422735e-05, |
| "loss": 0.1554, |
| "step": 1997 |
| }, |
| { |
| "epoch": 0.7247007616974973, |
| "grad_norm": 0.17560617625713348, |
| "learning_rate": 3.5453940057585866e-05, |
| "loss": 0.1869, |
| "step": 1998 |
| }, |
| { |
| "epoch": 0.72506347479144, |
| "grad_norm": 0.19136746227741241, |
| "learning_rate": 3.5366631422260045e-05, |
| "loss": 0.1761, |
| "step": 1999 |
| }, |
| { |
| "epoch": 0.7254261878853827, |
| "grad_norm": 0.1808745115995407, |
| "learning_rate": 3.527940732244289e-05, |
| "loss": 0.1558, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7257889009793254, |
| "grad_norm": 0.16616669297218323, |
| "learning_rate": 3.519226787221692e-05, |
| "loss": 0.1465, |
| "step": 2001 |
| }, |
| { |
| "epoch": 0.726151614073268, |
| "grad_norm": 0.1782522052526474, |
| "learning_rate": 3.5105213185553856e-05, |
| "loss": 0.1546, |
| "step": 2002 |
| }, |
| { |
| "epoch": 0.7265143271672108, |
| "grad_norm": 0.1684170663356781, |
| "learning_rate": 3.5018243376314574e-05, |
| "loss": 0.1625, |
| "step": 2003 |
| }, |
| { |
| "epoch": 0.7268770402611534, |
| "grad_norm": 0.16710427403450012, |
| "learning_rate": 3.493135855824894e-05, |
| "loss": 0.155, |
| "step": 2004 |
| }, |
| { |
| "epoch": 0.7272397533550962, |
| "grad_norm": 4411.4638671875, |
| "learning_rate": 3.484455884499561e-05, |
| "loss": 0.1437, |
| "step": 2005 |
| }, |
| { |
| "epoch": 0.7276024664490388, |
| "grad_norm": 0.1757262647151947, |
| "learning_rate": 3.475784435008208e-05, |
| "loss": 0.1531, |
| "step": 2006 |
| }, |
| { |
| "epoch": 0.7279651795429815, |
| "grad_norm": 0.1928826868534088, |
| "learning_rate": 3.467121518692422e-05, |
| "loss": 0.1655, |
| "step": 2007 |
| }, |
| { |
| "epoch": 0.7283278926369242, |
| "grad_norm": 0.19880840182304382, |
| "learning_rate": 3.458467146882637e-05, |
| "loss": 0.1579, |
| "step": 2008 |
| }, |
| { |
| "epoch": 0.7286906057308669, |
| "grad_norm": 0.23102417588233948, |
| "learning_rate": 3.4498213308981095e-05, |
| "loss": 0.1581, |
| "step": 2009 |
| }, |
| { |
| "epoch": 0.7290533188248096, |
| "grad_norm": 0.1807643175125122, |
| "learning_rate": 3.441184082046908e-05, |
| "loss": 0.1462, |
| "step": 2010 |
| }, |
| { |
| "epoch": 0.7294160319187523, |
| "grad_norm": 0.18923969566822052, |
| "learning_rate": 3.4325554116258894e-05, |
| "loss": 0.1507, |
| "step": 2011 |
| }, |
| { |
| "epoch": 0.7297787450126949, |
| "grad_norm": 0.22489802539348602, |
| "learning_rate": 3.423935330920702e-05, |
| "loss": 0.1803, |
| "step": 2012 |
| }, |
| { |
| "epoch": 0.7301414581066377, |
| "grad_norm": 0.23475851118564606, |
| "learning_rate": 3.415323851205752e-05, |
| "loss": 0.1649, |
| "step": 2013 |
| }, |
| { |
| "epoch": 0.7305041712005803, |
| "grad_norm": 0.2082839459180832, |
| "learning_rate": 3.406720983744193e-05, |
| "loss": 0.182, |
| "step": 2014 |
| }, |
| { |
| "epoch": 0.7308668842945231, |
| "grad_norm": 0.19769790768623352, |
| "learning_rate": 3.3981267397879215e-05, |
| "loss": 0.1543, |
| "step": 2015 |
| }, |
| { |
| "epoch": 0.7312295973884657, |
| "grad_norm": 0.1755545437335968, |
| "learning_rate": 3.38954113057755e-05, |
| "loss": 0.1469, |
| "step": 2016 |
| }, |
| { |
| "epoch": 0.7315923104824085, |
| "grad_norm": 0.18786299228668213, |
| "learning_rate": 3.3809641673423985e-05, |
| "loss": 0.1778, |
| "step": 2017 |
| }, |
| { |
| "epoch": 0.7319550235763511, |
| "grad_norm": 0.17806515097618103, |
| "learning_rate": 3.3723958613004855e-05, |
| "loss": 0.1567, |
| "step": 2018 |
| }, |
| { |
| "epoch": 0.7323177366702938, |
| "grad_norm": 0.17538048326969147, |
| "learning_rate": 3.3638362236584965e-05, |
| "loss": 0.1573, |
| "step": 2019 |
| }, |
| { |
| "epoch": 0.7326804497642365, |
| "grad_norm": 0.17543213069438934, |
| "learning_rate": 3.355285265611784e-05, |
| "loss": 0.1651, |
| "step": 2020 |
| }, |
| { |
| "epoch": 0.7330431628581792, |
| "grad_norm": 0.1797361820936203, |
| "learning_rate": 3.346742998344348e-05, |
| "loss": 0.1696, |
| "step": 2021 |
| }, |
| { |
| "epoch": 0.7334058759521219, |
| "grad_norm": 0.20315411686897278, |
| "learning_rate": 3.3382094330288216e-05, |
| "loss": 0.1682, |
| "step": 2022 |
| }, |
| { |
| "epoch": 0.7337685890460646, |
| "grad_norm": 0.17584829032421112, |
| "learning_rate": 3.3296845808264574e-05, |
| "loss": 0.1734, |
| "step": 2023 |
| }, |
| { |
| "epoch": 0.7341313021400072, |
| "grad_norm": 0.192337304353714, |
| "learning_rate": 3.321168452887106e-05, |
| "loss": 0.185, |
| "step": 2024 |
| }, |
| { |
| "epoch": 0.73449401523395, |
| "grad_norm": 0.1659361571073532, |
| "learning_rate": 3.3126610603492194e-05, |
| "loss": 0.1556, |
| "step": 2025 |
| }, |
| { |
| "epoch": 0.7348567283278926, |
| "grad_norm": 0.16753138601779938, |
| "learning_rate": 3.304162414339814e-05, |
| "loss": 0.1467, |
| "step": 2026 |
| }, |
| { |
| "epoch": 0.7352194414218354, |
| "grad_norm": 0.18743427097797394, |
| "learning_rate": 3.295672525974469e-05, |
| "loss": 0.1653, |
| "step": 2027 |
| }, |
| { |
| "epoch": 0.735582154515778, |
| "grad_norm": 0.16860130429267883, |
| "learning_rate": 3.287191406357311e-05, |
| "loss": 0.1563, |
| "step": 2028 |
| }, |
| { |
| "epoch": 0.7359448676097207, |
| "grad_norm": 0.16440363228321075, |
| "learning_rate": 3.278719066580995e-05, |
| "loss": 0.1493, |
| "step": 2029 |
| }, |
| { |
| "epoch": 0.7363075807036634, |
| "grad_norm": 0.1813763827085495, |
| "learning_rate": 3.270255517726691e-05, |
| "loss": 0.1621, |
| "step": 2030 |
| }, |
| { |
| "epoch": 0.7366702937976061, |
| "grad_norm": 0.16494570672512054, |
| "learning_rate": 3.261800770864083e-05, |
| "loss": 0.1381, |
| "step": 2031 |
| }, |
| { |
| "epoch": 0.7370330068915488, |
| "grad_norm": 0.1700211763381958, |
| "learning_rate": 3.2533548370513286e-05, |
| "loss": 0.1508, |
| "step": 2032 |
| }, |
| { |
| "epoch": 0.7373957199854915, |
| "grad_norm": 0.19019465148448944, |
| "learning_rate": 3.244917727335066e-05, |
| "loss": 0.1596, |
| "step": 2033 |
| }, |
| { |
| "epoch": 0.7377584330794341, |
| "grad_norm": 0.1853635013103485, |
| "learning_rate": 3.236489452750385e-05, |
| "loss": 0.1433, |
| "step": 2034 |
| }, |
| { |
| "epoch": 0.7381211461733769, |
| "grad_norm": 0.19163811206817627, |
| "learning_rate": 3.228070024320833e-05, |
| "loss": 0.1605, |
| "step": 2035 |
| }, |
| { |
| "epoch": 0.7384838592673195, |
| "grad_norm": 0.2122446596622467, |
| "learning_rate": 3.2196594530583735e-05, |
| "loss": 0.1792, |
| "step": 2036 |
| }, |
| { |
| "epoch": 0.7388465723612623, |
| "grad_norm": 0.18100525438785553, |
| "learning_rate": 3.211257749963391e-05, |
| "loss": 0.1703, |
| "step": 2037 |
| }, |
| { |
| "epoch": 0.7392092854552049, |
| "grad_norm": 0.15972734987735748, |
| "learning_rate": 3.2028649260246754e-05, |
| "loss": 0.1691, |
| "step": 2038 |
| }, |
| { |
| "epoch": 0.7395719985491476, |
| "grad_norm": 0.17128963768482208, |
| "learning_rate": 3.1944809922193986e-05, |
| "loss": 0.1611, |
| "step": 2039 |
| }, |
| { |
| "epoch": 0.7399347116430903, |
| "grad_norm": 0.18161478638648987, |
| "learning_rate": 3.186105959513103e-05, |
| "loss": 0.1457, |
| "step": 2040 |
| }, |
| { |
| "epoch": 0.740297424737033, |
| "grad_norm": 0.1911374032497406, |
| "learning_rate": 3.177739838859694e-05, |
| "loss": 0.1655, |
| "step": 2041 |
| }, |
| { |
| "epoch": 0.7406601378309757, |
| "grad_norm": 0.16643930971622467, |
| "learning_rate": 3.1693826412014114e-05, |
| "loss": 0.1744, |
| "step": 2042 |
| }, |
| { |
| "epoch": 0.7410228509249184, |
| "grad_norm": 0.17060095071792603, |
| "learning_rate": 3.1610343774688414e-05, |
| "loss": 0.1469, |
| "step": 2043 |
| }, |
| { |
| "epoch": 0.7413855640188611, |
| "grad_norm": 0.1795426309108734, |
| "learning_rate": 3.152695058580871e-05, |
| "loss": 0.1487, |
| "step": 2044 |
| }, |
| { |
| "epoch": 0.7417482771128038, |
| "grad_norm": 0.1854647696018219, |
| "learning_rate": 3.1443646954446914e-05, |
| "loss": 0.17, |
| "step": 2045 |
| }, |
| { |
| "epoch": 0.7421109902067464, |
| "grad_norm": 0.1683138608932495, |
| "learning_rate": 3.136043298955782e-05, |
| "loss": 0.1584, |
| "step": 2046 |
| }, |
| { |
| "epoch": 0.7424737033006892, |
| "grad_norm": 0.18557599186897278, |
| "learning_rate": 3.127730879997895e-05, |
| "loss": 0.1507, |
| "step": 2047 |
| }, |
| { |
| "epoch": 0.7428364163946318, |
| "grad_norm": 0.17158469557762146, |
| "learning_rate": 3.119427449443032e-05, |
| "loss": 0.1512, |
| "step": 2048 |
| }, |
| { |
| "epoch": 0.7431991294885746, |
| "grad_norm": 0.1670829951763153, |
| "learning_rate": 3.111133018151456e-05, |
| "loss": 0.167, |
| "step": 2049 |
| }, |
| { |
| "epoch": 0.7435618425825172, |
| "grad_norm": 0.1642339676618576, |
| "learning_rate": 3.102847596971646e-05, |
| "loss": 0.144, |
| "step": 2050 |
| }, |
| { |
| "epoch": 0.7439245556764599, |
| "grad_norm": 0.16173475980758667, |
| "learning_rate": 3.094571196740299e-05, |
| "loss": 0.1412, |
| "step": 2051 |
| }, |
| { |
| "epoch": 0.7442872687704026, |
| "grad_norm": 0.16731561720371246, |
| "learning_rate": 3.086303828282315e-05, |
| "loss": 0.1586, |
| "step": 2052 |
| }, |
| { |
| "epoch": 0.7446499818643453, |
| "grad_norm": 0.19204100966453552, |
| "learning_rate": 3.078045502410779e-05, |
| "loss": 0.2226, |
| "step": 2053 |
| }, |
| { |
| "epoch": 0.745012694958288, |
| "grad_norm": 0.17547018826007843, |
| "learning_rate": 3.069796229926952e-05, |
| "loss": 0.1509, |
| "step": 2054 |
| }, |
| { |
| "epoch": 0.7453754080522307, |
| "grad_norm": 0.1662409007549286, |
| "learning_rate": 3.0615560216202486e-05, |
| "loss": 0.1554, |
| "step": 2055 |
| }, |
| { |
| "epoch": 0.7457381211461733, |
| "grad_norm": 0.18224076926708221, |
| "learning_rate": 3.0533248882682374e-05, |
| "loss": 0.1608, |
| "step": 2056 |
| }, |
| { |
| "epoch": 0.7461008342401161, |
| "grad_norm": 0.2161344736814499, |
| "learning_rate": 3.045102840636609e-05, |
| "loss": 0.1661, |
| "step": 2057 |
| }, |
| { |
| "epoch": 0.7464635473340587, |
| "grad_norm": 0.16624325513839722, |
| "learning_rate": 3.0368898894791753e-05, |
| "loss": 0.1558, |
| "step": 2058 |
| }, |
| { |
| "epoch": 0.7468262604280015, |
| "grad_norm": 0.15912269055843353, |
| "learning_rate": 3.0286860455378462e-05, |
| "loss": 0.1536, |
| "step": 2059 |
| }, |
| { |
| "epoch": 0.7471889735219441, |
| "grad_norm": 0.1618340164422989, |
| "learning_rate": 3.0204913195426254e-05, |
| "loss": 0.1436, |
| "step": 2060 |
| }, |
| { |
| "epoch": 0.7475516866158868, |
| "grad_norm": 0.16747722029685974, |
| "learning_rate": 3.0123057222115836e-05, |
| "loss": 0.149, |
| "step": 2061 |
| }, |
| { |
| "epoch": 0.7479143997098295, |
| "grad_norm": 0.1707213968038559, |
| "learning_rate": 3.0041292642508644e-05, |
| "loss": 0.1522, |
| "step": 2062 |
| }, |
| { |
| "epoch": 0.7482771128037722, |
| "grad_norm": 0.17695897817611694, |
| "learning_rate": 2.995961956354646e-05, |
| "loss": 0.1573, |
| "step": 2063 |
| }, |
| { |
| "epoch": 0.7486398258977149, |
| "grad_norm": 0.18760527670383453, |
| "learning_rate": 2.9878038092051443e-05, |
| "loss": 0.1551, |
| "step": 2064 |
| }, |
| { |
| "epoch": 0.7490025389916576, |
| "grad_norm": 0.1940336525440216, |
| "learning_rate": 2.9796548334725916e-05, |
| "loss": 0.1531, |
| "step": 2065 |
| }, |
| { |
| "epoch": 0.7493652520856003, |
| "grad_norm": 0.16656464338302612, |
| "learning_rate": 2.9715150398152268e-05, |
| "loss": 0.1474, |
| "step": 2066 |
| }, |
| { |
| "epoch": 0.749727965179543, |
| "grad_norm": 0.16804639995098114, |
| "learning_rate": 2.9633844388792732e-05, |
| "loss": 0.1651, |
| "step": 2067 |
| }, |
| { |
| "epoch": 0.7500906782734856, |
| "grad_norm": 0.16543330252170563, |
| "learning_rate": 2.9552630412989434e-05, |
| "loss": 0.1433, |
| "step": 2068 |
| }, |
| { |
| "epoch": 0.7504533913674284, |
| "grad_norm": 0.17684879899024963, |
| "learning_rate": 2.9471508576964023e-05, |
| "loss": 0.1533, |
| "step": 2069 |
| }, |
| { |
| "epoch": 0.750816104461371, |
| "grad_norm": 0.16878783702850342, |
| "learning_rate": 2.939047898681765e-05, |
| "loss": 0.1509, |
| "step": 2070 |
| }, |
| { |
| "epoch": 0.7511788175553138, |
| "grad_norm": 0.16449496150016785, |
| "learning_rate": 2.93095417485308e-05, |
| "loss": 0.1628, |
| "step": 2071 |
| }, |
| { |
| "epoch": 0.7515415306492564, |
| "grad_norm": 0.20348592102527618, |
| "learning_rate": 2.9228696967963275e-05, |
| "loss": 0.1695, |
| "step": 2072 |
| }, |
| { |
| "epoch": 0.7519042437431991, |
| "grad_norm": 0.1528720259666443, |
| "learning_rate": 2.9147944750853816e-05, |
| "loss": 0.1396, |
| "step": 2073 |
| }, |
| { |
| "epoch": 0.7522669568371418, |
| "grad_norm": 0.17836391925811768, |
| "learning_rate": 2.906728520282015e-05, |
| "loss": 0.1538, |
| "step": 2074 |
| }, |
| { |
| "epoch": 0.7526296699310845, |
| "grad_norm": 0.16207584738731384, |
| "learning_rate": 2.898671842935885e-05, |
| "loss": 0.1457, |
| "step": 2075 |
| }, |
| { |
| "epoch": 0.7529923830250272, |
| "grad_norm": 0.17391245067119598, |
| "learning_rate": 2.8906244535845072e-05, |
| "loss": 0.1813, |
| "step": 2076 |
| }, |
| { |
| "epoch": 0.7533550961189699, |
| "grad_norm": 0.1827738881111145, |
| "learning_rate": 2.8825863627532524e-05, |
| "loss": 0.1712, |
| "step": 2077 |
| }, |
| { |
| "epoch": 0.7537178092129125, |
| "grad_norm": 0.16939976811408997, |
| "learning_rate": 2.8745575809553294e-05, |
| "loss": 0.1599, |
| "step": 2078 |
| }, |
| { |
| "epoch": 0.7540805223068553, |
| "grad_norm": 0.15600422024726868, |
| "learning_rate": 2.8665381186917718e-05, |
| "loss": 0.1469, |
| "step": 2079 |
| }, |
| { |
| "epoch": 0.7544432354007979, |
| "grad_norm": 0.2160848081111908, |
| "learning_rate": 2.858527986451419e-05, |
| "loss": 0.1748, |
| "step": 2080 |
| }, |
| { |
| "epoch": 0.7548059484947407, |
| "grad_norm": 0.16352678835391998, |
| "learning_rate": 2.8505271947109203e-05, |
| "loss": 0.1486, |
| "step": 2081 |
| }, |
| { |
| "epoch": 0.7551686615886833, |
| "grad_norm": 0.16789479553699493, |
| "learning_rate": 2.842535753934695e-05, |
| "loss": 0.1765, |
| "step": 2082 |
| }, |
| { |
| "epoch": 0.755531374682626, |
| "grad_norm": 0.16260650753974915, |
| "learning_rate": 2.8345536745749403e-05, |
| "loss": 0.1374, |
| "step": 2083 |
| }, |
| { |
| "epoch": 0.7558940877765687, |
| "grad_norm": 0.16362746059894562, |
| "learning_rate": 2.8265809670716027e-05, |
| "loss": 0.1528, |
| "step": 2084 |
| }, |
| { |
| "epoch": 0.7562568008705114, |
| "grad_norm": 0.1730203479528427, |
| "learning_rate": 2.818617641852376e-05, |
| "loss": 0.16, |
| "step": 2085 |
| }, |
| { |
| "epoch": 0.7566195139644541, |
| "grad_norm": 0.1941351443529129, |
| "learning_rate": 2.8106637093326782e-05, |
| "loss": 0.1578, |
| "step": 2086 |
| }, |
| { |
| "epoch": 0.7569822270583968, |
| "grad_norm": 0.17957964539527893, |
| "learning_rate": 2.8027191799156514e-05, |
| "loss": 0.1497, |
| "step": 2087 |
| }, |
| { |
| "epoch": 0.7573449401523396, |
| "grad_norm": 0.1569589227437973, |
| "learning_rate": 2.794784063992131e-05, |
| "loss": 0.1377, |
| "step": 2088 |
| }, |
| { |
| "epoch": 0.7577076532462822, |
| "grad_norm": 0.16305673122406006, |
| "learning_rate": 2.7868583719406403e-05, |
| "loss": 0.1471, |
| "step": 2089 |
| }, |
| { |
| "epoch": 0.7580703663402248, |
| "grad_norm": 0.171325221657753, |
| "learning_rate": 2.778942114127382e-05, |
| "loss": 0.1501, |
| "step": 2090 |
| }, |
| { |
| "epoch": 0.7584330794341676, |
| "grad_norm": 0.1620980203151703, |
| "learning_rate": 2.771035300906215e-05, |
| "loss": 0.1461, |
| "step": 2091 |
| }, |
| { |
| "epoch": 0.7587957925281102, |
| "grad_norm": 0.16900931298732758, |
| "learning_rate": 2.7631379426186434e-05, |
| "loss": 0.143, |
| "step": 2092 |
| }, |
| { |
| "epoch": 0.759158505622053, |
| "grad_norm": 0.1761879175901413, |
| "learning_rate": 2.755250049593816e-05, |
| "loss": 0.1541, |
| "step": 2093 |
| }, |
| { |
| "epoch": 0.7595212187159956, |
| "grad_norm": 0.18240278959274292, |
| "learning_rate": 2.74737163214849e-05, |
| "loss": 0.1931, |
| "step": 2094 |
| }, |
| { |
| "epoch": 0.7598839318099383, |
| "grad_norm": 0.15427257120609283, |
| "learning_rate": 2.7395027005870343e-05, |
| "loss": 0.1453, |
| "step": 2095 |
| }, |
| { |
| "epoch": 0.760246644903881, |
| "grad_norm": 0.18148113787174225, |
| "learning_rate": 2.73164326520141e-05, |
| "loss": 0.1733, |
| "step": 2096 |
| }, |
| { |
| "epoch": 0.7606093579978237, |
| "grad_norm": 0.1736038774251938, |
| "learning_rate": 2.7237933362711576e-05, |
| "loss": 0.1532, |
| "step": 2097 |
| }, |
| { |
| "epoch": 0.7609720710917665, |
| "grad_norm": 0.18636751174926758, |
| "learning_rate": 2.715952924063383e-05, |
| "loss": 0.1627, |
| "step": 2098 |
| }, |
| { |
| "epoch": 0.7613347841857091, |
| "grad_norm": 0.18383683264255524, |
| "learning_rate": 2.7081220388327522e-05, |
| "loss": 0.1625, |
| "step": 2099 |
| }, |
| { |
| "epoch": 0.7616974972796517, |
| "grad_norm": 0.16700130701065063, |
| "learning_rate": 2.70030069082146e-05, |
| "loss": 0.1536, |
| "step": 2100 |
| }, |
| { |
| "epoch": 0.7620602103735945, |
| "grad_norm": 0.178177148103714, |
| "learning_rate": 2.692488890259235e-05, |
| "loss": 0.1593, |
| "step": 2101 |
| }, |
| { |
| "epoch": 0.7624229234675372, |
| "grad_norm": 0.16141119599342346, |
| "learning_rate": 2.6846866473633125e-05, |
| "loss": 0.1476, |
| "step": 2102 |
| }, |
| { |
| "epoch": 0.7627856365614799, |
| "grad_norm": 0.16690880060195923, |
| "learning_rate": 2.676893972338432e-05, |
| "loss": 0.1606, |
| "step": 2103 |
| }, |
| { |
| "epoch": 0.7631483496554226, |
| "grad_norm": 0.18088023364543915, |
| "learning_rate": 2.6691108753768146e-05, |
| "loss": 0.1799, |
| "step": 2104 |
| }, |
| { |
| "epoch": 0.7635110627493652, |
| "grad_norm": 0.16774174571037292, |
| "learning_rate": 2.661337366658161e-05, |
| "loss": 0.1534, |
| "step": 2105 |
| }, |
| { |
| "epoch": 0.763873775843308, |
| "grad_norm": 0.1739625185728073, |
| "learning_rate": 2.653573456349624e-05, |
| "loss": 0.1752, |
| "step": 2106 |
| }, |
| { |
| "epoch": 0.7642364889372506, |
| "grad_norm": 0.1661982536315918, |
| "learning_rate": 2.6458191546058064e-05, |
| "loss": 0.1554, |
| "step": 2107 |
| }, |
| { |
| "epoch": 0.7645992020311934, |
| "grad_norm": 0.15863363444805145, |
| "learning_rate": 2.638074471568739e-05, |
| "loss": 0.1563, |
| "step": 2108 |
| }, |
| { |
| "epoch": 0.764961915125136, |
| "grad_norm": 0.1664765626192093, |
| "learning_rate": 2.630339417367882e-05, |
| "loss": 0.1613, |
| "step": 2109 |
| }, |
| { |
| "epoch": 0.7653246282190788, |
| "grad_norm": 0.17983406782150269, |
| "learning_rate": 2.622614002120091e-05, |
| "loss": 0.1354, |
| "step": 2110 |
| }, |
| { |
| "epoch": 0.7656873413130214, |
| "grad_norm": 0.18512356281280518, |
| "learning_rate": 2.6148982359296205e-05, |
| "loss": 0.1548, |
| "step": 2111 |
| }, |
| { |
| "epoch": 0.766050054406964, |
| "grad_norm": 0.16237185895442963, |
| "learning_rate": 2.6071921288880984e-05, |
| "loss": 0.151, |
| "step": 2112 |
| }, |
| { |
| "epoch": 0.7664127675009068, |
| "grad_norm": 0.16601556539535522, |
| "learning_rate": 2.5994956910745326e-05, |
| "loss": 0.1616, |
| "step": 2113 |
| }, |
| { |
| "epoch": 0.7667754805948495, |
| "grad_norm": 0.163995161652565, |
| "learning_rate": 2.5918089325552707e-05, |
| "loss": 0.1485, |
| "step": 2114 |
| }, |
| { |
| "epoch": 0.7671381936887922, |
| "grad_norm": 0.18575289845466614, |
| "learning_rate": 2.5841318633840072e-05, |
| "loss": 0.1577, |
| "step": 2115 |
| }, |
| { |
| "epoch": 0.7675009067827349, |
| "grad_norm": 0.19277150928974152, |
| "learning_rate": 2.576464493601761e-05, |
| "loss": 0.155, |
| "step": 2116 |
| }, |
| { |
| "epoch": 0.7678636198766775, |
| "grad_norm": 0.1656551957130432, |
| "learning_rate": 2.5688068332368632e-05, |
| "loss": 0.1486, |
| "step": 2117 |
| }, |
| { |
| "epoch": 0.7682263329706203, |
| "grad_norm": 0.15799161791801453, |
| "learning_rate": 2.5611588923049544e-05, |
| "loss": 0.1369, |
| "step": 2118 |
| }, |
| { |
| "epoch": 0.7685890460645629, |
| "grad_norm": 0.17702096700668335, |
| "learning_rate": 2.5535206808089553e-05, |
| "loss": 0.1789, |
| "step": 2119 |
| }, |
| { |
| "epoch": 0.7689517591585057, |
| "grad_norm": 2096.28515625, |
| "learning_rate": 2.5458922087390613e-05, |
| "loss": 0.1436, |
| "step": 2120 |
| }, |
| { |
| "epoch": 0.7693144722524483, |
| "grad_norm": 0.17093558609485626, |
| "learning_rate": 2.5382734860727332e-05, |
| "loss": 0.1518, |
| "step": 2121 |
| }, |
| { |
| "epoch": 0.769677185346391, |
| "grad_norm": 0.1638222485780716, |
| "learning_rate": 2.5306645227746762e-05, |
| "loss": 0.1473, |
| "step": 2122 |
| }, |
| { |
| "epoch": 0.7700398984403337, |
| "grad_norm": 0.1996994912624359, |
| "learning_rate": 2.523065328796831e-05, |
| "loss": 0.1809, |
| "step": 2123 |
| }, |
| { |
| "epoch": 0.7704026115342764, |
| "grad_norm": 0.1753552258014679, |
| "learning_rate": 2.515475914078369e-05, |
| "loss": 0.1811, |
| "step": 2124 |
| }, |
| { |
| "epoch": 0.7707653246282191, |
| "grad_norm": 0.19755405187606812, |
| "learning_rate": 2.5078962885456612e-05, |
| "loss": 0.1783, |
| "step": 2125 |
| }, |
| { |
| "epoch": 0.7711280377221618, |
| "grad_norm": 0.18720857799053192, |
| "learning_rate": 2.5003264621122802e-05, |
| "loss": 0.1519, |
| "step": 2126 |
| }, |
| { |
| "epoch": 0.7714907508161044, |
| "grad_norm": 0.1806974709033966, |
| "learning_rate": 2.4927664446789788e-05, |
| "loss": 0.1594, |
| "step": 2127 |
| }, |
| { |
| "epoch": 0.7718534639100472, |
| "grad_norm": 0.18246807157993317, |
| "learning_rate": 2.4852162461336835e-05, |
| "loss": 0.1395, |
| "step": 2128 |
| }, |
| { |
| "epoch": 0.7722161770039898, |
| "grad_norm": 0.18061847984790802, |
| "learning_rate": 2.477675876351475e-05, |
| "loss": 0.1709, |
| "step": 2129 |
| }, |
| { |
| "epoch": 0.7725788900979326, |
| "grad_norm": 0.1823715716600418, |
| "learning_rate": 2.4701453451945846e-05, |
| "loss": 0.1488, |
| "step": 2130 |
| }, |
| { |
| "epoch": 0.7729416031918752, |
| "grad_norm": 0.16946843266487122, |
| "learning_rate": 2.4626246625123706e-05, |
| "loss": 0.1498, |
| "step": 2131 |
| }, |
| { |
| "epoch": 0.773304316285818, |
| "grad_norm": 0.17811253666877747, |
| "learning_rate": 2.455113838141311e-05, |
| "loss": 0.1649, |
| "step": 2132 |
| }, |
| { |
| "epoch": 0.7736670293797606, |
| "grad_norm": 0.16584321856498718, |
| "learning_rate": 2.4476128819049893e-05, |
| "loss": 0.1814, |
| "step": 2133 |
| }, |
| { |
| "epoch": 0.7740297424737033, |
| "grad_norm": 0.15835148096084595, |
| "learning_rate": 2.4401218036140848e-05, |
| "loss": 0.1453, |
| "step": 2134 |
| }, |
| { |
| "epoch": 0.774392455567646, |
| "grad_norm": 0.17442336678504944, |
| "learning_rate": 2.4326406130663527e-05, |
| "loss": 0.1457, |
| "step": 2135 |
| }, |
| { |
| "epoch": 0.7747551686615887, |
| "grad_norm": 0.18500109016895294, |
| "learning_rate": 2.4251693200466242e-05, |
| "loss": 0.1673, |
| "step": 2136 |
| }, |
| { |
| "epoch": 0.7751178817555314, |
| "grad_norm": 0.17963416874408722, |
| "learning_rate": 2.417707934326775e-05, |
| "loss": 0.1522, |
| "step": 2137 |
| }, |
| { |
| "epoch": 0.7754805948494741, |
| "grad_norm": 0.17526273429393768, |
| "learning_rate": 2.4102564656657312e-05, |
| "loss": 0.1485, |
| "step": 2138 |
| }, |
| { |
| "epoch": 0.7758433079434167, |
| "grad_norm": 0.15860708057880402, |
| "learning_rate": 2.402814923809442e-05, |
| "loss": 0.1446, |
| "step": 2139 |
| }, |
| { |
| "epoch": 0.7762060210373595, |
| "grad_norm": 0.1740608960390091, |
| "learning_rate": 2.3953833184908757e-05, |
| "loss": 0.1521, |
| "step": 2140 |
| }, |
| { |
| "epoch": 0.7765687341313021, |
| "grad_norm": 0.1701829582452774, |
| "learning_rate": 2.387961659430007e-05, |
| "loss": 0.1386, |
| "step": 2141 |
| }, |
| { |
| "epoch": 0.7769314472252449, |
| "grad_norm": 0.17111440002918243, |
| "learning_rate": 2.380549956333793e-05, |
| "loss": 0.1452, |
| "step": 2142 |
| }, |
| { |
| "epoch": 0.7772941603191875, |
| "grad_norm": 0.17982304096221924, |
| "learning_rate": 2.3731482188961818e-05, |
| "loss": 0.163, |
| "step": 2143 |
| }, |
| { |
| "epoch": 0.7776568734131302, |
| "grad_norm": 0.1801091730594635, |
| "learning_rate": 2.3657564567980782e-05, |
| "loss": 0.1423, |
| "step": 2144 |
| }, |
| { |
| "epoch": 0.7780195865070729, |
| "grad_norm": 0.15309491753578186, |
| "learning_rate": 2.358374679707339e-05, |
| "loss": 0.1393, |
| "step": 2145 |
| }, |
| { |
| "epoch": 0.7783822996010156, |
| "grad_norm": 0.15650945901870728, |
| "learning_rate": 2.351002897278771e-05, |
| "loss": 0.1894, |
| "step": 2146 |
| }, |
| { |
| "epoch": 0.7787450126949583, |
| "grad_norm": 0.17866793274879456, |
| "learning_rate": 2.343641119154101e-05, |
| "loss": 0.1549, |
| "step": 2147 |
| }, |
| { |
| "epoch": 0.779107725788901, |
| "grad_norm": 0.17232728004455566, |
| "learning_rate": 2.336289354961969e-05, |
| "loss": 0.1802, |
| "step": 2148 |
| }, |
| { |
| "epoch": 0.7794704388828436, |
| "grad_norm": 0.18021385371685028, |
| "learning_rate": 2.3289476143179202e-05, |
| "loss": 0.143, |
| "step": 2149 |
| }, |
| { |
| "epoch": 0.7798331519767864, |
| "grad_norm": 0.18300630152225494, |
| "learning_rate": 2.3216159068243958e-05, |
| "loss": 0.1739, |
| "step": 2150 |
| }, |
| { |
| "epoch": 0.780195865070729, |
| "grad_norm": 0.18222151696681976, |
| "learning_rate": 2.314294242070706e-05, |
| "loss": 0.1653, |
| "step": 2151 |
| }, |
| { |
| "epoch": 0.7805585781646718, |
| "grad_norm": 0.16753800213336945, |
| "learning_rate": 2.30698262963303e-05, |
| "loss": 0.1766, |
| "step": 2152 |
| }, |
| { |
| "epoch": 0.7809212912586144, |
| "grad_norm": 0.16288548707962036, |
| "learning_rate": 2.2996810790743983e-05, |
| "loss": 0.1417, |
| "step": 2153 |
| }, |
| { |
| "epoch": 0.7812840043525572, |
| "grad_norm": 0.14791814982891083, |
| "learning_rate": 2.2923895999446764e-05, |
| "loss": 0.1452, |
| "step": 2154 |
| }, |
| { |
| "epoch": 0.7816467174464998, |
| "grad_norm": 0.17105069756507874, |
| "learning_rate": 2.2851082017805703e-05, |
| "loss": 0.1641, |
| "step": 2155 |
| }, |
| { |
| "epoch": 0.7820094305404425, |
| "grad_norm": 0.17432281374931335, |
| "learning_rate": 2.2778368941055882e-05, |
| "loss": 0.1774, |
| "step": 2156 |
| }, |
| { |
| "epoch": 0.7823721436343852, |
| "grad_norm": 0.19430530071258545, |
| "learning_rate": 2.2705756864300454e-05, |
| "loss": 0.167, |
| "step": 2157 |
| }, |
| { |
| "epoch": 0.7827348567283279, |
| "grad_norm": 0.16627925634384155, |
| "learning_rate": 2.2633245882510457e-05, |
| "loss": 0.1328, |
| "step": 2158 |
| }, |
| { |
| "epoch": 0.7830975698222706, |
| "grad_norm": 0.1691751331090927, |
| "learning_rate": 2.256083609052474e-05, |
| "loss": 0.1504, |
| "step": 2159 |
| }, |
| { |
| "epoch": 0.7834602829162133, |
| "grad_norm": 0.17866089940071106, |
| "learning_rate": 2.2488527583049736e-05, |
| "loss": 0.1503, |
| "step": 2160 |
| }, |
| { |
| "epoch": 0.7838229960101559, |
| "grad_norm": 0.19467145204544067, |
| "learning_rate": 2.2416320454659512e-05, |
| "loss": 0.1611, |
| "step": 2161 |
| }, |
| { |
| "epoch": 0.7841857091040987, |
| "grad_norm": 0.17603172361850739, |
| "learning_rate": 2.2344214799795438e-05, |
| "loss": 0.1519, |
| "step": 2162 |
| }, |
| { |
| "epoch": 0.7845484221980413, |
| "grad_norm": 0.18451876938343048, |
| "learning_rate": 2.2272210712766205e-05, |
| "loss": 0.1675, |
| "step": 2163 |
| }, |
| { |
| "epoch": 0.7849111352919841, |
| "grad_norm": 0.17610016465187073, |
| "learning_rate": 2.2200308287747673e-05, |
| "loss": 0.1597, |
| "step": 2164 |
| }, |
| { |
| "epoch": 0.7852738483859267, |
| "grad_norm": 0.1533452421426773, |
| "learning_rate": 2.21285076187827e-05, |
| "loss": 0.1381, |
| "step": 2165 |
| }, |
| { |
| "epoch": 0.7856365614798694, |
| "grad_norm": 0.16271378099918365, |
| "learning_rate": 2.205680879978107e-05, |
| "loss": 0.1435, |
| "step": 2166 |
| }, |
| { |
| "epoch": 0.7859992745738121, |
| "grad_norm": 0.15660040080547333, |
| "learning_rate": 2.19852119245194e-05, |
| "loss": 0.1441, |
| "step": 2167 |
| }, |
| { |
| "epoch": 0.7863619876677548, |
| "grad_norm": 0.16608907282352448, |
| "learning_rate": 2.1913717086640906e-05, |
| "loss": 0.1603, |
| "step": 2168 |
| }, |
| { |
| "epoch": 0.7867247007616975, |
| "grad_norm": 0.19811011850833893, |
| "learning_rate": 2.1842324379655378e-05, |
| "loss": 0.1729, |
| "step": 2169 |
| }, |
| { |
| "epoch": 0.7870874138556402, |
| "grad_norm": 0.16923308372497559, |
| "learning_rate": 2.177103389693903e-05, |
| "loss": 0.1572, |
| "step": 2170 |
| }, |
| { |
| "epoch": 0.7874501269495828, |
| "grad_norm": 0.16869297623634338, |
| "learning_rate": 2.169984573173436e-05, |
| "loss": 0.1523, |
| "step": 2171 |
| }, |
| { |
| "epoch": 0.7878128400435256, |
| "grad_norm": 0.16741646826267242, |
| "learning_rate": 2.162875997715005e-05, |
| "loss": 0.1336, |
| "step": 2172 |
| }, |
| { |
| "epoch": 0.7881755531374682, |
| "grad_norm": 0.17434288561344147, |
| "learning_rate": 2.1557776726160807e-05, |
| "loss": 0.1615, |
| "step": 2173 |
| }, |
| { |
| "epoch": 0.788538266231411, |
| "grad_norm": 0.19176846742630005, |
| "learning_rate": 2.1486896071607364e-05, |
| "loss": 0.158, |
| "step": 2174 |
| }, |
| { |
| "epoch": 0.7889009793253536, |
| "grad_norm": 0.19300417602062225, |
| "learning_rate": 2.141611810619617e-05, |
| "loss": 0.1618, |
| "step": 2175 |
| }, |
| { |
| "epoch": 0.7892636924192964, |
| "grad_norm": 0.18857765197753906, |
| "learning_rate": 2.1345442922499394e-05, |
| "loss": 0.1552, |
| "step": 2176 |
| }, |
| { |
| "epoch": 0.789626405513239, |
| "grad_norm": 0.16958756744861603, |
| "learning_rate": 2.127487061295478e-05, |
| "loss": 0.1498, |
| "step": 2177 |
| }, |
| { |
| "epoch": 0.7899891186071817, |
| "grad_norm": 0.1617862582206726, |
| "learning_rate": 2.1204401269865526e-05, |
| "loss": 0.1468, |
| "step": 2178 |
| }, |
| { |
| "epoch": 0.7903518317011244, |
| "grad_norm": 0.17696796357631683, |
| "learning_rate": 2.113403498540011e-05, |
| "loss": 0.158, |
| "step": 2179 |
| }, |
| { |
| "epoch": 0.7907145447950671, |
| "grad_norm": 0.18679635226726532, |
| "learning_rate": 2.1063771851592316e-05, |
| "loss": 0.1725, |
| "step": 2180 |
| }, |
| { |
| "epoch": 0.7910772578890098, |
| "grad_norm": 0.16767951846122742, |
| "learning_rate": 2.099361196034093e-05, |
| "loss": 0.1541, |
| "step": 2181 |
| }, |
| { |
| "epoch": 0.7914399709829525, |
| "grad_norm": 0.17078953981399536, |
| "learning_rate": 2.09235554034097e-05, |
| "loss": 0.1517, |
| "step": 2182 |
| }, |
| { |
| "epoch": 0.7918026840768951, |
| "grad_norm": 0.18054896593093872, |
| "learning_rate": 2.085360227242731e-05, |
| "loss": 0.1668, |
| "step": 2183 |
| }, |
| { |
| "epoch": 0.7921653971708379, |
| "grad_norm": 0.17167535424232483, |
| "learning_rate": 2.0783752658887066e-05, |
| "loss": 0.1486, |
| "step": 2184 |
| }, |
| { |
| "epoch": 0.7925281102647805, |
| "grad_norm": 0.18194803595542908, |
| "learning_rate": 2.0714006654146955e-05, |
| "loss": 0.1705, |
| "step": 2185 |
| }, |
| { |
| "epoch": 0.7928908233587233, |
| "grad_norm": 0.15957947075366974, |
| "learning_rate": 2.0644364349429378e-05, |
| "loss": 0.1393, |
| "step": 2186 |
| }, |
| { |
| "epoch": 0.7932535364526659, |
| "grad_norm": 0.17193473875522614, |
| "learning_rate": 2.057482583582122e-05, |
| "loss": 0.1549, |
| "step": 2187 |
| }, |
| { |
| "epoch": 0.7936162495466086, |
| "grad_norm": 0.16619963943958282, |
| "learning_rate": 2.0505391204273495e-05, |
| "loss": 0.1526, |
| "step": 2188 |
| }, |
| { |
| "epoch": 0.7939789626405513, |
| "grad_norm": 0.15132339298725128, |
| "learning_rate": 2.043606054560141e-05, |
| "loss": 0.1602, |
| "step": 2189 |
| }, |
| { |
| "epoch": 0.794341675734494, |
| "grad_norm": 0.17620229721069336, |
| "learning_rate": 2.0366833950484164e-05, |
| "loss": 0.1505, |
| "step": 2190 |
| }, |
| { |
| "epoch": 0.7947043888284367, |
| "grad_norm": 0.16328759491443634, |
| "learning_rate": 2.0297711509464833e-05, |
| "loss": 0.1407, |
| "step": 2191 |
| }, |
| { |
| "epoch": 0.7950671019223794, |
| "grad_norm": 0.16912280023097992, |
| "learning_rate": 2.0228693312950352e-05, |
| "loss": 0.1571, |
| "step": 2192 |
| }, |
| { |
| "epoch": 0.795429815016322, |
| "grad_norm": 0.16919687390327454, |
| "learning_rate": 2.0159779451211204e-05, |
| "loss": 0.1484, |
| "step": 2193 |
| }, |
| { |
| "epoch": 0.7957925281102648, |
| "grad_norm": 0.17652738094329834, |
| "learning_rate": 2.009097001438147e-05, |
| "loss": 0.1388, |
| "step": 2194 |
| }, |
| { |
| "epoch": 0.7961552412042074, |
| "grad_norm": 0.17439448833465576, |
| "learning_rate": 2.0022265092458638e-05, |
| "loss": 0.162, |
| "step": 2195 |
| }, |
| { |
| "epoch": 0.7965179542981502, |
| "grad_norm": 0.16315314173698425, |
| "learning_rate": 1.9953664775303483e-05, |
| "loss": 0.1463, |
| "step": 2196 |
| }, |
| { |
| "epoch": 0.7968806673920928, |
| "grad_norm": 0.15268266201019287, |
| "learning_rate": 1.988516915263996e-05, |
| "loss": 0.1421, |
| "step": 2197 |
| }, |
| { |
| "epoch": 0.7972433804860355, |
| "grad_norm": 0.16543833911418915, |
| "learning_rate": 1.981677831405516e-05, |
| "loss": 0.1495, |
| "step": 2198 |
| }, |
| { |
| "epoch": 0.7976060935799782, |
| "grad_norm": 0.1608053743839264, |
| "learning_rate": 1.974849234899907e-05, |
| "loss": 0.1383, |
| "step": 2199 |
| }, |
| { |
| "epoch": 0.7979688066739209, |
| "grad_norm": 0.1577446609735489, |
| "learning_rate": 1.9680311346784496e-05, |
| "loss": 0.1418, |
| "step": 2200 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 2757, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.5288134512966107e+19, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|