| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.235294117647058, |
| "eval_steps": 500, |
| "global_step": 120, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.0784313725490196, |
| "grad_norm": 5.935066953271119, |
| "learning_rate": 4.1666666666666667e-07, |
| "loss": 1.1559, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.1568627450980392, |
| "grad_norm": 5.931820940431562, |
| "learning_rate": 8.333333333333333e-07, |
| "loss": 1.1107, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.23529411764705882, |
| "grad_norm": 6.143905320771078, |
| "learning_rate": 1.25e-06, |
| "loss": 1.1384, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.3137254901960784, |
| "grad_norm": 6.065274861977263, |
| "learning_rate": 1.6666666666666667e-06, |
| "loss": 1.1421, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.39215686274509803, |
| "grad_norm": 5.8625107951536535, |
| "learning_rate": 2.0833333333333334e-06, |
| "loss": 1.1439, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.47058823529411764, |
| "grad_norm": 5.067374092992189, |
| "learning_rate": 2.5e-06, |
| "loss": 1.0618, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.5490196078431373, |
| "grad_norm": 4.755801552471063, |
| "learning_rate": 2.916666666666667e-06, |
| "loss": 1.0401, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.6274509803921569, |
| "grad_norm": 3.3129565953149323, |
| "learning_rate": 3.3333333333333333e-06, |
| "loss": 0.9898, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.7058823529411765, |
| "grad_norm": 3.3912326629630547, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 1.011, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.7843137254901961, |
| "grad_norm": 3.2048784731280806, |
| "learning_rate": 4.166666666666667e-06, |
| "loss": 0.9935, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.8627450980392157, |
| "grad_norm": 3.752806588180809, |
| "learning_rate": 4.583333333333333e-06, |
| "loss": 0.9037, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.9411764705882353, |
| "grad_norm": 3.7034973959501185, |
| "learning_rate": 5e-06, |
| "loss": 0.9095, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 3.7034973959501185, |
| "learning_rate": 4.998942375205502e-06, |
| "loss": 0.6715, |
| "step": 13 |
| }, |
| { |
| "epoch": 1.0784313725490196, |
| "grad_norm": 3.3436999577667104, |
| "learning_rate": 4.995770395678171e-06, |
| "loss": 0.8186, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.156862745098039, |
| "grad_norm": 2.662791286273683, |
| "learning_rate": 4.990486745229364e-06, |
| "loss": 0.7876, |
| "step": 15 |
| }, |
| { |
| "epoch": 1.2352941176470589, |
| "grad_norm": 3.1462660546154915, |
| "learning_rate": 4.983095894354858e-06, |
| "loss": 0.7498, |
| "step": 16 |
| }, |
| { |
| "epoch": 1.3137254901960784, |
| "grad_norm": 2.806483473961631, |
| "learning_rate": 4.973604096452361e-06, |
| "loss": 0.7377, |
| "step": 17 |
| }, |
| { |
| "epoch": 1.392156862745098, |
| "grad_norm": 2.4168449532239933, |
| "learning_rate": 4.962019382530521e-06, |
| "loss": 0.6722, |
| "step": 18 |
| }, |
| { |
| "epoch": 1.4705882352941178, |
| "grad_norm": 2.1659663165693823, |
| "learning_rate": 4.948351554413879e-06, |
| "loss": 0.6785, |
| "step": 19 |
| }, |
| { |
| "epoch": 1.5490196078431373, |
| "grad_norm": 1.9715057945884606, |
| "learning_rate": 4.93261217644956e-06, |
| "loss": 0.6873, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.6274509803921569, |
| "grad_norm": 2.03020256390926, |
| "learning_rate": 4.914814565722671e-06, |
| "loss": 0.659, |
| "step": 21 |
| }, |
| { |
| "epoch": 1.7058823529411766, |
| "grad_norm": 1.9753159977423889, |
| "learning_rate": 4.894973780788722e-06, |
| "loss": 0.6438, |
| "step": 22 |
| }, |
| { |
| "epoch": 1.784313725490196, |
| "grad_norm": 1.7231552446064127, |
| "learning_rate": 4.873106608932585e-06, |
| "loss": 0.6467, |
| "step": 23 |
| }, |
| { |
| "epoch": 1.8627450980392157, |
| "grad_norm": 1.7271946867600179, |
| "learning_rate": 4.849231551964771e-06, |
| "loss": 0.6004, |
| "step": 24 |
| }, |
| { |
| "epoch": 1.9411764705882353, |
| "grad_norm": 1.908074354983798, |
| "learning_rate": 4.823368810567056e-06, |
| "loss": 0.6608, |
| "step": 25 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.8986968468078707, |
| "learning_rate": 4.7955402672006855e-06, |
| "loss": 0.4767, |
| "step": 26 |
| }, |
| { |
| "epoch": 2.0784313725490198, |
| "grad_norm": 1.7821876091389497, |
| "learning_rate": 4.765769467591626e-06, |
| "loss": 0.5677, |
| "step": 27 |
| }, |
| { |
| "epoch": 2.156862745098039, |
| "grad_norm": 1.5729961003705275, |
| "learning_rate": 4.734081600808531e-06, |
| "loss": 0.5875, |
| "step": 28 |
| }, |
| { |
| "epoch": 2.235294117647059, |
| "grad_norm": 1.6161908564695435, |
| "learning_rate": 4.700503477950278e-06, |
| "loss": 0.5528, |
| "step": 29 |
| }, |
| { |
| "epoch": 2.313725490196078, |
| "grad_norm": 1.453253235686431, |
| "learning_rate": 4.665063509461098e-06, |
| "loss": 0.5471, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.392156862745098, |
| "grad_norm": 1.5747903710062445, |
| "learning_rate": 4.627791681092499e-06, |
| "loss": 0.5058, |
| "step": 31 |
| }, |
| { |
| "epoch": 2.4705882352941178, |
| "grad_norm": 1.4435044859613202, |
| "learning_rate": 4.588719528532342e-06, |
| "loss": 0.5226, |
| "step": 32 |
| }, |
| { |
| "epoch": 2.549019607843137, |
| "grad_norm": 1.39605748394361, |
| "learning_rate": 4.54788011072248e-06, |
| "loss": 0.5071, |
| "step": 33 |
| }, |
| { |
| "epoch": 2.627450980392157, |
| "grad_norm": 1.5254258052298646, |
| "learning_rate": 4.50530798188761e-06, |
| "loss": 0.4998, |
| "step": 34 |
| }, |
| { |
| "epoch": 2.7058823529411766, |
| "grad_norm": 1.4261873477979348, |
| "learning_rate": 4.46103916229894e-06, |
| "loss": 0.5009, |
| "step": 35 |
| }, |
| { |
| "epoch": 2.784313725490196, |
| "grad_norm": 1.4137129832097197, |
| "learning_rate": 4.415111107797445e-06, |
| "loss": 0.479, |
| "step": 36 |
| }, |
| { |
| "epoch": 2.8627450980392157, |
| "grad_norm": 1.520647591268587, |
| "learning_rate": 4.367562678102491e-06, |
| "loss": 0.5122, |
| "step": 37 |
| }, |
| { |
| "epoch": 2.9411764705882355, |
| "grad_norm": 1.437130314805203, |
| "learning_rate": 4.318434103932622e-06, |
| "loss": 0.4938, |
| "step": 38 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.381198514753958, |
| "learning_rate": 4.267766952966369e-06, |
| "loss": 0.3629, |
| "step": 39 |
| }, |
| { |
| "epoch": 3.0784313725490198, |
| "grad_norm": 1.4414609712821322, |
| "learning_rate": 4.215604094671835e-06, |
| "loss": 0.4262, |
| "step": 40 |
| }, |
| { |
| "epoch": 3.156862745098039, |
| "grad_norm": 1.3346547206899726, |
| "learning_rate": 4.161989664034844e-06, |
| "loss": 0.4259, |
| "step": 41 |
| }, |
| { |
| "epoch": 3.235294117647059, |
| "grad_norm": 1.2955870784243064, |
| "learning_rate": 4.106969024216348e-06, |
| "loss": 0.4009, |
| "step": 42 |
| }, |
| { |
| "epoch": 3.313725490196078, |
| "grad_norm": 1.381727916852796, |
| "learning_rate": 4.0505887281706505e-06, |
| "loss": 0.4082, |
| "step": 43 |
| }, |
| { |
| "epoch": 3.392156862745098, |
| "grad_norm": 1.2215807897629705, |
| "learning_rate": 3.992896479256966e-06, |
| "loss": 0.4012, |
| "step": 44 |
| }, |
| { |
| "epoch": 3.4705882352941178, |
| "grad_norm": 1.341754935028127, |
| "learning_rate": 3.933941090877615e-06, |
| "loss": 0.4003, |
| "step": 45 |
| }, |
| { |
| "epoch": 3.549019607843137, |
| "grad_norm": 1.3219873249674265, |
| "learning_rate": 3.8737724451770155e-06, |
| "loss": 0.3906, |
| "step": 46 |
| }, |
| { |
| "epoch": 3.627450980392157, |
| "grad_norm": 1.2735261344863207, |
| "learning_rate": 3.8124414508364005e-06, |
| "loss": 0.3956, |
| "step": 47 |
| }, |
| { |
| "epoch": 3.7058823529411766, |
| "grad_norm": 1.408667555717726, |
| "learning_rate": 3.7500000000000005e-06, |
| "loss": 0.3841, |
| "step": 48 |
| }, |
| { |
| "epoch": 3.784313725490196, |
| "grad_norm": 1.3920186495457132, |
| "learning_rate": 3.6865009243691015e-06, |
| "loss": 0.3998, |
| "step": 49 |
| }, |
| { |
| "epoch": 3.8627450980392157, |
| "grad_norm": 1.3668192562682828, |
| "learning_rate": 3.621997950501156e-06, |
| "loss": 0.3947, |
| "step": 50 |
| }, |
| { |
| "epoch": 3.9411764705882355, |
| "grad_norm": 1.3220345352979812, |
| "learning_rate": 3.556545654351749e-06, |
| "loss": 0.3898, |
| "step": 51 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 1.273119605790472, |
| "learning_rate": 3.4901994150978926e-06, |
| "loss": 0.2635, |
| "step": 52 |
| }, |
| { |
| "epoch": 4.078431372549019, |
| "grad_norm": 1.5507480548662818, |
| "learning_rate": 3.4230153682817112e-06, |
| "loss": 0.3232, |
| "step": 53 |
| }, |
| { |
| "epoch": 4.1568627450980395, |
| "grad_norm": 1.2464576840916806, |
| "learning_rate": 3.3550503583141726e-06, |
| "loss": 0.2997, |
| "step": 54 |
| }, |
| { |
| "epoch": 4.235294117647059, |
| "grad_norm": 1.2140787104099877, |
| "learning_rate": 3.2863618903790346e-06, |
| "loss": 0.3061, |
| "step": 55 |
| }, |
| { |
| "epoch": 4.313725490196078, |
| "grad_norm": 1.2569518947420275, |
| "learning_rate": 3.217008081777726e-06, |
| "loss": 0.2995, |
| "step": 56 |
| }, |
| { |
| "epoch": 4.392156862745098, |
| "grad_norm": 1.2248006221848717, |
| "learning_rate": 3.147047612756302e-06, |
| "loss": 0.2973, |
| "step": 57 |
| }, |
| { |
| "epoch": 4.470588235294118, |
| "grad_norm": 1.2928146585389553, |
| "learning_rate": 3.0765396768561005e-06, |
| "loss": 0.2987, |
| "step": 58 |
| }, |
| { |
| "epoch": 4.549019607843137, |
| "grad_norm": 1.3574488686929125, |
| "learning_rate": 3.0055439308300954e-06, |
| "loss": 0.3132, |
| "step": 59 |
| }, |
| { |
| "epoch": 4.627450980392156, |
| "grad_norm": 1.2894955154605787, |
| "learning_rate": 2.9341204441673267e-06, |
| "loss": 0.2929, |
| "step": 60 |
| }, |
| { |
| "epoch": 4.705882352941177, |
| "grad_norm": 1.2326192638645608, |
| "learning_rate": 2.862329648268117e-06, |
| "loss": 0.2794, |
| "step": 61 |
| }, |
| { |
| "epoch": 4.784313725490196, |
| "grad_norm": 1.3599625008219607, |
| "learning_rate": 2.7902322853130758e-06, |
| "loss": 0.2911, |
| "step": 62 |
| }, |
| { |
| "epoch": 4.862745098039216, |
| "grad_norm": 1.2507485894142631, |
| "learning_rate": 2.717889356869146e-06, |
| "loss": 0.2945, |
| "step": 63 |
| }, |
| { |
| "epoch": 4.9411764705882355, |
| "grad_norm": 1.3120759092640928, |
| "learning_rate": 2.6453620722761897e-06, |
| "loss": 0.3093, |
| "step": 64 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 1.3120759092640928, |
| "learning_rate": 2.572711796857779e-06, |
| "loss": 0.2038, |
| "step": 65 |
| }, |
| { |
| "epoch": 5.078431372549019, |
| "grad_norm": 1.2133928459250458, |
| "learning_rate": 2.5e-06, |
| "loss": 0.2457, |
| "step": 66 |
| }, |
| { |
| "epoch": 5.1568627450980395, |
| "grad_norm": 1.1164096271756436, |
| "learning_rate": 2.4272882031422216e-06, |
| "loss": 0.2378, |
| "step": 67 |
| }, |
| { |
| "epoch": 5.235294117647059, |
| "grad_norm": 1.1865964714246466, |
| "learning_rate": 2.3546379277238107e-06, |
| "loss": 0.2365, |
| "step": 68 |
| }, |
| { |
| "epoch": 5.313725490196078, |
| "grad_norm": 1.1133005502471431, |
| "learning_rate": 2.2821106431308546e-06, |
| "loss": 0.2273, |
| "step": 69 |
| }, |
| { |
| "epoch": 5.392156862745098, |
| "grad_norm": 1.0726780522048756, |
| "learning_rate": 2.2097677146869242e-06, |
| "loss": 0.2251, |
| "step": 70 |
| }, |
| { |
| "epoch": 5.470588235294118, |
| "grad_norm": 1.1507923857528541, |
| "learning_rate": 2.1376703517318835e-06, |
| "loss": 0.2199, |
| "step": 71 |
| }, |
| { |
| "epoch": 5.549019607843137, |
| "grad_norm": 1.1965895392459576, |
| "learning_rate": 2.0658795558326745e-06, |
| "loss": 0.2275, |
| "step": 72 |
| }, |
| { |
| "epoch": 5.627450980392156, |
| "grad_norm": 1.2081935303041835, |
| "learning_rate": 1.994456069169906e-06, |
| "loss": 0.2219, |
| "step": 73 |
| }, |
| { |
| "epoch": 5.705882352941177, |
| "grad_norm": 1.2037511840449118, |
| "learning_rate": 1.9234603231439e-06, |
| "loss": 0.2204, |
| "step": 74 |
| }, |
| { |
| "epoch": 5.784313725490196, |
| "grad_norm": 1.1530325179032028, |
| "learning_rate": 1.852952387243698e-06, |
| "loss": 0.22, |
| "step": 75 |
| }, |
| { |
| "epoch": 5.862745098039216, |
| "grad_norm": 1.1780261182347391, |
| "learning_rate": 1.7829919182222752e-06, |
| "loss": 0.2199, |
| "step": 76 |
| }, |
| { |
| "epoch": 5.9411764705882355, |
| "grad_norm": 1.1895169708488194, |
| "learning_rate": 1.7136381096209665e-06, |
| "loss": 0.2037, |
| "step": 77 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 1.0854093120483197, |
| "learning_rate": 1.6449496416858285e-06, |
| "loss": 0.1583, |
| "step": 78 |
| }, |
| { |
| "epoch": 6.078431372549019, |
| "grad_norm": 1.0801354684612592, |
| "learning_rate": 1.5769846317182894e-06, |
| "loss": 0.1934, |
| "step": 79 |
| }, |
| { |
| "epoch": 6.1568627450980395, |
| "grad_norm": 1.0439246440370717, |
| "learning_rate": 1.509800584902108e-06, |
| "loss": 0.1705, |
| "step": 80 |
| }, |
| { |
| "epoch": 6.235294117647059, |
| "grad_norm": 1.0284457474816655, |
| "learning_rate": 1.443454345648252e-06, |
| "loss": 0.1723, |
| "step": 81 |
| }, |
| { |
| "epoch": 6.313725490196078, |
| "grad_norm": 0.9926653408766625, |
| "learning_rate": 1.3780020494988447e-06, |
| "loss": 0.1792, |
| "step": 82 |
| }, |
| { |
| "epoch": 6.392156862745098, |
| "grad_norm": 1.0138502371712474, |
| "learning_rate": 1.313499075630899e-06, |
| "loss": 0.1679, |
| "step": 83 |
| }, |
| { |
| "epoch": 6.470588235294118, |
| "grad_norm": 1.0083965688320564, |
| "learning_rate": 1.2500000000000007e-06, |
| "loss": 0.169, |
| "step": 84 |
| }, |
| { |
| "epoch": 6.549019607843137, |
| "grad_norm": 1.0514389443911982, |
| "learning_rate": 1.1875585491636e-06, |
| "loss": 0.1807, |
| "step": 85 |
| }, |
| { |
| "epoch": 6.627450980392156, |
| "grad_norm": 1.0545495890932062, |
| "learning_rate": 1.1262275548229852e-06, |
| "loss": 0.1735, |
| "step": 86 |
| }, |
| { |
| "epoch": 6.705882352941177, |
| "grad_norm": 1.1020211072090924, |
| "learning_rate": 1.0660589091223854e-06, |
| "loss": 0.1728, |
| "step": 87 |
| }, |
| { |
| "epoch": 6.784313725490196, |
| "grad_norm": 1.0983738677411488, |
| "learning_rate": 1.0071035207430352e-06, |
| "loss": 0.171, |
| "step": 88 |
| }, |
| { |
| "epoch": 6.862745098039216, |
| "grad_norm": 1.0848006154577043, |
| "learning_rate": 9.494112718293503e-07, |
| "loss": 0.1761, |
| "step": 89 |
| }, |
| { |
| "epoch": 6.9411764705882355, |
| "grad_norm": 1.0504620709430657, |
| "learning_rate": 8.930309757836517e-07, |
| "loss": 0.1689, |
| "step": 90 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 1.0335128775784548, |
| "learning_rate": 8.380103359651554e-07, |
| "loss": 0.1193, |
| "step": 91 |
| }, |
| { |
| "epoch": 7.078431372549019, |
| "grad_norm": 0.9668349947212325, |
| "learning_rate": 7.843959053281663e-07, |
| "loss": 0.1531, |
| "step": 92 |
| }, |
| { |
| "epoch": 7.1568627450980395, |
| "grad_norm": 0.9049193018844318, |
| "learning_rate": 7.322330470336314e-07, |
| "loss": 0.1449, |
| "step": 93 |
| }, |
| { |
| "epoch": 7.235294117647059, |
| "grad_norm": 0.8855250299857752, |
| "learning_rate": 6.815658960673782e-07, |
| "loss": 0.1398, |
| "step": 94 |
| }, |
| { |
| "epoch": 7.313725490196078, |
| "grad_norm": 0.9608687848192914, |
| "learning_rate": 6.324373218975105e-07, |
| "loss": 0.1487, |
| "step": 95 |
| }, |
| { |
| "epoch": 7.392156862745098, |
| "grad_norm": 0.9692880229899594, |
| "learning_rate": 5.848888922025553e-07, |
| "loss": 0.1403, |
| "step": 96 |
| }, |
| { |
| "epoch": 7.470588235294118, |
| "grad_norm": 0.9035708627250062, |
| "learning_rate": 5.389608377010608e-07, |
| "loss": 0.1449, |
| "step": 97 |
| }, |
| { |
| "epoch": 7.549019607843137, |
| "grad_norm": 0.8748928881891229, |
| "learning_rate": 4.946920181123904e-07, |
| "loss": 0.1451, |
| "step": 98 |
| }, |
| { |
| "epoch": 7.627450980392156, |
| "grad_norm": 0.8912371091414796, |
| "learning_rate": 4.5211988927752026e-07, |
| "loss": 0.134, |
| "step": 99 |
| }, |
| { |
| "epoch": 7.705882352941177, |
| "grad_norm": 0.9454342145405766, |
| "learning_rate": 4.1128047146765936e-07, |
| "loss": 0.1497, |
| "step": 100 |
| }, |
| { |
| "epoch": 7.784313725490196, |
| "grad_norm": 0.9466608405460601, |
| "learning_rate": 3.722083189075007e-07, |
| "loss": 0.1523, |
| "step": 101 |
| }, |
| { |
| "epoch": 7.862745098039216, |
| "grad_norm": 0.9716836299571869, |
| "learning_rate": 3.3493649053890325e-07, |
| "loss": 0.1406, |
| "step": 102 |
| }, |
| { |
| "epoch": 7.9411764705882355, |
| "grad_norm": 0.9517618408879835, |
| "learning_rate": 2.9949652204972257e-07, |
| "loss": 0.1414, |
| "step": 103 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.8961669761492526, |
| "learning_rate": 2.6591839919146963e-07, |
| "loss": 0.0988, |
| "step": 104 |
| }, |
| { |
| "epoch": 8.07843137254902, |
| "grad_norm": 0.8800809597012162, |
| "learning_rate": 2.3423053240837518e-07, |
| "loss": 0.1326, |
| "step": 105 |
| }, |
| { |
| "epoch": 8.156862745098039, |
| "grad_norm": 0.8678268581817502, |
| "learning_rate": 2.044597327993153e-07, |
| "loss": 0.1382, |
| "step": 106 |
| }, |
| { |
| "epoch": 8.235294117647058, |
| "grad_norm": 0.8749341283858125, |
| "learning_rate": 1.7663118943294367e-07, |
| "loss": 0.1394, |
| "step": 107 |
| }, |
| { |
| "epoch": 8.313725490196079, |
| "grad_norm": 0.8593112391578863, |
| "learning_rate": 1.507684480352292e-07, |
| "loss": 0.1278, |
| "step": 108 |
| }, |
| { |
| "epoch": 8.392156862745098, |
| "grad_norm": 0.9129847126820709, |
| "learning_rate": 1.2689339106741529e-07, |
| "loss": 0.1391, |
| "step": 109 |
| }, |
| { |
| "epoch": 8.470588235294118, |
| "grad_norm": 0.8656126791419273, |
| "learning_rate": 1.0502621921127776e-07, |
| "loss": 0.1311, |
| "step": 110 |
| }, |
| { |
| "epoch": 8.549019607843137, |
| "grad_norm": 0.8593751089585534, |
| "learning_rate": 8.518543427732951e-08, |
| "loss": 0.1296, |
| "step": 111 |
| }, |
| { |
| "epoch": 8.627450980392156, |
| "grad_norm": 0.8879560191886899, |
| "learning_rate": 6.738782355044048e-08, |
| "loss": 0.1263, |
| "step": 112 |
| }, |
| { |
| "epoch": 8.705882352941176, |
| "grad_norm": 0.9206743688905853, |
| "learning_rate": 5.164844558612131e-08, |
| "loss": 0.1381, |
| "step": 113 |
| }, |
| { |
| "epoch": 8.784313725490197, |
| "grad_norm": 0.8795400657114122, |
| "learning_rate": 3.798061746947995e-08, |
| "loss": 0.1282, |
| "step": 114 |
| }, |
| { |
| "epoch": 8.862745098039216, |
| "grad_norm": 0.8568365367984947, |
| "learning_rate": 2.6395903547638825e-08, |
| "loss": 0.1331, |
| "step": 115 |
| }, |
| { |
| "epoch": 8.941176470588236, |
| "grad_norm": 0.847262314471311, |
| "learning_rate": 1.6904105645142443e-08, |
| "loss": 0.1264, |
| "step": 116 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.847262314471311, |
| "learning_rate": 9.513254770636138e-09, |
| "loss": 0.0991, |
| "step": 117 |
| }, |
| { |
| "epoch": 9.07843137254902, |
| "grad_norm": 0.8727190548675097, |
| "learning_rate": 4.229604321829561e-09, |
| "loss": 0.1293, |
| "step": 118 |
| }, |
| { |
| "epoch": 9.156862745098039, |
| "grad_norm": 0.816661788373, |
| "learning_rate": 1.0576247944985018e-09, |
| "loss": 0.1269, |
| "step": 119 |
| }, |
| { |
| "epoch": 9.235294117647058, |
| "grad_norm": 0.885147564294731, |
| "learning_rate": 0.0, |
| "loss": 0.1341, |
| "step": 120 |
| }, |
| { |
| "epoch": 9.235294117647058, |
| "step": 120, |
| "total_flos": 27074593947648.0, |
| "train_loss": 0.388430199213326, |
| "train_runtime": 2487.6221, |
| "train_samples_per_second": 1.624, |
| "train_steps_per_second": 0.048 |
| } |
| ], |
| "logging_steps": 1.0, |
| "max_steps": 120, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 1000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 27074593947648.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|