Fine-R1-7B-Stage1 / trainer_state.json
StevenHH2000's picture
Upload 21 files
c568233 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.235294117647058,
"eval_steps": 500,
"global_step": 120,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0784313725490196,
"grad_norm": 5.935066953271119,
"learning_rate": 4.1666666666666667e-07,
"loss": 1.1559,
"step": 1
},
{
"epoch": 0.1568627450980392,
"grad_norm": 5.931820940431562,
"learning_rate": 8.333333333333333e-07,
"loss": 1.1107,
"step": 2
},
{
"epoch": 0.23529411764705882,
"grad_norm": 6.143905320771078,
"learning_rate": 1.25e-06,
"loss": 1.1384,
"step": 3
},
{
"epoch": 0.3137254901960784,
"grad_norm": 6.065274861977263,
"learning_rate": 1.6666666666666667e-06,
"loss": 1.1421,
"step": 4
},
{
"epoch": 0.39215686274509803,
"grad_norm": 5.8625107951536535,
"learning_rate": 2.0833333333333334e-06,
"loss": 1.1439,
"step": 5
},
{
"epoch": 0.47058823529411764,
"grad_norm": 5.067374092992189,
"learning_rate": 2.5e-06,
"loss": 1.0618,
"step": 6
},
{
"epoch": 0.5490196078431373,
"grad_norm": 4.755801552471063,
"learning_rate": 2.916666666666667e-06,
"loss": 1.0401,
"step": 7
},
{
"epoch": 0.6274509803921569,
"grad_norm": 3.3129565953149323,
"learning_rate": 3.3333333333333333e-06,
"loss": 0.9898,
"step": 8
},
{
"epoch": 0.7058823529411765,
"grad_norm": 3.3912326629630547,
"learning_rate": 3.7500000000000005e-06,
"loss": 1.011,
"step": 9
},
{
"epoch": 0.7843137254901961,
"grad_norm": 3.2048784731280806,
"learning_rate": 4.166666666666667e-06,
"loss": 0.9935,
"step": 10
},
{
"epoch": 0.8627450980392157,
"grad_norm": 3.752806588180809,
"learning_rate": 4.583333333333333e-06,
"loss": 0.9037,
"step": 11
},
{
"epoch": 0.9411764705882353,
"grad_norm": 3.7034973959501185,
"learning_rate": 5e-06,
"loss": 0.9095,
"step": 12
},
{
"epoch": 1.0,
"grad_norm": 3.7034973959501185,
"learning_rate": 4.998942375205502e-06,
"loss": 0.6715,
"step": 13
},
{
"epoch": 1.0784313725490196,
"grad_norm": 3.3436999577667104,
"learning_rate": 4.995770395678171e-06,
"loss": 0.8186,
"step": 14
},
{
"epoch": 1.156862745098039,
"grad_norm": 2.662791286273683,
"learning_rate": 4.990486745229364e-06,
"loss": 0.7876,
"step": 15
},
{
"epoch": 1.2352941176470589,
"grad_norm": 3.1462660546154915,
"learning_rate": 4.983095894354858e-06,
"loss": 0.7498,
"step": 16
},
{
"epoch": 1.3137254901960784,
"grad_norm": 2.806483473961631,
"learning_rate": 4.973604096452361e-06,
"loss": 0.7377,
"step": 17
},
{
"epoch": 1.392156862745098,
"grad_norm": 2.4168449532239933,
"learning_rate": 4.962019382530521e-06,
"loss": 0.6722,
"step": 18
},
{
"epoch": 1.4705882352941178,
"grad_norm": 2.1659663165693823,
"learning_rate": 4.948351554413879e-06,
"loss": 0.6785,
"step": 19
},
{
"epoch": 1.5490196078431373,
"grad_norm": 1.9715057945884606,
"learning_rate": 4.93261217644956e-06,
"loss": 0.6873,
"step": 20
},
{
"epoch": 1.6274509803921569,
"grad_norm": 2.03020256390926,
"learning_rate": 4.914814565722671e-06,
"loss": 0.659,
"step": 21
},
{
"epoch": 1.7058823529411766,
"grad_norm": 1.9753159977423889,
"learning_rate": 4.894973780788722e-06,
"loss": 0.6438,
"step": 22
},
{
"epoch": 1.784313725490196,
"grad_norm": 1.7231552446064127,
"learning_rate": 4.873106608932585e-06,
"loss": 0.6467,
"step": 23
},
{
"epoch": 1.8627450980392157,
"grad_norm": 1.7271946867600179,
"learning_rate": 4.849231551964771e-06,
"loss": 0.6004,
"step": 24
},
{
"epoch": 1.9411764705882353,
"grad_norm": 1.908074354983798,
"learning_rate": 4.823368810567056e-06,
"loss": 0.6608,
"step": 25
},
{
"epoch": 2.0,
"grad_norm": 1.8986968468078707,
"learning_rate": 4.7955402672006855e-06,
"loss": 0.4767,
"step": 26
},
{
"epoch": 2.0784313725490198,
"grad_norm": 1.7821876091389497,
"learning_rate": 4.765769467591626e-06,
"loss": 0.5677,
"step": 27
},
{
"epoch": 2.156862745098039,
"grad_norm": 1.5729961003705275,
"learning_rate": 4.734081600808531e-06,
"loss": 0.5875,
"step": 28
},
{
"epoch": 2.235294117647059,
"grad_norm": 1.6161908564695435,
"learning_rate": 4.700503477950278e-06,
"loss": 0.5528,
"step": 29
},
{
"epoch": 2.313725490196078,
"grad_norm": 1.453253235686431,
"learning_rate": 4.665063509461098e-06,
"loss": 0.5471,
"step": 30
},
{
"epoch": 2.392156862745098,
"grad_norm": 1.5747903710062445,
"learning_rate": 4.627791681092499e-06,
"loss": 0.5058,
"step": 31
},
{
"epoch": 2.4705882352941178,
"grad_norm": 1.4435044859613202,
"learning_rate": 4.588719528532342e-06,
"loss": 0.5226,
"step": 32
},
{
"epoch": 2.549019607843137,
"grad_norm": 1.39605748394361,
"learning_rate": 4.54788011072248e-06,
"loss": 0.5071,
"step": 33
},
{
"epoch": 2.627450980392157,
"grad_norm": 1.5254258052298646,
"learning_rate": 4.50530798188761e-06,
"loss": 0.4998,
"step": 34
},
{
"epoch": 2.7058823529411766,
"grad_norm": 1.4261873477979348,
"learning_rate": 4.46103916229894e-06,
"loss": 0.5009,
"step": 35
},
{
"epoch": 2.784313725490196,
"grad_norm": 1.4137129832097197,
"learning_rate": 4.415111107797445e-06,
"loss": 0.479,
"step": 36
},
{
"epoch": 2.8627450980392157,
"grad_norm": 1.520647591268587,
"learning_rate": 4.367562678102491e-06,
"loss": 0.5122,
"step": 37
},
{
"epoch": 2.9411764705882355,
"grad_norm": 1.437130314805203,
"learning_rate": 4.318434103932622e-06,
"loss": 0.4938,
"step": 38
},
{
"epoch": 3.0,
"grad_norm": 1.381198514753958,
"learning_rate": 4.267766952966369e-06,
"loss": 0.3629,
"step": 39
},
{
"epoch": 3.0784313725490198,
"grad_norm": 1.4414609712821322,
"learning_rate": 4.215604094671835e-06,
"loss": 0.4262,
"step": 40
},
{
"epoch": 3.156862745098039,
"grad_norm": 1.3346547206899726,
"learning_rate": 4.161989664034844e-06,
"loss": 0.4259,
"step": 41
},
{
"epoch": 3.235294117647059,
"grad_norm": 1.2955870784243064,
"learning_rate": 4.106969024216348e-06,
"loss": 0.4009,
"step": 42
},
{
"epoch": 3.313725490196078,
"grad_norm": 1.381727916852796,
"learning_rate": 4.0505887281706505e-06,
"loss": 0.4082,
"step": 43
},
{
"epoch": 3.392156862745098,
"grad_norm": 1.2215807897629705,
"learning_rate": 3.992896479256966e-06,
"loss": 0.4012,
"step": 44
},
{
"epoch": 3.4705882352941178,
"grad_norm": 1.341754935028127,
"learning_rate": 3.933941090877615e-06,
"loss": 0.4003,
"step": 45
},
{
"epoch": 3.549019607843137,
"grad_norm": 1.3219873249674265,
"learning_rate": 3.8737724451770155e-06,
"loss": 0.3906,
"step": 46
},
{
"epoch": 3.627450980392157,
"grad_norm": 1.2735261344863207,
"learning_rate": 3.8124414508364005e-06,
"loss": 0.3956,
"step": 47
},
{
"epoch": 3.7058823529411766,
"grad_norm": 1.408667555717726,
"learning_rate": 3.7500000000000005e-06,
"loss": 0.3841,
"step": 48
},
{
"epoch": 3.784313725490196,
"grad_norm": 1.3920186495457132,
"learning_rate": 3.6865009243691015e-06,
"loss": 0.3998,
"step": 49
},
{
"epoch": 3.8627450980392157,
"grad_norm": 1.3668192562682828,
"learning_rate": 3.621997950501156e-06,
"loss": 0.3947,
"step": 50
},
{
"epoch": 3.9411764705882355,
"grad_norm": 1.3220345352979812,
"learning_rate": 3.556545654351749e-06,
"loss": 0.3898,
"step": 51
},
{
"epoch": 4.0,
"grad_norm": 1.273119605790472,
"learning_rate": 3.4901994150978926e-06,
"loss": 0.2635,
"step": 52
},
{
"epoch": 4.078431372549019,
"grad_norm": 1.5507480548662818,
"learning_rate": 3.4230153682817112e-06,
"loss": 0.3232,
"step": 53
},
{
"epoch": 4.1568627450980395,
"grad_norm": 1.2464576840916806,
"learning_rate": 3.3550503583141726e-06,
"loss": 0.2997,
"step": 54
},
{
"epoch": 4.235294117647059,
"grad_norm": 1.2140787104099877,
"learning_rate": 3.2863618903790346e-06,
"loss": 0.3061,
"step": 55
},
{
"epoch": 4.313725490196078,
"grad_norm": 1.2569518947420275,
"learning_rate": 3.217008081777726e-06,
"loss": 0.2995,
"step": 56
},
{
"epoch": 4.392156862745098,
"grad_norm": 1.2248006221848717,
"learning_rate": 3.147047612756302e-06,
"loss": 0.2973,
"step": 57
},
{
"epoch": 4.470588235294118,
"grad_norm": 1.2928146585389553,
"learning_rate": 3.0765396768561005e-06,
"loss": 0.2987,
"step": 58
},
{
"epoch": 4.549019607843137,
"grad_norm": 1.3574488686929125,
"learning_rate": 3.0055439308300954e-06,
"loss": 0.3132,
"step": 59
},
{
"epoch": 4.627450980392156,
"grad_norm": 1.2894955154605787,
"learning_rate": 2.9341204441673267e-06,
"loss": 0.2929,
"step": 60
},
{
"epoch": 4.705882352941177,
"grad_norm": 1.2326192638645608,
"learning_rate": 2.862329648268117e-06,
"loss": 0.2794,
"step": 61
},
{
"epoch": 4.784313725490196,
"grad_norm": 1.3599625008219607,
"learning_rate": 2.7902322853130758e-06,
"loss": 0.2911,
"step": 62
},
{
"epoch": 4.862745098039216,
"grad_norm": 1.2507485894142631,
"learning_rate": 2.717889356869146e-06,
"loss": 0.2945,
"step": 63
},
{
"epoch": 4.9411764705882355,
"grad_norm": 1.3120759092640928,
"learning_rate": 2.6453620722761897e-06,
"loss": 0.3093,
"step": 64
},
{
"epoch": 5.0,
"grad_norm": 1.3120759092640928,
"learning_rate": 2.572711796857779e-06,
"loss": 0.2038,
"step": 65
},
{
"epoch": 5.078431372549019,
"grad_norm": 1.2133928459250458,
"learning_rate": 2.5e-06,
"loss": 0.2457,
"step": 66
},
{
"epoch": 5.1568627450980395,
"grad_norm": 1.1164096271756436,
"learning_rate": 2.4272882031422216e-06,
"loss": 0.2378,
"step": 67
},
{
"epoch": 5.235294117647059,
"grad_norm": 1.1865964714246466,
"learning_rate": 2.3546379277238107e-06,
"loss": 0.2365,
"step": 68
},
{
"epoch": 5.313725490196078,
"grad_norm": 1.1133005502471431,
"learning_rate": 2.2821106431308546e-06,
"loss": 0.2273,
"step": 69
},
{
"epoch": 5.392156862745098,
"grad_norm": 1.0726780522048756,
"learning_rate": 2.2097677146869242e-06,
"loss": 0.2251,
"step": 70
},
{
"epoch": 5.470588235294118,
"grad_norm": 1.1507923857528541,
"learning_rate": 2.1376703517318835e-06,
"loss": 0.2199,
"step": 71
},
{
"epoch": 5.549019607843137,
"grad_norm": 1.1965895392459576,
"learning_rate": 2.0658795558326745e-06,
"loss": 0.2275,
"step": 72
},
{
"epoch": 5.627450980392156,
"grad_norm": 1.2081935303041835,
"learning_rate": 1.994456069169906e-06,
"loss": 0.2219,
"step": 73
},
{
"epoch": 5.705882352941177,
"grad_norm": 1.2037511840449118,
"learning_rate": 1.9234603231439e-06,
"loss": 0.2204,
"step": 74
},
{
"epoch": 5.784313725490196,
"grad_norm": 1.1530325179032028,
"learning_rate": 1.852952387243698e-06,
"loss": 0.22,
"step": 75
},
{
"epoch": 5.862745098039216,
"grad_norm": 1.1780261182347391,
"learning_rate": 1.7829919182222752e-06,
"loss": 0.2199,
"step": 76
},
{
"epoch": 5.9411764705882355,
"grad_norm": 1.1895169708488194,
"learning_rate": 1.7136381096209665e-06,
"loss": 0.2037,
"step": 77
},
{
"epoch": 6.0,
"grad_norm": 1.0854093120483197,
"learning_rate": 1.6449496416858285e-06,
"loss": 0.1583,
"step": 78
},
{
"epoch": 6.078431372549019,
"grad_norm": 1.0801354684612592,
"learning_rate": 1.5769846317182894e-06,
"loss": 0.1934,
"step": 79
},
{
"epoch": 6.1568627450980395,
"grad_norm": 1.0439246440370717,
"learning_rate": 1.509800584902108e-06,
"loss": 0.1705,
"step": 80
},
{
"epoch": 6.235294117647059,
"grad_norm": 1.0284457474816655,
"learning_rate": 1.443454345648252e-06,
"loss": 0.1723,
"step": 81
},
{
"epoch": 6.313725490196078,
"grad_norm": 0.9926653408766625,
"learning_rate": 1.3780020494988447e-06,
"loss": 0.1792,
"step": 82
},
{
"epoch": 6.392156862745098,
"grad_norm": 1.0138502371712474,
"learning_rate": 1.313499075630899e-06,
"loss": 0.1679,
"step": 83
},
{
"epoch": 6.470588235294118,
"grad_norm": 1.0083965688320564,
"learning_rate": 1.2500000000000007e-06,
"loss": 0.169,
"step": 84
},
{
"epoch": 6.549019607843137,
"grad_norm": 1.0514389443911982,
"learning_rate": 1.1875585491636e-06,
"loss": 0.1807,
"step": 85
},
{
"epoch": 6.627450980392156,
"grad_norm": 1.0545495890932062,
"learning_rate": 1.1262275548229852e-06,
"loss": 0.1735,
"step": 86
},
{
"epoch": 6.705882352941177,
"grad_norm": 1.1020211072090924,
"learning_rate": 1.0660589091223854e-06,
"loss": 0.1728,
"step": 87
},
{
"epoch": 6.784313725490196,
"grad_norm": 1.0983738677411488,
"learning_rate": 1.0071035207430352e-06,
"loss": 0.171,
"step": 88
},
{
"epoch": 6.862745098039216,
"grad_norm": 1.0848006154577043,
"learning_rate": 9.494112718293503e-07,
"loss": 0.1761,
"step": 89
},
{
"epoch": 6.9411764705882355,
"grad_norm": 1.0504620709430657,
"learning_rate": 8.930309757836517e-07,
"loss": 0.1689,
"step": 90
},
{
"epoch": 7.0,
"grad_norm": 1.0335128775784548,
"learning_rate": 8.380103359651554e-07,
"loss": 0.1193,
"step": 91
},
{
"epoch": 7.078431372549019,
"grad_norm": 0.9668349947212325,
"learning_rate": 7.843959053281663e-07,
"loss": 0.1531,
"step": 92
},
{
"epoch": 7.1568627450980395,
"grad_norm": 0.9049193018844318,
"learning_rate": 7.322330470336314e-07,
"loss": 0.1449,
"step": 93
},
{
"epoch": 7.235294117647059,
"grad_norm": 0.8855250299857752,
"learning_rate": 6.815658960673782e-07,
"loss": 0.1398,
"step": 94
},
{
"epoch": 7.313725490196078,
"grad_norm": 0.9608687848192914,
"learning_rate": 6.324373218975105e-07,
"loss": 0.1487,
"step": 95
},
{
"epoch": 7.392156862745098,
"grad_norm": 0.9692880229899594,
"learning_rate": 5.848888922025553e-07,
"loss": 0.1403,
"step": 96
},
{
"epoch": 7.470588235294118,
"grad_norm": 0.9035708627250062,
"learning_rate": 5.389608377010608e-07,
"loss": 0.1449,
"step": 97
},
{
"epoch": 7.549019607843137,
"grad_norm": 0.8748928881891229,
"learning_rate": 4.946920181123904e-07,
"loss": 0.1451,
"step": 98
},
{
"epoch": 7.627450980392156,
"grad_norm": 0.8912371091414796,
"learning_rate": 4.5211988927752026e-07,
"loss": 0.134,
"step": 99
},
{
"epoch": 7.705882352941177,
"grad_norm": 0.9454342145405766,
"learning_rate": 4.1128047146765936e-07,
"loss": 0.1497,
"step": 100
},
{
"epoch": 7.784313725490196,
"grad_norm": 0.9466608405460601,
"learning_rate": 3.722083189075007e-07,
"loss": 0.1523,
"step": 101
},
{
"epoch": 7.862745098039216,
"grad_norm": 0.9716836299571869,
"learning_rate": 3.3493649053890325e-07,
"loss": 0.1406,
"step": 102
},
{
"epoch": 7.9411764705882355,
"grad_norm": 0.9517618408879835,
"learning_rate": 2.9949652204972257e-07,
"loss": 0.1414,
"step": 103
},
{
"epoch": 8.0,
"grad_norm": 0.8961669761492526,
"learning_rate": 2.6591839919146963e-07,
"loss": 0.0988,
"step": 104
},
{
"epoch": 8.07843137254902,
"grad_norm": 0.8800809597012162,
"learning_rate": 2.3423053240837518e-07,
"loss": 0.1326,
"step": 105
},
{
"epoch": 8.156862745098039,
"grad_norm": 0.8678268581817502,
"learning_rate": 2.044597327993153e-07,
"loss": 0.1382,
"step": 106
},
{
"epoch": 8.235294117647058,
"grad_norm": 0.8749341283858125,
"learning_rate": 1.7663118943294367e-07,
"loss": 0.1394,
"step": 107
},
{
"epoch": 8.313725490196079,
"grad_norm": 0.8593112391578863,
"learning_rate": 1.507684480352292e-07,
"loss": 0.1278,
"step": 108
},
{
"epoch": 8.392156862745098,
"grad_norm": 0.9129847126820709,
"learning_rate": 1.2689339106741529e-07,
"loss": 0.1391,
"step": 109
},
{
"epoch": 8.470588235294118,
"grad_norm": 0.8656126791419273,
"learning_rate": 1.0502621921127776e-07,
"loss": 0.1311,
"step": 110
},
{
"epoch": 8.549019607843137,
"grad_norm": 0.8593751089585534,
"learning_rate": 8.518543427732951e-08,
"loss": 0.1296,
"step": 111
},
{
"epoch": 8.627450980392156,
"grad_norm": 0.8879560191886899,
"learning_rate": 6.738782355044048e-08,
"loss": 0.1263,
"step": 112
},
{
"epoch": 8.705882352941176,
"grad_norm": 0.9206743688905853,
"learning_rate": 5.164844558612131e-08,
"loss": 0.1381,
"step": 113
},
{
"epoch": 8.784313725490197,
"grad_norm": 0.8795400657114122,
"learning_rate": 3.798061746947995e-08,
"loss": 0.1282,
"step": 114
},
{
"epoch": 8.862745098039216,
"grad_norm": 0.8568365367984947,
"learning_rate": 2.6395903547638825e-08,
"loss": 0.1331,
"step": 115
},
{
"epoch": 8.941176470588236,
"grad_norm": 0.847262314471311,
"learning_rate": 1.6904105645142443e-08,
"loss": 0.1264,
"step": 116
},
{
"epoch": 9.0,
"grad_norm": 0.847262314471311,
"learning_rate": 9.513254770636138e-09,
"loss": 0.0991,
"step": 117
},
{
"epoch": 9.07843137254902,
"grad_norm": 0.8727190548675097,
"learning_rate": 4.229604321829561e-09,
"loss": 0.1293,
"step": 118
},
{
"epoch": 9.156862745098039,
"grad_norm": 0.816661788373,
"learning_rate": 1.0576247944985018e-09,
"loss": 0.1269,
"step": 119
},
{
"epoch": 9.235294117647058,
"grad_norm": 0.885147564294731,
"learning_rate": 0.0,
"loss": 0.1341,
"step": 120
},
{
"epoch": 9.235294117647058,
"step": 120,
"total_flos": 27074593947648.0,
"train_loss": 0.388430199213326,
"train_runtime": 2487.6221,
"train_samples_per_second": 1.624,
"train_steps_per_second": 0.048
}
],
"logging_steps": 1.0,
"max_steps": 120,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 27074593947648.0,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}