Guen commited on
Commit
6058c6f
·
verified ·
1 Parent(s): b617f98

sync 2026-04-24T07:32:24+00:00

Browse files
sft/checkpoint-200/adapter_config.json CHANGED
@@ -30,8 +30,8 @@
30
  "rank_pattern": {},
31
  "revision": null,
32
  "target_modules": [
33
- "q_proj",
34
  "k_proj",
 
35
  "v_proj",
36
  "o_proj"
37
  ],
 
30
  "rank_pattern": {},
31
  "revision": null,
32
  "target_modules": [
 
33
  "k_proj",
34
+ "q_proj",
35
  "v_proj",
36
  "o_proj"
37
  ],
sft/checkpoint-200/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3969884eca31fa76f9aef7175b4cebfdc6d5b239094e713ec1d2b4d6ec6105f6
3
  size 61380432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23a9dd8fabae6de649147b472c2f3b3c0299587b7d29dc4690bd2203887ee683
3
  size 61380432
sft/checkpoint-200/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:88be034ce74fb909976c5f0af1eaca344b672bdf5766af980e753ff07ab28123
3
  size 122930379
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb695f7a88aa180a7dbc0f57a0f1e1e8bbedcef2aeb5c7875dc3038aa2f699af
3
  size 122930379
sft/checkpoint-200/trainer_state.json CHANGED
@@ -11,142 +11,142 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.04094165813715456,
14
- "grad_norm": 6.233852386474609,
15
  "learning_rate": 9.000000000000001e-07,
16
- "loss": 6.206051254272461,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.08188331627430911,
21
- "grad_norm": 5.675178527832031,
22
  "learning_rate": 1.9000000000000002e-06,
23
- "loss": 6.079831314086914,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.12282497441146366,
28
- "grad_norm": 5.132958889007568,
29
  "learning_rate": 2.9e-06,
30
- "loss": 5.749290466308594,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.16376663254861823,
35
- "grad_norm": 5.81948184967041,
36
  "learning_rate": 3.900000000000001e-06,
37
- "loss": 6.033080673217773,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.2047082906857728,
42
- "grad_norm": 6.442906379699707,
43
  "learning_rate": 4.9000000000000005e-06,
44
- "loss": 5.972381973266602,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.24564994882292732,
49
- "grad_norm": 5.304522514343262,
50
  "learning_rate": 5.9e-06,
51
- "loss": 5.472136306762695,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 0.2865916069600819,
56
- "grad_norm": 5.261083126068115,
57
  "learning_rate": 6.9e-06,
58
- "loss": 5.041688537597656,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.32753326509723646,
63
- "grad_norm": 6.387202739715576,
64
  "learning_rate": 7.9e-06,
65
- "loss": 4.916830062866211,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 0.368474923234391,
70
- "grad_norm": 7.068334102630615,
71
  "learning_rate": 8.900000000000001e-06,
72
- "loss": 3.531498336791992,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 0.4094165813715456,
77
- "grad_norm": 4.787615776062012,
78
  "learning_rate": 9.9e-06,
79
- "loss": 2.2226877212524414,
80
  "step": 100
81
  },
82
  {
83
  "epoch": 0.4503582395087001,
84
- "grad_norm": 3.4490268230438232,
85
  "learning_rate": 9.76923076923077e-06,
86
- "loss": 1.955281639099121,
87
  "step": 110
88
  },
89
  {
90
  "epoch": 0.49129989764585463,
91
- "grad_norm": 2.435624361038208,
92
  "learning_rate": 9.512820512820514e-06,
93
- "loss": 1.5814408302307128,
94
  "step": 120
95
  },
96
  {
97
  "epoch": 0.5322415557830092,
98
- "grad_norm": 2.0870230197906494,
99
  "learning_rate": 9.256410256410257e-06,
100
- "loss": 1.4773900985717774,
101
  "step": 130
102
  },
103
  {
104
  "epoch": 0.5731832139201638,
105
- "grad_norm": 2.1049163341522217,
106
  "learning_rate": 9e-06,
107
- "loss": 1.508018970489502,
108
  "step": 140
109
  },
110
  {
111
  "epoch": 0.6141248720573184,
112
- "grad_norm": 2.7535016536712646,
113
  "learning_rate": 8.743589743589743e-06,
114
- "loss": 1.6693695068359375,
115
  "step": 150
116
  },
117
  {
118
  "epoch": 0.6550665301944729,
119
- "grad_norm": 2.2710325717926025,
120
  "learning_rate": 8.487179487179488e-06,
121
- "loss": 1.590962028503418,
122
  "step": 160
123
  },
124
  {
125
  "epoch": 0.6960081883316275,
126
- "grad_norm": 2.007903575897217,
127
  "learning_rate": 8.230769230769232e-06,
128
- "loss": 1.6514751434326171,
129
  "step": 170
130
  },
131
  {
132
  "epoch": 0.736949846468782,
133
- "grad_norm": 2.2933390140533447,
134
  "learning_rate": 7.974358974358975e-06,
135
- "loss": 1.2684261322021484,
136
  "step": 180
137
  },
138
  {
139
  "epoch": 0.7778915046059366,
140
- "grad_norm": 2.4877562522888184,
141
  "learning_rate": 7.717948717948718e-06,
142
- "loss": 1.4245257377624512,
143
  "step": 190
144
  },
145
  {
146
  "epoch": 0.8188331627430911,
147
- "grad_norm": 2.4900834560394287,
148
  "learning_rate": 7.461538461538462e-06,
149
- "loss": 1.4417343139648438,
150
  "step": 200
151
  }
152
  ],
 
11
  "log_history": [
12
  {
13
  "epoch": 0.04094165813715456,
14
+ "grad_norm": 7.529000759124756,
15
  "learning_rate": 9.000000000000001e-07,
16
+ "loss": 6.639524841308594,
17
  "step": 10
18
  },
19
  {
20
  "epoch": 0.08188331627430911,
21
+ "grad_norm": 7.1427388191223145,
22
  "learning_rate": 1.9000000000000002e-06,
23
+ "loss": 6.582770538330078,
24
  "step": 20
25
  },
26
  {
27
  "epoch": 0.12282497441146366,
28
+ "grad_norm": 6.251121997833252,
29
  "learning_rate": 2.9e-06,
30
+ "loss": 6.114261245727539,
31
  "step": 30
32
  },
33
  {
34
  "epoch": 0.16376663254861823,
35
+ "grad_norm": 7.014133453369141,
36
  "learning_rate": 3.900000000000001e-06,
37
+ "loss": 6.333962631225586,
38
  "step": 40
39
  },
40
  {
41
  "epoch": 0.2047082906857728,
42
+ "grad_norm": 7.072969913482666,
43
  "learning_rate": 4.9000000000000005e-06,
44
+ "loss": 6.248543930053711,
45
  "step": 50
46
  },
47
  {
48
  "epoch": 0.24564994882292732,
49
+ "grad_norm": 5.674424171447754,
50
  "learning_rate": 5.9e-06,
51
+ "loss": 5.6220745086669925,
52
  "step": 60
53
  },
54
  {
55
  "epoch": 0.2865916069600819,
56
+ "grad_norm": 5.3739800453186035,
57
  "learning_rate": 6.9e-06,
58
+ "loss": 5.0251930236816404,
59
  "step": 70
60
  },
61
  {
62
  "epoch": 0.32753326509723646,
63
+ "grad_norm": 6.372293472290039,
64
  "learning_rate": 7.9e-06,
65
+ "loss": 5.033017349243164,
66
  "step": 80
67
  },
68
  {
69
  "epoch": 0.368474923234391,
70
+ "grad_norm": 7.22106409072876,
71
  "learning_rate": 8.900000000000001e-06,
72
+ "loss": 3.514456939697266,
73
  "step": 90
74
  },
75
  {
76
  "epoch": 0.4094165813715456,
77
+ "grad_norm": 4.990958213806152,
78
  "learning_rate": 9.9e-06,
79
+ "loss": 2.256962776184082,
80
  "step": 100
81
  },
82
  {
83
  "epoch": 0.4503582395087001,
84
+ "grad_norm": 3.2661993503570557,
85
  "learning_rate": 9.76923076923077e-06,
86
+ "loss": 2.0482555389404298,
87
  "step": 110
88
  },
89
  {
90
  "epoch": 0.49129989764585463,
91
+ "grad_norm": 2.997323513031006,
92
  "learning_rate": 9.512820512820514e-06,
93
+ "loss": 1.6077747344970703,
94
  "step": 120
95
  },
96
  {
97
  "epoch": 0.5322415557830092,
98
+ "grad_norm": 2.422741413116455,
99
  "learning_rate": 9.256410256410257e-06,
100
+ "loss": 1.538028621673584,
101
  "step": 130
102
  },
103
  {
104
  "epoch": 0.5731832139201638,
105
+ "grad_norm": 2.4594290256500244,
106
  "learning_rate": 9e-06,
107
+ "loss": 1.5614049911499024,
108
  "step": 140
109
  },
110
  {
111
  "epoch": 0.6141248720573184,
112
+ "grad_norm": 2.7256577014923096,
113
  "learning_rate": 8.743589743589743e-06,
114
+ "loss": 1.7180768966674804,
115
  "step": 150
116
  },
117
  {
118
  "epoch": 0.6550665301944729,
119
+ "grad_norm": 2.6614902019500732,
120
  "learning_rate": 8.487179487179488e-06,
121
+ "loss": 1.6412044525146485,
122
  "step": 160
123
  },
124
  {
125
  "epoch": 0.6960081883316275,
126
+ "grad_norm": 2.1517934799194336,
127
  "learning_rate": 8.230769230769232e-06,
128
+ "loss": 1.729467010498047,
129
  "step": 170
130
  },
131
  {
132
  "epoch": 0.736949846468782,
133
+ "grad_norm": 2.4588229656219482,
134
  "learning_rate": 7.974358974358975e-06,
135
+ "loss": 1.305363941192627,
136
  "step": 180
137
  },
138
  {
139
  "epoch": 0.7778915046059366,
140
+ "grad_norm": 2.5282061100006104,
141
  "learning_rate": 7.717948717948718e-06,
142
+ "loss": 1.4986873626708985,
143
  "step": 190
144
  },
145
  {
146
  "epoch": 0.8188331627430911,
147
+ "grad_norm": 3.699396848678589,
148
  "learning_rate": 7.461538461538462e-06,
149
+ "loss": 1.5059691429138184,
150
  "step": 200
151
  }
152
  ],