zd21 commited on
Commit
cb3c378
·
verified ·
1 Parent(s): ad172b5

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ checkpoint-120/tokenizer.json filter=lfs diff=lfs merge=lfs -text
checkpoint-120/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
checkpoint-120/config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/bmm-system/data/private/zhangdan/checkpoint/Qwen2.5-Math-7B",
3
+ "architectures": [
4
+ "Qwen2ForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151643,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 3584,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 18944,
13
+ "max_position_embeddings": 4096,
14
+ "max_window_layers": 28,
15
+ "model_type": "qwen2",
16
+ "num_attention_heads": 28,
17
+ "num_hidden_layers": 28,
18
+ "num_key_value_heads": 4,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 10000,
22
+ "sliding_window": 4096,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.49.0",
26
+ "use_cache": false,
27
+ "use_mrope": false,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 152064
30
+ }
checkpoint-120/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "eos_token_id": 151643,
4
+ "max_new_tokens": 2048,
5
+ "transformers_version": "4.49.0"
6
+ }
checkpoint-120/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-120/model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fad3d41b93759c8f1c5098128d7f34a2010dfd07d5f644b097cd2c5afa129ab
3
+ size 4877660776
checkpoint-120/model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:011cdd9aaa01ec97cc70a64b363804105e0a9597d99ff73fce2d7de461b29e6f
3
+ size 4932751008
checkpoint-120/model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:314cb6a9dbd1b073d56c1b4bf01b4a3a7ff8eb221adc127fbf188d9ff9a986e4
3
+ size 4330865200
checkpoint-120/model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38b6dc0894bacfe5b66f10533ca033e9ea325f8d1aeb7175ab3b27fb712c8bba
3
+ size 1089994880
checkpoint-120/model.safetensors.index.json ADDED
@@ -0,0 +1,346 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "total_size": 15231233024
4
+ },
5
+ "weight_map": {
6
+ "lm_head.weight": "model-00004-of-00004.safetensors",
7
+ "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
8
+ "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
9
+ "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
10
+ "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
11
+ "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
12
+ "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
13
+ "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
14
+ "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
15
+ "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
16
+ "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
17
+ "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
18
+ "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
19
+ "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
20
+ "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
21
+ "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
22
+ "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
23
+ "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
24
+ "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
25
+ "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
26
+ "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
27
+ "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
28
+ "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
29
+ "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
30
+ "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
31
+ "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
32
+ "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
33
+ "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
34
+ "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
35
+ "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
36
+ "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
37
+ "model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
38
+ "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
39
+ "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
40
+ "model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
41
+ "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
42
+ "model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
43
+ "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
44
+ "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
45
+ "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
46
+ "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
47
+ "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
48
+ "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
49
+ "model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
50
+ "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
51
+ "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
52
+ "model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
53
+ "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
54
+ "model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
55
+ "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
56
+ "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
57
+ "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
58
+ "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
59
+ "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
60
+ "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
61
+ "model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
62
+ "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
63
+ "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
64
+ "model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
65
+ "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
66
+ "model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
67
+ "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
68
+ "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
69
+ "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
70
+ "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
71
+ "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
72
+ "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
73
+ "model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
74
+ "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
75
+ "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
76
+ "model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
77
+ "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
78
+ "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
79
+ "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
80
+ "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
81
+ "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
82
+ "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
83
+ "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
84
+ "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
85
+ "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
86
+ "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
87
+ "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
88
+ "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
89
+ "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
90
+ "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
91
+ "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
92
+ "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
93
+ "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
94
+ "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
95
+ "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
96
+ "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
97
+ "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
98
+ "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
99
+ "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
100
+ "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
101
+ "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
102
+ "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
103
+ "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
104
+ "model.layers.16.input_layernorm.weight": "model-00002-of-00004.safetensors",
105
+ "model.layers.16.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
106
+ "model.layers.16.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
107
+ "model.layers.16.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
108
+ "model.layers.16.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
109
+ "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
110
+ "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
111
+ "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
112
+ "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
113
+ "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
114
+ "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
115
+ "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
116
+ "model.layers.17.input_layernorm.weight": "model-00002-of-00004.safetensors",
117
+ "model.layers.17.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
118
+ "model.layers.17.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
119
+ "model.layers.17.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
120
+ "model.layers.17.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
121
+ "model.layers.17.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
122
+ "model.layers.17.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
123
+ "model.layers.17.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
124
+ "model.layers.17.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
125
+ "model.layers.17.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
126
+ "model.layers.17.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
127
+ "model.layers.17.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
128
+ "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
129
+ "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
130
+ "model.layers.18.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
131
+ "model.layers.18.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
132
+ "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
133
+ "model.layers.18.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
134
+ "model.layers.18.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
135
+ "model.layers.18.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
136
+ "model.layers.18.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
137
+ "model.layers.18.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
138
+ "model.layers.18.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
139
+ "model.layers.18.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
140
+ "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
141
+ "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
142
+ "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
143
+ "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
144
+ "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
145
+ "model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
146
+ "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
147
+ "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
148
+ "model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
149
+ "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
150
+ "model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
151
+ "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
152
+ "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
153
+ "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
154
+ "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
155
+ "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
156
+ "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
157
+ "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
158
+ "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
159
+ "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
160
+ "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
161
+ "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
162
+ "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
163
+ "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
164
+ "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
165
+ "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
166
+ "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
167
+ "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
168
+ "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
169
+ "model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
170
+ "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
171
+ "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
172
+ "model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
173
+ "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
174
+ "model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
175
+ "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
176
+ "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
177
+ "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
178
+ "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
179
+ "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
180
+ "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
181
+ "model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
182
+ "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
183
+ "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
184
+ "model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
185
+ "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
186
+ "model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
187
+ "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
188
+ "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
189
+ "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
190
+ "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
191
+ "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
192
+ "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
193
+ "model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
194
+ "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
195
+ "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
196
+ "model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
197
+ "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
198
+ "model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
199
+ "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
200
+ "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
201
+ "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
202
+ "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
203
+ "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
204
+ "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
205
+ "model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
206
+ "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
207
+ "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
208
+ "model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
209
+ "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
210
+ "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
211
+ "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
212
+ "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
213
+ "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
214
+ "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
215
+ "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
216
+ "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
217
+ "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
218
+ "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
219
+ "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
220
+ "model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
221
+ "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
222
+ "model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
223
+ "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
224
+ "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
225
+ "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
226
+ "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
227
+ "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
228
+ "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
229
+ "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
230
+ "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
231
+ "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
232
+ "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
233
+ "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
234
+ "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
235
+ "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
236
+ "model.layers.26.input_layernorm.weight": "model-00003-of-00004.safetensors",
237
+ "model.layers.26.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
238
+ "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
239
+ "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
240
+ "model.layers.26.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
241
+ "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
242
+ "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
243
+ "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
244
+ "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
245
+ "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
246
+ "model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
247
+ "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
248
+ "model.layers.27.input_layernorm.weight": "model-00003-of-00004.safetensors",
249
+ "model.layers.27.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
250
+ "model.layers.27.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
251
+ "model.layers.27.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
252
+ "model.layers.27.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
253
+ "model.layers.27.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
254
+ "model.layers.27.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
255
+ "model.layers.27.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
256
+ "model.layers.27.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
257
+ "model.layers.27.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
258
+ "model.layers.27.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
259
+ "model.layers.27.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
260
+ "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
261
+ "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
262
+ "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
263
+ "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
264
+ "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
265
+ "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
266
+ "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
267
+ "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
268
+ "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
269
+ "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
270
+ "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
271
+ "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
272
+ "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
273
+ "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
274
+ "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
275
+ "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
276
+ "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
277
+ "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
278
+ "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
279
+ "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
280
+ "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
281
+ "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
282
+ "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
283
+ "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
284
+ "model.layers.5.input_layernorm.weight": "model-00001-of-00004.safetensors",
285
+ "model.layers.5.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
286
+ "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
287
+ "model.layers.5.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
288
+ "model.layers.5.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
289
+ "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
290
+ "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
291
+ "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
292
+ "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
293
+ "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
294
+ "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
295
+ "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
296
+ "model.layers.6.input_layernorm.weight": "model-00001-of-00004.safetensors",
297
+ "model.layers.6.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
298
+ "model.layers.6.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
299
+ "model.layers.6.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
300
+ "model.layers.6.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
301
+ "model.layers.6.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
302
+ "model.layers.6.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
303
+ "model.layers.6.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
304
+ "model.layers.6.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
305
+ "model.layers.6.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
306
+ "model.layers.6.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
307
+ "model.layers.6.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
308
+ "model.layers.7.input_layernorm.weight": "model-00001-of-00004.safetensors",
309
+ "model.layers.7.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
310
+ "model.layers.7.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
311
+ "model.layers.7.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
312
+ "model.layers.7.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
313
+ "model.layers.7.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
314
+ "model.layers.7.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
315
+ "model.layers.7.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
316
+ "model.layers.7.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
317
+ "model.layers.7.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
318
+ "model.layers.7.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
319
+ "model.layers.7.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
320
+ "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
321
+ "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
322
+ "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
323
+ "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
324
+ "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
325
+ "model.layers.8.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
326
+ "model.layers.8.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
327
+ "model.layers.8.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
328
+ "model.layers.8.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
329
+ "model.layers.8.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
330
+ "model.layers.8.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
331
+ "model.layers.8.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
332
+ "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
333
+ "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
334
+ "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
335
+ "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
336
+ "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
337
+ "model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
338
+ "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
339
+ "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
340
+ "model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
341
+ "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
342
+ "model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
343
+ "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
344
+ "model.norm.weight": "model-00003-of-00004.safetensors"
345
+ }
346
+ }
checkpoint-120/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|endoftext|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
checkpoint-120/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
3
+ size 11422063
checkpoint-120/tokenizer_config.json ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0]['role'] == 'system' %}\n {{- messages[0]['content'] }}\n {%- else %}\n {{- 'Please reason step by step, and put your final answer within \\\\boxed{}.' }}\n {%- endif %}\n {{- \"\\n\\n# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0]['role'] == 'system' %}\n {{- '<|im_start|>system\\n' + messages[0]['content'] + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>system\\nPlease reason step by step, and put your final answer within \\\\boxed{}.<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- for message in messages %}\n {%- if (message.role == \"user\") or (message.role == \"system\" and not loop.first) or (message.role == \"assistant\" and not message.tool_calls) %}\n {{- '<|im_start|>' + message.role + '\\n' + message.content + '<|im_end|>' + '\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role }}\n {%- if message.content %}\n {{- '\\n' + message.content }}\n {%- endif %}\n {%- for tool_call in message.tool_calls %}\n {%- if tool_call.function is defined %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '\\n<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {{- message.content }}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n",
199
+ "clean_up_tokenization_spaces": false,
200
+ "eos_token": "<|endoftext|>",
201
+ "errors": "replace",
202
+ "extra_special_tokens": {},
203
+ "model_max_length": 131072,
204
+ "pad_token": "<|endoftext|>",
205
+ "split_special_tokens": false,
206
+ "tokenizer_class": "Qwen2Tokenizer",
207
+ "unk_token": null
208
+ }
checkpoint-120/trainer_state.json ADDED
@@ -0,0 +1,1809 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.38095238095238093,
5
+ "eval_steps": 10,
6
+ "global_step": 120,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "clip_ratio": 0.0,
13
+ "completion_length": 973.6428833007812,
14
+ "epoch": 0.0031746031746031746,
15
+ "grad_norm": 2.777648063498741,
16
+ "kl": 0.0,
17
+ "learning_rate": 1e-07,
18
+ "loss": 0.1021,
19
+ "reward": 0.9749529957771301,
20
+ "reward_std": 0.8150795698165894,
21
+ "rewards/": 3.446193218231201,
22
+ "rewards/math_compute_score": 0.3571428656578064,
23
+ "step": 1
24
+ },
25
+ {
26
+ "clip_ratio": 0.0,
27
+ "completion_length": 708.4285888671875,
28
+ "epoch": 0.006349206349206349,
29
+ "grad_norm": 0.8174320460798857,
30
+ "kl": 0.0,
31
+ "learning_rate": 2e-07,
32
+ "loss": 0.0372,
33
+ "reward": 1.2198399305343628,
34
+ "reward_std": 0.6601618528366089,
35
+ "rewards/": 3.9563426971435547,
36
+ "rewards/math_compute_score": 0.535714328289032,
37
+ "step": 2
38
+ },
39
+ {
40
+ "clip_ratio": 0.0,
41
+ "completion_length": 891.607177734375,
42
+ "epoch": 0.009523809523809525,
43
+ "grad_norm": 4.681867395183935,
44
+ "kl": 0.00020313262939453125,
45
+ "learning_rate": 3e-07,
46
+ "loss": -0.0069,
47
+ "reward": 0.8394444584846497,
48
+ "reward_std": 0.7244595885276794,
49
+ "rewards/": 3.268650770187378,
50
+ "rewards/math_compute_score": 0.2321428656578064,
51
+ "step": 3
52
+ },
53
+ {
54
+ "clip_ratio": 0.0,
55
+ "completion_length": 672.0535888671875,
56
+ "epoch": 0.012698412698412698,
57
+ "grad_norm": 1.9071109509944568,
58
+ "kl": 0.00019073486328125,
59
+ "learning_rate": 4e-07,
60
+ "loss": 0.1393,
61
+ "reward": 0.837346613407135,
62
+ "reward_std": 0.7451086640357971,
63
+ "rewards/": 2.186732769012451,
64
+ "rewards/math_compute_score": 0.5,
65
+ "step": 4
66
+ },
67
+ {
68
+ "clip_ratio": 0.0,
69
+ "completion_length": 843.0178833007812,
70
+ "epoch": 0.015873015873015872,
71
+ "grad_norm": 3.3988529526704374,
72
+ "kl": 0.00021457672119140625,
73
+ "learning_rate": 5e-07,
74
+ "loss": -0.0491,
75
+ "reward": 1.047133207321167,
76
+ "reward_std": 0.6228165626525879,
77
+ "rewards/": 3.7356655597686768,
78
+ "rewards/math_compute_score": 0.3750000298023224,
79
+ "step": 5
80
+ },
81
+ {
82
+ "clip_ratio": 0.0,
83
+ "completion_length": 953.6785888671875,
84
+ "epoch": 0.01904761904761905,
85
+ "grad_norm": 2.637160712333027,
86
+ "kl": 0.00018310546875,
87
+ "learning_rate": 6e-07,
88
+ "loss": 0.1669,
89
+ "reward": 1.0524885654449463,
90
+ "reward_std": 0.8631003499031067,
91
+ "rewards/": 3.9767284393310547,
92
+ "rewards/math_compute_score": 0.3214285969734192,
93
+ "step": 6
94
+ },
95
+ {
96
+ "clip_ratio": 0.0,
97
+ "completion_length": 767.0357666015625,
98
+ "epoch": 0.022222222222222223,
99
+ "grad_norm": 4.305675207109899,
100
+ "kl": 0.0002536773681640625,
101
+ "learning_rate": 7e-07,
102
+ "loss": 0.2786,
103
+ "reward": 1.2252023220062256,
104
+ "reward_std": 0.6430084109306335,
105
+ "rewards/": 3.6974401473999023,
106
+ "rewards/math_compute_score": 0.6071428656578064,
107
+ "step": 7
108
+ },
109
+ {
110
+ "clip_ratio": 0.0,
111
+ "completion_length": 639.1785888671875,
112
+ "epoch": 0.025396825396825397,
113
+ "grad_norm": 3.7743752869617477,
114
+ "kl": 0.000499725341796875,
115
+ "learning_rate": 8e-07,
116
+ "loss": 0.1966,
117
+ "reward": 1.1576731204986572,
118
+ "reward_std": 0.6582887172698975,
119
+ "rewards/": 3.788365125656128,
120
+ "rewards/math_compute_score": 0.5,
121
+ "step": 8
122
+ },
123
+ {
124
+ "clip_ratio": 0.0,
125
+ "completion_length": 806.607177734375,
126
+ "epoch": 0.02857142857142857,
127
+ "grad_norm": 2.64690799480574,
128
+ "kl": 0.000507354736328125,
129
+ "learning_rate": 9e-07,
130
+ "loss": 0.1066,
131
+ "reward": 0.9516462087631226,
132
+ "reward_std": 0.6632688045501709,
133
+ "rewards/": 3.1153740882873535,
134
+ "rewards/math_compute_score": 0.4107142984867096,
135
+ "step": 9
136
+ },
137
+ {
138
+ "epoch": 0.031746031746031744,
139
+ "grad_norm": 2.1254890231202204,
140
+ "learning_rate": 1e-06,
141
+ "loss": 0.0976,
142
+ "step": 10
143
+ },
144
+ {
145
+ "epoch": 0.031746031746031744,
146
+ "eval_clip_ratio": 0.0,
147
+ "eval_completion_length": 881.2059733072916,
148
+ "eval_kl": 0.0006243387858072916,
149
+ "eval_loss": 0.10283354669809341,
150
+ "eval_reward": 1.3367964426676433,
151
+ "eval_reward_std": 0.7034762501716614,
152
+ "eval_rewards/": 4.350648641586304,
153
+ "eval_rewards/math_compute_score": 0.5833333730697632,
154
+ "eval_runtime": 110.5471,
155
+ "eval_samples_per_second": 0.181,
156
+ "eval_steps_per_second": 0.009,
157
+ "step": 10
158
+ },
159
+ {
160
+ "clip_ratio": 0.0,
161
+ "completion_length": 852.982177734375,
162
+ "epoch": 0.03492063492063492,
163
+ "grad_norm": 0.9239839479287824,
164
+ "kl": 0.0006341934204101562,
165
+ "learning_rate": 9.99973476170006e-07,
166
+ "loss": 0.0414,
167
+ "reward": 1.064503788948059,
168
+ "reward_std": 0.7399034202098846,
169
+ "rewards/": 3.608232855796814,
170
+ "rewards/math_compute_score": 0.4285714626312256,
171
+ "step": 11
172
+ },
173
+ {
174
+ "clip_ratio": 0.0,
175
+ "completion_length": 678.5892944335938,
176
+ "epoch": 0.0380952380952381,
177
+ "grad_norm": 1.877318279677736,
178
+ "kl": 0.000621795654296875,
179
+ "learning_rate": 9.998939074940787e-07,
180
+ "loss": 0.1241,
181
+ "reward": 1.0163637399673462,
182
+ "reward_std": 0.5952743887901306,
183
+ "rewards/": 3.224675416946411,
184
+ "rewards/math_compute_score": 0.4642857313156128,
185
+ "step": 12
186
+ },
187
+ {
188
+ "clip_ratio": 0.0,
189
+ "completion_length": 605.7678833007812,
190
+ "epoch": 0.04126984126984127,
191
+ "grad_norm": 2.4604298210996953,
192
+ "kl": 0.00101470947265625,
193
+ "learning_rate": 9.997613024140818e-07,
194
+ "loss": 0.0281,
195
+ "reward": 1.1692917346954346,
196
+ "reward_std": 0.7951300740242004,
197
+ "rewards/": 3.989314556121826,
198
+ "rewards/math_compute_score": 0.4642857313156128,
199
+ "step": 13
200
+ },
201
+ {
202
+ "clip_ratio": 0.0,
203
+ "completion_length": 706.5714721679688,
204
+ "epoch": 0.044444444444444446,
205
+ "grad_norm": 1.7791723044352024,
206
+ "kl": 0.00089263916015625,
207
+ "learning_rate": 9.995756749987941e-07,
208
+ "loss": 0.1917,
209
+ "reward": 1.2231166362762451,
210
+ "reward_std": 0.5831072926521301,
211
+ "rewards/": 3.829869031906128,
212
+ "rewards/math_compute_score": 0.5714285969734192,
213
+ "step": 14
214
+ },
215
+ {
216
+ "clip_ratio": 0.0,
217
+ "completion_length": 752.1964721679688,
218
+ "epoch": 0.047619047619047616,
219
+ "grad_norm": 2.2482045589194892,
220
+ "kl": 0.0009918212890625,
221
+ "learning_rate": 9.993370449424152e-07,
222
+ "loss": 0.0832,
223
+ "reward": 1.3100411891937256,
224
+ "reward_std": 0.7690545320510864,
225
+ "rewards/": 4.335920333862305,
226
+ "rewards/math_compute_score": 0.5535714626312256,
227
+ "step": 15
228
+ },
229
+ {
230
+ "clip_ratio": 0.0,
231
+ "completion_length": 766.5535888671875,
232
+ "epoch": 0.050793650793650794,
233
+ "grad_norm": 3.1628124598253606,
234
+ "kl": 0.000843048095703125,
235
+ "learning_rate": 9.990454375624776e-07,
236
+ "loss": 0.38,
237
+ "reward": 1.3946969509124756,
238
+ "reward_std": 0.43135303258895874,
239
+ "rewards/": 4.687770366668701,
240
+ "rewards/math_compute_score": 0.5714285969734192,
241
+ "step": 16
242
+ },
243
+ {
244
+ "clip_ratio": 0.0,
245
+ "completion_length": 838.9285888671875,
246
+ "epoch": 0.05396825396825397,
247
+ "grad_norm": 0.8809772166643767,
248
+ "kl": 0.0009765625,
249
+ "learning_rate": 9.987008837971594e-07,
250
+ "loss": 0.2734,
251
+ "reward": 1.2452915906906128,
252
+ "reward_std": 0.6284029483795166,
253
+ "rewards/": 4.369315147399902,
254
+ "rewards/math_compute_score": 0.4642857313156128,
255
+ "step": 17
256
+ },
257
+ {
258
+ "clip_ratio": 0.0,
259
+ "completion_length": 623.8214721679688,
260
+ "epoch": 0.05714285714285714,
261
+ "grad_norm": 1.037602508755578,
262
+ "kl": 0.00156402587890625,
263
+ "learning_rate": 9.98303420202003e-07,
264
+ "loss": 0.1688,
265
+ "reward": 1.3429791927337646,
266
+ "reward_std": 0.6546451449394226,
267
+ "rewards/": 4.357753276824951,
268
+ "rewards/math_compute_score": 0.5892857313156128,
269
+ "step": 18
270
+ },
271
+ {
272
+ "clip_ratio": 0.0,
273
+ "completion_length": 785.5535888671875,
274
+ "epoch": 0.06031746031746032,
275
+ "grad_norm": 0.9507117942449174,
276
+ "kl": 0.0013580322265625,
277
+ "learning_rate": 9.978530889460349e-07,
278
+ "loss": 0.0918,
279
+ "reward": 1.3252233266830444,
280
+ "reward_std": 0.42738255858421326,
281
+ "rewards/": 3.9832592010498047,
282
+ "rewards/math_compute_score": 0.660714328289032,
283
+ "step": 19
284
+ },
285
+ {
286
+ "epoch": 0.06349206349206349,
287
+ "grad_norm": 1.0293500184503375,
288
+ "learning_rate": 9.973499378072946e-07,
289
+ "loss": 0.0123,
290
+ "step": 20
291
+ },
292
+ {
293
+ "epoch": 0.06349206349206349,
294
+ "eval_clip_ratio": 0.0,
295
+ "eval_completion_length": 773.2273966471354,
296
+ "eval_kl": 0.004353841145833333,
297
+ "eval_loss": 0.17447665333747864,
298
+ "eval_reward": 1.47151780128479,
299
+ "eval_reward_std": 0.5722967584927877,
300
+ "eval_rewards/": 4.524255673090617,
301
+ "eval_rewards/math_compute_score": 0.7083333730697632,
302
+ "eval_runtime": 105.3036,
303
+ "eval_samples_per_second": 0.19,
304
+ "eval_steps_per_second": 0.009,
305
+ "step": 20
306
+ },
307
+ {
308
+ "clip_ratio": 0.0,
309
+ "completion_length": 580.1339721679688,
310
+ "epoch": 0.06666666666666667,
311
+ "grad_norm": 1.9887336347273274,
312
+ "kl": 0.00174713134765625,
313
+ "learning_rate": 9.967940201677625e-07,
314
+ "loss": 0.2513,
315
+ "reward": 1.5847508311271667,
316
+ "reward_std": 0.5660464465618134,
317
+ "rewards/": 4.9237542152404785,
318
+ "rewards/math_compute_score": 0.7500000298023224,
319
+ "step": 21
320
+ },
321
+ {
322
+ "clip_ratio": 0.0,
323
+ "completion_length": 699.7142944335938,
324
+ "epoch": 0.06984126984126984,
325
+ "grad_norm": 0.8687096793533198,
326
+ "kl": 0.000827789306640625,
327
+ "learning_rate": 9.96185395007699e-07,
328
+ "loss": 0.1428,
329
+ "reward": 1.5009138584136963,
330
+ "reward_std": 0.5467565655708313,
331
+ "rewards/": 4.3617119789123535,
332
+ "rewards/math_compute_score": 0.785714328289032,
333
+ "step": 22
334
+ },
335
+ {
336
+ "clip_ratio": 0.0,
337
+ "completion_length": 662.9464721679688,
338
+ "epoch": 0.07301587301587302,
339
+ "grad_norm": 1.281704967643667,
340
+ "kl": 0.0009765625,
341
+ "learning_rate": 9.95524126899385e-07,
342
+ "loss": 0.2754,
343
+ "reward": 1.4940431118011475,
344
+ "reward_std": 0.49719491600990295,
345
+ "rewards/": 4.684500694274902,
346
+ "rewards/math_compute_score": 0.6964285969734192,
347
+ "step": 23
348
+ },
349
+ {
350
+ "clip_ratio": 0.0,
351
+ "completion_length": 593.1785888671875,
352
+ "epoch": 0.0761904761904762,
353
+ "grad_norm": 2.6268302071766283,
354
+ "kl": 0.00179290771484375,
355
+ "learning_rate": 9.94810286000272e-07,
356
+ "loss": 0.1202,
357
+ "reward": 1.4679704904556274,
358
+ "reward_std": 0.5678864121437073,
359
+ "rewards/": 3.9827098846435547,
360
+ "rewards/math_compute_score": 0.8392857313156128,
361
+ "step": 24
362
+ },
363
+ {
364
+ "clip_ratio": 0.0,
365
+ "completion_length": 816.5178833007812,
366
+ "epoch": 0.07936507936507936,
367
+ "grad_norm": 1.6260567552924308,
368
+ "kl": 0.00225830078125,
369
+ "learning_rate": 9.940439480455385e-07,
370
+ "loss": 0.1839,
371
+ "reward": 1.5339845418930054,
372
+ "reward_std": 0.5200893878936768,
373
+ "rewards/": 5.027064800262451,
374
+ "rewards/math_compute_score": 0.660714328289032,
375
+ "step": 25
376
+ },
377
+ {
378
+ "clip_ratio": 0.0,
379
+ "completion_length": 586.8035888671875,
380
+ "epoch": 0.08253968253968254,
381
+ "grad_norm": 3.176548889137236,
382
+ "kl": 0.0013885498046875,
383
+ "learning_rate": 9.932251943400553e-07,
384
+ "loss": 0.0802,
385
+ "reward": 1.6504465341567993,
386
+ "reward_std": 0.38086333870887756,
387
+ "rewards/": 4.895089626312256,
388
+ "rewards/math_compute_score": 0.8392857313156128,
389
+ "step": 26
390
+ },
391
+ {
392
+ "clip_ratio": 0.0,
393
+ "completion_length": 666.607177734375,
394
+ "epoch": 0.08571428571428572,
395
+ "grad_norm": 2.5123015337902737,
396
+ "kl": 0.00098419189453125,
397
+ "learning_rate": 9.923541117497585e-07,
398
+ "loss": 0.0904,
399
+ "reward": 1.4960030317306519,
400
+ "reward_std": 0.601716160774231,
401
+ "rewards/": 4.694301128387451,
402
+ "rewards/math_compute_score": 0.6964285969734192,
403
+ "step": 27
404
+ },
405
+ {
406
+ "clip_ratio": 0.0,
407
+ "completion_length": 636.4285888671875,
408
+ "epoch": 0.08888888888888889,
409
+ "grad_norm": 1.2874582194461706,
410
+ "kl": 0.00299072265625,
411
+ "learning_rate": 9.914307926924344e-07,
412
+ "loss": 0.1484,
413
+ "reward": 1.4589931964874268,
414
+ "reward_std": 0.6707652807235718,
415
+ "rewards/": 4.223536968231201,
416
+ "rewards/math_compute_score": 0.7678571939468384,
417
+ "step": 28
418
+ },
419
+ {
420
+ "clip_ratio": 0.0,
421
+ "completion_length": 690.5892944335938,
422
+ "epoch": 0.09206349206349207,
423
+ "grad_norm": 2.693607802209877,
424
+ "kl": 0.0023345947265625,
425
+ "learning_rate": 9.904553351279136e-07,
426
+ "loss": 0.1137,
427
+ "reward": 1.4248205423355103,
428
+ "reward_std": 0.6116536259651184,
429
+ "rewards/": 4.409816265106201,
430
+ "rewards/math_compute_score": 0.6785714626312256,
431
+ "step": 29
432
+ },
433
+ {
434
+ "epoch": 0.09523809523809523,
435
+ "grad_norm": 1.5994282855822706,
436
+ "learning_rate": 9.894278425476788e-07,
437
+ "loss": 0.1336,
438
+ "step": 30
439
+ },
440
+ {
441
+ "epoch": 0.09523809523809523,
442
+ "eval_clip_ratio": 0.0,
443
+ "eval_completion_length": 845.4524129231771,
444
+ "eval_kl": 0.009012858072916666,
445
+ "eval_loss": 0.13642674684524536,
446
+ "eval_reward": 1.517225941022237,
447
+ "eval_reward_std": 0.5625964005788168,
448
+ "eval_rewards/": 4.728986581166585,
449
+ "eval_rewards/math_compute_score": 0.7142857511838278,
450
+ "eval_runtime": 106.4531,
451
+ "eval_samples_per_second": 0.188,
452
+ "eval_steps_per_second": 0.009,
453
+ "step": 30
454
+ },
455
+ {
456
+ "clip_ratio": 0.0,
457
+ "completion_length": 710.9107360839844,
458
+ "epoch": 0.09841269841269841,
459
+ "grad_norm": 0.9974627094215855,
460
+ "kl": 0.00218963623046875,
461
+ "learning_rate": 9.88348423963884e-07,
462
+ "loss": 0.2872,
463
+ "reward": 1.5053049325942993,
464
+ "reward_std": 0.5521042495965958,
465
+ "rewards/": 4.9550957679748535,
466
+ "rewards/math_compute_score": 0.6428571939468384,
467
+ "step": 31
468
+ },
469
+ {
470
+ "clip_ratio": 0.0,
471
+ "completion_length": 708.1250610351562,
472
+ "epoch": 0.10158730158730159,
473
+ "grad_norm": 2.7592195903517727,
474
+ "kl": 0.0031280517578125,
475
+ "learning_rate": 9.872171938977893e-07,
476
+ "loss": 0.2145,
477
+ "reward": 1.5618199110031128,
478
+ "reward_std": 0.49139147996902466,
479
+ "rewards/": 4.451956748962402,
480
+ "rewards/math_compute_score": 0.8392857313156128,
481
+ "step": 32
482
+ },
483
+ {
484
+ "clip_ratio": 0.0,
485
+ "completion_length": 653.625,
486
+ "epoch": 0.10476190476190476,
487
+ "grad_norm": 1.4899641756718722,
488
+ "kl": 0.0025787353515625,
489
+ "learning_rate": 9.860342723676104e-07,
490
+ "loss": 0.2142,
491
+ "reward": 1.5433281660079956,
492
+ "reward_std": 0.5415727496147156,
493
+ "rewards/": 4.502354621887207,
494
+ "rewards/math_compute_score": 0.8035714626312256,
495
+ "step": 33
496
+ },
497
+ {
498
+ "clip_ratio": 0.0,
499
+ "completion_length": 646.9464721679688,
500
+ "epoch": 0.10793650793650794,
501
+ "grad_norm": 0.7771150726203957,
502
+ "kl": 0.002044677734375,
503
+ "learning_rate": 9.847997848757854e-07,
504
+ "loss": 0.1302,
505
+ "reward": 1.4443954229354858,
506
+ "reward_std": 0.5867473483085632,
507
+ "rewards/": 4.293405055999756,
508
+ "rewards/math_compute_score": 0.7321428656578064,
509
+ "step": 34
510
+ },
511
+ {
512
+ "clip_ratio": 0.0,
513
+ "completion_length": 601.8392944335938,
514
+ "epoch": 0.1111111111111111,
515
+ "grad_norm": 1.8258651083690958,
516
+ "kl": 0.0035247802734375,
517
+ "learning_rate": 9.835138623956602e-07,
518
+ "loss": -0.0484,
519
+ "reward": 1.6701312065124512,
520
+ "reward_std": 0.38416627049446106,
521
+ "rewards/": 4.493513107299805,
522
+ "rewards/math_compute_score": 0.9642857313156128,
523
+ "step": 35
524
+ },
525
+ {
526
+ "clip_ratio": 0.0,
527
+ "completion_length": 633.232177734375,
528
+ "epoch": 0.11428571428571428,
529
+ "grad_norm": 1.9714220532856719,
530
+ "kl": 0.0037384033203125,
531
+ "learning_rate": 9.821766413575914e-07,
532
+ "loss": 0.057,
533
+ "reward": 1.6821569204330444,
534
+ "reward_std": 0.4740881323814392,
535
+ "rewards/": 5.553641319274902,
536
+ "rewards/math_compute_score": 0.7142857313156128,
537
+ "step": 36
538
+ },
539
+ {
540
+ "clip_ratio": 0.0,
541
+ "completion_length": 620.75,
542
+ "epoch": 0.11746031746031746,
543
+ "grad_norm": 1.6010557178740101,
544
+ "kl": 0.00238037109375,
545
+ "learning_rate": 9.80788263634473e-07,
546
+ "loss": 0.0491,
547
+ "reward": 1.4776437282562256,
548
+ "reward_std": 0.514848530292511,
549
+ "rewards/": 4.459647178649902,
550
+ "rewards/math_compute_score": 0.7321428656578064,
551
+ "step": 37
552
+ },
553
+ {
554
+ "clip_ratio": 0.0,
555
+ "completion_length": 663.375,
556
+ "epoch": 0.12063492063492064,
557
+ "grad_norm": 3.177882116146041,
558
+ "kl": 0.0025634765625,
559
+ "learning_rate": 9.793488765266838e-07,
560
+ "loss": 0.2726,
561
+ "reward": 1.4468194246292114,
562
+ "reward_std": 0.5143493413925171,
563
+ "rewards/": 4.519810676574707,
564
+ "rewards/math_compute_score": 0.6785714626312256,
565
+ "step": 38
566
+ },
567
+ {
568
+ "clip_ratio": 0.0,
569
+ "completion_length": 662.8035888671875,
570
+ "epoch": 0.12380952380952381,
571
+ "grad_norm": 0.9741870004361116,
572
+ "kl": 0.00445556640625,
573
+ "learning_rate": 9.778586327464597e-07,
574
+ "loss": 0.1164,
575
+ "reward": 1.3739641904830933,
576
+ "reward_std": 0.5605769157409668,
577
+ "rewards/": 4.012678146362305,
578
+ "rewards/math_compute_score": 0.7142857313156128,
579
+ "step": 39
580
+ },
581
+ {
582
+ "epoch": 0.12698412698412698,
583
+ "grad_norm": 1.1959685615621873,
584
+ "learning_rate": 9.763176904016913e-07,
585
+ "loss": 0.1094,
586
+ "step": 40
587
+ },
588
+ {
589
+ "epoch": 0.12698412698412698,
590
+ "eval_clip_ratio": 0.0,
591
+ "eval_completion_length": 792.1571655273438,
592
+ "eval_kl": 0.025288899739583332,
593
+ "eval_loss": 0.11785109341144562,
594
+ "eval_reward": 1.5712554057439168,
595
+ "eval_reward_std": 0.5905763109525045,
596
+ "eval_rewards/": 4.7134199142456055,
597
+ "eval_rewards/math_compute_score": 0.785714328289032,
598
+ "eval_runtime": 105.8496,
599
+ "eval_samples_per_second": 0.189,
600
+ "eval_steps_per_second": 0.009,
601
+ "step": 40
602
+ },
603
+ {
604
+ "clip_ratio": 0.0,
605
+ "completion_length": 713.8839721679688,
606
+ "epoch": 0.13015873015873017,
607
+ "grad_norm": 0.8987806956191959,
608
+ "kl": 0.002960205078125,
609
+ "learning_rate": 9.747262129791495e-07,
610
+ "loss": 0.0717,
611
+ "reward": 1.5012034177780151,
612
+ "reward_std": 0.5221634805202484,
613
+ "rewards/": 4.827445030212402,
614
+ "rewards/math_compute_score": 0.6696428656578064,
615
+ "step": 41
616
+ },
617
+ {
618
+ "clip_ratio": 0.0,
619
+ "completion_length": 840.2857666015625,
620
+ "epoch": 0.13333333333333333,
621
+ "grad_norm": 1.08064899340382,
622
+ "kl": 0.00262451171875,
623
+ "learning_rate": 9.730843693271413e-07,
624
+ "loss": 0.1193,
625
+ "reward": 1.4552539587020874,
626
+ "reward_std": 0.6179671287536621,
627
+ "rewards/": 4.776269435882568,
628
+ "rewards/math_compute_score": 0.625,
629
+ "step": 42
630
+ },
631
+ {
632
+ "clip_ratio": 0.0,
633
+ "completion_length": 641.0357666015625,
634
+ "epoch": 0.1365079365079365,
635
+ "grad_norm": 0.766263291574053,
636
+ "kl": 0.0024871826171875,
637
+ "learning_rate": 9.713923336375936e-07,
638
+ "loss": 0.1407,
639
+ "reward": 1.6835448741912842,
640
+ "reward_std": 0.5881065726280212,
641
+ "rewards/": 5.703439235687256,
642
+ "rewards/math_compute_score": 0.6785714626312256,
643
+ "step": 43
644
+ },
645
+ {
646
+ "clip_ratio": 0.0,
647
+ "completion_length": 638.7857666015625,
648
+ "epoch": 0.13968253968253969,
649
+ "grad_norm": 1.032192482137044,
650
+ "kl": 0.002838134765625,
651
+ "learning_rate": 9.696502854275748e-07,
652
+ "loss": 0.1778,
653
+ "reward": 1.4399136304855347,
654
+ "reward_std": 0.570347249507904,
655
+ "rewards/": 4.27099609375,
656
+ "rewards/math_compute_score": 0.7321428656578064,
657
+ "step": 44
658
+ },
659
+ {
660
+ "clip_ratio": 0.0,
661
+ "completion_length": 716.5714721679688,
662
+ "epoch": 0.14285714285714285,
663
+ "grad_norm": 1.0335824481684432,
664
+ "kl": 0.0021820068359375,
665
+ "learning_rate": 9.678584095202469e-07,
666
+ "loss": 0.2178,
667
+ "reward": 1.5996861457824707,
668
+ "reward_std": 0.7114024758338928,
669
+ "rewards/": 5.355573654174805,
670
+ "rewards/math_compute_score": 0.660714328289032,
671
+ "step": 45
672
+ },
673
+ {
674
+ "clip_ratio": 0.0,
675
+ "completion_length": 502.0357360839844,
676
+ "epoch": 0.14603174603174604,
677
+ "grad_norm": 0.8112268442860173,
678
+ "kl": 0.004425048828125,
679
+ "learning_rate": 9.660168960252575e-07,
680
+ "loss": 0.0694,
681
+ "reward": 1.4711426496505737,
682
+ "reward_std": 0.35930201411247253,
683
+ "rewards/": 4.355712890625,
684
+ "rewards/math_compute_score": 0.7500000596046448,
685
+ "step": 46
686
+ },
687
+ {
688
+ "clip_ratio": 0.0,
689
+ "completion_length": 811.8214721679688,
690
+ "epoch": 0.1492063492063492,
691
+ "grad_norm": 0.859203113128272,
692
+ "kl": 0.00098419189453125,
693
+ "learning_rate": 9.641259403185704e-07,
694
+ "loss": 0.1777,
695
+ "reward": 1.622921347618103,
696
+ "reward_std": 0.4903363585472107,
697
+ "rewards/": 5.900321006774902,
698
+ "rewards/math_compute_score": 0.5535714626312256,
699
+ "step": 47
700
+ },
701
+ {
702
+ "clip_ratio": 0.0,
703
+ "completion_length": 765.607177734375,
704
+ "epoch": 0.1523809523809524,
705
+ "grad_norm": 0.8078928566280016,
706
+ "kl": 0.0020294189453125,
707
+ "learning_rate": 9.621857430217365e-07,
708
+ "loss": 0.2052,
709
+ "reward": 1.5972657203674316,
710
+ "reward_std": 0.3894428610801697,
711
+ "rewards/": 5.414899826049805,
712
+ "rewards/math_compute_score": 0.6428571939468384,
713
+ "step": 48
714
+ },
715
+ {
716
+ "clip_ratio": 0.0,
717
+ "completion_length": 637.0178833007812,
718
+ "epoch": 0.15555555555555556,
719
+ "grad_norm": 4.86317816941763,
720
+ "kl": 0.002838134765625,
721
+ "learning_rate": 9.601965099806084e-07,
722
+ "loss": 0.1317,
723
+ "reward": 1.5043946504592896,
724
+ "reward_std": 0.5993847250938416,
725
+ "rewards/": 4.379115581512451,
726
+ "rewards/math_compute_score": 0.785714328289032,
727
+ "step": 49
728
+ },
729
+ {
730
+ "epoch": 0.15873015873015872,
731
+ "grad_norm": 0.6567682116832987,
732
+ "learning_rate": 9.581584522435023e-07,
733
+ "loss": 0.1677,
734
+ "step": 50
735
+ },
736
+ {
737
+ "epoch": 0.15873015873015872,
738
+ "eval_clip_ratio": 0.0,
739
+ "eval_completion_length": 841.1322021484375,
740
+ "eval_kl": 0.004923502604166667,
741
+ "eval_loss": 0.10817983746528625,
742
+ "eval_reward": 1.6196069717407227,
743
+ "eval_reward_std": 0.539500097433726,
744
+ "eval_rewards/": 5.074225107828776,
745
+ "eval_rewards/math_compute_score": 0.7559524178504944,
746
+ "eval_runtime": 107.9307,
747
+ "eval_samples_per_second": 0.185,
748
+ "eval_steps_per_second": 0.009,
749
+ "step": 50
750
+ },
751
+ {
752
+ "clip_ratio": 0.0,
753
+ "completion_length": 603.4107513427734,
754
+ "epoch": 0.1619047619047619,
755
+ "grad_norm": 0.7984893297808615,
756
+ "kl": 0.00357818603515625,
757
+ "learning_rate": 9.56071786038806e-07,
758
+ "loss": 0.1832,
759
+ "reward": 1.4798433780670166,
760
+ "reward_std": 0.45238083600997925,
761
+ "rewards/": 4.363502264022827,
762
+ "rewards/math_compute_score": 0.7589285969734192,
763
+ "step": 51
764
+ },
765
+ {
766
+ "clip_ratio": 0.0,
767
+ "completion_length": 742.607177734375,
768
+ "epoch": 0.16507936507936508,
769
+ "grad_norm": 0.5100556612157693,
770
+ "kl": 0.00167083740234375,
771
+ "learning_rate": 9.53936732752038e-07,
772
+ "loss": -0.0658,
773
+ "reward": 1.6291016340255737,
774
+ "reward_std": 0.38449862599372864,
775
+ "rewards/": 5.359793663024902,
776
+ "rewards/math_compute_score": 0.6964285969734192,
777
+ "step": 52
778
+ },
779
+ {
780
+ "clip_ratio": 0.0,
781
+ "completion_length": 679.2857666015625,
782
+ "epoch": 0.16825396825396827,
783
+ "grad_norm": 0.5788655761199448,
784
+ "kl": 0.002838134765625,
785
+ "learning_rate": 9.517535189023601e-07,
786
+ "loss": 0.0362,
787
+ "reward": 1.597886562347412,
788
+ "reward_std": 0.4612652361392975,
789
+ "rewards/": 4.418004035949707,
790
+ "rewards/math_compute_score": 0.8928571939468384,
791
+ "step": 53
792
+ },
793
+ {
794
+ "clip_ratio": 0.0,
795
+ "completion_length": 677.6785888671875,
796
+ "epoch": 0.17142857142857143,
797
+ "grad_norm": 1.116601202976549,
798
+ "kl": 0.00189971923828125,
799
+ "learning_rate": 9.495223761185441e-07,
800
+ "loss": 0.1404,
801
+ "reward": 1.7574794292449951,
802
+ "reward_std": 0.4653353691101074,
803
+ "rewards/": 5.858826160430908,
804
+ "rewards/math_compute_score": 0.7321428656578064,
805
+ "step": 54
806
+ },
807
+ {
808
+ "clip_ratio": 0.0,
809
+ "completion_length": 571.9642944335938,
810
+ "epoch": 0.1746031746031746,
811
+ "grad_norm": 5.49249475780532,
812
+ "kl": 0.0086669921875,
813
+ "learning_rate": 9.472435411143977e-07,
814
+ "loss": 0.1024,
815
+ "reward": 1.579323172569275,
816
+ "reward_std": 0.49340134859085083,
817
+ "rewards/": 4.825186729431152,
818
+ "rewards/math_compute_score": 0.7678571939468384,
819
+ "step": 55
820
+ },
821
+ {
822
+ "clip_ratio": 0.0,
823
+ "completion_length": 534.5714721679688,
824
+ "epoch": 0.17777777777777778,
825
+ "grad_norm": 0.6735986705301703,
826
+ "kl": 0.003204345703125,
827
+ "learning_rate": 9.449172556636497e-07,
828
+ "loss": 0.0415,
829
+ "reward": 1.7549248933792114,
830
+ "reward_std": 0.41859832406044006,
831
+ "rewards/": 5.346052169799805,
832
+ "rewards/math_compute_score": 0.8571429252624512,
833
+ "step": 56
834
+ },
835
+ {
836
+ "clip_ratio": 0.0,
837
+ "completion_length": 786.3750610351562,
838
+ "epoch": 0.18095238095238095,
839
+ "grad_norm": 1.2145506026406598,
840
+ "kl": 0.0016632080078125,
841
+ "learning_rate": 9.425437665742997e-07,
842
+ "loss": 0.1078,
843
+ "reward": 1.463396430015564,
844
+ "reward_std": 0.5382161736488342,
845
+ "rewards/": 4.602696418762207,
846
+ "rewards/math_compute_score": 0.6785714626312256,
847
+ "step": 57
848
+ },
849
+ {
850
+ "clip_ratio": 0.0,
851
+ "completion_length": 651.6607666015625,
852
+ "epoch": 0.18412698412698414,
853
+ "grad_norm": 1.6110105562648922,
854
+ "kl": 0.0028839111328125,
855
+ "learning_rate": 9.401233256624316e-07,
856
+ "loss": 0.0788,
857
+ "reward": 1.6237270832061768,
858
+ "reward_std": 0.5656346082687378,
859
+ "rewards/": 4.7614922523498535,
860
+ "rewards/math_compute_score": 0.8392857313156128,
861
+ "step": 58
862
+ },
863
+ {
864
+ "clip_ratio": 0.0,
865
+ "completion_length": 692.2678833007812,
866
+ "epoch": 0.1873015873015873,
867
+ "grad_norm": 0.6039435728831022,
868
+ "kl": 0.0020599365234375,
869
+ "learning_rate": 9.376561897254987e-07,
870
+ "loss": 0.0331,
871
+ "reward": 1.7711775302886963,
872
+ "reward_std": 0.4943082630634308,
873
+ "rewards/": 5.570173263549805,
874
+ "rewards/math_compute_score": 0.8214285969734192,
875
+ "step": 59
876
+ },
877
+ {
878
+ "epoch": 0.19047619047619047,
879
+ "grad_norm": 0.6468115451400324,
880
+ "learning_rate": 9.351426205150776e-07,
881
+ "loss": 0.0748,
882
+ "step": 60
883
+ },
884
+ {
885
+ "epoch": 0.19047619047619047,
886
+ "eval_clip_ratio": 0.0,
887
+ "eval_completion_length": 789.8095296223959,
888
+ "eval_kl": 0.023050944010416668,
889
+ "eval_loss": 0.12776777148246765,
890
+ "eval_reward": 1.6391727129618328,
891
+ "eval_reward_std": 0.5289413332939148,
892
+ "eval_rewards/": 5.076815923055013,
893
+ "eval_rewards/math_compute_score": 0.7797619501749674,
894
+ "eval_runtime": 107.6708,
895
+ "eval_samples_per_second": 0.186,
896
+ "eval_steps_per_second": 0.009,
897
+ "step": 60
898
+ },
899
+ {
900
+ "clip_ratio": 0.0,
901
+ "completion_length": 599.1518249511719,
902
+ "epoch": 0.19365079365079366,
903
+ "grad_norm": 0.62860654069817,
904
+ "kl": 0.002593994140625,
905
+ "learning_rate": 9.32582884709098e-07,
906
+ "loss": -0.0003,
907
+ "reward": 1.5030343532562256,
908
+ "reward_std": 0.49661755561828613,
909
+ "rewards/": 4.658028841018677,
910
+ "rewards/math_compute_score": 0.7142857313156128,
911
+ "step": 61
912
+ },
913
+ {
914
+ "clip_ratio": 0.0,
915
+ "completion_length": 664.8035888671875,
916
+ "epoch": 0.19682539682539682,
917
+ "grad_norm": 1.3582868033212785,
918
+ "kl": 0.0115966796875,
919
+ "learning_rate": 9.299772538835491e-07,
920
+ "loss": 0.1527,
921
+ "reward": 1.7017022371292114,
922
+ "reward_std": 0.452860027551651,
923
+ "rewards/": 4.937081813812256,
924
+ "rewards/math_compute_score": 0.8928571939468384,
925
+ "step": 62
926
+ },
927
+ {
928
+ "clip_ratio": 0.0,
929
+ "completion_length": 746.6428833007812,
930
+ "epoch": 0.2,
931
+ "grad_norm": 0.7888407267831482,
932
+ "kl": 0.002288818359375,
933
+ "learning_rate": 9.273260044836673e-07,
934
+ "loss": 0.1191,
935
+ "reward": 1.4725233316421509,
936
+ "reward_std": 0.5076785087585449,
937
+ "rewards/": 4.505473613739014,
938
+ "rewards/math_compute_score": 0.7142857313156128,
939
+ "step": 63
940
+ },
941
+ {
942
+ "clip_ratio": 0.0,
943
+ "completion_length": 630.9285888671875,
944
+ "epoch": 0.20317460317460317,
945
+ "grad_norm": 0.5701838250380957,
946
+ "kl": 0.003143310546875,
947
+ "learning_rate": 9.246294177946062e-07,
948
+ "loss": 0.175,
949
+ "reward": 1.4169644117355347,
950
+ "reward_std": 0.4153939485549927,
951
+ "rewards/": 4.084821701049805,
952
+ "rewards/math_compute_score": 0.7500000596046448,
953
+ "step": 64
954
+ },
955
+ {
956
+ "clip_ratio": 0.0,
957
+ "completion_length": 746.1607666015625,
958
+ "epoch": 0.20634920634920634,
959
+ "grad_norm": 0.43485838557829426,
960
+ "kl": 0.003326416015625,
961
+ "learning_rate": 9.218877799115927e-07,
962
+ "loss": 0.0591,
963
+ "reward": 1.405147910118103,
964
+ "reward_std": 0.6980858445167542,
965
+ "rewards/": 4.1685967445373535,
966
+ "rewards/math_compute_score": 0.7142857313156128,
967
+ "step": 65
968
+ },
969
+ {
970
+ "clip_ratio": 0.0,
971
+ "completion_length": 754.3750610351562,
972
+ "epoch": 0.20952380952380953,
973
+ "grad_norm": 0.5397143184004072,
974
+ "kl": 0.0023040771484375,
975
+ "learning_rate": 9.191013817095761e-07,
976
+ "loss": 0.1156,
977
+ "reward": 1.5029298067092896,
978
+ "reward_std": 0.5369086265563965,
979
+ "rewards/": 4.657505989074707,
980
+ "rewards/math_compute_score": 0.7142857313156128,
981
+ "step": 66
982
+ },
983
+ {
984
+ "clip_ratio": 0.0,
985
+ "completion_length": 473.4107360839844,
986
+ "epoch": 0.2126984126984127,
987
+ "grad_norm": 0.6430268032171578,
988
+ "kl": 0.0027313232421875,
989
+ "learning_rate": 9.162705188123646e-07,
990
+ "loss": -0.0227,
991
+ "reward": 1.8193360567092896,
992
+ "reward_std": 0.5557112097740173,
993
+ "rewards/": 5.1681084632873535,
994
+ "rewards/math_compute_score": 0.9821429252624512,
995
+ "step": 67
996
+ },
997
+ {
998
+ "clip_ratio": 0.0,
999
+ "completion_length": 584.4642944335938,
1000
+ "epoch": 0.21587301587301588,
1001
+ "grad_norm": 0.6983774100649138,
1002
+ "kl": 0.00494384765625,
1003
+ "learning_rate": 9.133954915612634e-07,
1004
+ "loss": 0.062,
1005
+ "reward": 1.5165389776229858,
1006
+ "reward_std": 0.41740962862968445,
1007
+ "rewards/": 4.296979904174805,
1008
+ "rewards/math_compute_score": 0.8214285969734192,
1009
+ "step": 68
1010
+ },
1011
+ {
1012
+ "clip_ratio": 0.0,
1013
+ "completion_length": 609.1607666015625,
1014
+ "epoch": 0.21904761904761905,
1015
+ "grad_norm": 0.6699339431654112,
1016
+ "kl": 0.00439453125,
1017
+ "learning_rate": 9.104766049832087e-07,
1018
+ "loss": 0.0443,
1019
+ "reward": 1.5293978452682495,
1020
+ "reward_std": 0.5475279092788696,
1021
+ "rewards/": 4.575560569763184,
1022
+ "rewards/math_compute_score": 0.7678571939468384,
1023
+ "step": 69
1024
+ },
1025
+ {
1026
+ "epoch": 0.2222222222222222,
1027
+ "grad_norm": 0.7988695058458649,
1028
+ "learning_rate": 9.075141687584056e-07,
1029
+ "loss": -0.0167,
1030
+ "step": 70
1031
+ },
1032
+ {
1033
+ "epoch": 0.2222222222222222,
1034
+ "eval_clip_ratio": 0.0,
1035
+ "eval_completion_length": 901.5059814453125,
1036
+ "eval_kl": 0.2381591796875,
1037
+ "eval_loss": 0.2867256999015808,
1038
+ "eval_reward": 1.6091902653376262,
1039
+ "eval_reward_std": 0.5438455939292908,
1040
+ "eval_rewards/": 4.926903247833252,
1041
+ "eval_rewards/math_compute_score": 0.7797619501749674,
1042
+ "eval_runtime": 111.1812,
1043
+ "eval_samples_per_second": 0.18,
1044
+ "eval_steps_per_second": 0.009,
1045
+ "step": 70
1046
+ },
1047
+ {
1048
+ "clip_ratio": 0.0,
1049
+ "completion_length": 625.7410888671875,
1050
+ "epoch": 0.2253968253968254,
1051
+ "grad_norm": 0.5540727634053975,
1052
+ "kl": 0.003814697265625,
1053
+ "learning_rate": 9.045084971874737e-07,
1054
+ "loss": -0.0056,
1055
+ "reward": 1.7840471863746643,
1056
+ "reward_std": 0.4358938932418823,
1057
+ "rewards/": 5.313093185424805,
1058
+ "rewards/math_compute_score": 0.9017857611179352,
1059
+ "step": 71
1060
+ },
1061
+ {
1062
+ "clip_ratio": 0.0,
1063
+ "completion_length": 791.2678833007812,
1064
+ "epoch": 0.22857142857142856,
1065
+ "grad_norm": 0.49864634439649874,
1066
+ "kl": 0.0018157958984375,
1067
+ "learning_rate": 9.014599091580998e-07,
1068
+ "loss": 0.0311,
1069
+ "reward": 1.6201382875442505,
1070
+ "reward_std": 0.5860312581062317,
1071
+ "rewards/": 5.314976692199707,
1072
+ "rewards/math_compute_score": 0.6964285969734192,
1073
+ "step": 72
1074
+ },
1075
+ {
1076
+ "clip_ratio": 0.0,
1077
+ "completion_length": 515.8392944335938,
1078
+ "epoch": 0.23174603174603176,
1079
+ "grad_norm": 0.9973211460805157,
1080
+ "kl": 0.00482177734375,
1081
+ "learning_rate": 8.983687281112064e-07,
1082
+ "loss": -0.0697,
1083
+ "reward": 1.6238142251968384,
1084
+ "reward_std": 0.35913076996803284,
1085
+ "rewards/": 4.761928081512451,
1086
+ "rewards/math_compute_score": 0.8392857313156128,
1087
+ "step": 73
1088
+ },
1089
+ {
1090
+ "clip_ratio": 0.0,
1091
+ "completion_length": 643.357177734375,
1092
+ "epoch": 0.23492063492063492,
1093
+ "grad_norm": 0.877503000341696,
1094
+ "kl": 0.002105712890625,
1095
+ "learning_rate": 8.952352820066358e-07,
1096
+ "loss": -0.0014,
1097
+ "reward": 1.4080777168273926,
1098
+ "reward_std": 0.5432181358337402,
1099
+ "rewards/": 4.397531032562256,
1100
+ "rewards/math_compute_score": 0.660714328289032,
1101
+ "step": 74
1102
+ },
1103
+ {
1104
+ "clip_ratio": 0.0,
1105
+ "completion_length": 478.7857360839844,
1106
+ "epoch": 0.23809523809523808,
1107
+ "grad_norm": 0.6202183598487062,
1108
+ "kl": 0.0023345947265625,
1109
+ "learning_rate": 8.920599032883552e-07,
1110
+ "loss": 0.1503,
1111
+ "reward": 1.7339845895767212,
1112
+ "reward_std": 0.3223447799682617,
1113
+ "rewards/": 5.2413506507873535,
1114
+ "rewards/math_compute_score": 0.8571429252624512,
1115
+ "step": 75
1116
+ },
1117
+ {
1118
+ "clip_ratio": 0.0,
1119
+ "completion_length": 727.3392944335938,
1120
+ "epoch": 0.24126984126984127,
1121
+ "grad_norm": 0.9654723599639436,
1122
+ "kl": 0.00201416015625,
1123
+ "learning_rate": 8.888429288491855e-07,
1124
+ "loss": 0.1141,
1125
+ "reward": 1.7490864992141724,
1126
+ "reward_std": 0.5959608554840088,
1127
+ "rewards/": 5.0311455726623535,
1128
+ "rewards/math_compute_score": 0.9285714626312256,
1129
+ "step": 76
1130
+ },
1131
+ {
1132
+ "clip_ratio": 0.0,
1133
+ "completion_length": 607.982177734375,
1134
+ "epoch": 0.24444444444444444,
1135
+ "grad_norm": 0.7445671259261306,
1136
+ "kl": 0.00186920166015625,
1137
+ "learning_rate": 8.855846999950595e-07,
1138
+ "loss": 0.0564,
1139
+ "reward": 1.5656529664993286,
1140
+ "reward_std": 0.44271785020828247,
1141
+ "rewards/": 4.113978862762451,
1142
+ "rewards/math_compute_score": 0.9285714626312256,
1143
+ "step": 77
1144
+ },
1145
+ {
1146
+ "clip_ratio": 0.0,
1147
+ "completion_length": 730.5357666015625,
1148
+ "epoch": 0.24761904761904763,
1149
+ "grad_norm": 1.1259492375136075,
1150
+ "kl": 0.00174713134765625,
1151
+ "learning_rate": 8.822855624088097e-07,
1152
+ "loss": 0.0594,
1153
+ "reward": 1.7164901494979858,
1154
+ "reward_std": 0.46802079677581787,
1155
+ "rewards/": 5.296735763549805,
1156
+ "rewards/math_compute_score": 0.8214285969734192,
1157
+ "step": 78
1158
+ },
1159
+ {
1160
+ "clip_ratio": 0.0,
1161
+ "completion_length": 520.625,
1162
+ "epoch": 0.2507936507936508,
1163
+ "grad_norm": 0.6608772516145822,
1164
+ "kl": 0.0037689208984375,
1165
+ "learning_rate": 8.789458661134942e-07,
1166
+ "loss": 0.113,
1167
+ "reward": 1.6773438453674316,
1168
+ "reward_std": 0.41135743260383606,
1169
+ "rewards/": 4.9581475257873535,
1170
+ "rewards/math_compute_score": 0.8571429252624512,
1171
+ "step": 79
1172
+ },
1173
+ {
1174
+ "epoch": 0.25396825396825395,
1175
+ "grad_norm": 1.1086447024025683,
1176
+ "learning_rate": 8.755659654352599e-07,
1177
+ "loss": 0.0852,
1178
+ "step": 80
1179
+ },
1180
+ {
1181
+ "epoch": 0.25396825396825395,
1182
+ "eval_clip_ratio": 0.0,
1183
+ "eval_completion_length": 762.126220703125,
1184
+ "eval_kl": 0.057149251302083336,
1185
+ "eval_loss": 0.1757144182920456,
1186
+ "eval_reward": 1.6997971932093303,
1187
+ "eval_reward_std": 0.5428444643815359,
1188
+ "eval_rewards/": 5.284700234731038,
1189
+ "eval_rewards/math_compute_score": 0.8035714824994405,
1190
+ "eval_runtime": 105.3925,
1191
+ "eval_samples_per_second": 0.19,
1192
+ "eval_steps_per_second": 0.009,
1193
+ "step": 80
1194
+ },
1195
+ {
1196
+ "clip_ratio": 0.0,
1197
+ "completion_length": 554.4107208251953,
1198
+ "epoch": 0.2571428571428571,
1199
+ "grad_norm": 0.6297992518515485,
1200
+ "kl": 0.0034942626953125,
1201
+ "learning_rate": 8.721462189657509e-07,
1202
+ "loss": 0.0229,
1203
+ "reward": 1.6316068172454834,
1204
+ "reward_std": 0.46411074697971344,
1205
+ "rewards/": 5.0508904457092285,
1206
+ "rewards/math_compute_score": 0.7767857611179352,
1207
+ "step": 81
1208
+ },
1209
+ {
1210
+ "clip_ratio": 0.0,
1211
+ "completion_length": 612.8035888671875,
1212
+ "epoch": 0.26031746031746034,
1213
+ "grad_norm": 0.5403276172661425,
1214
+ "kl": 0.0034637451171875,
1215
+ "learning_rate": 8.686869895240631e-07,
1216
+ "loss": 0.0812,
1217
+ "reward": 1.6373475790023804,
1218
+ "reward_std": 0.49845850467681885,
1219
+ "rewards/": 4.901022911071777,
1220
+ "rewards/math_compute_score": 0.8214285969734192,
1221
+ "step": 82
1222
+ },
1223
+ {
1224
+ "clip_ratio": 0.0,
1225
+ "completion_length": 608.0535888671875,
1226
+ "epoch": 0.2634920634920635,
1227
+ "grad_norm": 0.915800232301419,
1228
+ "kl": 0.003448486328125,
1229
+ "learning_rate": 8.651886441182508e-07,
1230
+ "loss": 0.0353,
1231
+ "reward": 1.547105312347412,
1232
+ "reward_std": 0.6321852803230286,
1233
+ "rewards/": 5.235526084899902,
1234
+ "rewards/math_compute_score": 0.625,
1235
+ "step": 83
1236
+ },
1237
+ {
1238
+ "clip_ratio": 0.0,
1239
+ "completion_length": 619.4107666015625,
1240
+ "epoch": 0.26666666666666666,
1241
+ "grad_norm": 0.4435988410022463,
1242
+ "kl": 0.0033111572265625,
1243
+ "learning_rate": 8.616515539063894e-07,
1244
+ "loss": -0.0685,
1245
+ "reward": 1.8496233224868774,
1246
+ "reward_std": 0.49867764115333557,
1247
+ "rewards/": 5.676688194274902,
1248
+ "rewards/math_compute_score": 0.8928571939468384,
1249
+ "step": 84
1250
+ },
1251
+ {
1252
+ "clip_ratio": 0.0,
1253
+ "completion_length": 751.5000610351562,
1254
+ "epoch": 0.2698412698412698,
1255
+ "grad_norm": 0.8497969196784788,
1256
+ "kl": 0.0025177001953125,
1257
+ "learning_rate": 8.580760941571966e-07,
1258
+ "loss": 0.2484,
1259
+ "reward": 1.459954023361206,
1260
+ "reward_std": 0.6413030624389648,
1261
+ "rewards/": 4.871198654174805,
1262
+ "rewards/math_compute_score": 0.6071428656578064,
1263
+ "step": 85
1264
+ },
1265
+ {
1266
+ "clip_ratio": 0.0,
1267
+ "completion_length": 565.1607666015625,
1268
+ "epoch": 0.273015873015873,
1269
+ "grad_norm": 0.6258059646403542,
1270
+ "kl": 0.0048828125,
1271
+ "learning_rate": 8.544626442102187e-07,
1272
+ "loss": 0.038,
1273
+ "reward": 1.6110281944274902,
1274
+ "reward_std": 0.610092043876648,
1275
+ "rewards/": 5.197998046875,
1276
+ "rewards/math_compute_score": 0.7142857313156128,
1277
+ "step": 86
1278
+ },
1279
+ {
1280
+ "clip_ratio": 0.0,
1281
+ "completion_length": 488.39288330078125,
1282
+ "epoch": 0.2761904761904762,
1283
+ "grad_norm": 0.8138401874703006,
1284
+ "kl": 0.004638671875,
1285
+ "learning_rate": 8.508115874355839e-07,
1286
+ "loss": 0.0085,
1287
+ "reward": 1.3848702907562256,
1288
+ "reward_std": 0.39325088262557983,
1289
+ "rewards/": 4.210065841674805,
1290
+ "rewards/math_compute_score": 0.6785714626312256,
1291
+ "step": 87
1292
+ },
1293
+ {
1294
+ "clip_ratio": 0.0,
1295
+ "completion_length": 536.5892944335938,
1296
+ "epoch": 0.27936507936507937,
1297
+ "grad_norm": 2.4320450027460465,
1298
+ "kl": 0.00323486328125,
1299
+ "learning_rate": 8.47123311193329e-07,
1300
+ "loss": -0.0545,
1301
+ "reward": 1.82899010181427,
1302
+ "reward_std": 0.4799407124519348,
1303
+ "rewards/": 5.430664539337158,
1304
+ "rewards/math_compute_score": 0.9285714626312256,
1305
+ "step": 88
1306
+ },
1307
+ {
1308
+ "clip_ratio": 0.0,
1309
+ "completion_length": 728.8928833007812,
1310
+ "epoch": 0.28253968253968254,
1311
+ "grad_norm": 0.5888691350508202,
1312
+ "kl": 0.0026397705078125,
1313
+ "learning_rate": 8.433982067923021e-07,
1314
+ "loss": 0.1658,
1315
+ "reward": 1.526370644569397,
1316
+ "reward_std": 0.6073156595230103,
1317
+ "rewards/": 4.560424327850342,
1318
+ "rewards/math_compute_score": 0.7678571939468384,
1319
+ "step": 89
1320
+ },
1321
+ {
1322
+ "epoch": 0.2857142857142857,
1323
+ "grad_norm": 0.672547715543188,
1324
+ "learning_rate": 8.396366694486466e-07,
1325
+ "loss": -0.0451,
1326
+ "step": 90
1327
+ },
1328
+ {
1329
+ "epoch": 0.2857142857142857,
1330
+ "eval_clip_ratio": 0.0,
1331
+ "eval_completion_length": 751.4702657063802,
1332
+ "eval_kl": 0.01361083984375,
1333
+ "eval_loss": 0.09576481580734253,
1334
+ "eval_reward": 1.6055153608322144,
1335
+ "eval_reward_std": 0.609013577302297,
1336
+ "eval_rewards/": 4.837100187937419,
1337
+ "eval_rewards/math_compute_score": 0.797619084517161,
1338
+ "eval_runtime": 104.6076,
1339
+ "eval_samples_per_second": 0.191,
1340
+ "eval_steps_per_second": 0.01,
1341
+ "step": 90
1342
+ },
1343
+ {
1344
+ "clip_ratio": 0.0,
1345
+ "completion_length": 719.5357666015625,
1346
+ "epoch": 0.28888888888888886,
1347
+ "grad_norm": 0.5180346375049427,
1348
+ "kl": 0.002506256103515625,
1349
+ "learning_rate": 8.358390982438705e-07,
1350
+ "loss": 0.1519,
1351
+ "reward": 1.9254953861236572,
1352
+ "reward_std": 0.503593385219574,
1353
+ "rewards/": 6.09176230430603,
1354
+ "rewards/math_compute_score": 0.8839286267757416,
1355
+ "step": 91
1356
+ },
1357
+ {
1358
+ "clip_ratio": 0.0,
1359
+ "completion_length": 452.1607360839844,
1360
+ "epoch": 0.2920634920634921,
1361
+ "grad_norm": 0.9788308883352828,
1362
+ "kl": 0.0079345703125,
1363
+ "learning_rate": 8.320058960825058e-07,
1364
+ "loss": 0.0102,
1365
+ "reward": 1.6602121591567993,
1366
+ "reward_std": 0.3053475022315979,
1367
+ "rewards/": 4.658203125,
1368
+ "rewards/math_compute_score": 0.910714328289032,
1369
+ "step": 92
1370
+ },
1371
+ {
1372
+ "clip_ratio": 0.0,
1373
+ "completion_length": 693.5535888671875,
1374
+ "epoch": 0.29523809523809524,
1375
+ "grad_norm": 0.5847042829798909,
1376
+ "kl": 0.00244140625,
1377
+ "learning_rate": 8.281374696493626e-07,
1378
+ "loss": 0.2312,
1379
+ "reward": 1.8354074954986572,
1380
+ "reward_std": 0.37926599383354187,
1381
+ "rewards/": 6.248465538024902,
1382
+ "rewards/math_compute_score": 0.7321428656578064,
1383
+ "step": 93
1384
+ },
1385
+ {
1386
+ "clip_ratio": 0.0,
1387
+ "completion_length": 660.232177734375,
1388
+ "epoch": 0.2984126984126984,
1389
+ "grad_norm": 0.5205561829484078,
1390
+ "kl": 0.0027008056640625,
1391
+ "learning_rate": 8.242342293663809e-07,
1392
+ "loss": 0.1094,
1393
+ "reward": 1.840959906578064,
1394
+ "reward_std": 0.3830914795398712,
1395
+ "rewards/": 5.347656726837158,
1396
+ "rewards/math_compute_score": 0.9642857313156128,
1397
+ "step": 94
1398
+ },
1399
+ {
1400
+ "clip_ratio": 0.0,
1401
+ "completion_length": 609.8035888671875,
1402
+ "epoch": 0.30158730158730157,
1403
+ "grad_norm": 0.44482437448212625,
1404
+ "kl": 0.004425048828125,
1405
+ "learning_rate": 8.202965893490876e-07,
1406
+ "loss": 0.0873,
1407
+ "reward": 1.7079870700836182,
1408
+ "reward_std": 0.5448174476623535,
1409
+ "rewards/": 5.111363410949707,
1410
+ "rewards/math_compute_score": 0.8571429252624512,
1411
+ "step": 95
1412
+ },
1413
+ {
1414
+ "clip_ratio": 0.0,
1415
+ "completion_length": 671.5357666015625,
1416
+ "epoch": 0.3047619047619048,
1417
+ "grad_norm": 1.1320823935790385,
1418
+ "kl": 0.002166748046875,
1419
+ "learning_rate": 8.163249673626602e-07,
1420
+ "loss": 0.0481,
1421
+ "reward": 1.4774868488311768,
1422
+ "reward_std": 0.45228156447410583,
1423
+ "rewards/": 4.9588623046875,
1424
+ "rewards/math_compute_score": 0.6071428656578064,
1425
+ "step": 96
1426
+ },
1427
+ {
1428
+ "clip_ratio": 0.0,
1429
+ "completion_length": 544.3214721679688,
1430
+ "epoch": 0.30793650793650795,
1431
+ "grad_norm": 0.5094127089703688,
1432
+ "kl": 0.003997802734375,
1433
+ "learning_rate": 8.123197847776042e-07,
1434
+ "loss": 0.0533,
1435
+ "reward": 1.765764594078064,
1436
+ "reward_std": 0.5276929140090942,
1437
+ "rewards/": 5.757394313812256,
1438
+ "rewards/math_compute_score": 0.7678571939468384,
1439
+ "step": 97
1440
+ },
1441
+ {
1442
+ "clip_ratio": 0.0,
1443
+ "completion_length": 593.8214721679688,
1444
+ "epoch": 0.3111111111111111,
1445
+ "grad_norm": 0.5934812502311124,
1446
+ "kl": 0.0042724609375,
1447
+ "learning_rate": 8.082814665250476e-07,
1448
+ "loss": -0.0354,
1449
+ "reward": 1.6365864276885986,
1450
+ "reward_std": 0.46695929765701294,
1451
+ "rewards/": 5.040074348449707,
1452
+ "rewards/math_compute_score": 0.785714328289032,
1453
+ "step": 98
1454
+ },
1455
+ {
1456
+ "clip_ratio": 0.0,
1457
+ "completion_length": 511.4464416503906,
1458
+ "epoch": 0.3142857142857143,
1459
+ "grad_norm": 0.6289350290275575,
1460
+ "kl": 0.004669189453125,
1461
+ "learning_rate": 8.042104410516575e-07,
1462
+ "loss": -0.0663,
1463
+ "reward": 1.6093192100524902,
1464
+ "reward_std": 0.39656367897987366,
1465
+ "rewards/": 4.546596050262451,
1466
+ "rewards/math_compute_score": 0.8750000596046448,
1467
+ "step": 99
1468
+ },
1469
+ {
1470
+ "epoch": 0.31746031746031744,
1471
+ "grad_norm": 0.5581381958692002,
1472
+ "learning_rate": 8.001071402741842e-07,
1473
+ "loss": 0.0344,
1474
+ "step": 100
1475
+ },
1476
+ {
1477
+ "epoch": 0.31746031746031744,
1478
+ "eval_clip_ratio": 0.0,
1479
+ "eval_completion_length": 700.452392578125,
1480
+ "eval_kl": 0.01019287109375,
1481
+ "eval_loss": -0.026086583733558655,
1482
+ "eval_reward": 1.755845586458842,
1483
+ "eval_reward_std": 0.4914539158344269,
1484
+ "eval_rewards/": 5.422084490458171,
1485
+ "eval_rewards/math_compute_score": 0.8392857710520426,
1486
+ "eval_runtime": 91.4693,
1487
+ "eval_samples_per_second": 0.219,
1488
+ "eval_steps_per_second": 0.011,
1489
+ "step": 100
1490
+ },
1491
+ {
1492
+ "clip_ratio": 0.0,
1493
+ "completion_length": 552.2410888671875,
1494
+ "epoch": 0.32063492063492066,
1495
+ "grad_norm": 0.7136178771883936,
1496
+ "kl": 0.00354766845703125,
1497
+ "learning_rate": 7.959719995336363e-07,
1498
+ "loss": 0.0693,
1499
+ "reward": 1.6664237976074219,
1500
+ "reward_std": 0.429623618721962,
1501
+ "rewards/": 4.939261436462402,
1502
+ "rewards/math_compute_score": 0.848214328289032,
1503
+ "step": 101
1504
+ },
1505
+ {
1506
+ "clip_ratio": 0.0,
1507
+ "completion_length": 663.25,
1508
+ "epoch": 0.3238095238095238,
1509
+ "grad_norm": 0.5439969780060891,
1510
+ "kl": 0.0037994384765625,
1511
+ "learning_rate": 7.918054575490943e-07,
1512
+ "loss": 0.0424,
1513
+ "reward": 1.4987915754318237,
1514
+ "reward_std": 0.5196777582168579,
1515
+ "rewards/": 4.208243370056152,
1516
+ "rewards/math_compute_score": 0.8214285969734192,
1517
+ "step": 102
1518
+ },
1519
+ {
1520
+ "clip_ratio": 0.0,
1521
+ "completion_length": 471.8750305175781,
1522
+ "epoch": 0.326984126984127,
1523
+ "grad_norm": 0.7220192489284563,
1524
+ "kl": 0.0033416748046875,
1525
+ "learning_rate": 7.876079563711631e-07,
1526
+ "loss": 0.1121,
1527
+ "reward": 1.7968298196792603,
1528
+ "reward_std": 0.36027005314826965,
1529
+ "rewards/": 5.127005577087402,
1530
+ "rewards/math_compute_score": 0.9642857313156128,
1531
+ "step": 103
1532
+ },
1533
+ {
1534
+ "clip_ratio": 0.0,
1535
+ "completion_length": 640.5892944335938,
1536
+ "epoch": 0.33015873015873015,
1537
+ "grad_norm": 0.8015986414478035,
1538
+ "kl": 0.004974365234375,
1539
+ "learning_rate": 7.83379941335073e-07,
1540
+ "loss": 0.2035,
1541
+ "reward": 1.7956823110580444,
1542
+ "reward_std": 0.35563212633132935,
1543
+ "rewards/": 5.621268272399902,
1544
+ "rewards/math_compute_score": 0.8392857313156128,
1545
+ "step": 104
1546
+ },
1547
+ {
1548
+ "clip_ratio": 0.0,
1549
+ "completion_length": 799.8750610351562,
1550
+ "epoch": 0.3333333333333333,
1551
+ "grad_norm": 0.6682617366427315,
1552
+ "kl": 0.00250244140625,
1553
+ "learning_rate": 7.791218610134322e-07,
1554
+ "loss": 0.2286,
1555
+ "reward": 1.4982212781906128,
1556
+ "reward_std": 0.5870651602745056,
1557
+ "rewards/": 5.4911065101623535,
1558
+ "rewards/math_compute_score": 0.5,
1559
+ "step": 105
1560
+ },
1561
+ {
1562
+ "clip_ratio": 0.0,
1563
+ "completion_length": 680.9107666015625,
1564
+ "epoch": 0.33650793650793653,
1565
+ "grad_norm": 0.6146294713011484,
1566
+ "kl": 0.002471923828125,
1567
+ "learning_rate": 7.748341671686354e-07,
1568
+ "loss": 0.0038,
1569
+ "reward": 1.9384348392486572,
1570
+ "reward_std": 0.49243679642677307,
1571
+ "rewards/": 6.192173957824707,
1572
+ "rewards/math_compute_score": 0.8750000596046448,
1573
+ "step": 106
1574
+ },
1575
+ {
1576
+ "clip_ratio": 0.0,
1577
+ "completion_length": 663.4285888671875,
1578
+ "epoch": 0.3396825396825397,
1579
+ "grad_norm": 0.7562852855642146,
1580
+ "kl": 0.0038299560546875,
1581
+ "learning_rate": 7.705173147049325e-07,
1582
+ "loss": 0.1061,
1583
+ "reward": 1.6097028255462646,
1584
+ "reward_std": 0.5770988464355469,
1585
+ "rewards/": 4.691371440887451,
1586
+ "rewards/math_compute_score": 0.8392857313156128,
1587
+ "step": 107
1588
+ },
1589
+ {
1590
+ "clip_ratio": 0.0,
1591
+ "completion_length": 760.1250610351562,
1592
+ "epoch": 0.34285714285714286,
1593
+ "grad_norm": 0.8460361870531922,
1594
+ "kl": 0.005615234375,
1595
+ "learning_rate": 7.661717616201668e-07,
1596
+ "loss": 0.1169,
1597
+ "reward": 1.7678015232086182,
1598
+ "reward_std": 0.46296006441116333,
1599
+ "rewards/": 5.910435676574707,
1600
+ "rewards/math_compute_score": 0.7321428656578064,
1601
+ "step": 108
1602
+ },
1603
+ {
1604
+ "clip_ratio": 0.0,
1605
+ "completion_length": 544.4107666015625,
1606
+ "epoch": 0.346031746031746,
1607
+ "grad_norm": 0.8867581981573962,
1608
+ "kl": 0.00518798828125,
1609
+ "learning_rate": 7.617979689571839e-07,
1610
+ "loss": 0.0749,
1611
+ "reward": 1.6010881662368774,
1612
+ "reward_std": 0.40945473313331604,
1613
+ "rewards/": 4.7197265625,
1614
+ "rewards/math_compute_score": 0.8214285969734192,
1615
+ "step": 109
1616
+ },
1617
+ {
1618
+ "epoch": 0.3492063492063492,
1619
+ "grad_norm": 0.613697351604427,
1620
+ "learning_rate": 7.573964007549154e-07,
1621
+ "loss": -0.0219,
1622
+ "step": 110
1623
+ },
1624
+ {
1625
+ "epoch": 0.3492063492063492,
1626
+ "eval_clip_ratio": 0.0,
1627
+ "eval_completion_length": 747.9107259114584,
1628
+ "eval_kl": 0.0235595703125,
1629
+ "eval_loss": 0.026410922408103943,
1630
+ "eval_reward": 1.7149322032928467,
1631
+ "eval_reward_std": 0.49799948930740356,
1632
+ "eval_rewards/": 5.3365654945373535,
1633
+ "eval_rewards/math_compute_score": 0.8095238407452902,
1634
+ "eval_runtime": 103.4902,
1635
+ "eval_samples_per_second": 0.193,
1636
+ "eval_steps_per_second": 0.01,
1637
+ "step": 110
1638
+ },
1639
+ {
1640
+ "clip_ratio": 0.0,
1641
+ "completion_length": 603.4642944335938,
1642
+ "epoch": 0.3523809523809524,
1643
+ "grad_norm": 0.9237700659841631,
1644
+ "kl": 0.0062713623046875,
1645
+ "learning_rate": 7.529675239991482e-07,
1646
+ "loss": -0.0713,
1647
+ "reward": 1.6208950281143188,
1648
+ "reward_std": 0.5017957538366318,
1649
+ "rewards/": 5.03304648399353,
1650
+ "rewards/math_compute_score": 0.7678571939468384,
1651
+ "step": 111
1652
+ },
1653
+ {
1654
+ "clip_ratio": 0.0,
1655
+ "completion_length": 537.6785888671875,
1656
+ "epoch": 0.35555555555555557,
1657
+ "grad_norm": 0.5989679853358942,
1658
+ "kl": 0.003326416015625,
1659
+ "learning_rate": 7.485118085729789e-07,
1660
+ "loss": 0.0532,
1661
+ "reward": 1.7066690921783447,
1662
+ "reward_std": 0.5952469110488892,
1663
+ "rewards/": 4.819059371948242,
1664
+ "rewards/math_compute_score": 0.9285714626312256,
1665
+ "step": 112
1666
+ },
1667
+ {
1668
+ "clip_ratio": 0.0,
1669
+ "completion_length": 557.6607666015625,
1670
+ "epoch": 0.35873015873015873,
1671
+ "grad_norm": 0.792413828293772,
1672
+ "kl": 0.0034637451171875,
1673
+ "learning_rate": 7.440297272069614e-07,
1674
+ "loss": 0.0875,
1675
+ "reward": 1.5029088258743286,
1676
+ "reward_std": 0.5444967746734619,
1677
+ "rewards/": 4.800258159637451,
1678
+ "rewards/math_compute_score": 0.6785714626312256,
1679
+ "step": 113
1680
+ },
1681
+ {
1682
+ "clip_ratio": 0.0,
1683
+ "completion_length": 674.9464721679688,
1684
+ "epoch": 0.3619047619047619,
1685
+ "grad_norm": 0.8399630521739422,
1686
+ "kl": 0.002349853515625,
1687
+ "learning_rate": 7.395217554289523e-07,
1688
+ "loss": 0.065,
1689
+ "reward": 1.6198941469192505,
1690
+ "reward_std": 0.5176466107368469,
1691
+ "rewards/": 4.956613063812256,
1692
+ "rewards/math_compute_score": 0.785714328289032,
1693
+ "step": 114
1694
+ },
1695
+ {
1696
+ "clip_ratio": 0.0,
1697
+ "completion_length": 476.21429443359375,
1698
+ "epoch": 0.36507936507936506,
1699
+ "grad_norm": 1.01617746278495,
1700
+ "kl": 0.004150390625,
1701
+ "learning_rate": 7.3498837151366e-07,
1702
+ "loss": -0.0269,
1703
+ "reward": 1.5831892490386963,
1704
+ "reward_std": 0.44976264238357544,
1705
+ "rewards/": 5.058803081512451,
1706
+ "rewards/math_compute_score": 0.7142857313156128,
1707
+ "step": 115
1708
+ },
1709
+ {
1710
+ "clip_ratio": 0.0,
1711
+ "completion_length": 715.6964721679688,
1712
+ "epoch": 0.3682539682539683,
1713
+ "grad_norm": 0.6977639971716434,
1714
+ "kl": 0.00323486328125,
1715
+ "learning_rate": 7.304300564319013e-07,
1716
+ "loss": 0.0349,
1717
+ "reward": 1.998925805091858,
1718
+ "reward_std": 0.49372294545173645,
1719
+ "rewards/": 6.423200607299805,
1720
+ "rewards/math_compute_score": 0.8928571939468384,
1721
+ "step": 116
1722
+ },
1723
+ {
1724
+ "clip_ratio": 0.0,
1725
+ "completion_length": 738.1250610351562,
1726
+ "epoch": 0.37142857142857144,
1727
+ "grad_norm": 0.5710554895504717,
1728
+ "kl": 0.0037689208984375,
1729
+ "learning_rate": 7.258472937995735e-07,
1730
+ "loss": -0.023,
1731
+ "reward": 1.9152623414993286,
1732
+ "reward_std": 0.43735089898109436,
1733
+ "rewards/": 6.004883289337158,
1734
+ "rewards/math_compute_score": 0.8928571939468384,
1735
+ "step": 117
1736
+ },
1737
+ {
1738
+ "clip_ratio": 0.0,
1739
+ "completion_length": 696.607177734375,
1740
+ "epoch": 0.3746031746031746,
1741
+ "grad_norm": 0.9187825212513634,
1742
+ "kl": 0.0034637451171875,
1743
+ "learning_rate": 7.212405698263446e-07,
1744
+ "loss": -0.0131,
1745
+ "reward": 1.705580472946167,
1746
+ "reward_std": 0.509798526763916,
1747
+ "rewards/": 6.027902126312256,
1748
+ "rewards/math_compute_score": 0.625,
1749
+ "step": 118
1750
+ },
1751
+ {
1752
+ "clip_ratio": 0.0,
1753
+ "completion_length": 647.75,
1754
+ "epoch": 0.37777777777777777,
1755
+ "grad_norm": 0.8884977568265239,
1756
+ "kl": 0.005096435546875,
1757
+ "learning_rate": 7.166103732640681e-07,
1758
+ "loss": 0.1126,
1759
+ "reward": 1.5224602222442627,
1760
+ "reward_std": 0.5471193194389343,
1761
+ "rewards/": 4.3265862464904785,
1762
+ "rewards/math_compute_score": 0.8214285969734192,
1763
+ "step": 119
1764
+ },
1765
+ {
1766
+ "epoch": 0.38095238095238093,
1767
+ "grad_norm": 0.5323455430197922,
1768
+ "learning_rate": 7.119571953549304e-07,
1769
+ "loss": 0.0312,
1770
+ "step": 120
1771
+ },
1772
+ {
1773
+ "epoch": 0.38095238095238093,
1774
+ "eval_clip_ratio": 0.0,
1775
+ "eval_completion_length": 752.0238240559896,
1776
+ "eval_kl": 0.0135345458984375,
1777
+ "eval_loss": -0.006800096947699785,
1778
+ "eval_reward": 1.8065431118011475,
1779
+ "eval_reward_std": 0.4533117413520813,
1780
+ "eval_rewards/": 5.604143778483073,
1781
+ "eval_rewards/math_compute_score": 0.8571429053942362,
1782
+ "eval_runtime": 102.4477,
1783
+ "eval_samples_per_second": 0.195,
1784
+ "eval_steps_per_second": 0.01,
1785
+ "step": 120
1786
+ }
1787
+ ],
1788
+ "logging_steps": 1.0,
1789
+ "max_steps": 315,
1790
+ "num_input_tokens_seen": 0,
1791
+ "num_train_epochs": 1,
1792
+ "save_steps": 40,
1793
+ "stateful_callbacks": {
1794
+ "TrainerControl": {
1795
+ "args": {
1796
+ "should_epoch_stop": false,
1797
+ "should_evaluate": false,
1798
+ "should_log": false,
1799
+ "should_save": true,
1800
+ "should_training_stop": false
1801
+ },
1802
+ "attributes": {}
1803
+ }
1804
+ },
1805
+ "total_flos": 0.0,
1806
+ "train_batch_size": 8,
1807
+ "trial_name": null,
1808
+ "trial_params": null
1809
+ }
checkpoint-120/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:805b96e67670080dcd197ea7ae416ee337e68c776b2b7e456a99325a78e9a85e
3
+ size 8312
checkpoint-120/vocab.json ADDED
The diff for this file is too large to render. See raw diff