ywxzml3juser28 commited on
Commit
b9e0a97
·
0 Parent(s):
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +36 -0
  2. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/added_tokens.json +3 -0
  3. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/chat_template.jinja +54 -0
  4. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/config.json +3 -0
  5. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/generation_config.json +3 -0
  6. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/merges.txt +0 -0
  7. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/model.safetensors +3 -0
  8. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime2025_global_step_250.parquet +3 -0
  9. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime2025_global_step_250.parquet.pass.csv +2 -0
  10. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime2025_global_step_250.parquet.results.json +3 -0
  11. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet +3 -0
  12. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.pass.csv +2 -0
  13. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.results.json +3 -0
  14. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/amc23_global_step_250.parquet +3 -0
  15. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/amc23_global_step_250.parquet.pass.csv +2 -0
  16. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/amc23_global_step_250.parquet.results.json +3 -0
  17. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/math_global_step_250.parquet +3 -0
  18. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/math_global_step_250.parquet.pass.csv +2 -0
  19. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/math_global_step_250.parquet.results.json +3 -0
  20. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/minerva_global_step_250.parquet +3 -0
  21. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/minerva_global_step_250.parquet.pass.csv +2 -0
  22. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/minerva_global_step_250.parquet.results.json +3 -0
  23. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/olympiad_global_step_250.parquet +3 -0
  24. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/olympiad_global_step_250.parquet.pass.csv +2 -0
  25. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/olympiad_global_step_250.parquet.results.json +3 -0
  26. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/special_tokens_map.json +3 -0
  27. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/tokenizer.json +3 -0
  28. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/tokenizer_config.json +3 -0
  29. GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/vocab.json +3 -0
  30. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/added_tokens.json +3 -0
  31. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/chat_template.jinja +54 -0
  32. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/config.json +3 -0
  33. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_aime24.parquet +3 -0
  34. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_aime24.parquet.pass.csv +2 -0
  35. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_aime24.parquet.results.json +3 -0
  36. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_amc23.parquet +3 -0
  37. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_amc23.parquet.pass.csv +2 -0
  38. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_amc23.parquet.results.json +3 -0
  39. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_math.parquet +3 -0
  40. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_math.parquet.pass.csv +2 -0
  41. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_math.parquet.results.json +3 -0
  42. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_olympiad.parquet +3 -0
  43. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_olympiad.parquet.pass.csv +2 -0
  44. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_olympiad.parquet.results.json +3 -0
  45. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/generation_config.json +3 -0
  46. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/merges.txt +0 -0
  47. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/model.safetensors +3 -0
  48. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet +3 -0
  49. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.pass.csv +2 -0
  50. step-1B-top10-cluster-k2/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.results.json +3 -0
.gitattributes ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.json filter=lfs diff=lfs merge=lfs -text
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b
3
+ size 605
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'Please reason step by step, and put your final answer within \\boxed{}.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nPlease reason step by step, and put your final answer within \\boxed{}.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feb97c8aefd9c2a096a47434cb2f88aef6db2c459af236d09ec07e1e912ad4f0
3
+ size 1339
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/generation_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7402413987500866d32be7f6136c9db6e85080e3b4de4e5eccc83dc4df2548d
3
+ size 121
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ddc0d6c28babcf454a4041f80097a61f5edc4b9a3f28065fb7a9d303f303d31
3
+ size 3554214752
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime2025_global_step_250.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93ae8cd998824b9cda44e24157723b986c52fc5672b3f10ae0d3f921078ff670
3
+ size 170822
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime2025_global_step_250.parquet.pass.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_path,dataset,pass@1,pass@4
2
+ /home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model,aime2025.parquet,0.075,0.16666666666666666
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime2025_global_step_250.parquet.results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0553a742e3a635c6ac57d0921302fdaef559c4ebdc1508236403619edb1cd369
3
+ size 660
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef86602a1387677ab5a6eb00a91f54fe1cfa1f9ce675f40f85a0a16cc838d340
3
+ size 159309
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.pass.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_path,dataset,pass@1,pass@4
2
+ /home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model,aime24.parquet,0.18333333333333332,0.3
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:239b88c80d0e2792a9361e10e4ea9eaa42a0efac245e0c9d1cd6dc725aef3135
3
+ size 660
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/amc23_global_step_250.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50efb7083b9af4ca485ce26eb37ad68d2ada78ec67c2cda5680fc03fe9b60d0c
3
+ size 166540
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/amc23_global_step_250.parquet.pass.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_path,dataset,pass@1,pass@4
2
+ /home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model,amc23.parquet,0.46875,0.725
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/amc23_global_step_250.parquet.results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:633074a2a90843324c009ab971273871cd8a7a126385c3982b400aefbc4da396
3
+ size 880
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/math_global_step_250.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cd55fb404b59f25c30325d5ec83e703fe118e2ffd98773c42a8f50d3b3ee151
3
+ size 13696138
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/math_global_step_250.parquet.pass.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_path,dataset,pass@1,pass@4
2
+ /home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model,math.parquet,0.687875150060024,0.8207282913165266
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/math_global_step_250.parquet.results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63993dc04dfecf5c6a342582b4e6e9ea81435790b7ac42064df49bcd6c8b888a
3
+ size 109956
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/minerva_global_step_250.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2902648ba58f5246c4a2138dde39d5cd054443c7a323553317564047f3ffbd4a
3
+ size 849590
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/minerva_global_step_250.parquet.pass.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_path,dataset,pass@1,pass@4
2
+ /home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model,minerva.parquet,0.029411764705882353,0.04411764705882353
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/minerva_global_step_250.parquet.results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e654042bb37a638b238f280e173a10c6a4c4fd187c87a78581e55f5e044553a
3
+ size 5984
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/olympiad_global_step_250.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d2b695bfaa04fe38882808aeddbf53f23a24f4bdff2d1f82c451dba67b007c0
3
+ size 2489202
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/olympiad_global_step_250.parquet.pass.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_path,dataset,pass@1,pass@4
2
+ /home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model,olympiad.parquet,0.300148588410104,0.43536404160475484
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/olympiad_global_step_250.parquet.results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7b4d32429fa6d1618a2b6a179fc1b15886d9a5c5d66e874b2cfe3e14296ac76
3
+ size 14806
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/special_tokens_map.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6676f091c8bc4d1b50146427cfde92073402866b87b6e39223227931b70083e9
3
+ size 616
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/tokenizer_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60f6e7bc948cedd377a20f01d022ef664d026fbc93d72e0a9bdac233f8632181
3
+ size 4689
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/vocab.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
3
+ size 2776833
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b
3
+ size 605
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'Please reason step by step, and put your final answer within \\boxed{}.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nPlease reason step by step, and put your final answer within \\boxed{}.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:feb97c8aefd9c2a096a47434cb2f88aef6db2c459af236d09ec07e1e912ad4f0
3
+ size 1339
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_aime24.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6734843a1ced06046d0aad997cef1129063c7f05aed08d55a8943e9a500bd31
3
+ size 147871
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_aime24.parquet.pass.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_path,dataset,pass@1,pass@4
2
+ /home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/step-1B-top10-cluster-k2/global_step_250/actor/hf_model,aime24.parquet,0.15833333333333333,0.3
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_aime24.parquet.results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:086d9b4024e350178ca329be512a3abfb5e1dfa5dcf6ef023f4b01cfc825f2af
3
+ size 660
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_amc23.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:802de162fbf3a1eaf2a4023e76bb90d33a367427f7e0fb1daf71adf9e99a4f2a
3
+ size 146462
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_amc23.parquet.pass.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_path,dataset,pass@1,pass@4
2
+ /home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/step-1B-top10-cluster-k2/global_step_250/actor/hf_model,amc23.parquet,0.55,0.775
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_amc23.parquet.results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c312146b158247745f5402a5ba6210e5bde2a8fb1583c885f566d2bef6e8e664
3
+ size 880
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_math.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b901e0eb1bfb9da1c1a2b2209f7460a92d24b1220a40f5c30a11597c492a97f0
3
+ size 11918410
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_math.parquet.pass.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_path,dataset,pass@1,pass@4
2
+ /home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/step-1B-top10-cluster-k2/global_step_250/actor/hf_model,math.parquet,0.7197879151660664,0.8313325330132053
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_math.parquet.results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:256daa7f022f2a65e52b90dcb1ab4a1420db60e16655620782f3cc3772dea903
3
+ size 109956
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_olympiad.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d68f4d86e67077b1b75a1626e556535abc1aeae63641fcdff4cec7e31d34e5fd
3
+ size 2254482
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_olympiad.parquet.pass.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_path,dataset,pass@1,pass@4
2
+ /home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/step-1B-top10-cluster-k2/global_step_250/actor/hf_model,olympiad.parquet,0.3187221396731055,0.4472511144130758
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_olympiad.parquet.results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46517c1449a4f0f7e9403dd63c667dc584fe92bdfc6428cd8aac41f3f7cc9fa5
3
+ size 14806
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/generation_config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7402413987500866d32be7f6136c9db6e85080e3b4de4e5eccc83dc4df2548d
3
+ size 121
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b3be0c4b111d722be36721ed7c0d0e03a85e6f398dd1e8ffdd64956d7f5b338
3
+ size 3554214752
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6734843a1ced06046d0aad997cef1129063c7f05aed08d55a8943e9a500bd31
3
+ size 147871
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.pass.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ model_path,dataset,pass@1,pass@4
2
+ /home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/step-1B-top10-cluster-k2/global_step_250/actor/hf_model,aime24.parquet,0.15833333333333333,0.3
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.results.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:086d9b4024e350178ca329be512a3abfb5e1dfa5dcf6ef023f4b01cfc825f2af
3
+ size 660