ywxzml3juser28 commited on
Commit ·
b9e0a97
0
Parent(s):
beifen
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +36 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/added_tokens.json +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/chat_template.jinja +54 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/config.json +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/generation_config.json +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/merges.txt +0 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/model.safetensors +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime2025_global_step_250.parquet +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime2025_global_step_250.parquet.pass.csv +2 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime2025_global_step_250.parquet.results.json +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.pass.csv +2 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.results.json +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/amc23_global_step_250.parquet +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/amc23_global_step_250.parquet.pass.csv +2 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/amc23_global_step_250.parquet.results.json +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/math_global_step_250.parquet +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/math_global_step_250.parquet.pass.csv +2 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/math_global_step_250.parquet.results.json +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/minerva_global_step_250.parquet +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/minerva_global_step_250.parquet.pass.csv +2 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/minerva_global_step_250.parquet.results.json +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/olympiad_global_step_250.parquet +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/olympiad_global_step_250.parquet.pass.csv +2 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/olympiad_global_step_250.parquet.results.json +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/special_tokens_map.json +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/tokenizer.json +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/tokenizer_config.json +3 -0
- GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/vocab.json +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/added_tokens.json +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/chat_template.jinja +54 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/config.json +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_aime24.parquet +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_aime24.parquet.pass.csv +2 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_aime24.parquet.results.json +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_amc23.parquet +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_amc23.parquet.pass.csv +2 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_amc23.parquet.results.json +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_math.parquet +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_math.parquet.pass.csv +2 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_math.parquet.results.json +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_olympiad.parquet +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_olympiad.parquet.pass.csv +2 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_olympiad.parquet.results.json +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/generation_config.json +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/merges.txt +0 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/model.safetensors +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet +3 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.pass.csv +2 -0
- step-1B-top10-cluster-k2/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.results.json +3 -0
.gitattributes
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
*.7z filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
*.arrow filter=lfs diff=lfs merge=lfs -text
|
| 3 |
+
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
+
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 5 |
+
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 6 |
+
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
+
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
+
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
+
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
+
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
+
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 12 |
+
*.model filter=lfs diff=lfs merge=lfs -text
|
| 13 |
+
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 14 |
+
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 15 |
+
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 16 |
+
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 17 |
+
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 18 |
+
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 19 |
+
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 20 |
+
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 21 |
+
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 22 |
+
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 23 |
+
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 24 |
+
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 25 |
+
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 26 |
+
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 27 |
+
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
+
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 29 |
+
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 30 |
+
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 31 |
+
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 32 |
+
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
*.json filter=lfs diff=lfs merge=lfs -text
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b
|
| 3 |
+
size 605
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/chat_template.jinja
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0]['role'] == 'system' %}
|
| 4 |
+
{{- messages[0]['content'] }}
|
| 5 |
+
{%- else %}
|
| 6 |
+
{{- 'Please reason step by step, and put your final answer within \\boxed{}.' }}
|
| 7 |
+
{%- endif %}
|
| 8 |
+
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 9 |
+
{%- for tool in tools %}
|
| 10 |
+
{{- "\n" }}
|
| 11 |
+
{{- tool | tojson }}
|
| 12 |
+
{%- endfor %}
|
| 13 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 14 |
+
{%- else %}
|
| 15 |
+
{%- if messages[0]['role'] == 'system' %}
|
| 16 |
+
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
|
| 17 |
+
{%- else %}
|
| 18 |
+
{{- '<|im_start|>system\nPlease reason step by step, and put your final answer within \\boxed{}.<|im_end|>\n' }}
|
| 19 |
+
{%- endif %}
|
| 20 |
+
{%- endif %}
|
| 21 |
+
{%- for message in messages %}
|
| 22 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
|
| 23 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
| 24 |
+
{%- elif message.role == "assistant" %}
|
| 25 |
+
{{- '<|im_start|>' + message.role }}
|
| 26 |
+
{%- if message.content %}
|
| 27 |
+
{{- '\n' + message.content }}
|
| 28 |
+
{%- endif %}
|
| 29 |
+
{%- for tool_call in message.tool_calls %}
|
| 30 |
+
{%- if tool_call.function is defined %}
|
| 31 |
+
{%- set tool_call = tool_call.function %}
|
| 32 |
+
{%- endif %}
|
| 33 |
+
{{- '\n<tool_call>\n{"name": "' }}
|
| 34 |
+
{{- tool_call.name }}
|
| 35 |
+
{{- '", "arguments": ' }}
|
| 36 |
+
{{- tool_call.arguments | tojson }}
|
| 37 |
+
{{- '}\n</tool_call>' }}
|
| 38 |
+
{%- endfor %}
|
| 39 |
+
{{- '<|im_end|>\n' }}
|
| 40 |
+
{%- elif message.role == "tool" %}
|
| 41 |
+
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
|
| 42 |
+
{{- '<|im_start|>user' }}
|
| 43 |
+
{%- endif %}
|
| 44 |
+
{{- '\n<tool_response>\n' }}
|
| 45 |
+
{{- message.content }}
|
| 46 |
+
{{- '\n</tool_response>' }}
|
| 47 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 48 |
+
{{- '<|im_end|>\n' }}
|
| 49 |
+
{%- endif %}
|
| 50 |
+
{%- endif %}
|
| 51 |
+
{%- endfor %}
|
| 52 |
+
{%- if add_generation_prompt %}
|
| 53 |
+
{{- '<|im_start|>assistant\n' }}
|
| 54 |
+
{%- endif %}
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:feb97c8aefd9c2a096a47434cb2f88aef6db2c459af236d09ec07e1e912ad4f0
|
| 3 |
+
size 1339
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/generation_config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7402413987500866d32be7f6136c9db6e85080e3b4de4e5eccc83dc4df2548d
|
| 3 |
+
size 121
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ddc0d6c28babcf454a4041f80097a61f5edc4b9a3f28065fb7a9d303f303d31
|
| 3 |
+
size 3554214752
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime2025_global_step_250.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93ae8cd998824b9cda44e24157723b986c52fc5672b3f10ae0d3f921078ff670
|
| 3 |
+
size 170822
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime2025_global_step_250.parquet.pass.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_path,dataset,pass@1,pass@4
|
| 2 |
+
/home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model,aime2025.parquet,0.075,0.16666666666666666
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime2025_global_step_250.parquet.results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0553a742e3a635c6ac57d0921302fdaef559c4ebdc1508236403619edb1cd369
|
| 3 |
+
size 660
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ef86602a1387677ab5a6eb00a91f54fe1cfa1f9ce675f40f85a0a16cc838d340
|
| 3 |
+
size 159309
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.pass.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_path,dataset,pass@1,pass@4
|
| 2 |
+
/home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model,aime24.parquet,0.18333333333333332,0.3
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:239b88c80d0e2792a9361e10e4ea9eaa42a0efac245e0c9d1cd6dc725aef3135
|
| 3 |
+
size 660
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/amc23_global_step_250.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:50efb7083b9af4ca485ce26eb37ad68d2ada78ec67c2cda5680fc03fe9b60d0c
|
| 3 |
+
size 166540
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/amc23_global_step_250.parquet.pass.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_path,dataset,pass@1,pass@4
|
| 2 |
+
/home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model,amc23.parquet,0.46875,0.725
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/amc23_global_step_250.parquet.results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:633074a2a90843324c009ab971273871cd8a7a126385c3982b400aefbc4da396
|
| 3 |
+
size 880
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/math_global_step_250.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1cd55fb404b59f25c30325d5ec83e703fe118e2ffd98773c42a8f50d3b3ee151
|
| 3 |
+
size 13696138
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/math_global_step_250.parquet.pass.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_path,dataset,pass@1,pass@4
|
| 2 |
+
/home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model,math.parquet,0.687875150060024,0.8207282913165266
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/math_global_step_250.parquet.results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:63993dc04dfecf5c6a342582b4e6e9ea81435790b7ac42064df49bcd6c8b888a
|
| 3 |
+
size 109956
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/minerva_global_step_250.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2902648ba58f5246c4a2138dde39d5cd054443c7a323553317564047f3ffbd4a
|
| 3 |
+
size 849590
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/minerva_global_step_250.parquet.pass.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_path,dataset,pass@1,pass@4
|
| 2 |
+
/home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model,minerva.parquet,0.029411764705882353,0.04411764705882353
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/minerva_global_step_250.parquet.results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5e654042bb37a638b238f280e173a10c6a4c4fd187c87a78581e55f5e044553a
|
| 3 |
+
size 5984
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/olympiad_global_step_250.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2d2b695bfaa04fe38882808aeddbf53f23a24f4bdff2d1f82c451dba67b007c0
|
| 3 |
+
size 2489202
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/olympiad_global_step_250.parquet.pass.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_path,dataset,pass@1,pass@4
|
| 2 |
+
/home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model,olympiad.parquet,0.300148588410104,0.43536404160475484
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/output_eval/olympiad_global_step_250.parquet.results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7b4d32429fa6d1618a2b6a179fc1b15886d9a5c5d66e874b2cfe3e14296ac76
|
| 3 |
+
size 14806
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/special_tokens_map.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6676f091c8bc4d1b50146427cfde92073402866b87b6e39223227931b70083e9
|
| 3 |
+
size 616
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
|
| 3 |
+
size 11421896
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/tokenizer_config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60f6e7bc948cedd377a20f01d022ef664d026fbc93d72e0a9bdac233f8632181
|
| 3 |
+
size 4689
|
GRPO-qwenMath-1B-cluster/global_step_250/actor/hf_model/vocab.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca10d7e9fb3ed18575dd1e277a2579c16d108e32f27439684afa0e10b1440910
|
| 3 |
+
size 2776833
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/added_tokens.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:58b54bbe36fc752f79a24a271ef66a0a0830054b4dfad94bde757d851968060b
|
| 3 |
+
size 605
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/chat_template.jinja
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- if tools %}
|
| 2 |
+
{{- '<|im_start|>system\n' }}
|
| 3 |
+
{%- if messages[0]['role'] == 'system' %}
|
| 4 |
+
{{- messages[0]['content'] }}
|
| 5 |
+
{%- else %}
|
| 6 |
+
{{- 'Please reason step by step, and put your final answer within \\boxed{}.' }}
|
| 7 |
+
{%- endif %}
|
| 8 |
+
{{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 9 |
+
{%- for tool in tools %}
|
| 10 |
+
{{- "\n" }}
|
| 11 |
+
{{- tool | tojson }}
|
| 12 |
+
{%- endfor %}
|
| 13 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 14 |
+
{%- else %}
|
| 15 |
+
{%- if messages[0]['role'] == 'system' %}
|
| 16 |
+
{{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
|
| 17 |
+
{%- else %}
|
| 18 |
+
{{- '<|im_start|>system\nPlease reason step by step, and put your final answer within \\boxed{}.<|im_end|>\n' }}
|
| 19 |
+
{%- endif %}
|
| 20 |
+
{%- endif %}
|
| 21 |
+
{%- for message in messages %}
|
| 22 |
+
{%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
|
| 23 |
+
{{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
|
| 24 |
+
{%- elif message.role == "assistant" %}
|
| 25 |
+
{{- '<|im_start|>' + message.role }}
|
| 26 |
+
{%- if message.content %}
|
| 27 |
+
{{- '\n' + message.content }}
|
| 28 |
+
{%- endif %}
|
| 29 |
+
{%- for tool_call in message.tool_calls %}
|
| 30 |
+
{%- if tool_call.function is defined %}
|
| 31 |
+
{%- set tool_call = tool_call.function %}
|
| 32 |
+
{%- endif %}
|
| 33 |
+
{{- '\n<tool_call>\n{"name": "' }}
|
| 34 |
+
{{- tool_call.name }}
|
| 35 |
+
{{- '", "arguments": ' }}
|
| 36 |
+
{{- tool_call.arguments | tojson }}
|
| 37 |
+
{{- '}\n</tool_call>' }}
|
| 38 |
+
{%- endfor %}
|
| 39 |
+
{{- '<|im_end|>\n' }}
|
| 40 |
+
{%- elif message.role == "tool" %}
|
| 41 |
+
{%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
|
| 42 |
+
{{- '<|im_start|>user' }}
|
| 43 |
+
{%- endif %}
|
| 44 |
+
{{- '\n<tool_response>\n' }}
|
| 45 |
+
{{- message.content }}
|
| 46 |
+
{{- '\n</tool_response>' }}
|
| 47 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 48 |
+
{{- '<|im_end|>\n' }}
|
| 49 |
+
{%- endif %}
|
| 50 |
+
{%- endif %}
|
| 51 |
+
{%- endfor %}
|
| 52 |
+
{%- if add_generation_prompt %}
|
| 53 |
+
{{- '<|im_start|>assistant\n' }}
|
| 54 |
+
{%- endif %}
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:feb97c8aefd9c2a096a47434cb2f88aef6db2c459af236d09ec07e1e912ad4f0
|
| 3 |
+
size 1339
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_aime24.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6734843a1ced06046d0aad997cef1129063c7f05aed08d55a8943e9a500bd31
|
| 3 |
+
size 147871
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_aime24.parquet.pass.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_path,dataset,pass@1,pass@4
|
| 2 |
+
/home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/step-1B-top10-cluster-k2/global_step_250/actor/hf_model,aime24.parquet,0.15833333333333333,0.3
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_aime24.parquet.results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:086d9b4024e350178ca329be512a3abfb5e1dfa5dcf6ef023f4b01cfc825f2af
|
| 3 |
+
size 660
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_amc23.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:802de162fbf3a1eaf2a4023e76bb90d33a367427f7e0fb1daf71adf9e99a4f2a
|
| 3 |
+
size 146462
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_amc23.parquet.pass.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_path,dataset,pass@1,pass@4
|
| 2 |
+
/home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/step-1B-top10-cluster-k2/global_step_250/actor/hf_model,amc23.parquet,0.55,0.775
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_amc23.parquet.results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c312146b158247745f5402a5ba6210e5bde2a8fb1583c885f566d2bef6e8e664
|
| 3 |
+
size 880
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_math.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b901e0eb1bfb9da1c1a2b2209f7460a92d24b1220a40f5c30a11597c492a97f0
|
| 3 |
+
size 11918410
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_math.parquet.pass.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_path,dataset,pass@1,pass@4
|
| 2 |
+
/home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/step-1B-top10-cluster-k2/global_step_250/actor/hf_model,math.parquet,0.7197879151660664,0.8313325330132053
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_math.parquet.results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:256daa7f022f2a65e52b90dcb1ab4a1420db60e16655620782f3cc3772dea903
|
| 3 |
+
size 109956
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_olympiad.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d68f4d86e67077b1b75a1626e556535abc1aeae63641fcdff4cec7e31d34e5fd
|
| 3 |
+
size 2254482
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_olympiad.parquet.pass.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_path,dataset,pass@1,pass@4
|
| 2 |
+
/home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/step-1B-top10-cluster-k2/global_step_250/actor/hf_model,olympiad.parquet,0.3187221396731055,0.4472511144130758
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/eval_output/hf_model_olympiad.parquet.results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:46517c1449a4f0f7e9403dd63c667dc584fe92bdfc6428cd8aac41f3f7cc9fa5
|
| 3 |
+
size 14806
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/generation_config.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7402413987500866d32be7f6136c9db6e85080e3b4de4e5eccc83dc4df2548d
|
| 3 |
+
size 121
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2b3be0c4b111d722be36721ed7c0d0e03a85e6f398dd1e8ffdd64956d7f5b338
|
| 3 |
+
size 3554214752
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6734843a1ced06046d0aad997cef1129063c7f05aed08d55a8943e9a500bd31
|
| 3 |
+
size 147871
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.pass.csv
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
model_path,dataset,pass@1,pass@4
|
| 2 |
+
/home/ywxzml3j/ywxzml3juser28/workspace/StepReward-gspo/data/checkpoints/step-1B-top10-cluster-k2/global_step_250/actor/hf_model,aime24.parquet,0.15833333333333333,0.3
|
step-1B-top10-cluster-k2/global_step_250/actor/hf_model/output_eval/aime24_global_step_250.parquet.results.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:086d9b4024e350178ca329be512a3abfb5e1dfa5dcf6ef023f4b01cfc825f2af
|
| 3 |
+
size 660
|