hour1 commited on
Commit
9114cf2
·
verified ·
1 Parent(s): e2656ab

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gemini/config.yaml +10 -0
  2. .gitattributes +14 -0
  3. .github/CODEOWNERS +30 -0
  4. .github/ISSUE_TEMPLATE/bug-report.yml +65 -0
  5. .github/ISSUE_TEMPLATE/config.yml +2 -0
  6. .github/ISSUE_TEMPLATE/feature-request.yml +32 -0
  7. .github/PULL_REQUEST_TEMPLATE.md +40 -0
  8. .github/dependabot.yml +9 -0
  9. .github/workflows/.deprecate/e2e_eval_aime24.yml +147 -0
  10. .github/workflows/.deprecate/e2e_ppo_trainer.yml +133 -0
  11. .github/workflows/.deprecate/e2e_ppo_trainer_megatron_sglang.yml +155 -0
  12. .github/workflows/.deprecate/e2e_prime.yml +66 -0
  13. .github/workflows/.deprecate/e2e_spin.yml +119 -0
  14. .github/workflows/.deprecate/e2e_sppo.yml +118 -0
  15. .github/workflows/README.md +73 -0
  16. .github/workflows/check-pr-title.yml +58 -0
  17. .github/workflows/checkpoint_converter.yml +175 -0
  18. .github/workflows/cpu_unit_tests.yml +89 -0
  19. .github/workflows/doc.yml +100 -0
  20. .github/workflows/e2e_ascend.yml +156 -0
  21. .github/workflows/e2e_dapo.yml +145 -0
  22. .github/workflows/e2e_genrm_remote.yml +141 -0
  23. .github/workflows/e2e_one_step_off_policy.yml +178 -0
  24. .github/workflows/e2e_ppo_trainer.yml +79 -0
  25. .github/workflows/e2e_ppo_trainer_megatron_sglang.yml +281 -0
  26. .github/workflows/e2e_ppo_trainer_megatron_sglang_2.yml +275 -0
  27. .github/workflows/e2e_ppo_trainer_megatron_vllm.yml +292 -0
  28. .github/workflows/e2e_ppo_trainer_megatron_vllm_2.yml +420 -0
  29. .github/workflows/e2e_sft.yml +161 -0
  30. .github/workflows/gpu_unit_tests.yml +113 -0
  31. .github/workflows/model.yml +230 -0
  32. .github/workflows/pre-commit.yml +40 -0
  33. .github/workflows/reward_model.yml +131 -0
  34. .github/workflows/sanity.yml +109 -0
  35. .github/workflows/scorecard.yml +66 -0
  36. .github/workflows/secrets_scan.yml +22 -0
  37. .github/workflows/sgl.yml +178 -0
  38. .github/workflows/type-coverage-check.yml +31 -0
  39. .github/workflows/vllm.yml +145 -0
  40. .gitignore +128 -0
  41. .pre-commit-config.yaml +37 -0
  42. .readthedocs.yaml +19 -0
  43. CONTRIBUTING.md +89 -0
  44. LICENSE +202 -0
  45. Notice.txt +1 -0
  46. README.md +264 -0
  47. TODO.md +3 -0
  48. build_vllm.sh +11 -0
  49. debug.txt +25 -0
  50. docker/Apptainerfile.rocm +57 -0
.gemini/config.yaml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ have_fun: false
2
+ code_review:
3
+ disable: false
4
+ comment_severity_threshold: HIGH
5
+ max_review_comments: -1
6
+ pull_request_opened:
7
+ help: false
8
+ summary: false
9
+ code_review: true
10
+ ignore_patterns: []
.gitattributes CHANGED
@@ -33,3 +33,17 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ flash_attn-2.6.0.post1+cu122torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
37
+ flash_attn-2.6.0.post1+cu122torch2.4cxx11abiTRUE-cp311-cp311-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
38
+ flash_attn-2.7.2.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
39
+ flash_attn-2.7.2.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
40
+ flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp310-cp310-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
41
+ flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
42
+ flash_attn-2.7.4.post1+cu12torch2.7cxx11abiFALSE-cp311-cp311-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
43
+ flash_attn-2.8.2+cu12torch2.7cxx11abiFALSE-cp311-cp311-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
44
+ flashinfer_python-0.2.2.post1+cu124torch2.6-cp38-abi3-linux_x86_64.whl filter=lfs diff=lfs merge=lfs -text
45
+ my_output/Qwen2.5-3B-Instruct-judge_model_Qwen2.5-14B-Instruct-math-hard-interactivity_0.3/log.txt filter=lfs diff=lfs merge=lfs -text
46
+ my_output/Qwen2.5-3B-Instruct-judge_model_Qwen2.5-14B-Instruct-math-hard-interactivity_0.7/log.txt filter=lfs diff=lfs merge=lfs -text
47
+ my_output/Qwen2.5-3B-Instruct-judge_model_Qwen2.5-14B-Instruct-math-hard-interactivity_0.7-rollout_4/log.txt filter=lfs diff=lfs merge=lfs -text
48
+ my_output/Qwen2.5-3B-Instruct-judge_model_Qwen2.5-14B-Instruct-math-hard-interactivity_1.0/log.txt filter=lfs diff=lfs merge=lfs -text
49
+ my_output/Qwen2.5-3B-Instruct-judge_model_Qwen2.5-14B-Instruct-math-hard-new/log.txt filter=lfs diff=lfs merge=lfs -text
.github/CODEOWNERS ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /docs @eric-haibin-lin @zhaochenyang20 @hongpeng-guo
2
+ /docs/amd_tutorial @yushengsu-thu
3
+ /docs/slang_multiturn @zhaochenyang20 @SwordFaith
4
+ /docs/ascend_tutorial @FightingZhen
5
+
6
+ /recipe/dapo @tongyx361 @PeterSH6 @vermouth1992 @tardis-key @FightingZhen @ji-huazhong
7
+ /recipe/spin @zhaochenyang20
8
+ /recipe/sppo @zhaochenyang20
9
+
10
+ /third_party/sglang @zhaochenyang20 @SwordFaith
11
+ /third_party/vllm @PeterSH6 @wuxibin89
12
+
13
+ /examples/grpo_trainer @vermouth1992 @PeterSH6 @tardis-key @FightingZhen @ji-huazhong
14
+
15
+ /verl/single_controller @zw0610 @wuxibin89 @hongpeng-guo
16
+ /verl/trainer @eric-haibin-lin @vermouth1992 @tongyx361 @PeterSH6
17
+ /verl/models/mcore @ISEEKYAN @vermouth1992
18
+ /verl/models/transformers @vermouth1992 @PeterSH6 @tardis-key @FightingZhen @ji-huazhong
19
+ /verl/workers/engine @eric-haibin-lin @vermouth1992 @ZihengJiang
20
+ /verl/workers/roles @eric-haibin-lin @vermouth1992 @ZihengJiang
21
+ /verl/workers/engine/fsdp @eric-haibin-lin @vermouth1992 @ZihengJiang
22
+ /verl/workers/rollout/vllm_rollout @wuxibin89 @PeterSH6 @chenhaiq
23
+ /verl/workers/rollout/sglang_rollout @zhaochenyang20 @SwordFaith @chenhaiq
24
+ /verl/workers/actor/megatron_actor.py @ISEEKYAN @vermouth1992
25
+ /verl/workers/critic/megatron_critic.py @ISEEKYAN @vermouth1992
26
+ /verl/workers/megatron_workers.py @ISEEKYAN @vermouth1992
27
+
28
+ /tests/single_controller @zw0610 @wuxibin89
29
+ /tests/trainer @eric-haibin-lin @vermouth1992 @tongyx361 @PeterSH6
30
+ /tests/workers/rollout/vllm_rollout @wuxibin89 @PeterSH6 @chenhaiq
.github/ISSUE_TEMPLATE/bug-report.yml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modified from https://github.com/huggingface/transformers/blob/main/.github/ISSUE_TEMPLATE/bug-report.yml?plain=1
2
+ name: "\U0001F41B Bug Report"
3
+ description: Submit a bug report to help us improve verl
4
+ labels: [ "bug" ]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Thanks for taking the time to fill out this bug report! 🤗
10
+
11
+ - type: textarea
12
+ id: system-info
13
+ attributes:
14
+ label: System Info
15
+ description: Please share your system info with us. You can run the command `python scripts/diagnose.py` and copy-paste its output below.
16
+ placeholder: verl version, platform, python version, ...
17
+ validations:
18
+ required: true
19
+
20
+ - type: checkboxes
21
+ id: information-scripts-examples
22
+ attributes:
23
+ label: Information
24
+ description: 'The problem arises when using:'
25
+ options:
26
+ - label: "The official example scripts"
27
+ - label: "My own modified scripts"
28
+
29
+ - type: checkboxes
30
+ id: information-tasks
31
+ attributes:
32
+ label: Tasks
33
+ description: "The tasks I am working on are:"
34
+ options:
35
+ - label: "An officially supported task in the `examples` folder (such as GLUE/SQuAD, ...)"
36
+ - label: "My own task or dataset (give details below)"
37
+
38
+ - type: textarea
39
+ id: reproduction
40
+ validations:
41
+ required: true
42
+ attributes:
43
+ label: Reproduction
44
+ description: |
45
+ Please provide a code sample that reproduces the problem you ran into. It can be a Colab link or just a code snippet.
46
+ Please include relevant config information with your code.
47
+ If you have code snippets, error messages, stack traces please provide them here as well.
48
+ Important! Use code tags to correctly format your code. See https://help.github.com/en/github/writing-on-github/creating-and-highlighting-code-blocks#syntax-highlighting
49
+ Do not use screenshots, as they are hard to read and (more importantly) don't allow others to copy-and-paste your code.
50
+
51
+ placeholder: |
52
+ Steps to reproduce the behavior:
53
+
54
+ 1.
55
+ 2.
56
+ 3.
57
+
58
+
59
+ - type: textarea
60
+ id: expected-behavior
61
+ validations:
62
+ required: true
63
+ attributes:
64
+ label: Expected behavior
65
+ description: "A clear and concise description of what you would expect to happen."
.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ blank_issues_enabled: true
2
+ version: 0.1
.github/ISSUE_TEMPLATE/feature-request.yml ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # modified from https://github.com/huggingface/transformers/blob/main/.github/ISSUE_TEMPLATE/feature-request.yml?plain=1
2
+ name: "\U0001F680 Feature request"
3
+ description: Submit a proposal/request for a new verl feature
4
+ labels: [ "Feature request" ]
5
+ body:
6
+ - type: textarea
7
+ id: feature-request
8
+ validations:
9
+ required: true
10
+ attributes:
11
+ label: Feature request
12
+ description: |
13
+ A clear and concise description of the feature proposal. Please provide a link to the paper and code in case they exist.
14
+
15
+ - type: textarea
16
+ id: motivation
17
+ validations:
18
+ required: true
19
+ attributes:
20
+ label: Motivation
21
+ description: |
22
+ Please outline the motivation for the proposal. Is your feature request related to a problem? e.g., I'm always frustrated when [...]. If this is related to another GitHub issue, please link here too.
23
+
24
+
25
+ - type: textarea
26
+ id: contribution
27
+ validations:
28
+ required: true
29
+ attributes:
30
+ label: Your contribution
31
+ description: |
32
+ Is there any way that you could help, e.g. by submitting a PR? Make sure to read the CONTRIBUTING.MD [readme](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md)
.github/PULL_REQUEST_TEMPLATE.md ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### What does this PR do?
2
+
3
+ > Add **concise** overview of what this PR aims to achieve or accomplish. Reference related GitHub issues and PRs that help with the review.
4
+
5
+ ### Checklist Before Starting
6
+
7
+ - [ ] Search for similar PRs. Paste at least one query link here: ...
8
+ - [ ] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI)
9
+ - `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data`
10
+ - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]`
11
+ - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test`
12
+ - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title.
13
+ - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching`
14
+
15
+ ### Test
16
+
17
+ > For changes that can not be tested by CI (e.g., algorithm implementation, new model support), validate by experiment(s) and show results like training curve plots, evaluation results, etc.
18
+
19
+ ### API and Usage Example
20
+
21
+ > Demonstrate how the API changes if any, and provide usage example(s) if possible.
22
+
23
+ ```python
24
+ # Add code snippet or script demonstrating how to use this
25
+ ```
26
+
27
+ ### Design & Code Changes
28
+
29
+ > Demonstrate the high-level design if this PR is complex, and list the specific changes.
30
+
31
+ ### Checklist Before Submitting
32
+
33
+ > [!IMPORTANT]
34
+ > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review.
35
+
36
+ - [ ] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md).
37
+ - [ ] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always`
38
+ - [ ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs).
39
+ - [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ...
40
+ - [ ] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). (If not accessible, please try [the Feishu group (飞书群)](https://applink.larkoffice.com/client/chat/chatter/add_by_link?link_token=772jd4f1-cd91-441e-a820-498c6614126a).)
.github/dependabot.yml ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ## Enabled the dependabot to check the dependencies of the project
2
+ ## Dependabot will open pull requests to update dependencies automatically
3
+
4
+ version: 2
5
+ updates:
6
+ - package-ecosystem: pip
7
+ directory: "/"
8
+ schedule:
9
+ interval: weekly
.github/workflows/.deprecate/e2e_eval_aime24.yml ADDED
@@ -0,0 +1,147 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+
33
+ name: e2e_eval_aime24
34
+
35
+ on:
36
+ # Trigger the workflow on push or pull request,
37
+ # but only for the main branch
38
+ # For push, for now only anti-patterns are specified so it is more conservative
39
+ # and achieves higher coverage.
40
+ push:
41
+ branches:
42
+ - main
43
+ - v0.*
44
+ paths:
45
+ - "**/*.py"
46
+ # Other entrypoints
47
+ - "!*.md"
48
+ - "!docker/**"
49
+ - "!docs/**"
50
+ - "!examples/**"
51
+ - "!tests/**"
52
+ - "!verl/trainer/main_*.py"
53
+ - "!verl/trainer/fsdp_sft_trainer.py"
54
+ - "!recipe/**"
55
+ - "recipe/r1"
56
+ - "!recipe/r1/README.md"
57
+ pull_request:
58
+ branches:
59
+ - main
60
+ paths:
61
+ - "**/*.py"
62
+ # Other entrypoints
63
+ - "!*.md"
64
+ - "!docker/**"
65
+ - "!docs/**"
66
+ - "!examples/**"
67
+ - "!tests/**"
68
+ - "!verl/trainer/main_*.py"
69
+ - "!verl/trainer/fsdp_sft_trainer.py"
70
+ # Home
71
+ - "recipe/r1"
72
+ - "!recipe/r1/README.md"
73
+ # Other recipes
74
+ - "!recipe/**"
75
+ # Entrypoints
76
+ - ".github/workflows/e2e_eval_aime24.yml"
77
+ - "tests/special_e2e/run_r1_distill_qwen_aime24_eval.sh"
78
+ - "verl/trainer/main_generation.py"
79
+ - "verl/trainer/config/generation.yaml"
80
+
81
+ # Cancel jobs on the same ref if a new one is triggered
82
+ concurrency:
83
+ group: ${{ github.workflow }}-${{ github.ref }}
84
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
85
+
86
+ # Declare permissions just read content.
87
+ permissions:
88
+ contents: read
89
+
90
+ env:
91
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
92
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
93
+
94
+ jobs:
95
+ setup:
96
+ if: github.repository_owner == 'volcengine'
97
+ runs-on: ubuntu-latest
98
+ outputs:
99
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
100
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
101
+ steps:
102
+ - uses: actions/checkout@v4
103
+ - id: create-runner
104
+ uses: volcengine/vemlp-github-runner@v1
105
+ with:
106
+ mode: "create"
107
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
108
+ mlp-image: "${{ env.IMAGE }}"
109
+
110
+ e2e_eval_aime24:
111
+ needs: setup
112
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
113
+ timeout-minutes: 40 # Increase this timeout value as needed
114
+ env:
115
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
116
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
117
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
118
+ HF_ENDPOINT: "https://hf-mirror.com"
119
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
120
+ steps:
121
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
122
+ with:
123
+ fetch-depth: 0
124
+ - name: Install the current repository
125
+ run: |
126
+ pip3 install --no-deps -e .[test,gpu,math]
127
+ pip3 install math-verify transformers==4.56.2
128
+ - name: Prepare aime24 dataset
129
+ run: |
130
+ ray stop --force
131
+ python3 recipe/r1/data_process.py --task aime2024
132
+ - name: Running generation and evaluation in AIME 2024
133
+ run: |
134
+ ray stop --force
135
+ bash tests/special_e2e/run_r1_distill_qwen_aime24_eval.sh
136
+
137
+ cleanup:
138
+ runs-on: ubuntu-latest
139
+ needs: [setup, e2e_eval_aime24]
140
+ if: always()
141
+ steps:
142
+ - id: destroy-runner
143
+ uses: volcengine/vemlp-github-runner@v1
144
+ with:
145
+ mode: "destroy"
146
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
147
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/.deprecate/e2e_ppo_trainer.yml ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: e2e_ppo_trainer_deprecate
2
+
3
+ on:
4
+ # Trigger the workflow on push or pull request,
5
+ # but only for the main branch
6
+ # For push, for now only anti-patterns are specified so it is more conservative
7
+ # and achieves higher coverage.
8
+ push:
9
+ branches:
10
+ - disabled_ci
11
+ pull_request:
12
+ branches:
13
+ - disabled_ci
14
+ paths:
15
+ - "**/*.py"
16
+ # Other entrypoints
17
+ - "!**/*.md"
18
+ - "!docker/**"
19
+ - "!examples/**"
20
+ - "!tests/**"
21
+ - "!verl/trainer/main_*.py"
22
+ - "!verl/trainer/fsdp_sft_trainer.py"
23
+ # Docs
24
+ - "!docs/**"
25
+ # Recipes
26
+ - "!recipe/**"
27
+ # Megatron
28
+ - "!verl/workers/**/megatron_*.py"
29
+ # Entrypoints
30
+ - ".github/workflows/e2e_ppo_trainer.yml"
31
+ - "examples/data_preprocess/gsm8k.py"
32
+ - "examples/data_preprocess/geo3k.py"
33
+ - "tests/special_e2e/ppo_trainer"
34
+ - "verl/trainer/main_ppo.py"
35
+ - "verl/trainer/config/ppo_trainer.yaml"
36
+
37
+ # Cancel jobs on the same ref if a new one is triggered
38
+ concurrency:
39
+ group: ${{ github.workflow }}-${{ github.ref }}
40
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
41
+
42
+ # Declare permissions just read content.
43
+ permissions:
44
+ contents: read
45
+
46
+ jobs:
47
+ pre_commit_for_ppo:
48
+ runs-on: ubuntu-latest
49
+ strategy:
50
+ matrix:
51
+ python-version: ["3.12"]
52
+ steps:
53
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
54
+ - name: Set up Python ${{ matrix.python-version }}
55
+ uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
56
+ with:
57
+ python-version: ${{ matrix.python-version }}
58
+ - name: Install the current repository
59
+ run: |
60
+ pip install -e .
61
+ - name: Set ruff --output-format=github
62
+ run: |
63
+ sed -i 's/--output-format=full/--output-format=github/' .pre-commit-config.yaml
64
+ git add .pre-commit-config.yaml
65
+ - uses: pre-commit/action@v3.0.1
66
+ with:
67
+ extra_args: "" # Overriding default "--all-files"
68
+
69
+ e2e_ppo_trainer_sglang_multiturn_with_tool:
70
+ runs-on: [L20x8]
71
+ needs: pre_commit_for_ppo
72
+ timeout-minutes: 40 # Increase this timeout value as needed
73
+ env:
74
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
75
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
76
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
77
+ HF_ENDPOINT: "https://hf-mirror.com"
78
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
79
+ container:
80
+ image: verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2
81
+ options: --gpus all --shm-size=10g
82
+ steps:
83
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
84
+ with:
85
+ fetch-depth: 0
86
+ - name: Install the current repository
87
+ run: |
88
+ pip3 install -e .[test,gpu,sglang]
89
+ - name: Prepare gsm8k dataset with tool
90
+ run: |
91
+ ray stop --force
92
+ python3 examples/data_preprocess/gsm8k_multiturn_w_tool.py --local_save_dir $HOME/data/gsm8k_verl_sgl_multi_turn_preprocessed
93
+ - name: Running GSM8K with tool E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt with sglang
94
+ run: |
95
+ ray stop --force
96
+ bash tests/special_e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh
97
+ - name: Running GSM8K with tool E2E training tests with FSDP2
98
+ run: |
99
+ ray stop --force
100
+ FSDP_STRATEGY=fsdp2 bash tests/special_e2e/run_gsm8k_fsdp_sgl_multiturn_w_tool.sh
101
+
102
+ e2e_ppo_trainer_sglang_vlm_multiturn_with_tool:
103
+ runs-on: [L20x8]
104
+ needs: pre_commit_for_ppo
105
+ timeout-minutes: 40 # Increase this timeout value as needed
106
+ env:
107
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
108
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
109
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
110
+ HF_ENDPOINT: "https://hf-mirror.com"
111
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
112
+ container:
113
+ image: verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2
114
+ options: --gpus all --shm-size=10g
115
+ steps:
116
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
117
+ with:
118
+ fetch-depth: 0
119
+ - name: Install the current repository
120
+ run: |
121
+ pip3 install -e .[test,geo,gpu,sglang]
122
+ - name: Prepare geo3k dataset with tool
123
+ run: |
124
+ ray stop --force
125
+ python3 examples/data_preprocess/geo3k_multiturn_w_tool.py --local_dir $HOME/data/geo3k_verl_sgl_multi_turn_preprocessed
126
+ - name: Running GEO3K with tool E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt with sglang
127
+ run: |
128
+ ray stop --force
129
+ bash tests/special_e2e/run_geo3k_fsdp_sgl_multiturn_w_tool.sh
130
+ - name: Running GEO3K with tool E2E training tests with FSDP2
131
+ run: |
132
+ ray stop --force
133
+ FSDP_STRATEGY=fsdp2 bash tests/special_e2e/run_geo3k_fsdp_sgl_multiturn_w_tool.sh
.github/workflows/.deprecate/e2e_ppo_trainer_megatron_sglang.yml ADDED
@@ -0,0 +1,155 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+ name: e2e_ppo_trainer_megatron_sglang_deprecate
33
+
34
+ on:
35
+ # Trigger the workflow on push or pull request,
36
+ # but only for the main branch.
37
+ # For push, for now only anti-patterns are specified so it is more conservative
38
+ # and achieves higher coverage.
39
+ push:
40
+ branches:
41
+ - disabled_ci
42
+ pull_request:
43
+ branches:
44
+ - disabled_ci
45
+ paths:
46
+ - "**/*.py"
47
+ # Other entrypoints
48
+ - "!docker/**"
49
+ # Docs
50
+ - "!**/*.md"
51
+ - "!docs/**"
52
+ - "!examples/**"
53
+ - "!tests/**"
54
+ - "!verl/trainer/main_*.py"
55
+ - "!verl/trainer/fsdp_sft_trainer.py"
56
+ # Recipes
57
+ - "!recipe/**"
58
+ # FSDP
59
+ - "!verl/workers/**/*dp_*.py"
60
+ # Entrypoints
61
+ - ".github/workflows/e2e_ppo_trainer_megatron_sglang.yml"
62
+ - "examples/data_preprocess/gsm8k.py"
63
+ - "examples/data_preprocess/geo3k.py"
64
+ - "tests/special_e2e/run_ppo_trainer_megatron.sh"
65
+ - "verl/trainer/main_ppo.py"
66
+ - "verl/trainer/config/ppo_megatron_trainer.yaml"
67
+
68
+ # Cancel jobs on the same ref if a new one is triggered
69
+ concurrency:
70
+ group: ${{ github.workflow }}-${{ github.ref }}
71
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
72
+
73
+ # Declare permissions just read content.
74
+ permissions:
75
+ contents: read
76
+
77
+ env:
78
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
79
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
80
+
81
+ jobs:
82
+ setup:
83
+ if: github.repository_owner == 'volcengine'
84
+ runs-on: ubuntu-latest
85
+ outputs:
86
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
87
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
88
+ steps:
89
+ - uses: actions/checkout@v4
90
+ - id: create-runner
91
+ uses: volcengine/vemlp-github-runner@v1
92
+ with:
93
+ mode: "create"
94
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
95
+ mlp-image: "${{ env.IMAGE }}"
96
+
97
+ e2e_ppo_trainer_megatron-qwen3:
98
+ needs: setup
99
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
100
+ timeout-minutes: 60 # Increase this timeout value as needed
101
+ env:
102
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
103
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
104
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
105
+ HF_ENDPOINT: "https://hf-mirror.com"
106
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
107
+ steps:
108
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
109
+ with:
110
+ fetch-depth: 0
111
+ - name: Install the current repository
112
+ run: |
113
+ pip3 install --no-deps -e .[test]
114
+ - name: Prepare GSM8K dataset
115
+ run: |
116
+ python3 examples/data_preprocess/gsm8k.py
117
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) with validation and saving
118
+ run: |
119
+ ray stop --force
120
+ ENGINE=sglang ALL_OFFLOAD=True VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 MODEL_ID=Qwen/Qwen3-0.6B bash tests/special_e2e/run_ppo_trainer_megatron.sh
121
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) testing learning rate scheduler
122
+ run: |
123
+ ray stop --force
124
+ ENGINE=sglang LR_WARMUP_STEPS=1 TOTAL_TRAIN_STEPS=2 MODEL_ID=Qwen/Qwen3-0.6B bash tests/special_e2e/run_ppo_trainer_megatron.sh
125
+
126
+ - name: Test Megatron checkpoints merging function (Qwen3 Actor and Critic)
127
+ run: |
128
+ exp_name="qwen3-0.6b-megatron-gsm8k-minimal"
129
+ python -m verl.model_merger test --backend megatron --tie-word-embedding --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
130
+ python -m verl.model_merger test --backend megatron --is-value-model --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
131
+ - name: clean up
132
+ run: |
133
+ rm -rf checkpoints
134
+
135
+ cleanup:
136
+ runs-on: ubuntu-latest
137
+ needs:
138
+ [
139
+ setup,
140
+ e2e_ppo_trainer_megatron-deepseek,
141
+ e2e_ppo_trainer_megatron-qwen3,
142
+ e2e_ppo_trainer_megatron-different-train-infer-tp-qwen-tie-embedding,
143
+ e2e_ppo_trainer_megatron-qwen-override-transformer-config,
144
+ e2e_ppo_trainer_megatron-deepseek-override-transformer-config,
145
+ e2e_ppo_trainer_megatron-moe-expert-parallel,
146
+ e2e_ppo_trainer_megatron-qwen2_5vl-3b,
147
+ ]
148
+ if: always()
149
+ steps:
150
+ - id: destroy-runner
151
+ uses: volcengine/vemlp-github-runner@v1
152
+ with:
153
+ mode: "destroy"
154
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
155
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/.deprecate/e2e_prime.yml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: e2e_prime_deprecate
2
+
3
+ on:
4
+ # Trigger the workflow on push or pull request,
5
+ # but only for the main branch
6
+ push:
7
+ branches:
8
+ - disabled_ci
9
+ pull_request:
10
+ branches:
11
+ - disabled_ci
12
+ paths:
13
+ - "**/*.py"
14
+ # Other entrypoints
15
+ - "!examples/**"
16
+ - "!tests/**"
17
+ - "!verl/trainer/main_*.py"
18
+ - "!verl/trainer/fsdp_sft_trainer.py"
19
+ # Other recipes
20
+ - "!recipe/**"
21
+ # Megatron
22
+ - "!verl/workers/**/megatron_*.py"
23
+ # Home
24
+ - "recipe/prime"
25
+ # Entrypoints
26
+ - ".github/workflows/e2e_prime.yml"
27
+ - "examples/data_preprocess/gsm8k.py"
28
+ - "tests/special_e2e/run_prime.sh"
29
+
30
+ # Cancel jobs on the same ref if a new one is triggered
31
+ concurrency:
32
+ group: ${{ github.workflow }}-${{ github.ref }}
33
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
34
+
35
+ # Declare permissions just read content.
36
+ permissions:
37
+ contents: read
38
+
39
+ jobs:
40
+ e2e_prime:
41
+ runs-on: [L20x8]
42
+ timeout-minutes: 50 # Increase this timeout value as needed
43
+ env:
44
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
45
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
46
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
47
+ HF_ENDPOINT: "https://hf-mirror.com"
48
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
49
+ container:
50
+ image: whatcanyousee/verl:ngc-cu124-vllm0.8.5-sglang0.4.6.post5-mcore0.12.0-te2.3
51
+ options: --gpus all --shm-size=10g
52
+ steps:
53
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
54
+ with:
55
+ fetch-depth: 0
56
+ - name: Install the current repository
57
+ run: |
58
+ pip3 install --no-deps -e .[test,gpu]
59
+ - name: Prepare gsm8k dataset
60
+ run: |
61
+ ray stop --force
62
+ python3 examples/data_preprocess/gsm8k.py
63
+ - name: Running GSM8K E2E with prime alg
64
+ run: |
65
+ ray stop --force
66
+ bash tests/special_e2e/run_prime.sh
.github/workflows/.deprecate/e2e_spin.yml ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: e2e_spin
2
+
3
+ on:
4
+ # Trigger the workflow on push or pull request,
5
+ # but only for the main branch
6
+ push:
7
+ branches:
8
+ - main
9
+ - v0.*
10
+ paths:
11
+ - "**/*.py"
12
+ # Other entrypoints
13
+ - "!examples/**"
14
+ - "!tests/**"
15
+ - "!verl/trainer/main_*.py"
16
+ - "!verl/trainer/fsdp_sft_trainer.py"
17
+ # Other recipes
18
+ - "!recipe/**"
19
+ # Megatron
20
+ - "!verl/workers/**/megatron_*.py"
21
+ # Home
22
+ - "recipe/spin"
23
+ # Entrypoints
24
+ - ".github/workflows/e2e_spin.yml"
25
+ - "examples/data_preprocess/gsm8k.py"
26
+ - "tests/special_e2e/run_spin.sh"
27
+ - "!examples"
28
+ pull_request:
29
+ branches:
30
+ - main
31
+ - v0.*
32
+ paths:
33
+ - "**/*.py"
34
+ # Other entrypoints
35
+ - "!examples/**"
36
+ - "!tests/**"
37
+ - "!verl/trainer/main_*.py"
38
+ - "!verl/trainer/fsdp_sft_trainer.py"
39
+ # Other recipes
40
+ - "!recipe/**"
41
+ # Megatron
42
+ - "!verl/workers/**/megatron_*.py"
43
+ # Home
44
+ - "recipe/spin"
45
+ # Entrypoints
46
+ - ".github/workflows/e2e_spin.yml"
47
+ - "examples/data_preprocess/gsm8k.py"
48
+ - "tests/special_e2e/run_spin.sh"
49
+ - "!examples"
50
+
51
+ # Declare permissions just read content.
52
+ permissions:
53
+ contents: read
54
+
55
+ env:
56
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
57
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
58
+
59
+ # Cancel jobs on the same ref if a new one is triggered
60
+ concurrency:
61
+ group: ${{ github.workflow }}-${{ github.ref }}
62
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
63
+
64
+ jobs:
65
+ setup:
66
+ if: github.repository_owner == 'volcengine'
67
+ runs-on: ubuntu-latest
68
+ outputs:
69
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
70
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
71
+ steps:
72
+ - uses: actions/checkout@v4
73
+ - id: create-runner
74
+ uses: volcengine/vemlp-github-runner@v1
75
+ with:
76
+ mode: "create"
77
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
78
+ mlp-image: "${{ env.IMAGE }}"
79
+
80
+ e2e_spin:
81
+ needs: setup
82
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
83
+ timeout-minutes: 40 # Increase this timeout value as needed
84
+ env:
85
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
86
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
87
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
88
+ HF_ENDPOINT: "https://hf-mirror.com"
89
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
90
+ steps:
91
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
92
+ with:
93
+ fetch-depth: 0
94
+ - name: Install the current repository
95
+ run: |
96
+ pip3 install -e .[test,gpu,sglang]
97
+ - name: Prepare GSM8K dataset
98
+ run: |
99
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
100
+ - name: Running the E2E test with the spin algorithm
101
+ run: |
102
+ ray stop --force
103
+ bash tests/special_e2e/run_spin.sh
104
+
105
+ cleanup:
106
+ runs-on: ubuntu-latest
107
+ needs:
108
+ [
109
+ setup,
110
+ e2e_spin
111
+ ]
112
+ if: always()
113
+ steps:
114
+ - id: destroy-runner
115
+ uses: volcengine/vemlp-github-runner@v1
116
+ with:
117
+ mode: "destroy"
118
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
119
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/.deprecate/e2e_sppo.yml ADDED
@@ -0,0 +1,118 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: e2e_sppo
2
+
3
+ on:
4
+ # Trigger the workflow on push or pull request,
5
+ # but only for the main branch
6
+ push:
7
+ branches:
8
+ - main
9
+ - v0.*
10
+ paths:
11
+ - "**/*.py"
12
+ # Other entrypoints
13
+ - "!examples/**"
14
+ - "!tests/**"
15
+ - "!verl/trainer/main_*.py"
16
+ - "!verl/trainer/fsdp_sft_trainer.py"
17
+ # Other recipes
18
+ - "!recipe/**"
19
+ # Megatron
20
+ - "!verl/workers/**/megatron_*.py"
21
+ # Home
22
+ - "recipe/sppo"
23
+ # Entrypoints
24
+ - ".github/workflows/e2e_sppo.yml"
25
+ - "examples/data_preprocess/gsm8k.py"
26
+ - "tests/special_e2e/run_sppo.sh"
27
+ pull_request:
28
+ branches:
29
+ - main
30
+ - v0.*
31
+ paths:
32
+ - "**/*.py"
33
+ # Other entrypoints
34
+ - "!examples/**"
35
+ - "!tests/**"
36
+ - "!verl/trainer/main_*.py"
37
+ - "!verl/trainer/fsdp_sft_trainer.py"
38
+ # Other recipes
39
+ - "!recipe/**"
40
+ # Megatron
41
+ - "!verl/workers/**/megatron_*.py"
42
+ # Home
43
+ - "recipe/sppo"
44
+ # Entrypoints
45
+ - ".github/workflows/e2e_sppo.yml"
46
+ - "examples/data_preprocess/gsm8k.py"
47
+ - "tests/special_e2e/run_sppo.sh"
48
+
49
+ # Declare permissions just read content.
50
+ permissions:
51
+ contents: read
52
+
53
+ # Cancel jobs on the same ref if a new one is triggered
54
+ concurrency:
55
+ group: ${{ github.workflow }}-${{ github.ref }}
56
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
57
+
58
+ env:
59
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
60
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
61
+ TRANSFORMERS_VERSION: "4.56.2"
62
+
63
+ jobs:
64
+ setup:
65
+ if: github.repository_owner == 'volcengine'
66
+ runs-on: ubuntu-latest
67
+ outputs:
68
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
69
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
70
+ steps:
71
+ - uses: actions/checkout@v4
72
+ - id: create-runner
73
+ uses: volcengine/vemlp-github-runner@v1
74
+ with:
75
+ mode: "create"
76
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
77
+ mlp-image: "${{ env.IMAGE }}"
78
+
79
+ e2e_sppo:
80
+ needs: setup
81
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
82
+ timeout-minutes: 40 # Increase this timeout value as needed
83
+ env:
84
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
85
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
86
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
87
+ HF_ENDPOINT: "https://hf-mirror.com"
88
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
89
+ steps:
90
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
91
+ with:
92
+ fetch-depth: 0
93
+ - name: Install the current repository
94
+ run: |
95
+ pip3 install -e .[test,gpu,sglang]
96
+ - name: Prepare MATH dataset
97
+ run: |
98
+ python3 examples/data_preprocess/math_dataset.py --local_dataset_path $HOME/models/hf_data/DigitalLearningGmbH/MATH-lighteval
99
+ - name: Running the E2E test with the SPPO algorithm
100
+ run: |
101
+ ray stop --force
102
+ bash tests/special_e2e/run_sppo.sh
103
+
104
+ cleanup:
105
+ runs-on: ubuntu-latest
106
+ needs:
107
+ [
108
+ setup,
109
+ e2e_sppo
110
+ ]
111
+ if: always()
112
+ steps:
113
+ - id: destroy-runner
114
+ uses: volcengine/vemlp-github-runner@v1
115
+ with:
116
+ mode: "destroy"
117
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
118
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/README.md ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ### Adding a New Workflow
2
+
3
+ When adding a new workflow for continuous integration (CI), you have two runner options: a fixed runner or a machine from the vemlp.
4
+
5
+ - **Fixed Runner**: To use a fixed runner, specify it in your workflow using the `runs-on` keyword, like `runs-on: [L20x8]`.
6
+ - **Vemlp Runner**: Opting for a Vemlp machine allows you to launch tasks elastically.
7
+
8
+ Here is a template to assist you. This template is designed for using Vemlp machines. Currently, for each workflow, you need to create a `setup` and a `cleanup` job. When using this template, the main parts you need to modify are the `IMAGE` environment variable and the specific `job steps`.
9
+
10
+ ```yaml
11
+ name: Your Default Workflow
12
+
13
+ on:
14
+ push:
15
+ branches:
16
+ - main
17
+ - v0.*
18
+ pull_request:
19
+ branches:
20
+ - main
21
+ - v0.*
22
+ paths:
23
+ - "**/*.py"
24
+ - ".github/workflows/template.yml"
25
+
26
+ concurrency:
27
+ group: ${{ github.workflow }}-${{ github.ref }}
28
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
29
+
30
+ permissions:
31
+ contents: read
32
+
33
+ env:
34
+ IMAGE: "your vemlp image" # e.g. "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.4-vllm0.8.5-mcore0.12.2"
35
+ DYNAMIC_RUNNER_URL: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner" # public veFaas api
36
+
37
+ jobs:
38
+ setup:
39
+ if: github.repository_owner == 'volcengine'
40
+ runs-on: ubuntu-latest
41
+ outputs:
42
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
43
+ task-id: ${{ steps.create-runner.outputs.task-id }}
44
+ steps:
45
+ - uses: actions/checkout@v4
46
+ - id: create-runner
47
+ uses: volcengine/vemlp-github-runner@v1
48
+ with:
49
+ mode: "create"
50
+ faas-url: "${{ env.DYNAMIC_RUNNER_URL }}"
51
+ image: "${{ env.DEFAULT_IMAGE }}"
52
+
53
+ your_job:
54
+ needs: setup
55
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'default-runner' }}"]
56
+ steps:
57
+ xxxx # your jobs
58
+
59
+ cleanup:
60
+ runs-on: ubuntu-latest
61
+ needs: [setup, your_job]
62
+ if: always()
63
+ steps:
64
+ - id: destroy-runner
65
+ uses: volcengine/vemlp-github-runner@v1
66
+ with:
67
+ mode: "destroy"
68
+ faas-url: "${{ env.DYNAMIC_RUNNER_URL }}"
69
+ task-id: "${{ needs.setup.outputs.task-id }}"
70
+ ```
71
+
72
+ ### Model and Dataset
73
+ To avoid CI relies on network, we pre-download dataset on a NFS on the CI machine. The path for models are \${HOME}/models and the path for dataset is \${HOME}/models/hf_data.
.github/workflows/check-pr-title.yml ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+
33
+ on:
34
+ pull_request:
35
+ types: [opened, edited, synchronize]
36
+
37
+ jobs:
38
+ check-title:
39
+ runs-on: ubuntu-latest
40
+ steps:
41
+ - name: Checkout code
42
+ uses: actions/checkout@v4
43
+
44
+ - name: Set up Python
45
+ uses: actions/setup-python@v5
46
+ with:
47
+ python-version: '3.11'
48
+
49
+ - name: Run PR title checker
50
+ run: python3 tests/special_sanity/check_pr_title.py
51
+ env:
52
+ PR_TITLE: ${{ github.event.pull_request.title }}
53
+
54
+ - name: Run PR description checker
55
+ run: python3 tests/special_sanity/check_pr_description.py
56
+ env:
57
+ PR_TITLE: ${{ github.event.pull_request.title }}
58
+ GITHUB_EVENT_PATH: ${{ github.event_path }}
.github/workflows/checkpoint_converter.yml ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+ name: checkpoint_converter
33
+ # latest version: Megatron-LM core_r0.11.0 https://github.com/NVIDIA/Megatron-LM/tree/core_r0.11.0
34
+
35
+ on:
36
+ # Trigger the workflow on push or pull request,
37
+ # but only for the main branch
38
+ push:
39
+ branches:
40
+ - main
41
+ - v0.*
42
+ pull_request:
43
+ branches:
44
+ - main
45
+ - v0.*
46
+ paths:
47
+ - "**/*.py"
48
+ # Other entrypoints
49
+ - "!examples/**"
50
+ - "!tests/**"
51
+ - "!verl/trainer/main_*.py"
52
+ - "!verl/trainer/fsdp_sft_trainer.py"
53
+ # Recipes
54
+ - "!recipe/**"
55
+ # FSDP
56
+ - "!verl/workers/**/*dp_*.py"
57
+ # Entrypoints
58
+ - ".github/workflows/checkpoint_converter.yml"
59
+ - ".github/workflows/e2e_ppo_trainer_megatron.yml"
60
+ - "examples/data_preprocess/gsm8k.py"
61
+ - "tests/special_e2e/run_ppo_trainer_megatron.sh"
62
+ - "verl/trainer/main_ppo.py"
63
+ - "verl/trainer/config/ppo_megatron_trainer.yaml"
64
+
65
+ # Cancel jobs on the same ref if a new one is triggered
66
+ concurrency:
67
+ group: ${{ github.workflow }}-${{ github.ref }}
68
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
69
+
70
+ # Declare permissions just read content.
71
+ permissions:
72
+ contents: read
73
+
74
+ env:
75
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
76
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
77
+
78
+ jobs:
79
+ setup:
80
+ if: github.repository_owner == 'volcengine'
81
+ runs-on: ubuntu-latest
82
+ outputs:
83
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
84
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
85
+ steps:
86
+ - uses: actions/checkout@v4
87
+ - id: create-runner
88
+ uses: volcengine/vemlp-github-runner@v1
89
+ with:
90
+ mode: "create"
91
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
92
+ mlp-image: "${{ env.IMAGE }}"
93
+
94
+ checkpoint_converter:
95
+ needs: setup
96
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
97
+ timeout-minutes: 20 # Increase this timeout value as needed
98
+ env:
99
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
100
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
101
+ NO_PROXY: "localhost,127.0.0.1"
102
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
103
+ steps:
104
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
105
+ with:
106
+ fetch-depth: 0
107
+ - name: Install the current repository
108
+ run: |
109
+ pip3 install -e .[test]
110
+ # - name: Download Model to Use
111
+ # run: |
112
+ # huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
113
+ # huggingface-cli download deepseek-ai/deepseek-coder-1.3b-instruct --local-dir ${HOME}/models/deepseek-ai/deepseek-coder-1.3b-instruct
114
+ # export HF_HUB_OFFLINE=1
115
+ - name: Running Huggingface to Megatron dist_ckpt converter (Qwen/Qwen2.5-0.5B)
116
+ run: |
117
+ ray stop --force
118
+ python scripts/converter_hf_to_mcore.py --hf_model_path=${HOME}/models/Qwen/Qwen2.5-0.5B --output_path checkpoints/Qwen/Qwen2.5-0.5B --test
119
+ - name: Running Huggingface to Megatron dist_ckpt converter (deepseek-ai/deepseek-coder-1.3b-instruct)
120
+ run: |
121
+ ray stop --force
122
+ python scripts/converter_hf_to_mcore.py --hf_model_path=${HOME}/models/deepseek-ai/deepseek-coder-1.3b-instruct --output_path checkpoints/deepseek-ai/deepseek-coder-1.3b-instruct --test
123
+ - name: Clean up
124
+ run: |
125
+ rm -rf checkpoints
126
+
127
+ checkpoint_converter_large_moe_models:
128
+ needs: setup
129
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
130
+ timeout-minutes: 30 # Increase this timeout value as needed
131
+ env:
132
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
133
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
134
+ NO_PROXY: "localhost,127.0.0.1"
135
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
136
+ HF_ENDPOINT: "https://hf-mirror.com"
137
+ steps:
138
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
139
+ with:
140
+ fetch-depth: 0
141
+ - name: Install the current repository
142
+ run: |
143
+ pip3 install -e .[test]
144
+ # - name: Download Model to Use
145
+ # run: |
146
+ # huggingface-cli download Qwen/Qwen1.5-MoE-A2.7B-Chat --local-dir ${HOME}/models/Qwen/Qwen1.5-MoE-A2.7B-Chat
147
+ # export HF_HUB_OFFLINE=1
148
+ - name: Running Huggingface to Megatron dist_ckpt CPU converter (Qwen/Qwen1.5-MoE-A2.7B-Chat)
149
+ run: |
150
+ ray stop --force
151
+ python scripts/converter_hf_to_mcore.py --hf_model_path=${HOME}/models/Qwen/Qwen1.5-MoE-A2.7B-Chat --output_path checkpoints/Qwen/Qwen1.5-MoE-A2.7B-Chat --use_cpu_initialization
152
+ - name: Running distributed Huggingface to Megatron dist_ckpt CPU converter (Qwen/Qwen1.5-MoE-A2.7B-Chat)
153
+ run: |
154
+ ray stop --force
155
+ torchrun --nproc_per_node 8 --nnodes 1 scripts/converter_hf_to_mcore.py --hf_model_path=${HOME}/models/Qwen/Qwen1.5-MoE-A2.7B-Chat --output_path checkpoints/Qwen/Qwen1.5-MoE-A2.7B-Chat_dist --use_cpu_initialization
156
+ - name: clean up
157
+ run: |
158
+ rm -rf checkpoints
159
+
160
+ cleanup:
161
+ runs-on: ubuntu-latest
162
+ needs:
163
+ [
164
+ setup,
165
+ checkpoint_converter,
166
+ checkpoint_converter_large_moe_models
167
+ ]
168
+ if: always()
169
+ steps:
170
+ - id: destroy-runner
171
+ uses: volcengine/vemlp-github-runner@v1
172
+ with:
173
+ mode: "destroy"
174
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
175
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/cpu_unit_tests.yml ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+
33
+ name: cpu_unit_tests
34
+
35
+ on:
36
+ # Trigger the workflow on push or pull request,
37
+ # but only for the main branch
38
+ push:
39
+ branches:
40
+ - main
41
+ - v0.*
42
+ pull_request:
43
+ branches:
44
+ - main
45
+ - v0.*
46
+ paths:
47
+ - "**/*.py"
48
+ - .github/workflows/cpu_unit_tests.yml
49
+ - "!recipe/**/*.py"
50
+
51
+ # Cancel jobs on the same ref if a new one is triggered
52
+ concurrency:
53
+ group: ${{ github.workflow }}-${{ github.ref }}
54
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
55
+
56
+ # Declare permissions just read content.
57
+ permissions:
58
+ contents: read
59
+
60
+ jobs:
61
+ cpu_unit_tests:
62
+ if: github.repository_owner == 'volcengine'
63
+ runs-on: [L20x8]
64
+ timeout-minutes: 20 # Increase this timeout value as needed
65
+ env:
66
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
67
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
68
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
69
+ HF_ENDPOINT: "https://hf-mirror.com"
70
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
71
+ container:
72
+ image: verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2
73
+ steps:
74
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
75
+ with:
76
+ fetch-depth: 0
77
+ - name: Install the current repository
78
+ run: |
79
+ pip install -e .[test,prime,geo]
80
+ pip install --upgrade "ray>=2.40.0" pillow
81
+ - name: Download datasets
82
+ run: |
83
+ huggingface-cli download verl-team/gsm8k-v0.4.1 --repo-type dataset --local-dir ~/verl-data/gsm8k
84
+ python3 examples/data_preprocess/geo3k.py
85
+ - name: Running CPU unit tests
86
+ run: |
87
+ echo '[pytest]' > pytest.ini
88
+ echo 'python_files = *_on_cpu.py' >> pytest.ini
89
+ pytest -s -x --asyncio-mode=auto tests/
.github/workflows/doc.yml ADDED
@@ -0,0 +1,100 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+
33
+ name: doc_test
34
+
35
+ on:
36
+ # Trigger the workflow on push or pull request,
37
+ # but only for the main branch
38
+ push:
39
+ branches:
40
+ - main
41
+ - v0.*
42
+ pull_request:
43
+ branches:
44
+ - main
45
+ - v0.*
46
+ paths:
47
+ - "**/*.py"
48
+ - "docs/**"
49
+ - .github/workflows/doc.yml
50
+
51
+ # Cancel jobs on the same ref if a new one is triggered
52
+ concurrency:
53
+ group: ${{ github.workflow }}-${{ github.ref }}
54
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
55
+
56
+ # Declare permissions just read content.
57
+ permissions:
58
+ contents: read # for checkout
59
+ pages: write # for deploy-pages
60
+ id-token: write # for deploy-pages
61
+
62
+ jobs:
63
+ doc_test:
64
+ runs-on: ubuntu-latest
65
+ timeout-minutes: 5 # Increase this timeout value as needed
66
+ strategy:
67
+ matrix:
68
+ python-version: ["3.10"]
69
+ steps:
70
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
71
+ - name: Set up Python ${{ matrix.python-version }}
72
+ uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
73
+ with:
74
+ python-version: ${{ matrix.python-version }}
75
+ - name: Install the current repository
76
+ run: |
77
+ pip install -e .[test] --no-deps
78
+ pip install -r docs/requirements-docs.txt
79
+
80
+ - name: Run doc make html
81
+ run: |
82
+ cd docs
83
+ make clean
84
+ make html SPHINXOPTS="--keep-going -w _build/sphinx.log"
85
+ if grep -q ": ERROR:" _build/sphinx.log; then
86
+ echo "🚨 Sphinx doc build contained ERRORs - see _build/sphinx.log"
87
+ exit 1
88
+ fi
89
+ if grep -q "WARNING: document isn't included in any toctree" _build/sphinx.log; then
90
+ echo "🚨 Sphinx doc build contained WARNING. Please include newly added docs in index.rst. See _build/sphinx.log for details"
91
+ exit 1
92
+ fi
93
+ if grep -q "WARNING: Inline emphasis" _build/sphinx.log; then
94
+ echo "🚨 Sphinx doc build contained WARNING. Please check inline emphasis is correct. See _build/sphinx.log for details"
95
+ exit 1
96
+ fi
97
+ if grep -q "WARNING: Definition list ends without a blank line" _build/sphinx.log; then
98
+ echo "🚨 Sphinx doc build contained WARNING. Please check if the indentation is correct. See _build/sphinx.log for details"
99
+ exit 1
100
+ fi
.github/workflows/e2e_ascend.yml ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+
33
+ name: e2e_ascend
34
+
35
+ on:
36
+ # Trigger the workflow on push or pull request,
37
+ # but only for the main branch
38
+ push:
39
+ branches:
40
+ - main
41
+ - v0.*
42
+ pull_request:
43
+ branches:
44
+ - main
45
+ paths:
46
+ - ".github/workflows/e2e_ascend.yml"
47
+ - "**/*.py"
48
+ - "docs/ascend_tutorial/**"
49
+ - "examples/**"
50
+ - "recipe/**"
51
+ - "tests/special_npu/**"
52
+ - "tests/special_sanity/**"
53
+ - "verl/**"
54
+ - "pyproject.toml"
55
+ - "requirements-npu.txt"
56
+ - "setup.py"
57
+
58
+ # Cancel jobs on the same ref if a new one is triggered
59
+ concurrency:
60
+ group: ${{ github.workflow }}-${{ github.ref }}
61
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
62
+
63
+ permissions:
64
+ contents: read
65
+
66
+ jobs:
67
+ test:
68
+ if: github.repository_owner == 'volcengine'
69
+ name: verl Ascend test (self-host)
70
+ runs-on: [self-hosted, npu-0]
71
+ timeout-minutes: 40 # Increase this timeout value as needed
72
+ container:
73
+ image: crispig/verl_npu:cann8.1rc1-py3.10-torch2.5.1-vllm-ascend0.7.3.post1-mindspeed0121-250731
74
+ volumes:
75
+ - /usr/local/dcmi:/usr/local/dcmi
76
+ - /usr/local/bin/npu-smi:/usr/local/bin/npu-smi
77
+ - /usr/local/Ascend/driver/lib64/:/usr/local/Ascend/driver/lib64/
78
+ - /usr/local/Ascend/driver/version.info:/usr/local/Ascend/driver/version.info
79
+ - /etc/ascend_install.info:/etc/ascend_install.info
80
+ - /data00/dataset:/github/home/dataset
81
+ - /data00/models:/github/home/models
82
+ # Use self-host cache speed up pip and model download
83
+ # - /home/action/actions-runner/_work/cache:/github/home/.cache/
84
+ options: >-
85
+ --device /dev/davinci0
86
+ --device /dev/davinci_manager
87
+ --device /dev/devmm_svm
88
+ --device /dev/hisi_hdc
89
+ --network host
90
+ --privileged
91
+ --shm-size 16g
92
+ env:
93
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
94
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
95
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
96
+ HF_ENDPOINT: "https://hf-mirror.com"
97
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
98
+ steps:
99
+ - name: Check npu and CANN info
100
+ run: |
101
+ cat /usr/local/Ascend/ascend-toolkit/latest/"$(uname -i)"-linux/ascend_toolkit_install.info
102
+ npu-smi info
103
+ - name: Checkout volcengine/verl repo
104
+ uses: actions/checkout@v4
105
+ - name: Install the current repository
106
+ run: |
107
+ pip3 install hf_transfer peft
108
+ pip3 install -r requirements-npu.txt
109
+ pip install -e .
110
+ - name: Install torchvision
111
+ run: |
112
+ pip install torchvision==0.20.1+cpu --index-url https://download.pytorch.org/whl/cpu
113
+ - name: Uninstall Triton
114
+ run: |
115
+ pip uninstall -y triton
116
+ - name: Preprocess gsm8k dataset
117
+ run: |
118
+ python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/dataset/openai/gsm8k
119
+ - name: Preprocess geo3k dataset
120
+ run: |
121
+ python examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/dataset/hiyouga/geometry3k
122
+ - name: Running gsm8k e2e qwen3 training tests with PPO on ASCEND NPU
123
+ run: |
124
+ ray stop --force
125
+ bash tests/special_npu/run_qwen3_06b_ppo.sh
126
+ rm -rf $HOME/ckpts
127
+ - name: Running gsm8k e2e training tests with peft sft on ASCEND NPU
128
+ run: |
129
+ ray stop --force
130
+ bash tests/special_npu/run_qwen2_5_05b_sft_peft_sp2.sh
131
+ rm -rf $HOME/ckpts
132
+ - name: Running gsm8k e2e training tests with GRPO on ASCEND NPU
133
+ run: |
134
+ ray stop --force
135
+ bash tests/special_npu/run_qwen2_5_05b_grpo.sh
136
+ rm -rf $HOME/ckpts
137
+ - name: Running geo3k e2e training tests with GRPO on ASCEND NPU
138
+ run: |
139
+ ray stop --force
140
+ bash tests/special_npu/run_qwen2_5_vl_3b_npu.sh
141
+ rm -rf $HOME/ckpts
142
+ - name: Running gsm8k e2e training tests with DAPO on ASCEND NPU
143
+ run: |
144
+ ray stop --force
145
+ bash tests/special_npu/run_qwen2_5_05b_dapo.sh
146
+ rm -rf $HOME/ckpts
147
+ - name: Running gsm8k e2e training tests with GRPO MindSpeed on ASCEND NPU
148
+ run: |
149
+ ray stop --force
150
+ USE_DIST_CKPT=True bash tests/special_npu/run_qwen2_5_05b_grpo_mindspeed.sh
151
+ rm -rf $HOME/dist_ckpt/qwen2_5_05b_grpo_mindspeed
152
+ rm -rf $HOME/ckpts
153
+ - name: Running NPU profiling unit tests
154
+ run: |
155
+ ray stop --force
156
+ pytest -s -x tests/utils/test_special_mstx_profile.py
.github/workflows/e2e_dapo.yml ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+
33
+ name: e2e_dapo
34
+
35
+ on:
36
+ # Trigger the workflow on push or pull request,
37
+ # but only for the main branch
38
+ # For push, for now only anti-patterns are specified so it is more conservative
39
+ # and achieves higher coverage.
40
+ push:
41
+ branches:
42
+ - main
43
+ - v0.*
44
+ paths:
45
+ - "verl/*.py"
46
+ # Other entrypoints
47
+ - "!examples/*trainer*"
48
+ - "!tests/**"
49
+ - "!verl/trainer/main_*.py"
50
+ - "!verl/trainer/fsdp_sft_trainer.py"
51
+ # Megatron
52
+ - "!verl/workers/**/megatron_*.py"
53
+ - "!recipe/**"
54
+ - "recipe/dapo"
55
+ pull_request:
56
+ branches:
57
+ - main
58
+ - v0.*
59
+ paths:
60
+ - "**/*.py"
61
+ # Other entrypoints
62
+ - "!examples/**"
63
+ - "!tests/**"
64
+ - "!verl/trainer/main_*.py"
65
+ - "!verl/trainer/fsdp_sft_trainer.py"
66
+ # Other recipes
67
+ - "!recipe/**"
68
+ # Megatron
69
+ - "!verl/workers/**/megatron_*.py"
70
+ # Home
71
+ - "recipe/dapo"
72
+ # Entrypoints
73
+ - ".github/workflows/e2e_dapo.yml"
74
+ - "examples/data_preprocess/gsm8k.py"
75
+ - "tests/special_e2e/run_dapo.sh"
76
+
77
+ # Cancel jobs on the same ref if a new one is triggered
78
+ concurrency:
79
+ group: ${{ github.workflow }}-${{ github.ref }}
80
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
81
+
82
+ # Declare permissions just read content.
83
+ permissions:
84
+ contents: read
85
+
86
+ env:
87
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
88
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
89
+
90
+ jobs:
91
+ setup:
92
+ if: github.repository_owner == 'volcengine'
93
+ runs-on: ubuntu-latest
94
+ outputs:
95
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
96
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
97
+ steps:
98
+ - uses: actions/checkout@v4
99
+ - id: create-runner
100
+ uses: volcengine/vemlp-github-runner@v1
101
+ with:
102
+ mode: "create"
103
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
104
+ mlp-image: "${{ env.IMAGE }}"
105
+
106
+ e2e_dapo:
107
+ needs: setup
108
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
109
+ timeout-minutes: 40 # Increase this timeout value as needed
110
+ env:
111
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
112
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
113
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
114
+ HF_ENDPOINT: "https://hf-mirror.com"
115
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
116
+ steps:
117
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
118
+ with:
119
+ fetch-depth: 0
120
+ - name: Install the current repository
121
+ run: |
122
+ pip3 install --no-deps -e .[test,gpu]
123
+ - name: Prepare GSM8K dataset
124
+ run: |
125
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
126
+ - name: Running the E2E test with the DAPO algorithm
127
+ run: |
128
+ ray stop --force
129
+ bash tests/special_e2e/run_dapo.sh
130
+
131
+ cleanup:
132
+ runs-on: ubuntu-latest
133
+ needs:
134
+ [
135
+ setup,
136
+ e2e_dapo
137
+ ]
138
+ if: always()
139
+ steps:
140
+ - id: destroy-runner
141
+ uses: volcengine/vemlp-github-runner@v1
142
+ with:
143
+ mode: "destroy"
144
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
145
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/e2e_genrm_remote.yml ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+
33
+ name: e2e_genrm_remote
34
+
35
+ on:
36
+ # Trigger the workflow on push or pull request,
37
+ # but only for the main branch
38
+ push:
39
+ branches:
40
+ - main
41
+ - v0.*
42
+ paths:
43
+ - "**/*.py"
44
+ - "tests/**"
45
+ - "!recipe/**"
46
+ - "recipe/genrm_remote"
47
+ pull_request:
48
+ branches:
49
+ - main
50
+ - v0.*
51
+ paths:
52
+ - "**/*.py"
53
+ # Other entrypoints
54
+ - "!examples/**"
55
+ - "!tests/**"
56
+ - "!verl/trainer/main_*.py"
57
+ - "!verl/trainer/fsdp_sft_trainer.py"
58
+ # Other recipes
59
+ - "!recipe/**"
60
+ # Megatron
61
+ - "!verl/workers/**/megatron_*.py"
62
+ # Home
63
+ - "recipe/genrm_remote"
64
+ - "!recipe/genrm_remote/README.md"
65
+ # Entrypoints
66
+ - ".github/workflows/e2e_genrm_remote.yml"
67
+ - "examples/data_preprocess/gsm8k.py"
68
+ - "tests/special_e2e/run_genrm_remote.sh"
69
+ - "tests/special_e2e/generation/run_gen_qwen05_server.sh"
70
+
71
+ # Cancel jobs on the same ref if a new one is triggered
72
+ concurrency:
73
+ group: ${{ github.workflow }}-${{ github.ref }}
74
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
75
+
76
+ # Declare permissions just read content.
77
+ permissions:
78
+ contents: read
79
+
80
+ env:
81
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
82
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
83
+
84
+ jobs:
85
+ setup:
86
+ if: github.repository_owner == 'volcengine'
87
+ runs-on: ubuntu-latest
88
+ outputs:
89
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
90
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
91
+ steps:
92
+ - uses: actions/checkout@v4
93
+ - id: create-runner
94
+ uses: volcengine/vemlp-github-runner@v1
95
+ with:
96
+ mode: "create"
97
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
98
+ mlp-image: "${{ env.IMAGE }}"
99
+
100
+ e2e_genrm_remote:
101
+ needs: setup
102
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
103
+ timeout-minutes: 40 # Increase this timeout value as needed
104
+ env:
105
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
106
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
107
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
108
+ HF_ENDPOINT: "https://hf-mirror.com"
109
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
110
+ steps:
111
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
112
+ with:
113
+ fetch-depth: 0
114
+ - name: Install the current repository
115
+ run: |
116
+ pip3 install --no-deps -e .[test,gpu]
117
+ - name: Prepare GSM8K dataset
118
+ run: |
119
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
120
+ - name: Running the E2E test with the Generative Reward Model
121
+ run: |
122
+ ray stop --force
123
+ bash tests/special_e2e/run_genrm_remote.sh
124
+ ray stop --force
125
+ bash tests/special_e2e/generation/run_gen_qwen05_server.sh
126
+
127
+ cleanup:
128
+ runs-on: ubuntu-latest
129
+ needs:
130
+ [
131
+ setup,
132
+ e2e_genrm_remote
133
+ ]
134
+ if: always()
135
+ steps:
136
+ - id: destroy-runner
137
+ uses: volcengine/vemlp-github-runner@v1
138
+ with:
139
+ mode: "destroy"
140
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
141
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/e2e_one_step_off_policy.yml ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+
33
+ name: e2e_one_step_off_policy
34
+
35
+ on:
36
+ # Trigger the workflow on push or pull request,
37
+ # but only for the main branch
38
+ # For push, for now only anti-patterns are specified so it is more conservative
39
+ # and achieves higher coverage.
40
+ push:
41
+ branches:
42
+ - main
43
+ - v0.*
44
+ paths:
45
+ - "**/*.py"
46
+ - "!**/*.md"
47
+ - "!**/*.sh"
48
+ # Other entrypoints
49
+ - "!examples/*trainer*"
50
+ - "!tests/**"
51
+ - "!verl/trainer/main_*.py"
52
+ - "!verl/trainer/fsdp_sft_trainer.py"
53
+ - "!recipe/**"
54
+ - "recipe/one_step_off_policy"
55
+ pull_request:
56
+ branches:
57
+ - main
58
+ - v0.*
59
+ paths:
60
+ - "**/*.py"
61
+ - "!**/*.md"
62
+ - "!**/*.sh"
63
+ # Other entrypoints
64
+ - "!examples/**"
65
+ - "!tests/**"
66
+ - "!verl/trainer/main_*.py"
67
+ - "!verl/trainer/fsdp_sft_trainer.py"
68
+ # Other recipes
69
+ - "!recipe/**"
70
+ # Home
71
+ - "recipe/one_step_off_policy"
72
+ # Entrypoints
73
+ - ".github/workflows/e2e_one_step_off_policy.yml"
74
+ - "examples/data_preprocess/gsm8k.py"
75
+ - "tests/special_e2e/run_one_step_off_policy.sh"
76
+
77
+ # Cancel jobs on the same ref if a new one is triggered
78
+ concurrency:
79
+ group: ${{ github.workflow }}-${{ github.ref }}
80
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
81
+
82
+ # Declare permissions just read content.
83
+ permissions:
84
+ contents: read
85
+
86
+ env:
87
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
88
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
89
+ TRANSFORMERS_VERSION: "4.56.2"
90
+
91
+ jobs:
92
+ setup:
93
+ if: github.repository_owner == 'volcengine'
94
+ runs-on: ubuntu-latest
95
+ outputs:
96
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
97
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
98
+ steps:
99
+ - uses: actions/checkout@v4
100
+ - id: create-runner
101
+ uses: volcengine/vemlp-github-runner@v1
102
+ with:
103
+ mode: "create"
104
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
105
+ mlp-image: "${{ env.IMAGE }}"
106
+
107
+ # Test FSDP2 strategy
108
+ e2e_one_step_off_policy_fsdp2:
109
+ needs: setup
110
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
111
+ timeout-minutes: 10 # Increase timeout for async training
112
+ env:
113
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
114
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
115
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
116
+ HF_ENDPOINT: "https://hf-mirror.com"
117
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
118
+ ACTOR_STRATEGY: "fsdp2"
119
+ steps:
120
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
121
+ with:
122
+ fetch-depth: 0
123
+ - name: Install the current repository
124
+ run: |
125
+ pip3 install --no-deps -e .[test,gpu]
126
+ pip3 install transformers==$TRANSFORMERS_VERSION
127
+ - name: Prepare GSM8K dataset
128
+ run: |
129
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
130
+ - name: Running the E2E test with one_step_off_policy algorithm (FSDP2)
131
+ run: |
132
+ ray stop --force
133
+ bash tests/special_e2e/run_one_step_off_policy.sh
134
+
135
+ # Test Megatron strategy
136
+ e2e_one_step_off_policy_megatron:
137
+ needs: setup
138
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
139
+ timeout-minutes: 10 # Increase timeout for async training
140
+ env:
141
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
142
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
143
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
144
+ HF_ENDPOINT: "https://hf-mirror.com"
145
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
146
+ ACTOR_STRATEGY: "megatron"
147
+ steps:
148
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
149
+ with:
150
+ fetch-depth: 0
151
+ - name: Install the current repository
152
+ run: |
153
+ pip3 install --no-deps -e .[test,gpu]
154
+ pip3 install transformers==$TRANSFORMERS_VERSION
155
+ - name: Prepare GSM8K dataset
156
+ run: |
157
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
158
+ - name: Running the E2E test with one_step_off_policy algorithm (Megatron)
159
+ run: |
160
+ ray stop --force
161
+ bash tests/special_e2e/run_one_step_off_policy.sh
162
+
163
+ cleanup:
164
+ runs-on: ubuntu-latest
165
+ needs:
166
+ [
167
+ setup,
168
+ e2e_one_step_off_policy_fsdp2,
169
+ e2e_one_step_off_policy_megatron
170
+ ]
171
+ if: always()
172
+ steps:
173
+ - id: destroy-runner
174
+ uses: volcengine/vemlp-github-runner@v1
175
+ with:
176
+ mode: "destroy"
177
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
178
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/e2e_ppo_trainer.yml ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: e2e_ppo_trainer
2
+
3
+ on:
4
+ # Trigger the workflow on push or pull request,
5
+ # but only for the main branch
6
+ # For push, for now only anti-patterns are specified so it is more conservative
7
+ # and achieves higher coverage.
8
+ push:
9
+ branches:
10
+ - main
11
+ - v0.*
12
+ paths:
13
+ - "**/*.py"
14
+ # Other entrypoints
15
+ - "!verl/trainer/fsdp_sft_trainer.py"
16
+ # Recipes
17
+ - "!recipe/**"
18
+ # Megatron
19
+ - "!verl/workers/**/megatron_*.py"
20
+
21
+ pull_request:
22
+ branches:
23
+ - main
24
+ - v0.*
25
+ paths:
26
+ - "**/*.py"
27
+ # Other entrypoints
28
+ - "!**/*.md"
29
+ - "!docker/**"
30
+ - "!examples/**"
31
+ - "!tests/**"
32
+ - "!verl/trainer/main_*.py"
33
+ - "!verl/trainer/fsdp_sft_trainer.py"
34
+ # Docs
35
+ - "!docs/**"
36
+ # Recipes
37
+ - "!recipe/**"
38
+ # Megatron
39
+ - "!verl/workers/**/megatron_*.py"
40
+ # Entrypoints
41
+ - ".github/workflows/e2e_ppo_trainer.yml"
42
+ - "examples/data_preprocess/gsm8k.py"
43
+ - "examples/data_preprocess/geo3k.py"
44
+ - "tests/special_e2e/ppo_trainer"
45
+ - "verl/trainer/main_ppo.py"
46
+ - "verl/trainer/config/ppo_trainer.yaml"
47
+
48
+ # Cancel jobs on the same ref if a new one is triggered
49
+ concurrency:
50
+ group: ${{ github.workflow }}-${{ github.ref }}
51
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
52
+
53
+ # Declare permissions just read content.
54
+ permissions:
55
+ contents: read
56
+
57
+ jobs:
58
+ pre_commit_for_ppo:
59
+ runs-on: ubuntu-latest
60
+ strategy:
61
+ matrix:
62
+ python-version: ["3.12"]
63
+ steps:
64
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
65
+ - name: Set up Python ${{ matrix.python-version }}
66
+ uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
67
+ with:
68
+ python-version: ${{ matrix.python-version }}
69
+ - name: Install the current repository
70
+ run: |
71
+ pip install -e .
72
+ - name: Set ruff --output-format=github
73
+ run: |
74
+ sed -i 's/--output-format=full/--output-format=github/' .pre-commit-config.yaml
75
+ git add .pre-commit-config.yaml
76
+ - uses: pre-commit/action@v3.0.1
77
+ with:
78
+ extra_args: "" # Overriding default "--all-files"
79
+
.github/workflows/e2e_ppo_trainer_megatron_sglang.yml ADDED
@@ -0,0 +1,281 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+ name: e2e_ppo_trainer_megatron_sglang
33
+
34
+ on:
35
+ # Trigger the workflow on push or pull request,
36
+ # but only for the main branch.
37
+ # For push, for now only anti-patterns are specified so it is more conservative
38
+ # and achieves higher coverage.
39
+ push:
40
+ branches:
41
+ - main
42
+ - v0.*
43
+ paths:
44
+ - "**/*.py"
45
+ # Other entrypoints
46
+ - "!verl/trainer/fsdp_sft_trainer.py"
47
+ # Recipes
48
+ - "!recipe/**"
49
+ # FSDP
50
+ - "!verl/workers/**/*dp_*.py"
51
+ pull_request:
52
+ branches:
53
+ - main
54
+ - v0.*
55
+ paths:
56
+ - "**/*.py"
57
+ # Other entrypoints
58
+ - "!docker/**"
59
+ # Docs
60
+ - "!**/*.md"
61
+ - "!docs/**"
62
+ - "!examples/**"
63
+ - "!tests/**"
64
+ - "!verl/trainer/main_*.py"
65
+ - "!verl/trainer/fsdp_sft_trainer.py"
66
+ # Recipes
67
+ - "!recipe/**"
68
+ # FSDP
69
+ - "!verl/workers/**/*dp_*.py"
70
+ # Entrypoints
71
+ - "verl/worksers/rollout/sglang_rollout/*"
72
+ - ".github/workflows/e2e_ppo_trainer_megatron_sglang.yml"
73
+ - "examples/data_preprocess/gsm8k.py"
74
+ - "examples/data_preprocess/geo3k.py"
75
+ - "tests/special_e2e/run_ppo_trainer_megatron.sh"
76
+ - "verl/trainer/main_ppo.py"
77
+ - "verl/trainer/config/ppo_megatron_trainer.yaml"
78
+
79
+ # Cancel jobs on the same ref if a new one is triggered
80
+ concurrency:
81
+ group: ${{ github.workflow }}-${{ github.ref }}
82
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
83
+
84
+ # Declare permissions just read content.
85
+ permissions:
86
+ contents: read
87
+
88
+ env:
89
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
90
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
91
+
92
+ jobs:
93
+ setup:
94
+ if: github.repository_owner == 'volcengine'
95
+ runs-on: ubuntu-latest
96
+ outputs:
97
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
98
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
99
+ steps:
100
+ - uses: actions/checkout@v4
101
+ - id: create-runner
102
+ uses: volcengine/vemlp-github-runner@v1
103
+ with:
104
+ mode: "create"
105
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
106
+ mlp-image: "${{ env.IMAGE }}"
107
+
108
+ e2e_ppo_trainer_megatron-deepseek:
109
+ needs: setup
110
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
111
+ timeout-minutes: 60 # Increase this timeout value as needed
112
+ env:
113
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
114
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
115
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
116
+ HF_ENDPOINT: "https://hf-mirror.com"
117
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
118
+ steps:
119
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
120
+ with:
121
+ fetch-depth: 0
122
+ - name: Install the current repository
123
+ run: |
124
+ pip3 install --no-deps -e .[test]
125
+ - name: Prepare GSM8K dataset
126
+ run: |
127
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
128
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
129
+ run: |
130
+ ray stop --force
131
+ OPTIM_MEMORY_EFFICIENT=True ENGINE=sglang SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/special_e2e/run_ppo_trainer_megatron.sh
132
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
133
+ run: |
134
+ ray stop --force
135
+ export VLLM_USE_V1=1
136
+ ray start --head
137
+ ENGINE=sglang MODE=async RESUME_MODE=auto MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct TOTAL_TRAIN_STEPS=2 bash tests/special_e2e/run_ppo_trainer_megatron.sh
138
+ - name: Test Megatron checkpoints merging function (DeepSeek Actor and Critic)
139
+ run: |
140
+ exp_name="deepseek-coder-1.3b-instruct-megatron-gsm8k-minimal"
141
+ python -m verl.model_merger test --backend megatron --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
142
+ python -m verl.model_merger test --backend megatron --is-value-model --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
143
+ - name: Profiling GRPO GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Deepseek)
144
+ run: |
145
+ ray stop --force
146
+ PROFILE_ENABLE=True ENGINE=sglang ADV_ESTIMATOR=grpo USE_DYNAMIC_BSZ=False MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/special_e2e/run_ppo_trainer_megatron.sh
147
+ if [ -z "$( ls -A '/tmp/ray/session_latest/logs/nsight/' )" ]; then
148
+ echo "[ERROR] not found any profiling files"
149
+ exit 1
150
+ else
151
+ echo "[SUCCESS] profile success"
152
+ fi
153
+ - name: clean up
154
+ run: |
155
+ rm -rf checkpoints
156
+
157
+ e2e_ppo_trainer_megatron-different-train-infer-tp-qwen-tie-embedding:
158
+ needs: setup
159
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
160
+ timeout-minutes: 60 # Increase this timeout value as needed
161
+ env:
162
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
163
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
164
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
165
+ HF_ENDPOINT: "https://hf-mirror.com"
166
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
167
+ steps:
168
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
169
+ with:
170
+ fetch-depth: 0
171
+ - name: Install the current repository
172
+ run: |
173
+ pip3 install --no-deps -e .[test]
174
+ - name: Prepare GSM8K dataset
175
+ run: |
176
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
177
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with tie-embedding Megatron (Qwen) with train tp > infer tp
178
+ run: |
179
+ ray stop --force
180
+ ENGINE=sglang VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 TRAIN_TP=2 INFER_TP=1 MODEL_ID=Qwen/Qwen2.5-1.5B bash tests/special_e2e/run_ppo_trainer_megatron.sh
181
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen) with train tp < infer tp
182
+ run: |
183
+ ray stop --force
184
+ ENGINE=sglang VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 TRAIN_TP=1 INFER_TP=2 MODEL_ID=Qwen/Qwen2.5-1.5B bash tests/special_e2e/run_ppo_trainer_megatron.sh
185
+ - name: clean up
186
+ run: |
187
+ rm -rf checkpoints
188
+
189
+ e2e_ppo_trainer_megatron-qwen-override-transformer-config:
190
+ needs: setup
191
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
192
+ timeout-minutes: 60 # Increase this timeout value as needed
193
+ env:
194
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
195
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
196
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
197
+ HF_ENDPOINT: "https://hf-mirror.com"
198
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
199
+ steps:
200
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
201
+ with:
202
+ fetch-depth: 0
203
+ - name: Install the current repository
204
+ run: |
205
+ pip3 install --no-deps -e .[test]
206
+ - name: Prepare GSM8K dataset
207
+ run: |
208
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
209
+ # - name: Download Model to Use
210
+ # run: |
211
+ # huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
212
+ # export HF_HUB_OFFLINE=1
213
+ - name: Prepare dist_ckpt of Qwen2.5-0.5B, uneven layer distribution only supports dist_ckpt
214
+ run: |
215
+ python3 scripts/converter_hf_to_mcore.py --hf_model_path ${HOME}/models/Qwen/Qwen2.5-0.5B --output_path checkpoints/verl-test/qwen2.5-0.5b-megatron
216
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen)
217
+ run: |
218
+ ray stop --force
219
+ ENGINE=sglang SAVE_FREQ=1 COMMON_PP=4 COMMON_VPP=null COMMON_CP=1 SKIP_SAVE_HF_MODEL=1 bash tests/special_e2e/run_ppo_trainer_megatron.sh +actor_rollout_ref.actor.megatron.override_transformer_config.num_layers_in_first_pipeline_stage=8 +actor_rollout_ref.actor.megatron.override_transformer_config.num_layers_in_last_pipeline_stage=4 actor_rollout_ref.actor.megatron.use_dist_checkpointing=true actor_rollout_ref.actor.megatron.dist_checkpointing_path=checkpoints/verl-test/qwen2.5-0.5b-megatron actor_rollout_ref.ref.megatron.use_dist_checkpointing=true actor_rollout_ref.ref.megatron.dist_checkpointing_path=checkpoints/verl-test/qwen2.5-0.5b-megatron critic.megatron.use_dist_checkpointing=true critic.megatron.dist_checkpointing_path=checkpoints/verl-test/qwen2.5-0.5b-megatron reward_model.megatron.use_dist_checkpointing=true reward_model.megatron.dist_checkpointing_path=checkpoints/verl-test/qwen2.5-0.5b-megatron
220
+ cp -r checkpoints checkpoints-dut
221
+ ENGINE=sglang SAVE_FREQ=1 COMMON_PP=4 COMMON_VPP=null COMMON_CP=1 bash tests/special_e2e/run_ppo_trainer_megatron.sh
222
+ - name: Test Megatron checkpoints merging function (Qwen Actor and Critic)
223
+ run: |
224
+ exp_name="qwen2.5-0.5b-megatron-gsm8k-minimal"
225
+ python -m verl.model_merger test --backend megatron --tie-word-embedding --local_dir checkpoints-dut/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
226
+ python -m verl.model_merger test --backend megatron --is-value-model --local_dir checkpoints-dut/verl-test/${exp_name}/global_step_1/critic --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
227
+ - name: clean up
228
+ run: |
229
+ rm -rf checkpoints
230
+
231
+ e2e_ppo_trainer_megatron-deepseek-override-transformer-config:
232
+ needs: setup
233
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
234
+ timeout-minutes: 60 # Increase this timeout value as needed
235
+ env:
236
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
237
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
238
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
239
+ HF_ENDPOINT: "https://hf-mirror.com"
240
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
241
+ steps:
242
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
243
+ with:
244
+ fetch-depth: 0
245
+ - name: Install the current repository
246
+ run: |
247
+ pip3 install --no-deps -e .[test]
248
+ - name: Prepare GSM8K dataset
249
+ run: |
250
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
251
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
252
+ run: |
253
+ ray stop --force
254
+ ENGINE=sglang SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct COMMON_PP=2 COMMON_VPP=null bash tests/special_e2e/run_ppo_trainer_megatron.sh +actor_rollout_ref.actor.megatron.override_transformer_config.account_for_embedding_in_pipeline_split=true +actor_rollout_ref.actor.megatron.override_transformer_config.account_for_loss_in_pipeline_split=true
255
+ - name: Test Megatron checkpoints merging function (DeepSeek Actor and Critic)
256
+ run: |
257
+ exp_name="deepseek-coder-1.3b-instruct-megatron-gsm8k-minimal"
258
+ python -m verl.model_merger test --backend megatron --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
259
+ python -m verl.model_merger test --backend megatron --is-value-model --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
260
+ - name: clean up
261
+ run: |
262
+ rm -rf checkpoints
263
+
264
+ cleanup:
265
+ runs-on: ubuntu-latest
266
+ needs:
267
+ [
268
+ setup,
269
+ e2e_ppo_trainer_megatron-deepseek,
270
+ e2e_ppo_trainer_megatron-different-train-infer-tp-qwen-tie-embedding,
271
+ e2e_ppo_trainer_megatron-qwen-override-transformer-config,
272
+ e2e_ppo_trainer_megatron-deepseek-override-transformer-config,
273
+ ]
274
+ if: always()
275
+ steps:
276
+ - id: destroy-runner
277
+ uses: volcengine/vemlp-github-runner@v1
278
+ with:
279
+ mode: "destroy"
280
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
281
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/e2e_ppo_trainer_megatron_sglang_2.yml ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+ name: e2e_ppo_trainer_megatron_sglang_2
33
+
34
+ on:
35
+ # Trigger the workflow on push or pull request,
36
+ # but only for the main branch.
37
+ # For push, for now only anti-patterns are specified so it is more conservative
38
+ # and achieves higher coverage.
39
+ push:
40
+ branches:
41
+ - main
42
+ - v0.*
43
+ paths:
44
+ - "**/*.py"
45
+ # Other entrypoints
46
+ - "!verl/trainer/fsdp_sft_trainer.py"
47
+ # Recipes
48
+ - "!recipe/**"
49
+ # FSDP
50
+ - "!verl/workers/**/*dp_*.py"
51
+ pull_request:
52
+ branches:
53
+ - main
54
+ - v0.*
55
+ paths:
56
+ - "**/*.py"
57
+ # Other entrypoints
58
+ - "!docker/**"
59
+ # Docs
60
+ - "!**/*.md"
61
+ - "!docs/**"
62
+ - "!examples/**"
63
+ - "!tests/**"
64
+ - "!verl/trainer/main_*.py"
65
+ - "!verl/trainer/fsdp_sft_trainer.py"
66
+ # Recipes
67
+ - "!recipe/**"
68
+ # FSDP
69
+ - "!verl/workers/**/*dp_*.py"
70
+ # Entrypoints
71
+ - "verl/worksers/rollout/sglang_rollout/*"
72
+ - ".github/workflows/e2e_ppo_trainer_megatron_sglang.yml"
73
+ - "examples/data_preprocess/gsm8k.py"
74
+ - "examples/data_preprocess/geo3k.py"
75
+ - "tests/special_e2e/run_ppo_trainer_megatron.sh"
76
+ - "verl/trainer/main_ppo.py"
77
+ - "verl/trainer/config/ppo_megatron_trainer.yaml"
78
+
79
+ # Cancel jobs on the same ref if a new one is triggered
80
+ concurrency:
81
+ group: ${{ github.workflow }}-${{ github.ref }}
82
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
83
+
84
+ # Declare permissions just read content.
85
+ permissions:
86
+ contents: read
87
+
88
+ env:
89
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
90
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
91
+
92
+ jobs:
93
+ setup:
94
+ if: github.repository_owner == 'volcengine'
95
+ runs-on: ubuntu-latest
96
+ outputs:
97
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
98
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
99
+ steps:
100
+ - uses: actions/checkout@v4
101
+ - id: create-runner
102
+ uses: volcengine/vemlp-github-runner@v1
103
+ with:
104
+ mode: "create"
105
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
106
+ mlp-image: "${{ env.IMAGE }}"
107
+
108
+ e2e_ppo_trainer_megatron-moe-expert-parallel:
109
+ needs: setup
110
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
111
+ timeout-minutes: 60 # Increase this timeout value as needed
112
+ env:
113
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
114
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
115
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
116
+ HF_ENDPOINT: "https://hf-mirror.com"
117
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
118
+ steps:
119
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
120
+ with:
121
+ fetch-depth: 0
122
+ - name: Install the current repository
123
+ run: |
124
+ pip3 install --no-deps -e .[test]
125
+ - name: Prepare GSM8K dataset
126
+ run: |
127
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
128
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
129
+ run: |
130
+ ray stop --force
131
+ MEGATRON_CI_DISABLE_EXPANDABLE_SEGMENTS=1 \
132
+ ADV_ESTIMATOR=grpo USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen2moe_minimal.json \
133
+ PPO_MAX_TOKEN_LEN=512 FWD_MAX_TOKEN_LEN=512 \
134
+ MAX_PROMPT_LENGTH=256 MAX_RESPONSE_LENGTH=256 \
135
+ MODEL_ID=Qwen/Qwen1.5-MoE-A2.7B-Chat \
136
+ ENGINE=sglang COMMON_PP=2 COMMON_VPP=null COMMON_CP=1 COMMON_TP=4 COMMON_EP=4 COMMON_ETP=1 INFER_TP=8 \
137
+ USE_DIST_CKPT=True ALL_OFFLOAD=True SKIP_SAVE_HF_MODEL=1 bash tests/special_e2e/run_ppo_trainer_megatron.sh
138
+ - name: clean up
139
+ run: |
140
+ rm -rf checkpoints
141
+
142
+ e2e_ppo_trainer_megatron-qwen2_5vl-3b:
143
+ needs: setup
144
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
145
+ timeout-minutes: 60 # Increase this timeout value as needed
146
+ env:
147
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
148
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
149
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
150
+ HF_ENDPOINT: "https://hf-mirror.com"
151
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
152
+ steps:
153
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
154
+ with:
155
+ fetch-depth: 0
156
+ - name: Install the current repository
157
+ run: |
158
+ pip3 install --no-deps -e .[test]
159
+ - name: Prepare Geo3k dataset
160
+ run: |
161
+ python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k/
162
+ - name: Prepare dist_ckpt of Qwen2.5-VL-3B, only supports dist_ckpt
163
+ run: |
164
+ python3 scripts/converter_hf_to_mcore.py --hf_model_path ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct --output_path checkpoints/verl-test/qwen2.5-vl-3b-megatron
165
+ - name: Running Geo3k E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen)
166
+ run: |
167
+ ray stop --force
168
+ ENGINE=sglang TRAIN_FILES=${HOME}/data/geo3k/train.parquet VAL_FILES=${HOME}/data/geo3k/test.parquet MAX_PROMPT_LENGTH=1024 MAX_RESPONSE_LENGTH=2048 MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct ADV_ESTIMATOR=grpo USE_DYNAMIC_BSZ=False SKIP_SAVE_HF_MODEL=1 COMMON_PP=4 COMMON_VPP=null COMMON_CP=1 COMMON_TP=2 USE_DIST_CKPT=true DIST_CKPT_PATH=checkpoints/verl-test/qwen2.5-vl-3b-megatron bash tests/special_e2e/run_ppo_trainer_megatron.sh
169
+ - name: clean up
170
+ run: |
171
+ rm -rf checkpoints
172
+
173
+ e2e_ppo_trainer_sglang:
174
+ needs: setup
175
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
176
+ timeout-minutes: 40 # Increase this timeout value as needed
177
+ env:
178
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
179
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
180
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
181
+ HF_ENDPOINT: "https://hf-mirror.com"
182
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
183
+ steps:
184
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
185
+ with:
186
+ fetch-depth: 0
187
+ - name: Install the current repository
188
+ run: |
189
+ pip3 install -e .[test,gpu,sglang]
190
+ - name: Prepare gsm8k dataset
191
+ run: |
192
+ ray stop --force
193
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
194
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm and save ckpt
195
+ run: |
196
+ ray stop --force
197
+ ENGINE=sglang bash tests/special_e2e/ppo_trainer/run_function_reward.sh
198
+ - name: Running GSM8K E2E training tests on sglang async
199
+ run: |
200
+ ray stop --force
201
+ TOTAL_TRAIN_STEPS=2 ENGINE=sglang ROLLOUT_MODE=async bash tests/special_e2e/ppo_trainer/run_function_reward.sh
202
+
203
+ e2e_ppo_trainer_sglang_vlm:
204
+ needs: setup
205
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
206
+ timeout-minutes: 60 # Increase this timeout value as needed
207
+ env:
208
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
209
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
210
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
211
+ HF_ENDPOINT: "https://hf-mirror.com"
212
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
213
+ steps:
214
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
215
+ with:
216
+ fetch-depth: 0
217
+ - name: Install the current repository
218
+ run: |
219
+ pip3 install -e .[test,geo,gpu,sglang] --no-deps
220
+ # Geo3k
221
+ - name: Prepare GEO3K dataset
222
+ run: |
223
+ ray stop --force
224
+ python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k/
225
+ - name: Running GEO3K VLM E2E training tests on 8 L20 GPUs with rmpad using function rm
226
+ run: |
227
+ ray stop --force
228
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
229
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
230
+ MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
231
+ ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
232
+ ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
233
+ ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
234
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
235
+ - name: Running GEO3K VLM E2E with rmpad using torch fused kernel (Qwen2.5-VL)
236
+ run: |
237
+ ray stop --force
238
+ FUSED_KERNELS=True TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
239
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
240
+ MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
241
+ ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
242
+ ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
243
+ ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
244
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
245
+ - name: Running GEO3K VLM E2E with rmpad using triton fused kernel (Qwen2.5-VL)
246
+ run: |
247
+ ray stop --force
248
+ FUSED_KERNELS=True FUSED_KERNEL_BACKEND=triton \
249
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
250
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
251
+ MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct \
252
+ ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
253
+ ENGINE=sglang GPU_MEMORY_UTILIZATION=0.6 ACTOR_FSDP_PARAM_OFFLOAD=True \
254
+ ACTOR_FSDP_OPTIMIZER_OFFLOAD=True REF_FSDP_PARAM_OFFLOAD=True \
255
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
256
+
257
+
258
+ cleanup:
259
+ runs-on: ubuntu-latest
260
+ needs:
261
+ [
262
+ setup,
263
+ e2e_ppo_trainer_megatron-moe-expert-parallel,
264
+ e2e_ppo_trainer_megatron-qwen2_5vl-3b,
265
+ e2e_ppo_trainer_sglang,
266
+ e2e_ppo_trainer_sglang_vlm
267
+ ]
268
+ if: always()
269
+ steps:
270
+ - id: destroy-runner
271
+ uses: volcengine/vemlp-github-runner@v1
272
+ with:
273
+ mode: "destroy"
274
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
275
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/e2e_ppo_trainer_megatron_vllm.yml ADDED
@@ -0,0 +1,292 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+ name: e2e_ppo_trainer_megatron_vllm
33
+
34
+ on:
35
+ # Trigger the workflow on push or pull request,
36
+ # but only for the main branch.
37
+ # For push, for now only anti-patterns are specified so it is more conservative
38
+ # and achieves higher coverage.
39
+ push:
40
+ branches:
41
+ - main
42
+ - v0.*
43
+ paths:
44
+ - "**/*.py"
45
+ # Other entrypoints
46
+ - "!verl/trainer/fsdp_sft_trainer.py"
47
+ # Recipes
48
+ - "!recipe/**"
49
+ # FSDP
50
+ - "!verl/workers/**/*dp_*.py"
51
+ pull_request:
52
+ branches:
53
+ - main
54
+ - v0.*
55
+ paths:
56
+ - "**/*.py"
57
+ # Other entrypoints
58
+ - "!docker/**"
59
+ # Docs
60
+ - "!**/*.md"
61
+ - "!docs/**"
62
+ - "!examples/**"
63
+ - "!tests/**"
64
+ - "!verl/trainer/main_*.py"
65
+ - "!verl/trainer/fsdp_sft_trainer.py"
66
+ # Recipes
67
+ - "!recipe/**"
68
+ # FSDP
69
+ - "!verl/workers/**/*dp_*.py"
70
+ # Entrypoints
71
+ - ".github/workflows/e2e_ppo_trainer_megatron_vllm.yml"
72
+ - "examples/data_preprocess/gsm8k.py"
73
+ - "examples/data_preprocess/geo3k.py"
74
+ - "tests/special_e2e/run_ppo_trainer_megatron.sh"
75
+ - "verl/trainer/main_ppo.py"
76
+ - "verl/trainer/config/ppo_megatron_trainer.yaml"
77
+
78
+ # Cancel jobs on the same ref if a new one is triggered
79
+ concurrency:
80
+ group: ${{ github.workflow }}-${{ github.ref }}
81
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
82
+
83
+ # Declare permissions just read content.
84
+ permissions:
85
+ contents: read
86
+
87
+ env:
88
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
89
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
90
+ TRANSFORMERS_VERSION: "4.56.2"
91
+
92
+ jobs:
93
+ setup:
94
+ if: github.repository_owner == 'volcengine'
95
+ runs-on: ubuntu-latest
96
+ outputs:
97
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
98
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
99
+ steps:
100
+ - uses: actions/checkout@v4
101
+ - id: create-runner
102
+ uses: volcengine/vemlp-github-runner@v1
103
+ with:
104
+ mode: "create"
105
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
106
+ mlp-image: "${{ env.IMAGE }}"
107
+
108
+ e2e_ppo_trainer_megatron-deepseek:
109
+ needs: setup
110
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
111
+ timeout-minutes: 60 # Increase this timeout value as needed
112
+ env:
113
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
114
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
115
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
116
+ HF_ENDPOINT: "https://hf-mirror.com"
117
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
118
+ steps:
119
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
120
+ with:
121
+ fetch-depth: 0
122
+ - name: Install the current repository
123
+ run: |
124
+ pip3 install --no-deps -e .[test]
125
+ pip3 install math-verify transformers==$TRANSFORMERS_VERSION
126
+ - name: Prepare GSM8K dataset
127
+ run: |
128
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
129
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron, use mbridge e2e to pre-load and save (Deepseek)
130
+ run: |
131
+ ray stop --force
132
+ ALL_OFFLOAD=True SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct COMMON_PP=4 COMMON_VPP=null COMMON_CP=1 USE_MBRIDGE=True USE_DIST_CKPT=False \
133
+ bash tests/special_e2e/run_ppo_trainer_megatron.sh
134
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron, use mbridge e2e to pre-load and save (Deepseek)
135
+ run: |
136
+ ray stop --force
137
+ RESUME_MODE=auto MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct TOTAL_TRAIN_STEPS=2 SAVE_FREQ=1 COMMON_PP=4 COMMON_VPP=null COMMON_CP=1 USE_MBRIDGE=True USE_DIST_CKPT=False \
138
+ bash tests/special_e2e/run_ppo_trainer_megatron.sh
139
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
140
+ run: |
141
+ ray stop --force
142
+ export VLLM_USE_V1=1
143
+ ray start --head
144
+ MODE=async USE_FUSED_KERNELS=True MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct TOTAL_TRAIN_STEPS=2 SAVE_FREQ=2 bash tests/special_e2e/run_ppo_trainer_megatron.sh
145
+ - name: Test Megatron checkpoints merging function (DeepSeek Actor and Critic)
146
+ run: |
147
+ exp_name="deepseek-coder-1.3b-instruct-megatron-gsm8k-minimal"
148
+ python -m verl.model_merger test --backend megatron --local_dir checkpoints/verl-test/${exp_name}/global_step_2/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_2/actor/huggingface
149
+ python -m verl.model_merger test --backend megatron --is-value-model --local_dir checkpoints/verl-test/${exp_name}/global_step_2/critic --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_2/critic/huggingface
150
+ - name: Test Megatron distributed checkpoints merging function (DeepSeek)
151
+ run: |
152
+ exp_name="deepseek-coder-1.3b-instruct-megatron-gsm8k-minimal"
153
+ torchrun --nproc_per_node 4 --nnodes 1 -m verl.model_merger merge --backend megatron --local_dir checkpoints/verl-test/${exp_name}/global_step_2/actor --target_dir checkpoints/verl-test/${exp_name}/global_step_2/actor/hf_model
154
+ - name: Running GRPO GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Deepseek)
155
+ run: |
156
+ ray stop --force
157
+ ADV_ESTIMATOR=grpo USE_DYNAMIC_BSZ=False MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/special_e2e/run_ppo_trainer_megatron.sh
158
+ - name: clean up
159
+ run: |
160
+ rm -rf checkpoints
161
+ e2e_ppo_trainer_megatron-qwen3:
162
+ needs: setup
163
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
164
+ timeout-minutes: 60 # Increase this timeout value as needed
165
+ env:
166
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
167
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
168
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
169
+ HF_ENDPOINT: "https://hf-mirror.com"
170
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
171
+ steps:
172
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
173
+ with:
174
+ fetch-depth: 0
175
+ - name: Install the current repository
176
+ run: |
177
+ pip3 install --no-deps -e .[test]
178
+ pip3 install math-verify transformers==$TRANSFORMERS_VERSION
179
+ - name: Prepare GSM8K dataset
180
+ run: |
181
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
182
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) with validation and saving
183
+ run: |
184
+ ray stop --force
185
+ ALL_OFFLOAD=True VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 MODEL_ID=Qwen/Qwen3-0.6B bash tests/special_e2e/run_ppo_trainer_megatron.sh
186
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen3) testing learning rate scheduler
187
+ run: |
188
+ ray stop --force
189
+ LR_WARMUP_STEPS=1 TOTAL_TRAIN_STEPS=2 MODEL_ID=Qwen/Qwen3-0.6B bash tests/special_e2e/run_ppo_trainer_megatron.sh
190
+
191
+ - name: Test Megatron checkpoints merging function (Qwen3 Actor and Critic)
192
+ run: |
193
+ exp_name="qwen3-0.6b-megatron-gsm8k-minimal"
194
+ python -m verl.model_merger test --backend megatron --tie-word-embedding --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
195
+ python -m verl.model_merger test --backend megatron --is-value-model --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
196
+ - name: clean up
197
+ run: |
198
+ rm -rf checkpoints
199
+ e2e_ppo_trainer_megatron-different-train-infer-tp-qwen-tie-embedding:
200
+ needs: setup
201
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
202
+ timeout-minutes: 60 # Increase this timeout value as needed
203
+ env:
204
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
205
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
206
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
207
+ HF_ENDPOINT: "https://hf-mirror.com"
208
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
209
+ steps:
210
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
211
+ with:
212
+ fetch-depth: 0
213
+ - name: Install the current repository
214
+ run: |
215
+ pip3 install --no-deps -e .[test]
216
+ pip3 install math-verify transformers==$TRANSFORMERS_VERSION
217
+ - name: Prepare GSM8K dataset
218
+ run: |
219
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
220
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with tie-embedding Megatron (Qwen) with train tp > infer tp
221
+ run: |
222
+ ray stop --force
223
+ VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 TRAIN_TP=2 INFER_TP=1 MODEL_ID=Qwen/Qwen2.5-1.5B bash tests/special_e2e/run_ppo_trainer_megatron.sh
224
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen) with train tp < infer tp
225
+ run: |
226
+ ray stop --force
227
+ VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 TRAIN_TP=1 INFER_TP=2 ALL_OFFLOAD=True MODEL_ID=Qwen/Qwen2.5-1.5B bash tests/special_e2e/run_ppo_trainer_megatron.sh
228
+ - name: clean up
229
+ run: |
230
+ rm -rf checkpoints
231
+ e2e_ppo_trainer_megatron-qwen-override-transformer-config:
232
+ needs: setup
233
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
234
+ timeout-minutes: 60 # Increase this timeout value as needed
235
+ env:
236
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
237
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
238
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
239
+ HF_ENDPOINT: "https://hf-mirror.com"
240
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
241
+ steps:
242
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
243
+ with:
244
+ fetch-depth: 0
245
+ - name: Install the current repository
246
+ run: |
247
+ pip3 install --no-deps -e .[test]
248
+ pip3 install math-verify transformers==$TRANSFORMERS_VERSION
249
+ - name: Prepare GSM8K dataset
250
+ run: |
251
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
252
+ # - name: Download Model to Use
253
+ # run: |
254
+ # huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
255
+ # export HF_HUB_OFFLINE=1
256
+ - name: Prepare dist_ckpt of Qwen2.5-0.5B, uneven layer distribution only supports dist_ckpt
257
+ run: |
258
+ python3 scripts/converter_hf_to_mcore.py --hf_model_path ${HOME}/models/Qwen/Qwen2.5-0.5B --output_path checkpoints/verl-test/qwen2.5-0.5b-megatron
259
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen)
260
+ run: |
261
+ ray stop --force
262
+ SAVE_FREQ=1 COMMON_PP=4 COMMON_VPP=null COMMON_CP=1 SKIP_SAVE_HF_MODEL=1 USE_DIST_CKPT=True DIST_CKPT_PATH=checkpoints/verl-test/qwen2.5-0.5b-megatron \
263
+ bash tests/special_e2e/run_ppo_trainer_megatron.sh +actor_rollout_ref.actor.megatron.override_transformer_config.num_layers_in_first_pipeline_stage=8 +actor_rollout_ref.actor.megatron.override_transformer_config.num_layers_in_last_pipeline_stage=4
264
+ cp -r checkpoints checkpoints-dut
265
+ SAVE_FREQ=1 COMMON_PP=4 COMMON_VPP=null COMMON_CP=1 bash tests/special_e2e/run_ppo_trainer_megatron.sh
266
+ - name: Test Megatron checkpoints merging function (Qwen Actor and Critic)
267
+ run: |
268
+ exp_name="qwen2.5-0.5b-megatron-gsm8k-minimal"
269
+ python -m verl.model_merger test --backend megatron --tie-word-embedding --local_dir checkpoints-dut/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
270
+ python -m verl.model_merger test --backend megatron --is-value-model --local_dir checkpoints-dut/verl-test/${exp_name}/global_step_1/critic --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
271
+ - name: clean up
272
+ run: |
273
+ rm -rf checkpoints
274
+
275
+ cleanup:
276
+ runs-on: ubuntu-latest
277
+ needs:
278
+ [
279
+ setup,
280
+ e2e_ppo_trainer_megatron-deepseek,
281
+ e2e_ppo_trainer_megatron-qwen3,
282
+ e2e_ppo_trainer_megatron-different-train-infer-tp-qwen-tie-embedding,
283
+ e2e_ppo_trainer_megatron-qwen-override-transformer-config,
284
+ ]
285
+ if: always()
286
+ steps:
287
+ - id: destroy-runner
288
+ uses: volcengine/vemlp-github-runner@v1
289
+ with:
290
+ mode: "destroy"
291
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
292
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/e2e_ppo_trainer_megatron_vllm_2.yml ADDED
@@ -0,0 +1,420 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+ name: e2e_ppo_trainer_megatron_vllm_2
33
+
34
+ on:
35
+ # Trigger the workflow on push or pull request,
36
+ # but only for the main branch.
37
+ # For push, for now only anti-patterns are specified so it is more conservative
38
+ # and achieves higher coverage.
39
+ push:
40
+ branches:
41
+ - main
42
+ - v0.*
43
+ paths:
44
+ - "**/*.py"
45
+ # Other entrypoints
46
+ - "!verl/trainer/fsdp_sft_trainer.py"
47
+ # Recipes
48
+ - "!recipe/**"
49
+ # FSDP
50
+ - "!verl/workers/**/*dp_*.py"
51
+ pull_request:
52
+ branches:
53
+ - main
54
+ - v0.*
55
+ paths:
56
+ - "**/*.py"
57
+ # Other entrypoints
58
+ - "!docker/**"
59
+ # Docs
60
+ - "!**/*.md"
61
+ - "!docs/**"
62
+ - "!examples/**"
63
+ - "!tests/**"
64
+ - "!verl/trainer/main_*.py"
65
+ - "!verl/trainer/fsdp_sft_trainer.py"
66
+ # Recipes
67
+ - "!recipe/**"
68
+ # FSDP
69
+ - "!verl/workers/**/*dp_*.py"
70
+ # Entrypoints
71
+ - ".github/workflows/e2e_ppo_trainer_megatron_vllm.yml"
72
+ - "examples/data_preprocess/gsm8k.py"
73
+ - "examples/data_preprocess/geo3k.py"
74
+ - "tests/special_e2e/run_ppo_trainer_megatron.sh"
75
+ - "verl/trainer/main_ppo.py"
76
+ - "verl/trainer/config/ppo_megatron_trainer.yaml"
77
+
78
+ # Cancel jobs on the same ref if a new one is triggered
79
+ concurrency:
80
+ group: ${{ github.workflow }}-${{ github.ref }}
81
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
82
+
83
+ # Declare permissions just read content.
84
+ permissions:
85
+ contents: read
86
+
87
+ env:
88
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
89
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
90
+ TRANSFORMERS_VERSION: "4.56.2"
91
+
92
+ jobs:
93
+ setup:
94
+ if: github.repository_owner == 'volcengine'
95
+ runs-on: ubuntu-latest
96
+ outputs:
97
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
98
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
99
+ steps:
100
+ - uses: actions/checkout@v4
101
+ - id: create-runner
102
+ uses: volcengine/vemlp-github-runner@v1
103
+ with:
104
+ mode: "create"
105
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
106
+ mlp-image: "${{ env.IMAGE }}"
107
+
108
+ e2e_ppo_trainer_megatron-deepseek-override-transformer-config:
109
+ needs: setup
110
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
111
+ timeout-minutes: 60 # Increase this timeout value as needed
112
+ env:
113
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
114
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
115
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
116
+ HF_ENDPOINT: "https://hf-mirror.com"
117
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
118
+ steps:
119
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
120
+ with:
121
+ fetch-depth: 0
122
+ - name: Install the current repository
123
+ run: |
124
+ pip3 install --no-deps -e .[test]
125
+ pip3 install transformers==$TRANSFORMERS_VERSION
126
+ - name: Prepare GSM8K dataset
127
+ run: |
128
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
129
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
130
+ run: |
131
+ ray stop --force
132
+ SAVE_FREQ=1 MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct COMMON_PP=2 COMMON_VPP=null bash tests/special_e2e/run_ppo_trainer_megatron.sh +actor_rollout_ref.actor.megatron.override_transformer_config.account_for_embedding_in_pipeline_split=true +actor_rollout_ref.actor.megatron.override_transformer_config.account_for_loss_in_pipeline_split=true
133
+ - name: Test Megatron checkpoints merging function (DeepSeek Actor and Critic)
134
+ run: |
135
+ exp_name="deepseek-coder-1.3b-instruct-megatron-gsm8k-minimal"
136
+ python -m verl.model_merger test --backend megatron --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
137
+ python -m verl.model_merger test --backend megatron --is-value-model --local_dir checkpoints/verl-test/${exp_name}/global_step_1/critic --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/critic/huggingface
138
+ - name: clean up
139
+ run: |
140
+ rm -rf checkpoints
141
+ e2e_ppo_trainer_megatron-moe-expert-parallel:
142
+ needs: setup
143
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
144
+ timeout-minutes: 60 # Increase this timeout value as needed
145
+ env:
146
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
147
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
148
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
149
+ HF_ENDPOINT: "https://hf-mirror.com"
150
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
151
+ steps:
152
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
153
+ with:
154
+ fetch-depth: 0
155
+ - name: Install the current repository
156
+ run: |
157
+ pip3 install --no-deps -e .[test]
158
+ pip3 install mbridge
159
+ pip3 install transformers==$TRANSFORMERS_VERSION
160
+ - name: Prepare GSM8K dataset
161
+ run: |
162
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
163
+ - name: Running GSM8K E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (DeepSeek)
164
+ run: |
165
+ ray stop --force
166
+ ADV_ESTIMATOR=grpo USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen2moe_minimal.json \
167
+ PPO_MAX_TOKEN_LEN=512 FWD_MAX_TOKEN_LEN=512 \
168
+ MAX_PROMPT_LENGTH=256 MAX_RESPONSE_LENGTH=256 \
169
+ MODEL_ID=Qwen/Qwen1.5-MoE-A2.7B-Chat USE_MBRIDGE=True \
170
+ COMMON_PP=2 COMMON_VPP=null COMMON_CP=1 COMMON_TP=4 COMMON_EP=4 COMMON_ETP=1 INFER_TP=8 \
171
+ USE_DIST_CKPT=True ALL_OFFLOAD=True SKIP_SAVE_HF_MODEL=1 bash tests/special_e2e/run_ppo_trainer_megatron.sh
172
+ - name: clean up
173
+ run: |
174
+ rm -rf checkpoints
175
+ e2e_ppo_trainer_megatron-qwen2_5vl-3b:
176
+ needs: setup
177
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
178
+ timeout-minutes: 60 # Increase this timeout value as needed
179
+ env:
180
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
181
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
182
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
183
+ HF_ENDPOINT: "https://hf-mirror.com"
184
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
185
+ steps:
186
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
187
+ with:
188
+ fetch-depth: 0
189
+ - name: Install the current repository
190
+ run: |
191
+ pip3 install --no-deps -e .[test]
192
+ pip3 install transformers==$TRANSFORMERS_VERSION
193
+ - name: Prepare Geo3k dataset
194
+ run: |
195
+ python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k/
196
+ - name: Prepare dist_ckpt of Qwen2.5-VL-3B, only supports dist_ckpt
197
+ run: |
198
+ python3 scripts/converter_hf_to_mcore.py --hf_model_path ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct --output_path checkpoints/verl-test/qwen2.5-vl-3b-megatron
199
+ - name: Running Geo3k E2E training tests with 3D parallelism on 8 L20 GPUs with Megatron (Qwen)
200
+ run: |
201
+ ray stop --force
202
+ TRAIN_FILES=${HOME}/data/geo3k/train.parquet VAL_FILES=${HOME}/data/geo3k/test.parquet \
203
+ MAX_PROMPT_LENGTH=1024 MAX_RESPONSE_LENGTH=2048 MODEL_ID=Qwen/Qwen2.5-VL-3B-Instruct ADV_ESTIMATOR=grpo \
204
+ USE_DYNAMIC_BSZ=False USE_FUSED_KERNELS=True SKIP_SAVE_HF_MODEL=1 \
205
+ COMMON_PP=4 COMMON_VPP=null COMMON_CP=1 COMMON_TP=2 USE_DIST_CKPT=true \
206
+ DIST_CKPT_PATH=checkpoints/verl-test/qwen2.5-vl-3b-megatron bash tests/special_e2e/run_ppo_trainer_megatron.sh
207
+ - name: clean up
208
+ run: |
209
+ rm -rf checkpoints
210
+ e2e_ppo_trainer_vllm:
211
+ needs: setup
212
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
213
+ timeout-minutes: 60 # Increase this timeout value as needed
214
+ env:
215
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
216
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
217
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
218
+ HF_ENDPOINT: "https://hf-mirror.com"
219
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
220
+ steps:
221
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
222
+ with:
223
+ fetch-depth: 0
224
+ - name: Install the current repository
225
+ run: |
226
+ pip3 install --no-deps -e .[test,vllm]
227
+ pip3 install transformers==$TRANSFORMERS_VERSION
228
+ - name: Prepare GSM8K dataset
229
+ run: |
230
+ ray stop --force
231
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
232
+ # HF sanity
233
+ # - name: Running GSM8K E2E training tests on 1 L20 GPU with hf for sanity
234
+ # run: |
235
+ # ray stop --force
236
+ # bash tests/special_e2e/ppo_trainer/run_single_gpu.sh
237
+ # # HF sanity
238
+ # - name: Running GSM8K E2E training tests on 1 L20 GPU with engine interface for sanity.
239
+ # run: |
240
+ # ray stop --force
241
+ # bash tests/special_e2e/ppo_trainer/run_single_gpu_with_engine.sh
242
+ # Function RM
243
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (FSDP_SIZE=8)
244
+ run: |
245
+ ray stop --force
246
+ VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size8" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
247
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm after resuming
248
+ run: |
249
+ ray stop --force
250
+ RESUME_MODE=auto VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp-size8" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
251
+ - name: Test merging FSDP checkpoints (Qwen Actor)
252
+ run: |
253
+ exp_name="qwen2.5-0.5b-function-reward-minimal-fsdp-size8"
254
+ python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
255
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (DDP_SIZE=2, FSDP_SIZE=4)
256
+ run: |
257
+ ray stop --force
258
+ VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True FSDP_SIZE=4 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-ddp-size2-fsdp-size4" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
259
+ - name: Test merging DDP+FSDP checkpoints (Qwen Actor)
260
+ run: |
261
+ exp_name="qwen2.5-0.5b-function-reward-minimal-ddp-size2-fsdp-size4"
262
+ python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
263
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with validation and saving (FSDP2)
264
+ run: |
265
+ ray stop --force
266
+ VAL_BEFORE_TRAIN=True TEST_FREQ=1 SAVE_FREQ=1 SAVE_HF_MODEL=True VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal-fsdp2-size8" STRATEGY=fsdp2 bash tests/special_e2e/ppo_trainer/run_function_reward.sh
267
+ - name: Test merging FSDP2 checkpoints (Qwen Actor)
268
+ run: |
269
+ exp_name="qwen2.5-0.5b-function-reward-minimal-fsdp2-size8"
270
+ python -m verl.model_merger test --backend fsdp --local_dir checkpoints/verl-test/${exp_name}/global_step_1/actor --test_hf_dir checkpoints/verl-test/${exp_name}/global_step_1/actor/huggingface
271
+ - name: Running GSM8K E2E without rmpad using function rm
272
+ run: |
273
+ ray stop --force
274
+ RM_PAD=False bash tests/special_e2e/ppo_trainer/run_function_reward.sh
275
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (GRPO)
276
+ run: |
277
+ ray stop --force
278
+ ADV_ESTIMATOR=grpo USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
279
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm (ReMax)
280
+ run: |
281
+ ray stop --force
282
+ ADV_ESTIMATOR=remax USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
283
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using customized reward function
284
+ run: |
285
+ ray stop --force
286
+ CUSTOM_REWARD_FN=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
287
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm with in-reward kl and kl loss
288
+ run: |
289
+ ray stop --force
290
+ USE_KL=True bash tests/special_e2e/ppo_trainer/run_function_reward.sh
291
+ # LoRA tests
292
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm
293
+ run: |
294
+ ray stop --force
295
+ ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors bash tests/special_e2e/ppo_trainer/run_function_reward.sh
296
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm and layered_summon
297
+ run: |
298
+ ray stop --force
299
+ ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors LAYERED_SUMMON=True TOTAL_TRAIN_STEPS=1 SAVE_FREQ=1 FSDP_SIZE=4 VERL_EXP_NAME="qwen2.5-0.5b-function-reward-minimal" bash tests/special_e2e/ppo_trainer/run_function_reward.sh
300
+ - name: Test GRPO LoRA checkpoints merging function
301
+ run: |
302
+ export EXP_NAME="qwen2.5-0.5b-function-reward-minimal"
303
+ ls checkpoints/verl-test/${EXP_NAME}/global_step_1/actor
304
+ cat checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/huggingface/config.json
305
+ python3 -m verl.model_merger merge --backend fsdp --local_dir checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/ --target_dir checkpoints/verl-test/${EXP_NAME}/global_step_1/actor/huggingface
306
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with grpo lora using function rm with use_shm and layered_summon with fsdp2
307
+ run: |
308
+ ray stop --force
309
+ ADV_ESTIMATOR=grpo USE_SHM=True LORA_RANK=32 LOAD_FORMAT=safetensors LAYERED_SUMMON=True STRATEGY=fsdp2 bash tests/special_e2e/ppo_trainer/run_function_reward.sh
310
+ # Model RM
311
+ - name: Running GRPO GSM8K E2E training tests with FSDP on 8 L20 GPUs (DeepSeek)
312
+ run: |
313
+ ray stop --force
314
+ MODEL_ID=deepseek-ai/deepseek-coder-1.3b-instruct bash tests/special_e2e/ppo_trainer/run_function_reward.sh
315
+ - name: Running GSM8K E2E with rmpad using model rm
316
+ run: |
317
+ ray stop --force
318
+ bash tests/special_e2e/ppo_trainer/run_model_reward.sh
319
+ - name: Running GSM8K E2E without rmpad using model rm
320
+ run: |
321
+ ray stop --force
322
+ RM_PAD=False bash tests/special_e2e/ppo_trainer/run_model_reward.sh
323
+ - name: Running GSM8K E2E with rmpad using model rm and ulysses sp=2
324
+ run: |
325
+ ray stop --force
326
+ SP_SIZE=2 bash tests/special_e2e/ppo_trainer/run_model_reward.sh
327
+ - name: Running GSM8K E2E with rmpad using model rm and dynamic batch size
328
+ run: |
329
+ ray stop --force
330
+ SEQ_BALANCE=True bash tests/special_e2e/ppo_trainer/run_model_reward.sh
331
+ - name: Running GSM8K E2E with rmpad using model rm with Liger Kernel enabled
332
+ run: |
333
+ ray stop --force
334
+ LIGER=True bash tests/special_e2e/ppo_trainer/run_model_reward.sh
335
+ - name: Running GSM8K E2E with rmpad using model rm with Fused Kernel enabled
336
+ run: |
337
+ ray stop --force
338
+ FUSED_KERNELS=True bash tests/special_e2e/ppo_trainer/run_model_reward.sh
339
+ - name: Running GSM8K E2E with rmpad using model rm with Fused Kernel enabled
340
+ run: |
341
+ ray stop --force
342
+ FUSED_KERNEL=True FUSED_KERNEL_BACKEND=triton bash tests/special_e2e/ppo_trainer/run_model_reward.sh
343
+ - name: Running GSM8K E2E training tests on vllm async
344
+ run: |
345
+ ray stop --force
346
+ export VLLM_USE_V1=1
347
+ ray start --head
348
+ TOTAL_TRAIN_STEPS=2 ENGINE=vllm ROLLOUT_MODE=async bash tests/special_e2e/ppo_trainer/run_function_reward.sh
349
+
350
+ e2e_ppo_trainer_vllm_vlm:
351
+ needs: setup
352
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
353
+ timeout-minutes: 40 # Increase this timeout value as needed
354
+ env:
355
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
356
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
357
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
358
+ HF_ENDPOINT: "https://hf-mirror.com"
359
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
360
+ steps:
361
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
362
+ with:
363
+ fetch-depth: 0
364
+ - name: Install the current repository
365
+ run: |
366
+ pip3 install --no-deps -e .[test,gpu,vllm,geo,trl]
367
+ pip3 install transformers==$TRANSFORMERS_VERSION
368
+ # Geo3k
369
+ - name: Prepare GEO3K dataset
370
+ run: |
371
+ python3 examples/data_preprocess/geo3k.py --local_dataset_path ${HOME}/models/hf_data/hiyouga/geometry3k/
372
+ - name: Running GEO3K VLM GRPO E2E training tests on 8 L20 GPUs with rmpad using function rm
373
+ run: |
374
+ ray stop --force
375
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
376
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
377
+ MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
378
+ ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
379
+ SP_SIZE=2 \
380
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
381
+
382
+ - name: Running GEO3K VLM PPO E2E training tests on 8 L20 GPUs with rmpad using function rm
383
+ run: |
384
+ ray stop --force
385
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
386
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
387
+ MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
388
+ ADV_ESTIMATOR=gae RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
389
+ SP_SIZE=2 \
390
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
391
+ - name: Running GEO3K VLM GRPO E2E lora training tests on 8 L20 GPUs with rmpad using function rm
392
+ run: |
393
+ ray stop --force
394
+ TRAIN_FILES=$HOME/data/geo3k/train.parquet VAL_FILES=$HOME/data/geo3k/test.parquet \
395
+ MAX_PROMPT_LEN=1536 MAX_RESPONSE_LEN=1536 \
396
+ MODEL_ID=Qwen/Qwen2-VL-2B-Instruct \
397
+ ADV_ESTIMATOR=grpo RM_PAD=True USE_KL=True ENABLE_CHUNKED_PREFILL=False \
398
+ SP_SIZE=2 \
399
+ LORA_RANK=32 LORA_EXCLUDE=".*visual.*" \
400
+ bash tests/special_e2e/ppo_trainer/run_function_reward.sh
401
+
402
+ cleanup:
403
+ runs-on: ubuntu-latest
404
+ needs:
405
+ [
406
+ setup,
407
+ e2e_ppo_trainer_megatron-deepseek-override-transformer-config,
408
+ e2e_ppo_trainer_megatron-moe-expert-parallel,
409
+ e2e_ppo_trainer_megatron-qwen2_5vl-3b,
410
+ e2e_ppo_trainer_vllm,
411
+ e2e_ppo_trainer_vllm_vlm
412
+ ]
413
+ if: always()
414
+ steps:
415
+ - id: destroy-runner
416
+ uses: volcengine/vemlp-github-runner@v1
417
+ with:
418
+ mode: "destroy"
419
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
420
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/e2e_sft.yml ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+ name: e2e_sft
33
+
34
+ on:
35
+ # Trigger the workflow on push or pull request,
36
+ # but only for the main branch
37
+ push:
38
+ branches:
39
+ - main
40
+ - v0.*
41
+ pull_request:
42
+ branches:
43
+ - main
44
+ - v0.*
45
+ paths:
46
+ - "**/*.py"
47
+ # Other entrypoints
48
+ - "!examples/**"
49
+ - "!tests/**"
50
+ - "!verl/trainer/main_*.py"
51
+ - "!verl/trainer/fsdp_sft_trainer.py"
52
+ # Recipes
53
+ - "!recipe/**"
54
+ # Megatron
55
+ - "!verl/workers/**/megatron_*.py"
56
+ # Entrypoints
57
+ - ".github/workflows/e2e_sft.yml"
58
+ - "examples/data_preprocess/gsm8k.py"
59
+ - "tests/special_e2e/sft"
60
+ - "verl/trainer/fsdp_sft_trainer.py"
61
+ - "verl/trainer/config/sft_trainer.yaml"
62
+
63
+ # Cancel jobs on the same ref if a new one is triggered
64
+ concurrency:
65
+ group: ${{ github.workflow }}-${{ github.ref }}
66
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
67
+
68
+ # Declare permissions just read content.
69
+ permissions:
70
+ contents: read
71
+
72
+ env:
73
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
74
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
75
+
76
+ jobs:
77
+ setup:
78
+ if: github.repository_owner == 'volcengine'
79
+ runs-on: ubuntu-latest
80
+ outputs:
81
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
82
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
83
+ steps:
84
+ - uses: actions/checkout@v4
85
+ - id: create-runner
86
+ uses: volcengine/vemlp-github-runner@v1
87
+ with:
88
+ mode: "create"
89
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
90
+ mlp-image: "${{ env.IMAGE }}"
91
+ e2e_sft:
92
+ needs: setup
93
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
94
+ timeout-minutes: 25 # Increase this timeout value as needed
95
+ env:
96
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
97
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
98
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
99
+ HF_ENDPOINT: "https://hf-mirror.com"
100
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
101
+ steps:
102
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
103
+ with:
104
+ fetch-depth: 0
105
+ - name: Install the current repository
106
+ run: |
107
+ pip3 install peft
108
+ pip3 install --no-deps -e .[test,gpu]
109
+ - name: Prepare gsm8k dataset
110
+ run: |
111
+ ray stop --force
112
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
113
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm
114
+ run: |
115
+ ray stop --force
116
+ bash tests/special_e2e/sft/run_sft.sh
117
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs w/o rmpad using function rm
118
+ run: |
119
+ ray stop --force
120
+ RM_PAD=False bash tests/special_e2e/sft/run_sft.sh
121
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with sequence parallism
122
+ run: |
123
+ ray stop --force
124
+ SP_SIZE=2 bash tests/special_e2e/sft/run_sft.sh
125
+ - name: Check loss difference between sequence parallel vs. default implementation
126
+ run: |
127
+ ray stop --force
128
+ ENTRYPOINT="tests/special_e2e/sft/test_sp_loss_match.py" SP_SIZE=2 bash tests/special_e2e/sft/run_sft.sh
129
+ - name: Running GSM8K E2E training tests on 8 L20 GPUs with sequence parallism and liger
130
+ run: |
131
+ ray stop --force
132
+ SP_SIZE=2 LIGER=True bash tests/special_e2e/sft/run_sft.sh
133
+ - name: Running GSM8K E2E training tests with LoRA
134
+ run: |
135
+ ray stop --force
136
+ LORA_RANK=32 bash tests/special_e2e/sft/run_sft.sh
137
+ - name: Run GSM8K E2E training and resume tests resuming from the checkpoint manager
138
+ run: |
139
+ ray stop --force
140
+ LORA_RANK=32 RESUME_MODE=auto TOTAL_TRAIN_STEP=2 bash tests/special_e2e/sft/run_sft.sh
141
+ # TODO: multiturn
142
+ - name: Prepare gsm8k dataset
143
+ run: |
144
+ ray stop --force
145
+ python3 examples/data_preprocess/gsm8k_multiturn_sft.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
146
+ - name: Running GSM8K E2E training tests with multiturn and various configs and compare results
147
+ run: |
148
+ bash tests/special_e2e/sft/test_sft_engine_all.sh
149
+
150
+
151
+ cleanup:
152
+ runs-on: ubuntu-latest
153
+ needs: [setup, e2e_sft]
154
+ if: always()
155
+ steps:
156
+ - id: destroy-runner
157
+ uses: volcengine/vemlp-github-runner@v1
158
+ with:
159
+ mode: "destroy"
160
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
161
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/gpu_unit_tests.yml ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+ name: GPU unit tests
33
+
34
+ on:
35
+ # Trigger the workflow on push or pull request,
36
+ # but only for the main branch
37
+ push:
38
+ branches:
39
+ - main
40
+ - v0.4.x
41
+ paths:
42
+ - "**/*.py"
43
+ - .github/workflows/gpu_unit_tests.yml
44
+ pull_request:
45
+ branches:
46
+ - main
47
+ - v0.4.x
48
+ paths:
49
+ # The order that you define paths patterns matters:
50
+ # A matching negative pattern (prefixed with !) after a positive match will exclude the path.
51
+ # A matching positive pattern after a negative match will include the path again.
52
+ - "**/*.py"
53
+ # Other entrypoints
54
+ - "!examples/**"
55
+ - "!verl/trainer/main_*.py"
56
+ - "!verl/trainer/fsdp_sft_trainer.py"
57
+ - "!recipe/**"
58
+ # Entrypoints
59
+ - .github/workflows/gpu_unit_tests.yml
60
+ - "tests/**test_*.py"
61
+ # Ignore CPU tests
62
+ - "!tests/*_on_cpu.py"
63
+
64
+ # Cancel jobs on the same ref if a new one is triggered
65
+ concurrency:
66
+ group: ${{ github.workflow }}-${{ github.ref }}
67
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
68
+
69
+ # Declare permissions just read content.
70
+ permissions:
71
+ contents: read
72
+
73
+ jobs:
74
+ gpu_unit_tests:
75
+ if: github.repository_owner == 'volcengine'
76
+ runs-on: [L20x8]
77
+ timeout-minutes: 60 # Increase this timeout value as needed
78
+ env:
79
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
80
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
81
+ NO_PROXY: "localhost,127.0.0.1"
82
+ HF_HUB_ENABLE_HF_TRANSFER: 1
83
+ container:
84
+ image: verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2
85
+ options: --gpus all --shm-size=10g
86
+ steps:
87
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
88
+ with:
89
+ fetch-depth: 0
90
+ - name: Install the current repository
91
+ run: |
92
+ pip3 install hf_transfer
93
+ pip3 install --no-deps -e .[test]
94
+ pip3 install --upgrade "ray>=2.40.0"
95
+ pip3 install cupy-cuda12x
96
+ - name: Download Model to Use
97
+ run: |
98
+ huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct
99
+ huggingface-cli download Qwen/Qwen2.5-1.5B-Instruct
100
+ export HF_HUB_OFFLINE=1
101
+ # Disable requests to avoid network errors
102
+ - name: Run all GPU unit tests
103
+ run: |
104
+ pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob='*on_cpu.py' --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob='tests/special*' --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" tests/
105
+ - name: Testing LinearCrossEntropyTP Correctness, Computation Time and Memory Consumption
106
+ run: |
107
+ LOW_MEMORY=True torchrun --standalone --nnodes=1 --nproc-per-node=8 tests/utils/test_special_linear_cross_entropy_tp.py
108
+ - name: Testing FSDP2 actor functionality
109
+ run: |
110
+ torchrun --standalone --nnodes=1 --nproc-per-node=2 tests/workers/actor/test_special_dp_actor.py
111
+ - name: Testing FSDP2 critic functionality
112
+ run: |
113
+ torchrun --standalone --nnodes=1 --nproc-per-node=2 tests/workers/critic/test_special_dp_critic.py
.github/workflows/model.yml ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+ # name: Check PR Title
32
+
33
+ name: model
34
+
35
+ on:
36
+ # Trigger the workflow on push or pull request,
37
+ # but only for the main branch
38
+ push:
39
+ branches:
40
+ - main
41
+ - v0.*
42
+ pull_request:
43
+ branches:
44
+ - main
45
+ - v0.*
46
+ paths:
47
+ - "verl/**/*.py"
48
+ # Entrypoints
49
+ - ".github/workflows/model.yml"
50
+ - "tests/special_distributed/test_fsdp_ckpt.py"
51
+ - "tests/special_distributed/test_mcore_config_converter.py"
52
+ - "tests/special_distributed/test_tensor_dict.py"
53
+ - "tests/models/**"
54
+ - "tests/special_distributed/run_all.sh"
55
+
56
+ # Declare permissions just read content.
57
+ permissions:
58
+ contents: read
59
+
60
+ # Cancel jobs on the same ref if a new one is triggered
61
+ concurrency:
62
+ group: ${{ github.workflow }}-${{ github.ref }}
63
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
64
+
65
+
66
+ env:
67
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
68
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
69
+
70
+ jobs:
71
+ setup:
72
+ if: github.repository_owner == 'volcengine'
73
+ runs-on: ubuntu-latest
74
+ outputs:
75
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
76
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
77
+ steps:
78
+ - uses: actions/checkout@v4
79
+ - id: create-runner
80
+ uses: volcengine/vemlp-github-runner@v1
81
+ with:
82
+ mode: "create"
83
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
84
+ mlp-image: "${{ env.IMAGE }}"
85
+
86
+ model_rmpad:
87
+ needs: setup
88
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
89
+ timeout-minutes: 20 # Increase this timeout value as needed
90
+ env:
91
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
92
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
93
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
94
+ HF_ENDPOINT: "https://hf-mirror.com"
95
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
96
+ steps:
97
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
98
+ with:
99
+ fetch-depth: 0
100
+ - name: Install the current repository and upgrade to latest transformers(4.54.0)/flash_attn, transformers 4.55.0 has strange behavior with model backward
101
+ run: |
102
+ pip3 install --no-deps -e .[test]
103
+ pip3 install --upgrade transformers
104
+ - name: Running rmpad model tests on 8 L20 GPUs + flash_attn 2.5.8
105
+ run: |
106
+ pytest -s tests/models/test_transformer.py
107
+ - name: Running rmpad model tests on 8 L20 GPUs + latest flash_attn
108
+ run: |
109
+ pytest -s tests/models/test_transformer.py
110
+ - name: Running FSDP rmpad model tests on 8 L20 GPUs + latest flash_attn
111
+ run: |
112
+ STRATEGY=fsdp torchrun --nproc_per_node=8 tests/special_distributed/test_fsdp_ckpt.py
113
+ - name: Running transformers ulysses tests on 8 L20 GPUs + latest transformers
114
+ run: |
115
+ torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
116
+ - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.54.1
117
+ run: |
118
+ pip3 install transformers==4.54.1
119
+ torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
120
+ - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.53.2
121
+ run: |
122
+ pip3 install transformers==4.53.2
123
+ torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
124
+ - name: Running transformers ulysses tests on 8 L20 GPUs + transformers 4.52.0
125
+ run: |
126
+ pip3 install transformers==4.52.0
127
+ torchrun --nproc_per_node=8 -m pytest tests/models/test_transformers_ulysses.py
128
+ - name: Run distributed test
129
+ run: |
130
+ bash tests/special_distributed/run_all.sh
131
+
132
+ # TODO: Move this back to model_rmpad once FSDP2 is stable.
133
+ # NOTE: List as an independent job to make rerun easier.
134
+ model_rmpad_fsdp2_unstable:
135
+ needs: setup
136
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
137
+ timeout-minutes: 20 # Increase this timeout value as needed
138
+ env:
139
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
140
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
141
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
142
+ HF_ENDPOINT: "https://hf-mirror.com"
143
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
144
+ steps:
145
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
146
+ with:
147
+ fetch-depth: 0
148
+ - name: Install the current repository and upgrade to latest transformers/flash_attn
149
+ run: |
150
+ pip3 install --no-deps -e .[test]
151
+ pip3 install --upgrade transformers
152
+ - name: Running FSDP2 rmpad model tests on 8 L20 GPUs + latest flash_attn
153
+ run: |
154
+ STRATEGY=fsdp2 torchrun --nproc_per_node=8 tests/special_distributed/test_fsdp_ckpt.py
155
+
156
+ mcore_config_converter:
157
+ needs: setup
158
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
159
+ timeout-minutes: 20 # Increase this timeout value as needed
160
+ env:
161
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
162
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
163
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
164
+ HF_ENDPOINT: "https://hf-mirror.com"
165
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
166
+ steps:
167
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
168
+ with:
169
+ fetch-depth: 0
170
+ - name: Install the current repository
171
+ run: |
172
+ pip3 install --no-deps -e .[test]
173
+ pip install --upgrade "huggingface_hub[cli]"
174
+ # - name: Download model config files
175
+ # run: |
176
+ # hf download Qwen/Qwen2.5-7B config.json --local-dir $HOME/configs/Qwen/Qwen2.5-7B
177
+ # hf download Qwen/Qwen3-8B config.json --local-dir $HOME/configs/Qwen/Qwen3-8B
178
+ # hf download deepseek-ai/deepseek-coder-1.3b-instruct config.json --local-dir $HOME/configs/deepseek-ai/deepseek-coder-1.3b-instruct
179
+ # hf download Qwen/Qwen2-57B-A14B config.json --local-dir $HOME/configs/Qwen/Qwen2-57B-A14B
180
+ # hf download Qwen/Qwen3-30B-A3B config.json --local-dir $HOME/configs/Qwen/Qwen3-30B-A3B
181
+ # hf download deepseek-ai/DeepSeek-V3-Base config.json --local-dir $HOME/configs/deepseek-ai/DeepSeek-V3-Base
182
+ - name: Running mcore config converter tests on 8 L20 GPUs
183
+ run: |
184
+ torchrun --nproc_per_node=8 tests/special_distributed/test_mcore_config_converter.py
185
+
186
+ model_engine:
187
+ needs: setup
188
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
189
+ timeout-minutes: 20 # Increase this timeout value as needed
190
+ env:
191
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
192
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
193
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
194
+ HF_ENDPOINT: "https://hf-mirror.com"
195
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
196
+ steps:
197
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
198
+ with:
199
+ fetch-depth: 0
200
+ - name: Install the current repository
201
+ run: |
202
+ pip3 install --no-deps -e .[test]
203
+ pip3 install --upgrade tensordict transformers
204
+ pip install --upgrade "huggingface_hub[cli]"
205
+ - name: Download model config files
206
+ run: |
207
+ hf download Qwen/Qwen2.5-0.5B-Instruct --local-dir $HOME/models/Qwen/Qwen2.5-0.5B-Instruct
208
+
209
+ - name: Running mcore engine tests on 8 L20 GPUs
210
+ run: |
211
+ pytest -s -x tests/models/test_engine.py
212
+
213
+ cleanup:
214
+ runs-on: ubuntu-latest
215
+ needs:
216
+ [
217
+ setup,
218
+ model_rmpad,
219
+ model_rmpad_fsdp2_unstable,
220
+ mcore_config_converter,
221
+ model_engine
222
+ ]
223
+ if: always()
224
+ steps:
225
+ - id: destroy-runner
226
+ uses: volcengine/vemlp-github-runner@v1
227
+ with:
228
+ mode: "destroy"
229
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
230
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/pre-commit.yml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # c.f. https://github.com/pre-commit/action?tab=readme-ov-file#using-this-action
2
+ name: pre-commit
3
+
4
+ # No need to avoid / cancel lightweight pre-commit jobs
5
+ on:
6
+ schedule:
7
+ - cron: "0 0 * * 0"
8
+ pull_request:
9
+ push:
10
+ branches:
11
+ - main
12
+ - v0.*
13
+ # Allow manual triggering
14
+ workflow_dispatch:
15
+
16
+ # Declare permissions just read content.
17
+ permissions:
18
+ contents: read
19
+
20
+ jobs:
21
+ pre-commit:
22
+ runs-on: ubuntu-latest
23
+ strategy:
24
+ matrix:
25
+ python-version: ["3.12"]
26
+ steps:
27
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
28
+ - name: Set up Python ${{ matrix.python-version }}
29
+ uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
30
+ with:
31
+ python-version: ${{ matrix.python-version }}
32
+ - name: Install the current repository
33
+ run: |
34
+ pip install -e .
35
+ - name: Set ruff --output-format=github
36
+ run: |
37
+ sed -i 's/--output-format=full/--output-format=github/' .pre-commit-config.yaml
38
+ git add .pre-commit-config.yaml
39
+ # Check "--all-files" by default
40
+ - uses: pre-commit/action@v3.0.1
.github/workflows/reward_model.yml ADDED
@@ -0,0 +1,131 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+ # name: Check PR Title
32
+
33
+ name: reward_model
34
+
35
+ on:
36
+ # Trigger the workflow on push or pull request,
37
+ # but only for the main branch
38
+ push:
39
+ branches:
40
+ - main
41
+ - v0.*
42
+ pull_request:
43
+ branches:
44
+ - main
45
+ - v0.*
46
+ paths:
47
+ - "verl/**/*.py"
48
+ # Entrypoints
49
+ - ".github/workflows/reward_model.yml"
50
+ - "tests/workers/reward_model/**"
51
+
52
+ # Declare permissions just read content.
53
+ permissions:
54
+ contents: read
55
+
56
+ # Cancel jobs on the same ref if a new one is triggered
57
+ concurrency:
58
+ group: ${{ github.workflow }}-${{ github.ref }}
59
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
60
+
61
+
62
+ env:
63
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-sglang0.4.10.post2-mcore0.13.0-te2.2"
64
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
65
+ TRANSFORMERS_VERSION: "4.56.2"
66
+
67
+
68
+ jobs:
69
+ setup:
70
+ if: github.repository_owner == 'volcengine'
71
+ runs-on: ubuntu-latest
72
+ outputs:
73
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
74
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
75
+ steps:
76
+ - uses: actions/checkout@v4
77
+ - id: create-runner
78
+ uses: volcengine/vemlp-github-runner@v1
79
+ with:
80
+ mode: "create"
81
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
82
+ mlp-image: "${{ env.IMAGE }}"
83
+
84
+ reward_model:
85
+ needs: setup
86
+ runs-on: [ "${{ needs.setup.outputs.runner-label || 'L20x8' }}" ]
87
+ timeout-minutes: 20 # Increase this timeout value as needed
88
+ env:
89
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
90
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
91
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
92
+ HF_ENDPOINT: "https://hf-mirror.com"
93
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
94
+ SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK: "True"
95
+ NCCL_SHM_DISABLE: "1"
96
+ NCCL_P2P_DISABLE: "1"
97
+ steps:
98
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
99
+ with:
100
+ fetch-depth: 0
101
+ - name: Install the current repository
102
+ run: |
103
+ pip3 install -e .[test]
104
+ # - name: Download model config files
105
+ # run: |
106
+ # hf download Skywork/Skywork-Reward-V2-Llama-3.2-1B --local-dir $HOME/models/Skywork/Skywork-Reward-V2-Llama-3.2-1B
107
+ # hf download verl-team/GenRM-CI-Test-1.5B --local-dir $HOME/models/verl-team/GenRM-CI-Test-1.5B
108
+ - name: Running discriminative reward model tests on 8 L20 GPUs
109
+ run: |
110
+ unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
111
+ pytest -s -x tests/workers/reward_model/test_discriminative_reward_model.py
112
+ - name: Running generative reward model tests on 8 L20 GPUs
113
+ run: |
114
+ unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
115
+ pytest -s -x tests/workers/reward_model/test_generative_reward_model.py
116
+
117
+ cleanup:
118
+ runs-on: ubuntu-latest
119
+ needs:
120
+ [
121
+ setup,
122
+ reward_model
123
+ ]
124
+ if: always()
125
+ steps:
126
+ - id: destroy-runner
127
+ uses: volcengine/vemlp-github-runner@v1
128
+ with:
129
+ mode: "destroy"
130
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
131
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/sanity.yml ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+ # name: Check PR Title
32
+
33
+ name: sanity
34
+
35
+ on:
36
+ # Trigger the workflow on push or pull request,
37
+ # but only for the main branch
38
+ push:
39
+ branches:
40
+ - main
41
+ - v0.*
42
+ pull_request:
43
+ branches:
44
+ - main
45
+ - v0.*
46
+ paths:
47
+ - "**/*.py"
48
+ - .github/workflows/sanity.yml
49
+ - "tests/special_sanity/**"
50
+
51
+ # Cancel jobs on the same ref if a new one is triggered
52
+ concurrency:
53
+ group: ${{ github.workflow }}-${{ github.ref }}
54
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
55
+
56
+ # Declare permissions just read content.
57
+ permissions:
58
+ contents: read
59
+
60
+ jobs:
61
+ sanity:
62
+ runs-on: ubuntu-latest
63
+ timeout-minutes: 5 # Increase this timeout value as needed
64
+ strategy:
65
+ matrix:
66
+ python-version: ["3.10"]
67
+ steps:
68
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
69
+ - name: Set up Python ${{ matrix.python-version }}
70
+ uses: actions/setup-python@0b93645e9fea7318ecaed2b359559ac225c90a2b # v5.3.0
71
+ with:
72
+ python-version: ${{ matrix.python-version }}
73
+ - name: Install the current repository
74
+ run: |
75
+ pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
76
+ pip3 install -r requirements.txt
77
+ pip install -e .[test]
78
+ - name: Run sanity test
79
+ run: |
80
+ pytest -s -x tests/special_sanity
81
+ - name: Run license test
82
+ run: |
83
+ python3 tests/special_sanity/check_license.py --directories .
84
+ - name: Assert naming convention
85
+ run: |
86
+ if grep -rIn --exclude-dir=.git --exclude-dir=.github --exclude-dir=venv --exclude-dir=__pycache__ 'veRL' .; then
87
+ echo "Please use verl instead of veRL in the codebase"
88
+ exit 1
89
+ fi
90
+ - name: Assert SGLang naming convention
91
+ run: |
92
+ if grep -rIn --exclude-dir=.git --exclude-dir=.github --exclude-dir=venv --exclude-dir=__pycache__ -E 'Sglang|sgLang|sglAng|sglaNg|sglanG' .; then
93
+ echo "Please use SGLang or sglang as the formal name of SGLang rollout engine"
94
+ exit 1
95
+ fi
96
+ - name: Validate test folder structure
97
+ run: python3 tests/special_sanity/validate_structure.py
98
+ - name: Assert documentation requirement for functions
99
+ run: python3 tests/special_sanity/validate_imported_docs.py
100
+ - name: Assert device api usage in verl/recipe
101
+ run: python3 tests/special_sanity/check_device_api_usage.py --directory ./recipe
102
+ - name: Assert device api usage in verl/verl
103
+ run: python3 tests/special_sanity/check_device_api_usage.py --directory ./verl
104
+ - name: Assert documentation time info
105
+ run: python3 tests/special_sanity/check_docs_time_info.py
106
+ - name: Check docstrings for specified files
107
+ run: python3 tests/special_sanity/check_docstrings.py
108
+ - name: Check DataProto for specified folders
109
+ run: python3 tests/special_sanity/check_dataproto_usage.py -d ./verl/workers/engine
.github/workflows/scorecard.yml ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflow uses actions that are not certified by GitHub. They are provided
2
+ # by a third-party and are governed by separate terms of service, privacy
3
+ # policy, and support documentation.
4
+
5
+ name: Scorecard supply-chain security
6
+ on:
7
+ # For Branch-Protection check. Only the default branch is supported. See
8
+ # https://github.com/ossf/scorecard/blob/main/docs/checks.md#branch-protection
9
+ branch_protection_rule:
10
+ # To guarantee Maintained check is occasionally updated. See
11
+ # https://github.com/ossf/scorecard/blob/main/docs/checks.md#maintained
12
+ schedule:
13
+ - cron: "27 7 * * 1"
14
+ push:
15
+ branches:
16
+ - main
17
+ - v0.*
18
+
19
+ # Declare default permissions as read only.
20
+ permissions: read-all
21
+
22
+ jobs:
23
+ analysis:
24
+ name: Scorecard analysis
25
+ runs-on: ubuntu-latest
26
+ permissions:
27
+ # Needed to upload the results to code-scanning dashboard.
28
+ security-events: write
29
+ # Needed to publish results and get a badge (see publish_results below).
30
+ id-token: write
31
+ # Uncomment the permissions below if installing in a private repository.
32
+ # contents: read
33
+ # actions: read
34
+
35
+ steps:
36
+ - name: "Checkout code"
37
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
38
+ with:
39
+ persist-credentials: false
40
+
41
+ - name: "Run analysis"
42
+ uses: ossf/scorecard-action@0864cf19026789058feabb7e87baa5f140aac736 # v2.3.1
43
+ with:
44
+ results_file: results.sarif
45
+ results_format: sarif
46
+ # (Optional) "write" PAT token. Uncomment the `repo_token` line below if:
47
+ # - you want to enable the Branch-Protection check on a *public* repository, or
48
+ # - you are installing Scorecard on a *private* repository
49
+ # To create the PAT, follow the steps in https://github.com/ossf/scorecard-action?tab=readme-ov-file#authentication-with-fine-grained-pat-optional.
50
+ # repo_token: ${{ secrets.SCORECARD_TOKEN }}
51
+
52
+ # Public repositories:
53
+ # - Publish results to OpenSSF REST API for easy access by consumers
54
+ # - Allows the repository to include the Scorecard badge.
55
+ # - See https://github.com/ossf/scorecard-action#publishing-results.
56
+ # For private repositories:
57
+ # - `publish_results` will always be set to `false`, regardless
58
+ # of the value entered here.
59
+ publish_results: true
60
+
61
+ # Upload the results to GitHub's code scanning dashboard (optional).
62
+ # Commenting out will disable upload of results to your repo's Code Scanning dashboard
63
+ - name: "Upload to code-scanning"
64
+ uses: github/codeql-action/upload-sarif@9e8d0789d4a0fa9ceb6b1738f7e269594bdd67f0 #v3.28.9
65
+ with:
66
+ sarif_file: results.sarif
.github/workflows/secrets_scan.yml ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ on:
2
+ push:
3
+ branches:
4
+ - main
5
+ - v0.*
6
+ pull_request:
7
+
8
+ permissions:
9
+ contents: read
10
+
11
+ jobs:
12
+ test:
13
+ runs-on: ubuntu-latest
14
+ steps:
15
+ - name: Checkout code
16
+ uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
17
+ with:
18
+ fetch-depth: 0
19
+ - name: Secret Scanning
20
+ uses: trufflesecurity/trufflehog@7dc056a193116ba8d82154bf0549381c8fb8545c # v3.88.14
21
+ with:
22
+ extra_args: --results=verified,unknown
.github/workflows/sgl.yml ADDED
@@ -0,0 +1,178 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+ name: sgl
33
+
34
+ on:
35
+ # workflow_dispatch: # Manual
36
+ # Trigger the workflow on push or pull request,
37
+ # but only for the main branch
38
+ push:
39
+ branches:
40
+ - main
41
+ - v0.*
42
+ paths:
43
+ - "**/*.py"
44
+ - .github/workflows/sgl.yml
45
+ pull_request:
46
+ branches:
47
+ - main
48
+ - v0.*
49
+ paths:
50
+ - "**/*.py"
51
+ # Other entrypoints
52
+ - "!examples/**"
53
+ - "!tests/**"
54
+ - "!verl/trainer/main_*.py"
55
+ - "!verl/trainer/fsdp_sft_trainer.py"
56
+ # FSDP
57
+ - "!verl/workers/**/*dp_*.py"
58
+ # Megatron
59
+ - "!verl/workers/**/megatron_*.py"
60
+ # vLLM
61
+ - "!**/*vllm*"
62
+ # Recipes
63
+ - "!recipe/**"
64
+ # Entrypoints
65
+ - ".github/workflows/sgl.yml"
66
+ - "tests/rollout/*sglang*"
67
+ - "tests/rollout/async_rollout_utils.py"
68
+ - "tests/workers/rollout/*interaction*"
69
+
70
+ # Cancel jobs on the same ref if a new one is triggered
71
+ concurrency:
72
+ group: ${{ github.workflow }}-${{ github.ref }}
73
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
74
+
75
+ # Declare permissions just read content.
76
+ permissions:
77
+ contents: read
78
+
79
+ env:
80
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.6-transformers4.56.1-sglang0.5.2-mcore0.13.0-te2.2"
81
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
82
+
83
+ jobs:
84
+ setup:
85
+ if: github.repository_owner == 'volcengine'
86
+ runs-on: ubuntu-latest
87
+ outputs:
88
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
89
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
90
+ steps:
91
+ - uses: actions/checkout@v4
92
+ - id: create-runner
93
+ uses: volcengine/vemlp-github-runner@v1
94
+ with:
95
+ mode: "create"
96
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
97
+ mlp-image: "${{ env.IMAGE }}"
98
+
99
+ sgl:
100
+ needs: setup
101
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
102
+ timeout-minutes: 35 # Increase this timeout value as needed
103
+ env:
104
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
105
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
106
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
107
+ HF_ENDPOINT: "https://hf-mirror.com"
108
+ HF_HUB_ENABLE_HF_TRANSFER: 1
109
+ SGL_DISABLE_TP_MEMORY_INBALANCE_CHECK: "True"
110
+ NCCL_SHM_DISABLE: "1"
111
+ NCCL_P2P_DISABLE: "1"
112
+ steps:
113
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
114
+ with:
115
+ fetch-depth: 0
116
+ - name: Install the current repository
117
+ run: |
118
+ pip3 install hf_transfer fastmcp
119
+ pip3 install -e .[test]
120
+ # - name: Download Model to Use
121
+ # run: |
122
+ # huggingface-cli download Qwen/Qwen2.5-0.5B --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B
123
+ # huggingface-cli download Qwen/Qwen2.5-1.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-1.5B-Instruct
124
+ # huggingface-cli download Qwen/Qwen2.5-VL-3B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct
125
+ # export HF_HUB_OFFLINE=1
126
+ - name: Prepare gsm8k dataset
127
+ run: |
128
+ ray stop --force
129
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
130
+ - name: Test the latest SGLang Rollout async with agent loop
131
+ run: |
132
+ ROLLOUT_NAME=sglang pytest -svvv tests/experimental/agent_loop
133
+ # huggingface-cli download verl-team/gsm8k-v0.4.1 --repo-type dataset --local-dir ~/verl-data/gsm8k
134
+ - name: Test the latest SGLang
135
+ run: |
136
+ cd tests/workers/rollout
137
+ torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_spmd.py
138
+ - name: Test the latest SGLang Rollout async with interaction
139
+ run: |
140
+ cd tests/workers/rollout
141
+ torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_async_rollout_w_interaction.py
142
+ - name: Test the latest SGLang Multi Interaction
143
+ run: |
144
+ cd tests/workers/rollout
145
+ torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_multi_interaction.py
146
+ - name: Test the latest SGLang Rollout async with tool
147
+ run: |
148
+ cd tests/workers/rollout
149
+ torchrun --nnodes=1 --nproc_per_node=2 $(which pytest) -s test_sglang_async_rollout_w_tools.py
150
+ - name: Test the latest SGLang Rollout async with sandbox fusion tool
151
+ run: |
152
+ cd tests/workers/rollout
153
+ pytest -s test_sglang_async_rollout_sf_tools.py
154
+ - name: Test the latest SGLang Rollout async with search tool
155
+ run: |
156
+ cd tests/workers/rollout
157
+ pytest -s test_sglang_async_rollout_search_tools.py
158
+ - name: Test the latest SGLang Rollout async with mcp search tool
159
+ run: |
160
+ cd tests/workers/rollout
161
+ pytest -s test_sglang_async_rollout_mcp_tools.py
162
+ # Note(haibin.lin): for any new test, please update gpu_unit_tests.yaml to avoid repeated tests
163
+ - name: Test the latest SGLang Rollout async with multimodal delta
164
+ run: |
165
+ cd tests/workers/rollout
166
+ pytest -s test_sglang_async_rollout_multimodal_delta.py
167
+
168
+ cleanup:
169
+ runs-on: ubuntu-latest
170
+ needs: [setup, sgl]
171
+ if: always()
172
+ steps:
173
+ - id: destroy-runner
174
+ uses: volcengine/vemlp-github-runner@v1
175
+ with:
176
+ mode: "destroy"
177
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
178
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.github/workflows/type-coverage-check.yml ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Type Annotation and Docstring Coverage
2
+
3
+ on:
4
+ pull_request:
5
+ paths:
6
+ - '**/*.py'
7
+ - '.github/workflows/type-coverage-check.yml'
8
+
9
+ jobs:
10
+ type-coverage-check:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ with:
15
+ fetch-depth: 0 # 🚨 Important: fetch full history so `origin/main` is available
16
+ - name: Set up Python
17
+ uses: actions/setup-python@v5
18
+ with:
19
+ python-version: '3.10'
20
+
21
+ - name: Install dependencies
22
+ run: |
23
+ pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cpu
24
+ pip3 install -r requirements.txt
25
+ pip3 install -e . --no-deps
26
+ - name: Run type annotation coverage check
27
+ run: |
28
+ python3 tests/special_sanity/type_coverage_check.py
29
+ - name: Run docstring coverage check
30
+ run: |
31
+ python3 tests/special_sanity/check_api_docs.py verl
.github/workflows/vllm.yml ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # # Tests layout
2
+
3
+ # Each folder under tests/ corresponds to a test category for a sub-namespace in verl. For instance:
4
+ # - `tests/trainer` for testing functionality related to `verl/trainer`
5
+ # - `tests/models` for testing functionality related to `verl/models`
6
+ # - ...
7
+
8
+ # There are a few folders with `special_` prefix, created for special purposes:
9
+ # - `special_distributed`: unit tests that must run with multiple GPUs
10
+ # - `special_e2e`: end-to-end tests with training/generation scripts
11
+ # - `special_npu`: tests for NPUs
12
+ # - `special_sanity`: a suite of quick sanity tests
13
+ # - `special_standalone`: a set of test that are designed to run in dedicated environments
14
+
15
+ # Accelerators for tests
16
+ # - By default tests are run with GPU available, except for the ones under `special_npu`, and any test script whose name ends with `on_cpu.py`.
17
+ # - For test scripts with `on_cpu.py` name suffix would be tested on CPU resources in linux environment.
18
+
19
+ # # Workflow layout
20
+
21
+ # All CI tests are configured by yaml files in `.github/workflows/`. Here's an overview of all test configs:
22
+ # 1. A list of always triggered CPU sanity tests: `check-pr-title.yml`, `secrets_scan.yml`, `check-pr-title,yml`, `pre-commit.yml`, `doc.yml`
23
+ # 2. Some heavy multi-GPU unit tests, such as `model.yml`, `vllm.yml`, `sgl.yml`
24
+ # 3. End-to-end tests: `e2e_*.yml`
25
+ # 4. Unit tests
26
+ # - `cpu_unit_tests.yml`, run pytest on all scripts with file name pattern `tests/**/test_*_on_cpu.py`
27
+ # - `gpu_unit_tests.yml`, run pytest on all scripts with file without the `on_cpu.py` suffix.
28
+ # - Since cpu/gpu unit tests by default runs all tests under `tests`, please make sure tests are manually excluded in them when
29
+ # - new workflow yaml is added to `.github/workflows`
30
+ # - new tests are added to workflow mentioned in 2.
31
+
32
+ name: vllm
33
+
34
+ on:
35
+ # Trigger the workflow on push or pull request,
36
+ # but only for the main branch
37
+ push:
38
+ branches:
39
+ - main
40
+ - v0.*
41
+ pull_request:
42
+ branches:
43
+ - main
44
+ - v0.*
45
+ paths:
46
+ - "**/*.py"
47
+ # Other entrypoints
48
+ - "!examples/**"
49
+ - "!tests/**"
50
+ - "!verl/trainer/main_*.py"
51
+ - "!verl/trainer/fsdp_sft_trainer.py"
52
+ # Recipes
53
+ - "!recipe/**"
54
+ # FSDP
55
+ - "!verl/workers/**/*dp_*.py"
56
+ # Megatron
57
+ - "!verl/workers/**/megatron_*.py"
58
+ # SGLang
59
+ - "!**/*sglang*"
60
+ # Entrypoints
61
+ - ".github/workflows/vllm.yml"
62
+ - "tests/special_e2e/generation"
63
+ - "tests/workers/rollout"
64
+ - "verl/trainer/main_generation.py"
65
+ - "verl/trainer/config/generation.yaml"
66
+
67
+ # Cancel jobs on the same ref if a new one is triggered
68
+ concurrency:
69
+ group: ${{ github.workflow }}-${{ github.ref }}
70
+ cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
71
+
72
+ # Declare permissions just read content.
73
+ permissions:
74
+ contents: read
75
+
76
+ env:
77
+ IMAGE: "verl-ci-cn-beijing.cr.volces.com/verlai/verl:app-verl0.5-transformers4.55.4-vllm0.10.0-mcore0.13.0-te2.2"
78
+ DYNAMIC_RUNNER_ENDPOINT: "https://sd10g3clalm04ug7alq90.apigateway-cn-beijing.volceapi.com/runner"
79
+
80
+ jobs:
81
+ setup:
82
+ if: github.repository_owner == 'volcengine'
83
+ runs-on: ubuntu-latest
84
+ outputs:
85
+ runner-label: ${{ steps.create-runner.outputs.runner-label }}
86
+ mlp-task-id: ${{ steps.create-runner.outputs.mlp-task-id }}
87
+ steps:
88
+ - uses: actions/checkout@v4
89
+ - id: create-runner
90
+ uses: volcengine/vemlp-github-runner@v1
91
+ with:
92
+ mode: "create"
93
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
94
+ mlp-image: "${{ env.IMAGE }}"
95
+
96
+ vllm:
97
+ needs: setup
98
+ runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
99
+ timeout-minutes: 35 # Increase this timeout value as needed
100
+ env:
101
+ HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
102
+ HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
103
+ NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
104
+ HF_ENDPOINT: "https://hf-mirror.com"
105
+ HF_HUB_ENABLE_HF_TRANSFER: "0" # This is more stable
106
+ steps:
107
+ - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
108
+ with:
109
+ fetch-depth: 0
110
+ - name: Install the current repository
111
+ run: |
112
+ pip3 install -e .[test]
113
+ # - name: Download Model to Use
114
+ # run: |
115
+ # huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B-Instruct
116
+ # huggingface-cli download Qwen/Qwen2.5-1.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-1.5B-Instruct
117
+ # huggingface-cli download Qwen/Qwen2.5-VL-3B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct
118
+ # huggingface-cli download OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN --local-dir ${HOME}/models/OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN
119
+ # export HF_HUB_OFFLINE=1
120
+ - name: Prepare gsm8k dataset
121
+ run: |
122
+ ray stop --force
123
+ python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
124
+ - name: Test the latest vLLM Rollout async with agent loop
125
+ run: |
126
+ ROLLOUT_NAME=vllm pytest -svvv tests/experimental/agent_loop
127
+ - name: Test the latest vLLM
128
+ run: |
129
+ torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s tests/workers/rollout/rollout_vllm/test_vllm_spmd.py
130
+ - name: Test the latest vLLM on model with rope scaling
131
+ run: |
132
+ torchrun --standalone --nnodes=1 --nproc_per_node=4 $(which pytest) -s tests/workers/rollout/rollout_vllm/test_vllm_model_rope_scaling.py
133
+ # Note(haibin.lin): for any new test, please update gpu_unit_tests.yaml to avoid repeated tests
134
+
135
+ cleanup:
136
+ runs-on: ubuntu-latest
137
+ needs: [setup, vllm]
138
+ if: always()
139
+ steps:
140
+ - id: destroy-runner
141
+ uses: volcengine/vemlp-github-runner@v1
142
+ with:
143
+ mode: "destroy"
144
+ faas-url: "${{ env.DYNAMIC_RUNNER_ENDPOINT }}"
145
+ mlp-task-id: "${{ needs.setup.outputs.mlp-task-id }}"
.gitignore ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ **/*.pt
3
+ **/checkpoints
4
+ **/wget-log
5
+ **/_build/
6
+ **/*.ckpt
7
+ **/outputs
8
+ **/*.tar.gz
9
+ **/playground
10
+ **/wandb
11
+
12
+ # Byte-compiled / optimized / DLL files
13
+ __pycache__/
14
+ *.py[cod]
15
+ *$py.class
16
+ dataset/*
17
+ tensorflow/my_graph/*
18
+ .idea/
19
+ # C extensions
20
+ *.so
21
+
22
+ # Distribution / packaging
23
+ .Python
24
+ env/
25
+ build/
26
+ develop-eggs/
27
+ dist/
28
+ downloads/
29
+ eggs/
30
+ .eggs/
31
+ lib/
32
+ lib64/
33
+ parts/
34
+ sdist/
35
+ var/
36
+ tmp/
37
+ *.egg-info/
38
+ .installed.cfg
39
+ *.egg
40
+
41
+ # PyInstaller
42
+ # Usually these files are written by a python script from a template
43
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
44
+ *.manifest
45
+ *.spec
46
+
47
+ # Installer logs
48
+ pip-log.txt
49
+ pip-delete-this-directory.txt
50
+
51
+ # Unit test / coverage reports
52
+ htmlcov/
53
+ .tox/
54
+ .coverage
55
+ .coverage.*
56
+ .cache
57
+ nosetests.xml
58
+ coverage.xml
59
+ *,cover
60
+ .hypothesis/
61
+ pytest.ini
62
+ output.txt
63
+
64
+ # Translations
65
+ *.mo
66
+ *.pot
67
+
68
+ # Django stuff:
69
+ *.log
70
+ local_settings.py
71
+
72
+ # Flask stuff:
73
+ instance/
74
+ .webassets-cache
75
+
76
+ # Scrapy stuff:
77
+ .scrapy
78
+
79
+ # Sphinx documentation
80
+ docs/_build/
81
+
82
+ # PyBuilder
83
+ target/
84
+
85
+ # IPython Notebook
86
+ .ipynb_checkpoints
87
+
88
+ # pyenv
89
+ .python-version
90
+
91
+ # celery beat schedule file
92
+ celerybeat-schedule
93
+
94
+ # dotenv
95
+ .env
96
+
97
+ # virtualenv
98
+ venv/
99
+ .venv/
100
+ ENV/
101
+
102
+ # Spyder project settings
103
+ .spyderproject
104
+
105
+ # Rope project settings
106
+ .ropeproject
107
+
108
+ # vscode
109
+ .vscode
110
+
111
+ # Mac
112
+ .DS_Store
113
+
114
+ # vim
115
+ *.swp
116
+
117
+ # ckpt
118
+ *.lock
119
+
120
+ # data
121
+ *.parquet
122
+
123
+
124
+ # local logs
125
+ logs
126
+ log
127
+ outputs
128
+ .history
.pre-commit-config.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: https://github.com/astral-sh/ruff-pre-commit
3
+ rev: "v0.12.2"
4
+ hooks:
5
+ - id: ruff
6
+ args: ["--fix", "--show-fixes", "--output-format=full"]
7
+ exclude: ^.*\.(ipynb)$
8
+ - id: ruff-format
9
+
10
+ - repo: https://github.com/pre-commit/mirrors-mypy
11
+ rev: 'v1.17.0'
12
+ hooks:
13
+ - id: mypy
14
+
15
+ - repo: local
16
+ hooks:
17
+ - id: autogen-trainer-cfg
18
+ name: Generate and verify verl/trainer/config/_generated_*.yaml
19
+ entry: scripts/generate_trainer_config.sh
20
+ language: script
21
+ pass_filenames: false
22
+
23
+ - repo: local
24
+ hooks:
25
+ - id: check-docstrings
26
+ name: Check doc string coverage
27
+ entry: python3 tests/special_sanity/check_docstrings.py
28
+ language: python
29
+ pass_filenames: false
30
+
31
+ - repo: local
32
+ hooks:
33
+ - id: check-license
34
+ name: Check license
35
+ entry: python3 tests/special_sanity/check_license.py --directories examples recipe scripts tests verl setup.py
36
+ language: python
37
+ pass_filenames: false
.readthedocs.yaml ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Read the Docs configuration file
2
+ # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
3
+
4
+ version: 2
5
+
6
+ build:
7
+ os: ubuntu-22.04
8
+ tools:
9
+ python: "3.11"
10
+ rust: "1.70"
11
+
12
+ sphinx:
13
+ configuration: docs/conf.py
14
+
15
+ python:
16
+ install:
17
+ - requirements: docs/requirements-docs.txt
18
+ - method: pip
19
+ path: .
CONTRIBUTING.md ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contributing to verl
2
+
3
+ Thank you for considering a contribution to verl! We welcome contributions of any kind - bug fixes, enhancements, documentation improvements, or even just feedback. Whether you're an experienced developer or this is your first open-source project, your help is invaluable.
4
+
5
+ Your support can take many forms:
6
+ - Report issues or unexpected behaviors.
7
+ - Suggest or implement new features.
8
+ - Improve or expand documentation.
9
+ - Review pull requests and assist other contributors.
10
+ - Spread the word: share verl in blog posts, social media, or give the repo a ⭐.
11
+
12
+ ## Finding Issues to Contribute
13
+
14
+ Looking for ways to dive in? Check out these issues:
15
+ - [Good first issues](https://github.com/volcengine/verl/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22good%20first%20issue%22)
16
+ - [Call for contribution](https://github.com/volcengine/verl/issues?q=is%3Aissue%20state%3Aopen%20label%3A%22call%20for%20contribution%22)
17
+ Furthermore, you can learn the development plan and roadmap via [RFC](https://github.com/volcengine/verl/issues?q=is%3Aissue%20state%3Aopen%20label%3ARFC) and [Roadmap](https://github.com/volcengine/verl/issues?q=state%3Aopen%20label%3A%22roadmap%22).
18
+
19
+
20
+ ## Developing
21
+
22
+ - **Python-only**: install verl via `pip install -e .[test,vllm]` or `pip install -e .[test,sglang]` and iterate quickly. For full dependency setup, check out the verl [installation doc](https://verl.readthedocs.io/en/latest/start/install.html).
23
+
24
+ ## Code Linting and Formatting
25
+
26
+ We rely on pre-commit to keep our code consistent. To set it up:
27
+
28
+ ```bash
29
+ pip install pre-commit
30
+ pre-commit install
31
+ # for staged changes
32
+ pre-commit run
33
+ # for all files in the repo
34
+ pre-commit run --all-files
35
+ # run a specific hook with pre-commit
36
+ # pre-commit run --all-files --show-diff-on-failure --color=always <hood-id>
37
+ pre-commit run --all-files --show-diff-on-failure --color=always ruff
38
+ pre-commit run --all-files --show-diff-on-failure --color=always autogen-trainer-cfg
39
+ ```
40
+
41
+ ## Testing
42
+
43
+ Our test suites run on GitHub Actions. Check these workflows for details:
44
+ - [GPU unit tests](https://github.com/volcengine/verl/blob/main/.github/workflows/gpu_unit_tests.yml)
45
+ - [CPU unit tests](https://github.com/volcengine/verl/blob/main/.github/workflows/cpu_unit_tests.yml)
46
+ - [vLLM tests](https://github.com/volcengine/verl/blob/main/.github/workflows/vllm.yml)
47
+ - [SGLang tests](https://github.com/volcengine/verl/blob/main/.github/workflows/sgl.yml)
48
+
49
+ ### Adding CI tests
50
+
51
+ If possible, please add CI test(s) for your new feature:
52
+
53
+ 1. Find the most relevant workflow yml file, which usually corresponds to a `hydra` default config (e.g. `ppo_trainer`, `ppo_megatron_trainer`, `sft_trainer`, etc).
54
+ 2. Add related path patterns to the `paths` section if not already included.
55
+ 3. Minimize the workload of the test script(s) (see existing scripts for examples).
56
+
57
+ ## Building the Docs
58
+ ```
59
+ # Ensure verl is on your PYTHONPATH, e.g.:
60
+ pip install -e .[test]
61
+
62
+ # Install documentation dependencies
63
+ pip install -r requirements-docs.txt
64
+
65
+ # Generate HTML docs
66
+ make clean
67
+ make html
68
+
69
+ # Preview locally
70
+ python -m http.server -d _build/html/
71
+ ```
72
+ Open your browser at http://localhost:8000 to explore the docs.
73
+
74
+ ## Pull Requests & Code Reviews
75
+
76
+ Thanks for submitting a PR! To streamline reviews:
77
+ - Follow our Pull Request Template for title format and checklist.
78
+ - Adhere to our pre-commit lint rules and ensure all checks pass.
79
+ - Update docs for any user-facing changes.
80
+ - Add or update tests in the CI workflows, or explain why tests aren't applicable.
81
+
82
+ ## License
83
+
84
+ See the [LICENSE](https://github.com/volcengine/verl/blob/main/LICENSE) file for full details.
85
+
86
+ ## Thank You
87
+
88
+ We appreciate your contributions to verl. Your efforts help make the project stronger and more user-friendly. Happy coding!
89
+
LICENSE ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ Apache License
3
+ Version 2.0, January 2004
4
+ http://www.apache.org/licenses/
5
+
6
+ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7
+
8
+ 1. Definitions.
9
+
10
+ "License" shall mean the terms and conditions for use, reproduction,
11
+ and distribution as defined by Sections 1 through 9 of this document.
12
+
13
+ "Licensor" shall mean the copyright owner or entity authorized by
14
+ the copyright owner that is granting the License.
15
+
16
+ "Legal Entity" shall mean the union of the acting entity and all
17
+ other entities that control, are controlled by, or are under common
18
+ control with that entity. For the purposes of this definition,
19
+ "control" means (i) the power, direct or indirect, to cause the
20
+ direction or management of such entity, whether by contract or
21
+ otherwise, or (ii) ownership of fifty percent (50%) or more of the
22
+ outstanding shares, or (iii) beneficial ownership of such entity.
23
+
24
+ "You" (or "Your") shall mean an individual or Legal Entity
25
+ exercising permissions granted by this License.
26
+
27
+ "Source" form shall mean the preferred form for making modifications,
28
+ including but not limited to software source code, documentation
29
+ source, and configuration files.
30
+
31
+ "Object" form shall mean any form resulting from mechanical
32
+ transformation or translation of a Source form, including but
33
+ not limited to compiled object code, generated documentation,
34
+ and conversions to other media types.
35
+
36
+ "Work" shall mean the work of authorship, whether in Source or
37
+ Object form, made available under the License, as indicated by a
38
+ copyright notice that is included in or attached to the work
39
+ (an example is provided in the Appendix below).
40
+
41
+ "Derivative Works" shall mean any work, whether in Source or Object
42
+ form, that is based on (or derived from) the Work and for which the
43
+ editorial revisions, annotations, elaborations, or other modifications
44
+ represent, as a whole, an original work of authorship. For the purposes
45
+ of this License, Derivative Works shall not include works that remain
46
+ separable from, or merely link (or bind by name) to the interfaces of,
47
+ the Work and Derivative Works thereof.
48
+
49
+ "Contribution" shall mean any work of authorship, including
50
+ the original version of the Work and any modifications or additions
51
+ to that Work or Derivative Works thereof, that is intentionally
52
+ submitted to Licensor for inclusion in the Work by the copyright owner
53
+ or by an individual or Legal Entity authorized to submit on behalf of
54
+ the copyright owner. For the purposes of this definition, "submitted"
55
+ means any form of electronic, verbal, or written communication sent
56
+ to the Licensor or its representatives, including but not limited to
57
+ communication on electronic mailing lists, source code control systems,
58
+ and issue tracking systems that are managed by, or on behalf of, the
59
+ Licensor for the purpose of discussing and improving the Work, but
60
+ excluding communication that is conspicuously marked or otherwise
61
+ designated in writing by the copyright owner as "Not a Contribution."
62
+
63
+ "Contributor" shall mean Licensor and any individual or Legal Entity
64
+ on behalf of whom a Contribution has been received by Licensor and
65
+ subsequently incorporated within the Work.
66
+
67
+ 2. Grant of Copyright License. Subject to the terms and conditions of
68
+ this License, each Contributor hereby grants to You a perpetual,
69
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70
+ copyright license to reproduce, prepare Derivative Works of,
71
+ publicly display, publicly perform, sublicense, and distribute the
72
+ Work and such Derivative Works in Source or Object form.
73
+
74
+ 3. Grant of Patent License. Subject to the terms and conditions of
75
+ this License, each Contributor hereby grants to You a perpetual,
76
+ worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77
+ (except as stated in this section) patent license to make, have made,
78
+ use, offer to sell, sell, import, and otherwise transfer the Work,
79
+ where such license applies only to those patent claims licensable
80
+ by such Contributor that are necessarily infringed by their
81
+ Contribution(s) alone or by combination of their Contribution(s)
82
+ with the Work to which such Contribution(s) was submitted. If You
83
+ institute patent litigation against any entity (including a
84
+ cross-claim or counterclaim in a lawsuit) alleging that the Work
85
+ or a Contribution incorporated within the Work constitutes direct
86
+ or contributory patent infringement, then any patent licenses
87
+ granted to You under this License for that Work shall terminate
88
+ as of the date such litigation is filed.
89
+
90
+ 4. Redistribution. You may reproduce and distribute copies of the
91
+ Work or Derivative Works thereof in any medium, with or without
92
+ modifications, and in Source or Object form, provided that You
93
+ meet the following conditions:
94
+
95
+ (a) You must give any other recipients of the Work or
96
+ Derivative Works a copy of this License; and
97
+
98
+ (b) You must cause any modified files to carry prominent notices
99
+ stating that You changed the files; and
100
+
101
+ (c) You must retain, in the Source form of any Derivative Works
102
+ that You distribute, all copyright, patent, trademark, and
103
+ attribution notices from the Source form of the Work,
104
+ excluding those notices that do not pertain to any part of
105
+ the Derivative Works; and
106
+
107
+ (d) If the Work includes a "NOTICE" text file as part of its
108
+ distribution, then any Derivative Works that You distribute must
109
+ include a readable copy of the attribution notices contained
110
+ within such NOTICE file, excluding those notices that do not
111
+ pertain to any part of the Derivative Works, in at least one
112
+ of the following places: within a NOTICE text file distributed
113
+ as part of the Derivative Works; within the Source form or
114
+ documentation, if provided along with the Derivative Works; or,
115
+ within a display generated by the Derivative Works, if and
116
+ wherever such third-party notices normally appear. The contents
117
+ of the NOTICE file are for informational purposes only and
118
+ do not modify the License. You may add Your own attribution
119
+ notices within Derivative Works that You distribute, alongside
120
+ or as an addendum to the NOTICE text from the Work, provided
121
+ that such additional attribution notices cannot be construed
122
+ as modifying the License.
123
+
124
+ You may add Your own copyright statement to Your modifications and
125
+ may provide additional or different license terms and conditions
126
+ for use, reproduction, or distribution of Your modifications, or
127
+ for any such Derivative Works as a whole, provided Your use,
128
+ reproduction, and distribution of the Work otherwise complies with
129
+ the conditions stated in this License.
130
+
131
+ 5. Submission of Contributions. Unless You explicitly state otherwise,
132
+ any Contribution intentionally submitted for inclusion in the Work
133
+ by You to the Licensor shall be under the terms and conditions of
134
+ this License, without any additional terms or conditions.
135
+ Notwithstanding the above, nothing herein shall supersede or modify
136
+ the terms of any separate license agreement you may have executed
137
+ with Licensor regarding such Contributions.
138
+
139
+ 6. Trademarks. This License does not grant permission to use the trade
140
+ names, trademarks, service marks, or product names of the Licensor,
141
+ except as required for reasonable and customary use in describing the
142
+ origin of the Work and reproducing the content of the NOTICE file.
143
+
144
+ 7. Disclaimer of Warranty. Unless required by applicable law or
145
+ agreed to in writing, Licensor provides the Work (and each
146
+ Contributor provides its Contributions) on an "AS IS" BASIS,
147
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148
+ implied, including, without limitation, any warranties or conditions
149
+ of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150
+ PARTICULAR PURPOSE. You are solely responsible for determining the
151
+ appropriateness of using or redistributing the Work and assume any
152
+ risks associated with Your exercise of permissions under this License.
153
+
154
+ 8. Limitation of Liability. In no event and under no legal theory,
155
+ whether in tort (including negligence), contract, or otherwise,
156
+ unless required by applicable law (such as deliberate and grossly
157
+ negligent acts) or agreed to in writing, shall any Contributor be
158
+ liable to You for damages, including any direct, indirect, special,
159
+ incidental, or consequential damages of any character arising as a
160
+ result of this License or out of the use or inability to use the
161
+ Work (including but not limited to damages for loss of goodwill,
162
+ work stoppage, computer failure or malfunction, or any and all
163
+ other commercial damages or losses), even if such Contributor
164
+ has been advised of the possibility of such damages.
165
+
166
+ 9. Accepting Warranty or Additional Liability. While redistributing
167
+ the Work or Derivative Works thereof, You may choose to offer,
168
+ and charge a fee for, acceptance of support, warranty, indemnity,
169
+ or other liability obligations and/or rights consistent with this
170
+ License. However, in accepting such obligations, You may act only
171
+ on Your own behalf and on Your sole responsibility, not on behalf
172
+ of any other Contributor, and only if You agree to indemnify,
173
+ defend, and hold each Contributor harmless for any liability
174
+ incurred by, or claims asserted against, such Contributor by reason
175
+ of your accepting any such warranty or additional liability.
176
+
177
+ END OF TERMS AND CONDITIONS
178
+
179
+ APPENDIX: How to apply the Apache License to your work.
180
+
181
+ To apply the Apache License to your work, attach the following
182
+ boilerplate notice, with the fields enclosed by brackets "[]"
183
+ replaced with your own identifying information. (Don't include
184
+ the brackets!) The text should be enclosed in the appropriate
185
+ comment syntax for the file format. We also recommend that a
186
+ file or class name and description of purpose be included on the
187
+ same "printed page" as the copyright notice for easier
188
+ identification within third-party archives.
189
+
190
+ Copyright [yyyy] [name of copyright owner]
191
+
192
+ Licensed under the Apache License, Version 2.0 (the "License");
193
+ you may not use this file except in compliance with the License.
194
+ You may obtain a copy of the License at
195
+
196
+ http://www.apache.org/licenses/LICENSE-2.0
197
+
198
+ Unless required by applicable law or agreed to in writing, software
199
+ distributed under the License is distributed on an "AS IS" BASIS,
200
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201
+ See the License for the specific language governing permissions and
202
+ limitations under the License.
Notice.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ Copyright 2023-2024 Bytedance Ltd. and/or its affiliates
README.md ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center">
2
+ 👋 Hi, everyone!
3
+ verl is a RL training library initiated by <b>ByteDance Seed team</b> and maintained by the verl community.
4
+ <br>
5
+ <br>
6
+ </div>
7
+
8
+ <div align="center">
9
+
10
+ <a href="https://deepwiki.com/volcengine/verl"><img src="https://devin.ai/assets/deepwiki-badge.png" alt="Ask DeepWiki.com" style="height:20px;"></a>
11
+ [![GitHub Repo stars](https://img.shields.io/github/stars/volcengine/verl)](https://github.com/volcengine/verl/stargazers)
12
+ [![Twitter](https://img.shields.io/twitter/follow/verl_project)](https://twitter.com/verl_project)
13
+ <a href="https://join.slack.com/t/verl-project/shared_invite/zt-3c6mc2khw-v0lo6NfDPuFP6OnkrZwfqw"><img src="https://img.shields.io/badge/Slack-verl-blueviolet?logo=slack&amp"></a>
14
+ <a href="https://arxiv.org/pdf/2409.19256"><img src="https://img.shields.io/static/v1?label=EuroSys&message=Paper&color=red"></a>
15
+ [![Documentation](https://img.shields.io/badge/documentation-blue)](https://verl.readthedocs.io/en/latest/)
16
+ <a href="https://raw.githubusercontent.com/eric-haibin-lin/verl-community/refs/heads/main/WeChat.JPG"><img src="https://img.shields.io/badge/微信-green?logo=wechat&amp"></a>
17
+
18
+ </div>
19
+
20
+ ![seed logo](https://github.com/user-attachments/assets/c42e675e-497c-4508-8bb9-093ad4d1f216)
21
+
22
+ <h1 style="text-align: center;">verl: Volcano Engine Reinforcement Learning for LLMs</h1>
23
+
24
+ verl is a flexible, efficient and production-ready RL training library for large language models (LLMs).
25
+
26
+ verl is the open-source version of **[HybridFlow: A Flexible and Efficient RLHF Framework](https://arxiv.org/abs/2409.19256v2)** paper.
27
+
28
+ verl is flexible and easy to use with:
29
+
30
+ - **Easy extension of diverse RL algorithms**: The hybrid-controller programming model enables flexible representation and efficient execution of complex post-training dataflows. Build RL dataflows such as GRPO, PPO in a few lines of code.
31
+
32
+ - **Seamless integration of existing LLM infra with modular APIs**: Decouples computation and data dependencies, enabling seamless integration with existing LLM frameworks, such as FSDP, Megatron-LM, vLLM, SGLang, etc
33
+
34
+ - **Flexible device mapping**: Supports various placement of models onto different sets of GPUs for efficient resource utilization and scalability across different cluster sizes.
35
+
36
+ - Ready integration with popular HuggingFace models
37
+
38
+ verl is fast with:
39
+
40
+ - **State-of-the-art throughput**: SOTA LLM training and inference engine integrations and SOTA RL throughput.
41
+
42
+ - **Efficient actor model resharding with 3D-HybridEngine**: Eliminates memory redundancy and significantly reduces communication overhead during transitions between training and generation phases.
43
+
44
+ </p>
45
+
46
+ ## News
47
+ - [2025/08] verl is presented in the [PyTorch Expert Exchange Webinar](https://www.youtube.com/watch?v=Vd79NmmqY3Q&t=2s). [Slides](https://github.com/eric-haibin-lin/verl-community/blob/main/slides/verl_talk_pytorch_2025_08.pdf) available.
48
+ - [2025/07] The [ReTool](https://arxiv.org/pdf/2504.11536) recipe is fully open sourced. [Blog](https://www.notion.so/verl-reTool-recipe-Using-multi-round-conversations-and-code-sandboxing-to-improve-the-math-of-large-23a8b5b7feba80b386b2e5b5e3c1cde0)
49
+ - [2025/07] The first verl meetup will be held at ICML Vancouver on July 16th! Please [join us](https://lu.ma/0ek2nyao) if you are at ICML! (onsite only)
50
+ - [2025/06] verl with Megatron backend enables large MoE models such as [DeepSeek-671B and Qwen3-235B](https://verl.readthedocs.io/en/latest/perf/dpsk.html).
51
+ - [2025/03] [DAPO](https://dapo-sia.github.io/) is the open-sourced SOTA RL algorithm that achieves 50 points on AIME 2024 based on the Qwen2.5-32B pre-trained model, surpassing the previous SOTA achieved by DeepSeek's GRPO (DeepSeek-R1-Zero-Qwen-32B). DAPO's training is fully powered by verl and the reproduction code is available in `recipe/dapo` now.
52
+ <details><summary> more... </summary>
53
+ <ul>
54
+ <li>[2025/04] [Seed-Thinking-v1.5](https://github.com/ByteDance-Seed/Seed-Thinking-v1.5/blob/main/seed-thinking-v1.5.pdf) tech report is released! Trained with verl, Seed-Thinking-v1.5 achieves 86.7 on AIME 2024, 55.0 on Codeforces and 77.3 on GPQA, demonstrating excellent reasoning abilities in STEM and coding. Beyond reasoning tasks, the method demonstrates notable generalization across diverse domains.</li>
55
+ <li>[2025/07] verl keynote at [AWS AI Hours Singapore](https://pages.awscloud.com/aws-ai-hours-sg.html#agenda) on 7/8, verl & verl-agent project updates at [Agent for SWE meetup](https://lu.ma/e498qhsi) by LF AI & Data Singapore on 7/11.</li>
56
+ <li>[2025/06] verl team will provide latest project updates at [PyTorch Day China](https://www.lfasiallc.com/pytorch-day-china/) on June 7th. Meet our dev team in Beijing!</li>
57
+ <li> [2025/04] [VAPO](https://arxiv.org/pdf/2504.05118) (value-based augmented PPO) paper covers our latest RL method for reasoning models. Trained from Qwen-32B-base model, VAPO achieves 60.4 on AIME 2024, outperforming DAPO-32B.</li>
58
+ <li>[2025/05] [PF-PPO](https://arxiv.org/abs/2409.06957), accepted to ICML 2025, is now supported in verl! PF-PPO enhances policy learning efficiency and robustness by filtering potentially noisy reward signals and reusing high-quality experiences via a replay buffer.</li>
59
+ <li>[2025/04] We will give a tutorial about latest post-training techniques and programming guide for verl at [ICLR 2025 Expo](https://iclr.cc/virtual/2025/calendar?filter_events=Expo+Talk+Panel&filter_rooms=), [SCI-FM workshop](https://open-foundation-model.github.io/) and [LMSys afterparty](https://lu.ma/d23nyynm). Talk materials available [here](https://github.com/eric-haibin-lin/verl-community/tree/main/iclr25). </li>
60
+ <li>[2025/03] verl v0.3.0.post1 is released! See [release note](https://github.com/volcengine/verl/releases/) for details. It achieves [~1.4x speedup](https://tongyx361.github.io/blogs/posts/verl-intro/#/verl-flexible-and-efficient-rl-for-llms) compared to prev versions.</li>
61
+ <li>[2025/05] verl will be presented at [A2M Shanghai](https://a2m.msup.com.cn/home/?aid=4488&city=shanghai) on 5/16 - 5/17.</li>
62
+ <li>[2025/05] verl will be presented at [GOSIM x PyTorch Day 2025](https://paris2025.gosim.org/). See you in Paris! </li>
63
+ <li>[2025/03] We introduced the programming model of verl at the [vLLM Beijing Meetup](https://mp.weixin.qq.com/s/n77GibL2corAtQHtVEAzfg) and [verl intro and updates](https://github.com/eric-haibin-lin/verl-community/blob/main/slides/verl-lmsys-meetup.pdf) at the [SGLang-LMSYS Org Meetup](https://lu.ma/ntjrr7ig) in Sunnyvale mid-March.</li>
64
+ <li>[2025/03] We will present verl(HybridFlow) at EuroSys 2025. See you in Rotterdam!</li>
65
+ <li>[2025/02] verl v0.2.0.post2 is released!</li>
66
+ <li>[2025/02] We presented verl in the <a href="https://lu.ma/ji7atxux">Bytedance/NVIDIA/Anyscale Ray Meetup</a>. See you in San Jose!</li>
67
+ <li>[2025/01] [Doubao-1.5-pro](https://team.doubao.com/zh/special/doubao_1_5_pro) is released with SOTA-level performance on LLM & VLM. The RL scaling preview model is trained using verl, reaching OpenAI O1-level performance on math benchmarks (70.0 pass@1 on AIME).</li>
68
+ <li>[2024/12] verl is presented at Ray Forward 2024. Slides available <a href="https://github.com/eric-haibin-lin/verl-community/blob/main/slides/Ray_Forward_2024_%E5%B7%AB%E9%94%A1%E6%96%8C.pdf">here</a></li>
69
+ <li>[2024/12] The team presented <a href="https://neurips.cc/Expo/Conferences/2024/workshop/100677">Post-training LLMs: From Algorithms to Infrastructure</a> at NeurIPS 2024. <a href="https://github.com/eric-haibin-lin/verl-data/tree/neurips">Slides</a> and <a href="https://neurips.cc/Expo/Conferences/2024/workshop/100677">video</a> available.</li>
70
+ <li>[2024/10] verl is presented at Ray Summit. <a href="https://www.youtube.com/watch?v=MrhMcXkXvJU&list=PLzTswPQNepXntmT8jr9WaNfqQ60QwW7-U&index=37">Youtube video</a> available.</li>
71
+ <li>[2024/08] HybridFlow (verl) is accepted to EuroSys 2025.</li>
72
+ </ul>
73
+ </details>
74
+
75
+ ## Key Features
76
+
77
+ - **FSDP**, **FSDP2** and **Megatron-LM** for training.
78
+ - **vLLM**, **SGLang** and **HF Transformers** for rollout generation.
79
+ - Compatible with Hugging Face Transformers and Modelscope Hub: [Qwen-3](https://github.com/volcengine/verl/blob/main/examples/grpo_trainer/run_qwen3-8b.sh), Qwen-2.5, Llama3.1, Gemma2, DeepSeek-LLM, etc
80
+ - Supervised fine-tuning.
81
+ - Reinforcement learning with [PPO](examples/ppo_trainer/), [GRPO](examples/grpo_trainer/), [GSPO](recipe/gspo/), [ReMax](examples/remax_trainer/), [REINFORCE++](https://verl.readthedocs.io/en/latest/examples/config.html#algorithm), [RLOO](examples/rloo_trainer/), [PRIME](recipe/prime/), [DAPO](recipe/dapo/), [DrGRPO](recipe/drgrpo), [KL_Cov & Clip_Cov](recipe/entropy) etc.
82
+ - Support model-based reward and function-based reward (verifiable reward) for math, [coding](https://github.com/volcengine/verl/tree/main/recipe/dapo), etc
83
+ - Support vision-language models (VLMs) and [multi-modal RL](examples/grpo_trainer/run_qwen2_5_vl-7b.sh) with Qwen2.5-vl, Kimi-VL
84
+ - [Multi-turn with tool calling](https://github.com/volcengine/verl/tree/main/examples/sglang_multiturn)
85
+ - LLM alignment recipes such as [Self-play preference optimization (SPPO)](https://github.com/volcengine/verl/tree/main/recipe/sppo)
86
+ - Flash attention 2, [sequence packing](examples/ppo_trainer/run_qwen2-7b_seq_balance.sh), [sequence parallelism](examples/ppo_trainer/run_deepseek7b_llm_sp2.sh) support via DeepSpeed Ulysses, [LoRA](examples/sft/gsm8k/run_qwen_05_peft.sh), [Liger-kernel](examples/sft/gsm8k/run_qwen_05_sp2_liger.sh).
87
+ - Scales up to 671B models and hundreds of GPUs with [expert parallelism](https://github.com/volcengine/verl/pull/1467)
88
+ - Multi-gpu [LoRA RL](https://verl.readthedocs.io/en/latest/advance/ppo_lora.html) support to save memory.
89
+ - Experiment tracking with wandb, swanlab, mlflow and tensorboard.
90
+
91
+ ## Upcoming Features and Changes
92
+
93
+ - Q3 Roadmap https://github.com/volcengine/verl/issues/2388
94
+ - DeepSeek 671b optimizations with Megatron https://github.com/volcengine/verl/issues/1033
95
+ - Multi-turn rollout and tools using optimizations https://github.com/volcengine/verl/issues/1882
96
+ - [Agent integration](https://github.com/volcengine/verl/tree/main/verl/experimental/agent_loop)
97
+ - Async and off-policy architecture https://github.com/volcengine/verl/pull/2231
98
+ - List of breaking changes since v0.4 https://github.com/volcengine/verl/discussions/2270
99
+
100
+ ## Getting Started
101
+
102
+ <a href="https://verl.readthedocs.io/en/latest/index.html"><b>Documentation</b></a>
103
+
104
+ **Quickstart:**
105
+
106
+ - [Installation](https://verl.readthedocs.io/en/latest/start/install.html)
107
+ - [Quickstart](https://verl.readthedocs.io/en/latest/start/quickstart.html)
108
+ - [Programming Guide](https://verl.readthedocs.io/en/latest/hybrid_flow.html) & [Tech Talk](https://hcqnc.xetlk.com/sl/3vACOK) (in Chinese)
109
+ - [PPO in verl](https://verl.readthedocs.io/en/latest/algo/ppo.html)
110
+ - [GRPO in verl](https://verl.readthedocs.io/en/latest/algo/grpo.html)
111
+
112
+ **Running a PPO example step-by-step:**
113
+
114
+ - [Prepare Data for Post-Training](https://verl.readthedocs.io/en/latest/preparation/prepare_data.html)
115
+ - [Implement Reward Function for Dataset](https://verl.readthedocs.io/en/latest/preparation/reward_function.html)
116
+ - [PPO Example Architecture](https://verl.readthedocs.io/en/latest/examples/ppo_code_architecture.html)
117
+ - [Config Explanation](https://verl.readthedocs.io/en/latest/examples/config.html)
118
+
119
+ **Reproducible algorithm baselines:**
120
+
121
+ - [RL performance on coding, math](https://verl.readthedocs.io/en/latest/algo/baseline.html)
122
+
123
+ **For code explanation and advance usage (extension):**
124
+
125
+ - PPO Trainer and Workers
126
+ - [PPO Ray Trainer](https://verl.readthedocs.io/en/latest/workers/ray_trainer.html)
127
+ - [PyTorch FSDP Backend](https://verl.readthedocs.io/en/latest/workers/fsdp_workers.html)
128
+ - [Megatron-LM Backend](https://verl.readthedocs.io/en/latest/index.html)
129
+
130
+ - Advanced Usage and Extension
131
+ - [Add Models with the FSDP Backend](https://verl.readthedocs.io/en/latest/advance/fsdp_extension.html)
132
+ - [Add Models with the Megatron-LM Backend](https://verl.readthedocs.io/en/latest/advance/megatron_extension.html)
133
+ - [Multi-turn Rollout Support](https://verl.readthedocs.io/en/latest/sglang_multiturn/multiturn.html)
134
+ - [Search Tool Integration](https://verl.readthedocs.io/en/latest/sglang_multiturn/search_tool_example.html)
135
+ - [Sandbox Fusion Integration](https://verl.readthedocs.io/en/latest/examples/sandbox_fusion_example.html)
136
+ - [Deployment using Separate GPU Resources](https://github.com/volcengine/verl/tree/main/examples/split_placement)
137
+ - [Extend to Other RL(HF) algorithms](https://verl.readthedocs.io/en/latest/advance/dpo_extension.html)
138
+ - [Ray API design tutorial](https://verl.readthedocs.io/en/latest/advance/placement.html)
139
+
140
+ **Blogs from the community**
141
+
142
+ - [When Reasoning Models Break Tokenization: The Hidden Complexity of Multiturn Training](https://github.com/zhaochenyang20/Awesome-ML-SYS-Tutorial/blob/main/rlhf/verl/multi-turn/fast_tokenization/multiturn_tokenization_and_masking.md)
143
+ - [verl deployment on AWS SageMaker](https://medium.com/@kaige.yang0110/run-verl-on-sagemaker-using-4x8-l40s-gpus-8e6d5c3c61d3)
144
+ - [verl x SGLang Multi-turn Code Walkthrough](https://github.com/zhaochenyang20/Awesome-ML-SYS-Tutorial/blob/main/rlhf/verl/multi-turn/code-walk-through/readme_EN.md)
145
+ - [Optimizing SGLang Memory Usage in verl](https://hebiao064.github.io/rl-memory-management)
146
+ - [SGLang, verl, OpenBMB and Tsinghua University: Pioneering End-to-End Multi-Turn RLHF](https://github.com/zhaochenyang20/Awesome-ML-SYS-Tutorial/blob/main/rlhf/verl/multi-turn/verl-multiturn-rollout-Release.md)
147
+ - [Reinforcement Learning from Human Feedback on AMD GPUs with verl and ROCm Integration](https://rocm.blogs.amd.com/artificial-intelligence/verl-large-scale/README.html)
148
+ - [veMLP x verl :玩转强化学习训练](https://mp.weixin.qq.com/s/7nbqxk4knMGd-hQE9ls2tA)
149
+ - [使用 verl 进行 GRPO 分布式强化学习训练最佳实践](https://www.volcengine.com/docs/6459/1463942)
150
+ - [HybridFlow verl 原文浅析](https://github.com/zhaochenyang20/Awesome-ML-SYS-Tutorial/blob/main/rlhf/verl/readme.md)
151
+ - [最高提升 20 倍吞吐量!豆包大模型团队发布全新 RLHF 框架,现已开源!](https://team.doubao.com/en/blog/%E6%9C%80%E9%AB%98%E6%8F%90%E5%8D%8720%E5%80%8D%E5%90%9E%E5%90%90%E9%87%8F-%E8%B1%86%E5%8C%85%E5%A4%A7%E6%A8%A1%E5%9E%8B%E5%9B%A2%E9%98%9F%E5%8F%91%E5%B8%83%E5%85%A8%E6%96%B0-rlhf-%E6%A1%86%E6%9E%B6-%E7%8E%B0%E5%B7%B2%E5%BC%80%E6%BA%90)
152
+
153
+ ## Performance Tuning Guide
154
+
155
+ The performance is essential for on-policy RL algorithm. We have written a detailed [performance tuning guide](https://verl.readthedocs.io/en/latest/perf/perf_tuning.html) to help you optimize performance.
156
+
157
+ ## Upgrade to vLLM >= v0.8.2
158
+
159
+ verl now supports vLLM>=0.8.2 when using FSDP as the training backend. Please refer to [this document](https://github.com/volcengine/verl/blob/main/docs/README_vllm0.8.md) for the installation guide and more information. Please avoid vllm 0.7.x, which contains bugs that may lead to OOMs and unexpected errors.
160
+
161
+ ## Use Latest SGLang
162
+
163
+ SGLang is fully supported with verl, and SGLang RL Group is working extensively on building unique features, including multi-turn agentic RL, VLM RLHF, server-based RL, and partial rollout. Please refer to [this document](https://verl.readthedocs.io/en/latest/workers/sglang_worker.html) for the installation guide and more information.
164
+
165
+ ## Upgrade to FSDP2
166
+
167
+ verl is fully embracing FSDP2! FSDP2 is recommended by torch distributed team, providing better throughput and memory usage, and is composible with other features (e.g. torch.compile). To enable FSDP2, simply use verl main and set the following options:
168
+ ```
169
+ actor_rollout_ref.ref.strategy=fsdp2
170
+ actor_rollout_ref.actor.strategy=fsdp2
171
+ critic.strategy=fsdp2
172
+ reward_model.strategy=fsdp2
173
+ ```
174
+ Furthermore, FSDP2 cpu offloading is compatible with gradient accumulation. You can turn it on to save memory with `actor_rollout_ref.actor.fsdp_config.offload_policy=True`. For more details, see https://github.com/volcengine/verl/pull/1026
175
+
176
+ ## AMD Support (ROCm Kernel)
177
+
178
+ verl now supports FSDP as the training engine (Megatron support coming soon) and both integrates with vLLM and SGLang as inference engines. Please refer to [this document](https://github.com/volcengine/verl/blob/main/docs/amd_tutorial/amd_build_dockerfile_page.rst) for the installation guide and more information, and [this document](https://github.com/volcengine/verl/blob/main/docs/amd_tutorial/amd_vllm_page.rst) for the vLLM performance tuning for ROCm.
179
+
180
+
181
+ ## Citation and acknowledgement
182
+
183
+ If you find the project helpful, please cite:
184
+
185
+ - [HybridFlow: A Flexible and Efficient RLHF Framework](https://arxiv.org/abs/2409.19256v2)
186
+ - [A Framework for Training Large Language Models for Code Generation via Proximal Policy Optimization](https://i.cs.hku.hk/~cwu/papers/gmsheng-NL2Code24.pdf)
187
+
188
+ ```bibtex
189
+ @article{sheng2024hybridflow,
190
+ title = {HybridFlow: A Flexible and Efficient RLHF Framework},
191
+ author = {Guangming Sheng and Chi Zhang and Zilingfeng Ye and Xibin Wu and Wang Zhang and Ru Zhang and Yanghua Peng and Haibin Lin and Chuan Wu},
192
+ year = {2024},
193
+ journal = {arXiv preprint arXiv: 2409.19256}
194
+ }
195
+ ```
196
+
197
+ verl is inspired by the design of Nemo-Aligner, Deepspeed-chat and OpenRLHF. The project is adopted and contributed by Bytedance, Anyscale, LMSys.org, [Alibaba Qwen team](https://github.com/QwenLM/), Shanghai AI Lab, Tsinghua University, UC Berkeley, UCLA, UIUC, University of Hong Kong, ke.com, [All Hands AI](https://www.all-hands.dev/), [ModelBest](http://modelbest.cn/), JD AI Lab, Microsoft Research, [StepFun](https://www.stepfun.com/), Amazon, LinkedIn, Meituan, [Camel-AI](https://www.camel-ai.org/), [OpenManus](https://github.com/OpenManus), Xiaomi, NVIDIA research, [Baichuan](https://www.baichuan-ai.com/home), [RedNote](https://www.xiaohongshu.com/), [SwissAI](https://www.swiss-ai.org/), [Moonshot AI (Kimi)](https://www.moonshot-ai.com/), Baidu, Snowflake, Skywork.ai, JetBrains, [IceSword Lab](https://www.iceswordlab.com), and many more.
198
+
199
+ ## Awesome work using verl
200
+
201
+ - [TinyZero](https://github.com/Jiayi-Pan/TinyZero): a reproduction of **DeepSeek R1 Zero** recipe for reasoning tasks ![GitHub Repo stars](https://img.shields.io/github/stars/Jiayi-Pan/TinyZero)
202
+ - [SkyThought](https://github.com/NovaSky-AI/SkyThought): RL training for Sky-T1-7B by NovaSky AI team. ![GitHub Repo stars](https://img.shields.io/github/stars/NovaSky-AI/SkyThought)
203
+ - [simpleRL-reason](https://github.com/hkust-nlp/simpleRL-reason): SimpleRL-Zoo: Investigating and Taming Zero Reinforcement Learning for Open Base Models in the Wild ![GitHub Repo stars](https://img.shields.io/github/stars/hkust-nlp/simpleRL-reason)
204
+ - [Easy-R1](https://github.com/hiyouga/EasyR1): **Multi-modal** RL training framework ![GitHub Repo stars](https://img.shields.io/github/stars/hiyouga/EasyR1)
205
+ - [OpenManus-RL](https://github.com/OpenManus/OpenManus-RL): LLM Agents RL tunning framework for multiple agent environments. ![GitHub Repo stars](https://img.shields.io/github/stars/OpenManus/OpenManus-RL)
206
+ - [rllm](https://github.com/agentica-project/rllm): async RL training with [verl-pipeline](https://github.com/agentica-project/verl-pipeline) ![GitHub Repo stars](https://img.shields.io/github/stars/agentica-project/rllm)
207
+ - [RAGEN](https://github.com/ZihanWang314/ragen): a general-purpose reasoning **agent** training framework ![GitHub Repo stars](https://img.shields.io/github/stars/ZihanWang314/ragen)
208
+ - [Search-R1](https://github.com/PeterGriffinJin/Search-R1): RL with reasoning and **searching (tool-call)** interleaved LLMs ![GitHub Repo stars](https://img.shields.io/github/stars/PeterGriffinJin/Search-R1)
209
+ - [ReSearch](https://github.com/Agent-RL/ReSearch): Learning to **Re**ason with **Search** for LLMs via Reinforcement Learning ![GitHub Repo stars](https://img.shields.io/github/stars/Agent-RL/ReSearch)
210
+ - [Skywork-OR1](https://github.com/SkyworkAI/Skywork-OR1): Skywork open reaonser series ![GitHub Repo stars](https://img.shields.io/github/stars/SkyworkAI/Skywork-OR1)
211
+ - [ToRL](https://github.com/GAIR-NLP/ToRL): Scaling tool-integrated RL ![GitHub Repo stars](https://img.shields.io/github/stars/GAIR-NLP/ToRL)
212
+ - [Absolute Zero Reasoner](https://github.com/LeapLabTHU/Absolute-Zero-Reasoner): [A no human curated data self-play framework for reasoning](https://arxiv.org/abs/2505.03335) ![GitHub Repo stars](https://img.shields.io/github/stars/LeapLabTHU/Absolute-Zero-Reasoner)
213
+ - [verl-agent](https://github.com/langfengQ/verl-agent): A scalable training framework for **long-horizon LLM/VLM agents**, along with a new algorithm **GiGPO** ![GitHub Repo stars](https://img.shields.io/github/stars/langfengQ/verl-agent)
214
+ - [RL-Factory](https://github.com/Simple-Efficient/RL-Factory): An easy and efficient RL post-training framework for Agentic Learning ![GitHub Repo stars](https://img.shields.io/github/stars/Simple-Efficient/RL-Factory)
215
+ - [ReTool](https://retool-rl.github.io/): ReTool: reinforcement learning for strategic tool use in LLMs. Code release is in progress...
216
+ - [verl-tool](https://github.com/TIGER-AI-Lab/verl-tool): An unified and easy-to-extend tool-agent training framework based on verl![GitHub Repo stars](https://img.shields.io/github/stars/TIGER-AI-Lab/verl-tool)
217
+ - [PRIME](https://github.com/PRIME-RL/PRIME): Process reinforcement through implicit rewards ![GitHub Repo stars](https://img.shields.io/github/stars/PRIME-RL/PRIME)
218
+ - [MemAgent](https://github.com/BytedTsinghua-SIA/MemAgent): MemAgent: Reshaping Long-Context LLM with Multi-Conv RL based Memory Agent ![GitHub Repo stars](https://img.shields.io/github/stars/BytedTsinghua-SIA/MemAgent)
219
+ - [POLARIS](https://github.com/ChenxinAn-fdu/POLARIS): A Post-training recipe for scaling RL on Advanced Reasoning models ![GitHub Repo stars](https://img.shields.io/github/stars/ChenxinAn-fdu/POLARIS)
220
+ - [GUI-R1](https://github.com/ritzz-ai/GUI-R1): **GUI-R1**: A Generalist R1-style Vision-Language Action Model For **GUI Agents** ![GitHub Repo stars](https://img.shields.io/github/stars/ritzz-ai/GUI-R1)
221
+ - [DeepRetrieval](https://github.com/pat-jj/DeepRetrieval): RL Training of **Search Agent** with **Search/Retrieval Outcome** ![GitHub Repo stars](https://img.shields.io/github/stars/pat-jj/DeepRetrieval)
222
+ - [Code-R1](https://github.com/ganler/code-r1): Reproducing R1 for **Code** with Reliable Rewards ![GitHub Repo stars](https://img.shields.io/github/stars/ganler/code-r1)
223
+ - [DeepResearcher](https://github.com/GAIR-NLP/DeepResearcher): Scaling deep research via reinforcement learning in real-world environments ![GitHub Repo stars](https://img.shields.io/github/stars/GAIR-NLP/DeepResearcher)
224
+ - [VAGEN](https://github.com/RAGEN-AI/VAGEN): Training VLM agents with multi-turn reinforcement learning ![GitHub Repo stars](https://img.shields.io/github/stars/RAGEN-AI/VAGEN)
225
+ - [RM-R1](https://arxiv.org/abs/2505.02387): RL training of reasoning reward models ![GitHub Repo stars](https://img.shields.io/github/stars/RM-R1-UIUC/RM-R1)
226
+ - [LUFFY](https://arxiv.org/pdf/2504.14945): Learning to Reason under Off-Policy Guidance![GitHub Repo stars](https://img.shields.io/github/stars/ElliottYan/LUFFY)
227
+ - [DeepMath](https://github.com/zwhe99/DeepMath): DeepMath-103K data and series models for math reasoning![GitHub Repo stars](https://img.shields.io/github/stars/zwhe99/DeepMath)
228
+ - [PACS](https://github.com/ritzz-ai/PACS): Implicit Actor Critic Coupling via a Supervised Learning Framework for RLVR ![GitHub Repo stars](https://img.shields.io/github/stars/ritzz-ai/PACS)
229
+ - [Entropy Mechanism of RL](https://github.com/PRIME-RL/Entropy-Mechanism-of-RL): The Entropy Mechanism of Reinforcement Learning for Large Language Model Reasoning![GitHub Repo stars](https://img.shields.io/github/stars/PRIME-RL/Entropy-Mechanism-of-RL)
230
+ - [LLaSA-TTS-GRPO](https://github.com/channel-io/ch-tts-llasa-rl-grpo): TTS fine-tuning with GRPO optimization based on LLASA models ![GitHub Repo stars](https://img.shields.io/github/stars/channel-io/ch-tts-llasa-rl-grpo)
231
+ - [PF-PPO](https://arxiv.org/abs/2409.06957): Policy Filtration for PPO based on the reliability of reward signals for more efficient and robust RLHF.
232
+ - [RACRO](https://github.com/gyhdog99/RACRO2): Build multi-modal reasoning models via decoupling it into query-conditioned captioning and text-only reasoning ![GitHub Repo stars](https://img.shields.io/github/stars/gyhdog99/RACRO2)
233
+ - [Agent Lightning](https://github.com/microsoft/agent-lightning): A flexible and extensible framework that enables seamless agent optimization for any existing agent framework. ![GitHub Repo stars](https://img.shields.io/github/stars/microsoft/agent-lightning)
234
+ - [VTool-R1](https://github.com/VTOOL-R1/vtool-r1): VLMs Learn to Think with Images via Reinforcement Learning on Multimodal Tool Use. ![GitHub Repo stars](https://img.shields.io/github/stars/VTOOL-R1/vtool-r1)
235
+ - [Kimina-Prover-RL](https://github.com/project-numina/kimina-prover-rl/tree/main/recipe/kimina_prover_rl): Training pipeline for formal theorem proving, based on a paradigm inspired by DeepSeek-R1.
236
+ - [RL-PLUS](https://github.com/YihongDong/RL-PLUS): Countering Capability Boundary Collapse of LLMs in Reinforcement Learning with Hybrid-policy Optimization.
237
+ - [rStar2-Agent](https://github.com/microsoft/rStar): Using reinforcement learning with multi-step tool-calling for math tasks, rStar2-Agent-14B reaches frontier-level math reasoning in just 510 RL training steps ![GitHub Repo stars](https://img.shields.io/github/stars/microsoft/rStar)
238
+ - [Vision-SR1](https://github.com/zli12321/Vision-SR1): Self-Rewarding Vision-Language Model via Reasoning Decomposition ![GitHub Repo stars](https://img.shields.io/github/stars/zli12321/Vision-SR1)
239
+ - [SimpleVLA-RL](https://github.com/PRIME-RL/SimpleVLA-RL): SimpleVLA-RL: A Simple yet Effective Vision-Language Action Model for Reinforcement Learning ![GitHub Repo stars](https://img.shields.io/github/stars/PRIME-RL/SimpleVLA-RL)
240
+ - [Table-R1](https://github.com/Table-R1/Table-R1): Table-R1: Inference-Time Scaling for Table Reasoning ![GitHub Repo stars](https://img.shields.io/github/stars/Table-R1/Table-R1)
241
+
242
+ and many more awesome work listed in [recipe](recipe/README.md).
243
+
244
+ ## Contribution Guide
245
+
246
+ See [contributions guide](CONTRIBUTING.md)
247
+
248
+ ## About [ByteDance Seed Team](https://team.doubao.com/)
249
+
250
+ Founded in 2023, ByteDance Seed Team is dedicated to crafting the industry's most advanced AI foundation models. The team aspires to become a world-class research team and make significant contributions to the advancement of science and society. You can get to know Bytedance Seed better through the following channels👇
251
+ <div>
252
+ <a href="https://team.doubao.com/">
253
+ <img src="https://img.shields.io/badge/Website-%231e37ff?style=for-the-badge&logo=bytedance&logoColor=white"></a>
254
+ <a href="https://github.com/user-attachments/assets/469535a8-42f2-4797-acdf-4f7a1d4a0c3e">
255
+ <img src="https://img.shields.io/badge/WeChat-07C160?style=for-the-badge&logo=wechat&logoColor=white"></a>
256
+ <a href="https://www.xiaohongshu.com/user/profile/668e7e15000000000303157d?xsec_token=ABl2-aqekpytY6A8TuxjrwnZskU-6BsMRE_ufQQaSAvjc%3D&xsec_source=pc_search">
257
+ <img src="https://img.shields.io/badge/Xiaohongshu-%23FF2442?style=for-the-badge&logo=xiaohongshu&logoColor=white"></a>
258
+ <a href="https://www.zhihu.com/org/dou-bao-da-mo-xing-tuan-dui/">
259
+ <img src="https://img.shields.io/badge/zhihu-%230084FF?style=for-the-badge&logo=zhihu&logoColor=white"></a>
260
+
261
+ </div>
262
+ ---
263
+
264
+ We are HIRING! Send us an [email](mailto:the.verl.project@gmail.com) if you are interested in internship/FTE opportunities in RL for agents.
TODO.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ - 优化Metric中的实现
2
+ - 其需要给出一个json,包含thought和score,考虑去掉thought?
3
+ - 可以大幅减少调用api开支和打分时间
build_vllm.sh ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDA_VISIBLE_DEVICES=0 python -m vllm.entrypoints.openai.api_server \
2
+ --model /home/hector5/models/Qwen2.5-14B-Instruct/ \
3
+ --served-model-name Qwen2.5-14B-Instruct \
4
+ --dtype auto \
5
+ --max-model-len 10000 \
6
+ --tensor-parallel-size 1 \
7
+ --port 8000
8
+ # --served-model-name Llama-2-7b-chat
9
+ # --host 0.0.0.0 \
10
+
11
+ # CUDA_VISIBLE_DEVICES=3 vllm serve /data1/public/models/Qwen2.5-7B-Instruct --port 8000 --served-model-name Qwen2.5-7B-Instruct --dtype auto --max-model-len 10000 --tensor-parallel-size 1 --enable_prefix_caching
debug.txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ https://github.com/Dao-AILab/flash-attention/releases/
2
+
3
+ - 2.7.2.post1: cuda版本-torch版本-python版本
4
+
5
+
6
+ - ImportError: /lib64/libc.so.6: version `GLIBC_2.32' not found
7
+ -- https://github.com/Dao-AILab/flash-attention/issues/1708 (falsh-attn版本问题)
8
+ - ImportError: cannot import name 'get_tcp_uri' from 'vllm.utils'
9
+ -- https://github.com/volcengine/verl/issues/3638(貌似vllm和flash_attn都不干净)
10
+ -- 看上去 flash_attn-2.7.4.post1 (cu12 / torch2.6.0-cu14 / python 3.11) + vllm 0.10.0 可以
11
+ - 更新至0.10.0后又有以下错误
12
+ - - vllm/_C.abi3.so: undefined symbol: _ZN3c106ivalue14ConstantString6createENSt7__cxx1112basic_stringIcSt11char_traitsIcESaIcEEE
13
+ - 最后可能有效的版本(python 3.11 + torch 2.7.1(vllm required) + fa-2.6.0.post1 (abiTrue) + vllm 0.10.0)
14
+
15
+
16
+ - ModuleNotFoundError: No module named 'sglang.srt.weight_sync'
17
+ -- https://github.com/volcengine/verl/issues/2921
18
+ -- sglang版本应该为sglang[all]==0.4.9.post6 (flash-attn 2.8.2 / 看上去2.7.4.post1也能work)
19
+ - ModuleNotFoundError: No module named 'sgl_kernel.kvcacheio'
20
+ -- https://github.com/sgl-project/sglang/issues/3687
21
+ - [sgl_kernel] CRITICAL: Could not load any common_ops library!
22
+
23
+
24
+ - ImportError: cannot import name 'layer_type_validation' from 'transformers.configuration_utils'
25
+ -- https://github.com/huggingface/transformers/issues/38617 (更新至4.52.3 / 4.56.1)
docker/Apptainerfile.rocm ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Bootstrap: docker
2
+
3
+ # Support - Traing: fsdp; Inference: vllm
4
+ # FROM: rocm/vllm:rocm6.2_mi300_ubuntu20.04_py3.9_vllm_0.6.4
5
+ # Support - Traing: fsdp; Inference: vllm, sglang
6
+ FROM lmsysorg/sglang:v0.4.5-rocm630
7
+
8
+ %environment
9
+ export PYTORCH_ROCM_ARCH="gfx90a;gfx942"
10
+
11
+ export HIPCC_COMPILE_FLAGS_APPEND="--amdgpu-target=gfx90a;gfx942 -D__HIP_PLATFORM_AMD__"
12
+ export CFLAGS="-D__HIP_PLATFORM_AMD__"
13
+ export CXXFLAGS="-D__HIP_PLATFORM_AMD__"
14
+
15
+ %post
16
+ # Create source directory
17
+ mkdir -p /opt/src
18
+
19
+ # Uninstall and reinstall vllm
20
+ pip uninstall -y vllm
21
+ cd /opt/src
22
+ git clone -b v0.6.3 https://github.com/vllm-project/vllm.git
23
+ cd vllm
24
+ MAX_JOBS=$(nproc) python3 setup.py install
25
+ cd /opt
26
+ rm -rf /opt/src/vllm
27
+
28
+ # Install dependencies
29
+ pip install "tensordict<0.6" --no-deps
30
+ pip install accelerate \
31
+ codetiming \
32
+ datasets \
33
+ dill \
34
+ hydra-core \
35
+ liger-kernel \
36
+ numpy \
37
+ pandas \
38
+ peft \
39
+ "pyarrow>=15.0.0" \
40
+ pylatexenc \
41
+ "ray[data,train,tune,serve]" \
42
+ torchdata \
43
+ transformers \
44
+ wandb \
45
+ orjson \
46
+ pybind11
47
+
48
+ # Clone and install verl from GitHub
49
+ cd /opt
50
+ git clone https://github.com/volcengine/verl.git
51
+ cd verl
52
+ # Uncomment to use a specific version
53
+ # git checkout v0.3.0.post0
54
+ pip install -e . --no-deps
55
+
56
+ # Install torch_memory_saver
57
+ pip install git+https://github.com/ExtremeViscent/torch_memory_saver.git --no-deps