Lekr0 commited on
Commit
4024ed7
·
verified ·
1 Parent(s): 7c50656

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. SpecForge-ext/.devcontainer/Dockerfile +32 -0
  2. SpecForge-ext/.devcontainer/devcontainer.json +30 -0
  3. SpecForge-ext/.editorconfig +25 -0
  4. SpecForge-ext/.github/CODEOWNERS +11 -0
  5. SpecForge-ext/.github/pull_request_template.md +30 -0
  6. SpecForge-ext/.isort.cfg +3 -0
  7. SpecForge-ext/LICENSE +21 -0
  8. SpecForge-ext/MANIFEST.in +2 -0
  9. SpecForge-ext/README.md +70 -0
  10. SpecForge-ext/analyze_accept_length.sh +91 -0
  11. SpecForge-ext/assets/logo.svg +0 -0
  12. SpecForge-ext/configs/deepseek-v2-lite-eagle3.json +39 -0
  13. SpecForge-ext/configs/deepseek-v3-671b-eagle3.json +32 -0
  14. SpecForge-ext/configs/gemma3-1b-eagle3.json +32 -0
  15. SpecForge-ext/configs/gpt-oss-120B-eagle3.json +30 -0
  16. SpecForge-ext/configs/gpt-oss-20B-eagle3.json +30 -0
  17. SpecForge-ext/configs/ling-flash-2.0-eagle3.json +24 -0
  18. SpecForge-ext/configs/llama3-70B-ealge3.json +37 -0
  19. SpecForge-ext/configs/llama3-8B-eagle3.json +24 -0
  20. SpecForge-ext/configs/llama4-scout-17B-16E-eagle3.json +22 -0
  21. SpecForge-ext/configs/longcat-flash-dflash.json +41 -0
  22. SpecForge-ext/configs/longcat-flash-eagle3.json +31 -0
  23. SpecForge-ext/configs/phi4-eagle3.json +27 -0
  24. SpecForge-ext/configs/qwen2.5-7b-eagle3.json +30 -0
  25. SpecForge-ext/configs/qwen2.5-vl-32b-eagle3.json +40 -0
  26. SpecForge-ext/configs/qwen3-235B-A22B-eagle3.json +36 -0
  27. SpecForge-ext/configs/qwen3-30B-A3B-eagle3.json +31 -0
  28. SpecForge-ext/configs/qwen3-32b-eagle3.json +31 -0
  29. SpecForge-ext/configs/qwen3-4b-eagle3.json +31 -0
  30. SpecForge-ext/configs/qwen3-8b-dflash.json +41 -0
  31. SpecForge-ext/configs/qwen3-8b-eagle3.json +31 -0
  32. SpecForge-ext/configs/qwen3-8b-qwen3eagle-5layer.json +31 -0
  33. SpecForge-ext/configs/qwen3-coder-30B-A3B-instruct-eagle3.json +31 -0
  34. SpecForge-ext/configs/qwen3-coder-480B-A35B-instruct-eagle3.json +31 -0
  35. SpecForge-ext/configs/qwen3-next-80b-a3b-eagle3.json +29 -0
  36. SpecForge-ext/configs/qwq-32B-eagle3.json +28 -0
  37. SpecForge-ext/datasets/README.md +5 -0
  38. SpecForge-ext/datasets/download_laion.sh +36 -0
  39. SpecForge-ext/docs/Makefile +58 -0
  40. SpecForge-ext/docs/README.md +55 -0
  41. SpecForge-ext/docs/conf.py +188 -0
  42. SpecForge-ext/docs/deploy.py +22 -0
  43. SpecForge-ext/docs/index.rst +53 -0
  44. SpecForge-ext/docs/requirements.txt +20 -0
  45. SpecForge-ext/docs/serve.sh +3 -0
  46. SpecForge-ext/examples/run_deepseek_v3_671b_eagle3_online.sh +29 -0
  47. SpecForge-ext/examples/run_qwen3_30b_a3b_eagle3_online.sh +29 -0
  48. SpecForge-ext/examples/run_qwq_eagle3_online.sh +28 -0
  49. SpecForge-ext/logs/baseline_gsm8k_20260213_100853.log +5 -0
  50. SpecForge-ext/logs/baseline_humaneval_20260213_100956.log +5 -0
SpecForge-ext/.devcontainer/Dockerfile ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM lmsysorg/sglang:dev
2
+
3
+ # Create non-root user with specified UID and GID
4
+ # NOTE: Replace with your own UID and GID. This is a workaround from https://github.com/microsoft/vscode-remote-release/issues/49#issuecomment-489060908.
5
+ ARG HOST_UID=1003
6
+ ARG HOST_GID=1003
7
+ RUN groupadd -g $HOST_GID devuser && \
8
+ useradd -m -u $HOST_UID -g $HOST_GID -s /bin/zsh devuser
9
+
10
+ # Give devuser sudo access
11
+ RUN apt-get update && apt-get install -y sudo && \
12
+ echo "devuser ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/devuser && \
13
+ rm -rf /var/lib/apt/lists/* && \
14
+ apt-get clean
15
+
16
+ # Set up oh-my-zsh for devuser
17
+ RUN cp -r /root/.oh-my-zsh /home/devuser/.oh-my-zsh && \
18
+ cp /root/.zshrc /home/devuser/.zshrc && \
19
+ cp /root/.vimrc /home/devuser/.vimrc && \
20
+ cp /root/.tmux.conf /home/devuser/.tmux.conf && \
21
+ sed -i 's|/root/.oh-my-zsh|/home/devuser/.oh-my-zsh|g' /home/devuser/.zshrc && \
22
+ chown -R devuser:devuser /home/devuser/
23
+
24
+ # Set workspace directory and ownership
25
+ WORKDIR /sgl-workspace/sglang
26
+ RUN chown -R devuser:devuser /sgl-workspace
27
+
28
+ # Switch to devuser
29
+ USER devuser
30
+
31
+ # Install rust
32
+ RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y
SpecForge-ext/.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "name": "sglang",
3
+ "build": {
4
+ "dockerfile": "Dockerfile"
5
+ },
6
+ "remoteUser": "devuser",
7
+ "customizations": {
8
+ "vscode": {
9
+ "extensions": [
10
+ // Python development
11
+ "ms-python.python",
12
+ "charliermarsh.ruff",
13
+ // Rust development
14
+ "rust-lang.rust-analyzer",
15
+ "tamasfe.even-better-toml"
16
+ ]
17
+ }
18
+ },
19
+ "forwardPorts": [],
20
+ "runArgs": [
21
+ "--gpus",
22
+ "all"
23
+ ],
24
+ // The two lines below ensures that your local changes in the sglang
25
+ // repo is automatically synced to the sglang pip package installed
26
+ // in the dev docker container. You can remove / comment out these
27
+ // two lines if you prefer to sync code changes manually.
28
+ "workspaceMount": "source=${localWorkspaceFolder},target=/sgl-workspace/specforge,type=bind",
29
+ "workspaceFolder": "/sgl-workspace/specforge"
30
+ }
SpecForge-ext/.editorconfig ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://editorconfig.org/
2
+
3
+ root = true
4
+
5
+ [*]
6
+ charset = utf-8
7
+ end_of_line = lf
8
+ indent_style = space
9
+ indent_size = 4
10
+ trim_trailing_whitespace = true
11
+ insert_final_newline = true
12
+
13
+ [*.{json,yaml,yml}]
14
+ indent_size = 2
15
+
16
+ [*.md]
17
+ indent_size = 2
18
+ x-soft-wrap-text = true
19
+
20
+ [*.rst]
21
+ indent_size = 4
22
+ x-soft-wrap-text = true
23
+
24
+ [Makefile]
25
+ indent_style = tab
SpecForge-ext/.github/CODEOWNERS ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .github @FrankLeeeee
2
+ /specforge/core @FrankLeeeee
3
+ /specforge/data @zyksir @sleepcoo @shuaills
4
+ /specforge/layers @FrankLeeeee @FlamingoPg @sleepcoo @shuaills
5
+ /specforge/modeling @FlamingoPg @sleepcoo @shuaills @FrankLeeeee
6
+ /tests @FrankLeeeee
7
+ /assets @FrankLeeeee @zhyncs
8
+ /examples @shuaills @sleepcoo @FlamingoPg
9
+ /configs @FrankLeeeee @FlamingoPg
10
+ /benchmarks @FrankLeeeee
11
+ /scripts @shuaills @sleepcoo @FlamingoPg
SpecForge-ext/.github/pull_request_template.md ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!-- Thank you for your contribution! We appreciate it. The following guidelines will help improve your pull request and facilitate feedback. If anything is unclear, don't hesitate to submit your pull request and ask the maintainers for assistance. -->
2
+
3
+ ## Motivation
4
+
5
+ <!-- Explain the purpose of this PR and the goals it aims to achieve. -->
6
+
7
+ ## Modifications
8
+
9
+ <!-- Describe the changes made in this PR. -->
10
+
11
+ ## Related Issues
12
+
13
+ <!-- Link to any related issues here. e.g. "Fixes #123" or "Closes #456" -->
14
+
15
+ ## Accuracy Test
16
+
17
+ <!-- If this PR affects model-side code (e.g., kernels, model architecture), please provide accuracy test results. Ref: https://docs.sglang.ai/references/accuracy_evaluation.html -->
18
+
19
+ ## Benchmark & Profiling
20
+
21
+ <!-- If this PR is expected to impact performance, please provide benchmark and profiling results. Ref: https://docs.sglang.ai/references/benchmark_and_profiling.html -->
22
+
23
+ ## Checklist
24
+
25
+ - [ ] Format your code according to the [Code Formatting with Pre-Commit](https://docs.sglang.ai/references/contribution_guide.html#code-formatting-with-pre-commit).
26
+ - [ ] Add unit tests as outlined in the [Running Unit Tests](https://docs.sglang.ai/references/contribution_guide.html#running-unit-tests-adding-to-ci).
27
+ - [ ] Update documentation / docstrings / example tutorials as needed, according to [Writing Documentation](https://docs.sglang.ai/references/contribution_guide.html#writing-documentation-running-docs-ci).
28
+ - [ ] Provide throughput / latency benchmark results and accuracy evaluation results as needed, according to [Benchmark and Profiling](https://docs.sglang.ai/references/benchmark_and_profiling.html) and [Accuracy Results](https://docs.sglang.ai/references/accuracy_evaluation.html).
29
+ - [ ] For reviewers: If you haven't made any contributions to this PR and are only assisting with merging the main branch, please remove yourself as a co-author when merging the PR.
30
+ - [ ] Please feel free to join our Slack channel at https://sgl-fru7574.slack.com/archives/C09784E3EN6 to discuss your PR.
SpecForge-ext/.isort.cfg ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [settings]
2
+ profile=black
3
+ known_first_party=sgl-eagle
SpecForge-ext/LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2025 sgl-project
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
SpecForge-ext/MANIFEST.in ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ include requirements.txt
2
+ include version.txt
SpecForge-ext/README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <div align="center" id="sglangtop">
2
+ <img src="./assets/logo.png" alt="logo" width="400" margin="10px"></img>
3
+
4
+ [![documentation](https://img.shields.io/badge/📖-Documentation-red.svg?style=flat)](https://docs.sglang.ai/SpecForge/)
5
+ [![SpecBundle](https://img.shields.io/badge/🤗%20SpecBundle-yellow.svg?style=flat)](https://huggingface.co/collections/lmsys/specbundle)
6
+ [![DeepWiki](https://img.shields.io/badge/DeepWiki-SpecForge-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McDcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==)](https://deepwiki.com/sgl-project/SpecForge)
7
+
8
+ [![github badge](https://img.shields.io/badge/📃%20LMSYS-Blog-black.svg?style=flat)](https://lmsys.org/blog/2025-07-25-spec-forge/)
9
+ [![slack badge](https://img.shields.io/badge/Slack-join-blueviolet?logo=slack&amp)](https://sgl-fru7574.slack.com/archives/C09784E3EN6)
10
+ [![license](https://img.shields.io/badge/License-MIT%202.0-blue)](./LICENSE)
11
+
12
+ </div>
13
+
14
+ ## 📍 Overview
15
+
16
+ SpecForge is an ecosystem project developed by the SGLang team. It is a framework for training speculative decoding models so that you can smoothly port them over to the SGLang serving framework to speed up your inference.
17
+
18
+ We have seen many open-source projects for speculative decoding, but most of them are not well-maintained or not directly compatible with SGLang. We prepared this project because we wish that the open-source community can enjoy a speculative decoding framework that is
19
+ - regularly maintained by the SpecForge team: the code is runnable out-of-the-box
20
+ - directly compatible with SGLang: there is no additional efforts for porting to SGLang
21
+ - provide performant training capabilities: we provided online/offline/tensor-parallel/FSDP to suit your needs
22
+
23
+
24
+ Check out [**our documentation**](https://docs.sglang.ai/SpecForge/) to get started.
25
+
26
+
27
+ ## 🚀 Accelerate with SpecBundle
28
+
29
+ SpecBundle is a collection of production-grade speculative decoding models that are released by the SpecForge team and our industry partners. They provide higher acceptance rate compared to the existing open-source checkpoints over a wide range of domains. Together with SGLang, you can experience up to 4x speedup for inference. Check out our resources below:
30
+
31
+
32
+ | Item | Link |
33
+ | --- | --- |
34
+ | 📝 Documentation | [Link](https://docs.sglang.io/SpecForge/community_resources/specbundle.html) |
35
+ | 📊 Performance Dashboard | [Link](https://docs.sglang.io/SpecForge/SpecBundle/index.html) |
36
+ | 🤗 Hugging Face Collection | [Link](https://huggingface.co/collections/lmsys/specbundle) |
37
+
38
+
39
+ ## 🎉 News
40
+
41
+ - [2025-12] 🎉 Released SpecBundle (phase 1) and SpecForge v0.2. Check out our blog at [LMSYS.org](https://lmsys.org/blog/2025-12-23-spec-bundle-phase-1/)
42
+ - [2025-12] 🔔 Released the roadmap for 2026 Q1.
43
+ - [2025-08] 🔔 SpecForge is listed as a [flagship project](https://lmsys.org/about/) in LMSYS. Congratulations to the SpecForge team!
44
+ - [2025-08] 🔥 SpecForge powered the Eagle3 draft model for GPT-OSS. Check out the blog at [LMSYS.org](https://lmsys.org/blog/2025-08-27-gpt-oss/)
45
+ - [2025-07] 🔥 SpecForge is released together with Llama4-Eagle3 checkpoints. Check out our blog at [LMSYS.org](https://lmsys.org/blog/2025-07-25-spec-forge/)
46
+
47
+ ## ✨ Acknowledgements
48
+
49
+ <img src="./assets/acknowledgements.png" alt="acknowledgements"></img>
50
+
51
+ We would like to express our sincere gratitude to the official EAGLE team, especially Hongyang Zhang and Yuhui Li, for their invaluable contributions and support. Our thanks also go to the NVIDIA team—particularly Avery H and Izzy Putterman—and to the Google team, especially Ying Wang, for their insightful discussions and generous assistance throughout the project.
52
+
53
+ We are especially grateful to Meituan for their strong backing and meaningful contributions, which played a vital role in driving this project forward.
54
+
55
+ This project has also been inspired by many outstanding open-source projects from the LLM community, including [EAGLE](https://github.com/SafeAILab/EAGLE), [BaldEagle](https://github.com/NickL77/BaldEagle), and [TensorRT-Model-Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer) and others. Their contributions and shared knowledge have greatly benefited our work.
56
+
57
+ ## 💡 Special Thanks to Voltage Park
58
+
59
+ We would like to extend our sincere thanks to [Voltage Park](https://www.voltagepark.com/), our official infrastructure partner. As part of a formal collaboration with the SGLang team, Voltage Park provided critical GPU resources that empowered us to train and evaluate large-scale speculative decoding models efficiently and reliably. This partnership was instrumental in making SpecForge possible. We deeply appreciate Voltage Park’s mission to make cutting-edge AI infrastructure more accessible, and we look forward to continued collaboration as we push the boundaries of open-source LLM serving and optimization.
60
+
61
+ ## 📃 Citation
62
+
63
+ ```bibtex
64
+ @misc{specforge2025,
65
+ title={SpecForge: Train speculative decoding models effortlessly},
66
+ author={Shenggui Li, Yikai Zhu, Chao Wang, Fan Yin, Shuai Shi, Yubo Wang, Yi Zhang, Yingyi Huang, Haoshuai Zheng, Yineng Zhang},
67
+ year={2025},
68
+ publisher={GitHub},
69
+ howpublished={\url{https://github.com/sgl-project/specforge}},
70
+ }
SpecForge-ext/analyze_accept_length.sh ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # 分析accept length的脚本
4
+
5
+ echo "=========================================="
6
+ echo "Accept Length Analysis"
7
+ echo "=========================================="
8
+ echo ""
9
+
10
+ # 检查results目录
11
+ if [ ! -d "results" ]; then
12
+ echo "Error: results directory not found"
13
+ exit 1
14
+ fi
15
+
16
+ # 查找所有结果文件
17
+ result_files=$(ls results/*.jsonl 2>/dev/null)
18
+
19
+ if [ -z "$result_files" ]; then
20
+ echo "No result files found in results/ directory"
21
+ echo ""
22
+ echo "Please run the benchmark first:"
23
+ echo " python benchmarks/bench_eagle3.py ..."
24
+ exit 1
25
+ fi
26
+
27
+ echo "Found result files:"
28
+ ls -lh results/*.jsonl
29
+ echo ""
30
+ echo "=========================================="
31
+ echo ""
32
+
33
+ # 分析每个结果文件
34
+ for file in $result_files; do
35
+ filename=$(basename "$file")
36
+ echo "File: $filename"
37
+ echo "----------------------------------------"
38
+
39
+ # 检查文件是否包含mtbench结果
40
+ if grep -q "mtbench" "$file"; then
41
+ # 提取accept_length
42
+ echo "Accept lengths:"
43
+ cat "$file" | jq -r '.mtbench[0].metrics[] | " Sample \(.sample_id): accept_length=\(.accept_length // "N/A"), output_tokens=\(.output_tokens // "N/A")"' 2>/dev/null
44
+
45
+ echo ""
46
+ echo "Statistics:"
47
+ # 计算平均值
48
+ avg_accept=$(cat "$file" | jq -r '.mtbench[0].metrics[] | .accept_length' 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf " Average accept_length: %.4f\n", sum/count; else print " No data"}')
49
+ echo "$avg_accept"
50
+
51
+ # 计算最小值和最大值
52
+ min_accept=$(cat "$file" | jq -r '.mtbench[0].metrics[] | .accept_length' 2>/dev/null | sort -n | head -1)
53
+ max_accept=$(cat "$file" | jq -r '.mtbench[0].metrics[] | .accept_length' 2>/dev/null | sort -n | tail -1)
54
+ echo " Min accept_length: $min_accept"
55
+ echo " Max accept_length: $max_accept"
56
+
57
+ # 样本数量
58
+ sample_count=$(cat "$file" | jq -r '.mtbench[0].metrics | length' 2>/dev/null)
59
+ echo " Total samples: $sample_count"
60
+ else
61
+ echo " No mtbench results found in this file"
62
+ fi
63
+
64
+ echo ""
65
+ echo "=========================================="
66
+ echo ""
67
+ done
68
+
69
+ # 如果有baseline和trained的结果,进行对比
70
+ baseline_file=$(ls results/baseline*.jsonl 2>/dev/null | head -1)
71
+ trained_file=$(ls results/trained*.jsonl 2>/dev/null | head -1)
72
+
73
+ if [ -n "$baseline_file" ] && [ -n "$trained_file" ]; then
74
+ echo "Comparison: Baseline vs Trained"
75
+ echo "----------------------------------------"
76
+
77
+ baseline_avg=$(cat "$baseline_file" | jq -r '.mtbench[0].metrics[] | .accept_length' 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) print sum/count}')
78
+ trained_avg=$(cat "$trained_file" | jq -r '.mtbench[0].metrics[] | .accept_length' 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) print sum/count}')
79
+
80
+ if [ -n "$baseline_avg" ] && [ -n "$trained_avg" ]; then
81
+ echo "Baseline average: $baseline_avg"
82
+ echo "Trained average: $trained_avg"
83
+
84
+ # 计算提升百分比
85
+ improvement=$(echo "$baseline_avg $trained_avg" | awk '{printf "%.2f%%", ($2-$1)/$1*100}')
86
+ echo "Improvement: $improvement"
87
+ fi
88
+ echo ""
89
+ fi
90
+
91
+ echo "Done!"
SpecForge-ext/assets/logo.svg ADDED
SpecForge-ext/configs/deepseek-v2-lite-eagle3.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 100000,
8
+ "eos_token_id": 100001,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 10944,
14
+ "max_position_embeddings": 163840,
15
+ "max_window_layers": 64,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads": 16,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": {
22
+ "beta_fast": 32.0,
23
+ "beta_slow": 1.0,
24
+ "factor": 40.0,
25
+ "mscale": 0.707,
26
+ "mscale_all_dim": 0.707,
27
+ "original_max_position_embeddings": 4096,
28
+ "rope_type": "yarn"
29
+ },
30
+ "rope_theta": 10000,
31
+ "sliding_window": null,
32
+ "tie_word_embeddings": false,
33
+ "torch_dtype": "bfloat16",
34
+ "transformers_version": "4.33.1",
35
+ "use_cache": true,
36
+ "use_sliding_window": false,
37
+ "vocab_size": 102400,
38
+ "draft_vocab_size": 32000
39
+ }
SpecForge-ext/configs/deepseek-v3-671b-eagle3.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "eagle_config": {
6
+ "eagle_aux_hidden_state_layer_ids": [
7
+ 1,
8
+ 29,
9
+ 57
10
+ ],
11
+ "use_aux_hidden_state": true
12
+ },
13
+ "bos_token_id": 151643,
14
+ "eos_token_id": 151645,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 7168,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 40960,
19
+ "max_position_embeddings": 163840,
20
+ "model_type": "llama",
21
+ "num_attention_heads": 56,
22
+ "num_key_value_heads": 8,
23
+ "num_hidden_layers": 1,
24
+ "pad_token_id": 0,
25
+ "rms_norm_eps": 1e-05,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "float16",
28
+ "transformers_version": "4.51.0",
29
+ "use_cache": true,
30
+ "vocab_size": 129280,
31
+ "draft_vocab_size": 32000
32
+ }
SpecForge-ext/configs/gemma3-1b-eagle3.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 2,
8
+ "eos_token_id": 1,
9
+ "pad_token_id": 0,
10
+ "head_dim": 256,
11
+ "hidden_act": "silu",
12
+ "hidden_size": 1152,
13
+ "initializer_range": 0.02,
14
+ "intermediate_size": 6912,
15
+ "max_position_embeddings": 32768,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 4,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads": 1,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": 512,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.50.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 262145,
30
+ "draft_vocab_size": 32000,
31
+ "target_model_type": "gemma3_text"
32
+ }
SpecForge-ext/configs/gpt-oss-120B-eagle3.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "eagle_config": {
6
+ "eagle_aux_hidden_state_layer_ids": [
7
+ 1,
8
+ 17,
9
+ 33
10
+ ]
11
+ },
12
+ "head_dim": 64,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2880,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 17280,
17
+ "max_position_embeddings": 4096,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 64,
20
+ "num_key_value_heads": 8,
21
+ "num_hidden_layers": 1,
22
+ "pad_token_id": 0,
23
+ "rms_norm_eps": 1e-05,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.52.3",
27
+ "use_cache": true,
28
+ "vocab_size": 201088,
29
+ "draft_vocab_size": 32000
30
+ }
SpecForge-ext/configs/gpt-oss-20B-eagle3.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "eagle_config": {
6
+ "eagle_aux_hidden_state_layer_ids": [
7
+ 1,
8
+ 11,
9
+ 21
10
+ ]
11
+ },
12
+ "head_dim": 64,
13
+ "hidden_act": "silu",
14
+ "hidden_size": 2880,
15
+ "initializer_range": 0.02,
16
+ "intermediate_size": 17280,
17
+ "max_position_embeddings": 4096,
18
+ "model_type": "llama",
19
+ "num_attention_heads": 64,
20
+ "num_key_value_heads": 8,
21
+ "num_hidden_layers": 1,
22
+ "pad_token_id": 0,
23
+ "rms_norm_eps": 1e-05,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.52.3",
27
+ "use_cache": true,
28
+ "vocab_size": 201088,
29
+ "draft_vocab_size": 32000
30
+ }
SpecForge-ext/configs/ling-flash-2.0-eagle3.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "bos_token_id": 163584,
6
+ "eos_token_id": 163585,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 4096,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 14336,
11
+ "max_position_embeddings": 32768,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 32,
14
+ "num_key_value_heads": 8,
15
+ "num_hidden_layers": 1,
16
+ "pad_token_id": 0,
17
+ "rms_norm_eps": 1e-05,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "bfloat16",
20
+ "transformers_version": "4.57.1",
21
+ "use_cache": true,
22
+ "vocab_size": 157184,
23
+ "draft_vocab_size": 32000
24
+ }
SpecForge-ext/configs/llama3-70B-ealge3.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "bos_token_id": 128000,
6
+ "eos_token_id": [
7
+ 128001,
8
+ 128008,
9
+ 128009
10
+ ],
11
+ "head_dim": 128,
12
+ "hidden_act": "silu",
13
+ "hidden_size": 8192,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 28672,
16
+ "max_position_embeddings": 4096,
17
+ "model_type": "llama",
18
+ "num_attention_heads": 64,
19
+ "num_key_value_heads": 8,
20
+ "num_hidden_layers": 1,
21
+ "pad_token_id": 0,
22
+ "rms_norm_eps": 1e-05,
23
+ "rope_scaling": {
24
+ "factor": 8.0,
25
+ "high_freq_factor": 4.0,
26
+ "low_freq_factor": 1.0,
27
+ "original_max_position_embeddings": 4096,
28
+ "rope_type": "llama3"
29
+ },
30
+ "rope_theta": 500000.0,
31
+ "tie_word_embeddings": false,
32
+ "torch_dtype": "float16",
33
+ "transformers_version": "4.28.1",
34
+ "use_cache": true,
35
+ "vocab_size": 128256,
36
+ "draft_vocab_size": 32000
37
+ }
SpecForge-ext/configs/llama3-8B-eagle3.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "bos_token_id": 128000,
6
+ "eos_token_id": 128001,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 4096,
9
+ "initializer_range": 0.02,
10
+ "intermediate_size": 14336,
11
+ "max_position_embeddings": 2048,
12
+ "model_type": "llama",
13
+ "num_attention_heads": 32,
14
+ "num_key_value_heads": 8,
15
+ "num_hidden_layers": 1,
16
+ "pad_token_id": 0,
17
+ "rms_norm_eps": 1e-05,
18
+ "tie_word_embeddings": false,
19
+ "torch_dtype": "float16",
20
+ "transformers_version": "4.28.1",
21
+ "use_cache": true,
22
+ "vocab_size": 128256,
23
+ "draft_vocab_size": 32000
24
+ }
SpecForge-ext/configs/llama4-scout-17B-16E-eagle3.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "hidden_act": "silu",
6
+ "hidden_size": 5120,
7
+ "initializer_range": 0.02,
8
+ "intermediate_size": 32768,
9
+ "max_position_embeddings": 2048,
10
+ "model_type": "llama",
11
+ "num_attention_heads": 40,
12
+ "num_key_value_heads": 8,
13
+ "num_hidden_layers": 1,
14
+ "pad_token_id": 0,
15
+ "rms_norm_eps": 1e-05,
16
+ "tie_word_embeddings": false,
17
+ "torch_dtype": "bfloat16",
18
+ "transformers_version": "4.52.3",
19
+ "use_cache": true,
20
+ "vocab_size": 202048,
21
+ "draft_vocab_size": 32000
22
+ }
SpecForge-ext/configs/longcat-flash-dflash.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DFlashDraftModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoModel": "modeling_dflash.DFlashDraftModel"
9
+ },
10
+ "block_size": 16,
11
+ "bos_token_id": 1,
12
+ "dtype": "bfloat16",
13
+ "eos_token_id": 2,
14
+ "head_dim": 128,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 6144,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 12288,
19
+ "layer_types": [
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention"
25
+ ],
26
+ "max_position_embeddings": 40960,
27
+ "max_window_layers": 5,
28
+ "model_type": "qwen3",
29
+ "num_attention_heads": 32,
30
+ "num_hidden_layers": 5,
31
+ "num_key_value_heads": 8,
32
+ "num_target_layers": 28,
33
+ "rms_norm_eps": 1e-06,
34
+ "rope_scaling": null,
35
+ "rope_theta": 1000000,
36
+ "sliding_window": null,
37
+ "tie_word_embeddings": false,
38
+ "use_cache": true,
39
+ "use_sliding_window": false,
40
+ "vocab_size": 131072
41
+ }
SpecForge-ext/configs/longcat-flash-eagle3.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 2,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 6144,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 131072,
15
+ "max_window_layers": 48,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 64,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads":16,
20
+ "rms_norm_eps": 1e-05,
21
+ "rope_scaling": null,
22
+ "rope_theta": 10000000.0,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.53.2",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 131072,
30
+ "draft_vocab_size": 131072
31
+ }
SpecForge-ext/configs/phi4-eagle3.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 100257,
8
+ "eos_token_id": 100257,
9
+ "pad_token_id": 100257,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 5120,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 17920,
14
+ "max_position_embeddings": 16384,
15
+ "model_type": "phi3",
16
+ "num_attention_heads": 40,
17
+ "num_hidden_layers": 1,
18
+ "num_key_value_heads": 10,
19
+ "rms_norm_eps": 1e-05,
20
+ "rope_theta": 250000.0,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.47.0",
24
+ "use_cache": true,
25
+ "vocab_size": 100352,
26
+ "draft_vocab_size": 32000
27
+ }
SpecForge-ext/configs/qwen2.5-7b-eagle3.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 3584,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 18944,
13
+ "max_position_embeddings": 32768,
14
+ "max_window_layers": 28,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 28,
17
+ "num_hidden_layers": 1,
18
+ "num_key_value_heads": 4,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 1000000.0,
22
+ "sliding_window": 131072,
23
+ "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.51.0",
26
+ "use_cache": true,
27
+ "use_sliding_window": false,
28
+ "vocab_size": 152064,
29
+ "draft_vocab_size": 16000
30
+ }
SpecForge-ext/configs/qwen2.5-vl-32b-eagle3.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 5120,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 18944,
14
+ "max_position_embeddings": 8192,
15
+ "max_window_layers": 28,
16
+ "model_type": "llama",
17
+ "target_model_type": "qwen2_5_vl",
18
+ "num_attention_heads": 28,
19
+ "num_hidden_layers": 1,
20
+ "num_key_value_heads": 4,
21
+ "rms_norm_eps": 1e-06,
22
+ "pretraining_tp": 1,
23
+ "rope_scaling": {
24
+ "type": "mrope",
25
+ "mrope_section": [
26
+ 16,
27
+ 24,
28
+ 24
29
+ ]
30
+ },
31
+ "rope_theta": 1000000,
32
+ "sliding_window": 32768,
33
+ "tie_word_embeddings": false,
34
+ "torch_dtype": "bfloat16",
35
+ "transformers_version": "4.51.0",
36
+ "use_cache": true,
37
+ "use_sliding_window": false,
38
+ "vocab_size": 152064,
39
+ "draft_vocab_size": 32000
40
+ }
SpecForge-ext/configs/qwen3-235B-A22B-eagle3.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "eagle_config": {
6
+ "eagle_aux_hidden_state_layer_ids": [
7
+ 1,
8
+ 46,
9
+ 90
10
+ ],
11
+ "use_aux_hidden_state": true
12
+ },
13
+ "attention_bias": false,
14
+ "attention_dropout": 0.0,
15
+ "bos_token_id": 151643,
16
+ "draft_vocab_size": 32000,
17
+ "eos_token_id": 151645,
18
+ "head_dim": 128,
19
+ "hidden_act": "silu",
20
+ "hidden_size": 4096,
21
+ "initializer_range": 0.02,
22
+ "intermediate_size": 24576,
23
+ "max_position_embeddings": 40960,
24
+ "model_type": "llama",
25
+ "num_attention_heads": 64,
26
+ "num_hidden_layers": 1,
27
+ "num_key_value_heads": 4,
28
+ "rms_norm_eps": 1e-06,
29
+ "rope_theta": 1000000.0,
30
+ "rope_scaling": null,
31
+ "tie_word_embeddings": false,
32
+ "torch_dtype": "bfloat16",
33
+ "transformers_version": "4.51.0",
34
+ "use_cache": true,
35
+ "vocab_size": 151936
36
+ }
SpecForge-ext/configs/qwen3-30B-A3B-eagle3.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 2048,
15
+ "max_window_layers": 48,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads":4,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000.0,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.53.2",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
SpecForge-ext/configs/qwen3-32b-eagle3.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 5120,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 25600,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 64,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 64,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads": 8,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
SpecForge-ext/configs/qwen3-4b-eagle3.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2560,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 9728,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads": 8,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
SpecForge-ext/configs/qwen3-8b-dflash.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "DFlashDraftModel"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "auto_map": {
8
+ "AutoModel": "modeling_dflash.DFlashDraftModel"
9
+ },
10
+ "block_size": 16,
11
+ "bos_token_id": 151643,
12
+ "dtype": "bfloat16",
13
+ "eos_token_id": 151645,
14
+ "head_dim": 128,
15
+ "hidden_act": "silu",
16
+ "hidden_size": 4096,
17
+ "initializer_range": 0.02,
18
+ "intermediate_size": 12288,
19
+ "layer_types": [
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention"
25
+ ],
26
+ "max_position_embeddings": 40960,
27
+ "max_window_layers": 5,
28
+ "model_type": "qwen3",
29
+ "num_attention_heads": 32,
30
+ "num_hidden_layers": 5,
31
+ "num_key_value_heads": 8,
32
+ "num_target_layers": 36,
33
+ "rms_norm_eps": 1e-06,
34
+ "rope_scaling": null,
35
+ "rope_theta": 1000000,
36
+ "sliding_window": null,
37
+ "tie_word_embeddings": false,
38
+ "use_cache": true,
39
+ "use_sliding_window": false,
40
+ "vocab_size": 151936
41
+ }
SpecForge-ext/configs/qwen3-8b-eagle3.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads":8 ,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
SpecForge-ext/configs/qwen3-8b-qwen3eagle-5layer.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 40960,
15
+ "max_window_layers": 36,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 5,
19
+ "num_key_value_heads": 8,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
SpecForge-ext/configs/qwen3-coder-30B-A3B-instruct-eagle3.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 12288,
14
+ "max_position_embeddings": 2048,
15
+ "max_window_layers": 48,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads": 4,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000.0,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.53.2",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
SpecForge-ext/configs/qwen3-coder-480B-A35B-instruct-eagle3.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_bias": false,
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 151643,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 128,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 6144,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 16384,
14
+ "max_position_embeddings": 262144,
15
+ "max_window_layers": 62,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 96,
18
+ "num_hidden_layers": 1,
19
+ "num_key_value_heads":8,
20
+ "rms_norm_eps": 1e-06,
21
+ "rope_scaling": null,
22
+ "rope_theta": 1000000,
23
+ "sliding_window": null,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.51.0",
27
+ "use_cache": true,
28
+ "use_sliding_window": false,
29
+ "vocab_size": 151936,
30
+ "draft_vocab_size": 32000
31
+ }
SpecForge-ext/configs/qwen3-next-80b-a3b-eagle3.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "decoder_sparse_step": 1,
8
+ "eos_token_id": 151645,
9
+ "head_dim": 256,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 16384,
14
+ "max_position_embeddings": 262144,
15
+ "model_type": "llama",
16
+ "num_attention_heads": 16,
17
+ "num_hidden_layers": 1,
18
+ "num_key_value_heads": 2,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "rope_theta": 10000000,
22
+ "tie_word_embeddings": false,
23
+ "torch_dtype": "bfloat16",
24
+ "transformers_version": "4.57.0.dev0",
25
+ "use_cache": true,
26
+ "use_sliding_window": false,
27
+ "vocab_size": 151936,
28
+ "draft_vocab_size": 32000
29
+ }
SpecForge-ext/configs/qwq-32B-eagle3.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlamaForCausalLMEagle3"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "eos_token_id": 151645,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 5120,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 27648,
12
+ "max_position_embeddings": 40960,
13
+ "max_window_layers": 64,
14
+ "model_type": "qwen2",
15
+ "num_attention_heads": 40,
16
+ "num_hidden_layers": 1,
17
+ "num_key_value_heads": 8,
18
+ "rms_norm_eps": 1e-05,
19
+ "rope_theta": 1000000.0,
20
+ "sliding_window": 32768,
21
+ "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.43.1",
24
+ "use_cache": true,
25
+ "use_sliding_window": false,
26
+ "vocab_size": 152064,
27
+ "draft_vocab_size": 32000
28
+ }
SpecForge-ext/datasets/README.md ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ ## Store Comprehensive Datasets Download Scripts
2
+
3
+ | DatasetName | Github | Huggingface | command |
4
+ | -------- | -------- | -------- | -------- |
5
+ | ALLaVA-4V | [link](https://github.com/FreedomIntelligence/ALLaVA) | [link](https://huggingface.co/datasets/FreedomIntelligence/ALLaVA-4V) | download_laion.sh |
SpecForge-ext/datasets/download_laion.sh ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+
3
+ laion_root="allava_laion"
4
+
5
+ mkdir $laion_root
6
+ cd $laion_root
7
+
8
+
9
+ # 1. download annotation files
10
+ ## 1.1 caption
11
+ wget -c -O ALLaVA-Caption-LAION-4V.json https://huggingface.co/datasets/FreedomIntelligence/ALLaVA-4V/resolve/main/allava_laion/ALLaVA-Caption-LAION-4V.json?download=true
12
+
13
+ ## 1.2 instruction
14
+ wget -c -O ALLaVA-Instruct-LAION-4V.json https://huggingface.co/datasets/FreedomIntelligence/ALLaVA-4V/resolve/main/allava_laion/ALLaVA-Instruct-LAION-4V.json?download=true
15
+
16
+
17
+ # 2. download and upzip images
18
+ mkdir image_chunks
19
+
20
+ ## 2.1 download
21
+ for ((i=0; i<10; i++))
22
+ do
23
+ wget -c -O image_chunks/images_$i.zip https://huggingface.co/datasets/FreedomIntelligence/ALLaVA-4V/resolve/main/allava_laion/image_chunks/images_$i.zip?download=true &
24
+ done
25
+
26
+ mkdir -p images/
27
+ wait
28
+
29
+ ## 2.2 unzip
30
+ for ((i=0; i<10; i++))
31
+ do
32
+ unzip -j -o image_chunks/images_$i.zip -d images/ & # wait patiently, it takes a while...
33
+ done
34
+
35
+ wait
36
+ echo "All done!"
SpecForge-ext/docs/Makefile ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Minimal Makefile for Sphinx documentation
2
+ SPHINXOPTS ?=
3
+ SPHINXBUILD ?= sphinx-build
4
+ SPHINXAUTOBUILD ?= sphinx-autobuild
5
+ SOURCEDIR = .
6
+ BUILDDIR = _build
7
+ PORT ?= 8003
8
+
9
+ help:
10
+ @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
11
+ @echo ""
12
+ @echo "Additional targets:"
13
+ @echo " serve to build and serve documentation with auto-build and live reload"
14
+
15
+ # Compile Notebook files and record execution time
16
+ compile:
17
+ @set -e; \
18
+ echo "Starting Notebook compilation..."; \
19
+ mkdir -p logs; \
20
+ echo "Notebook execution timings:" > logs/timing.log; \
21
+ START_TOTAL=$$(date +%s); \
22
+ find $(SOURCEDIR) -path "*/_build/*" -prune -o -name "*.ipynb" -print0 | \
23
+ parallel -0 -j3 --halt soon,fail=1 ' \
24
+ NB_NAME=$$(basename {}); \
25
+ START_TIME=$$(date +%s); \
26
+ retry --delay=0 --times=2 -- \
27
+ jupyter nbconvert --to notebook --execute --inplace "{}" \
28
+ --ExecutePreprocessor.timeout=600 \
29
+ --ExecutePreprocessor.kernel_name=python3; \
30
+ RET_CODE=$$?; \
31
+ END_TIME=$$(date +%s); \
32
+ ELAPSED_TIME=$$((END_TIME - START_TIME)); \
33
+ echo "$${NB_NAME}: $${ELAPSED_TIME}s" >> logs/timing.log; \
34
+ exit $$RET_CODE' || exit 1; \
35
+ END_TOTAL=$$(date +%s); \
36
+ TOTAL_ELAPSED=$$((END_TOTAL - START_TOTAL)); \
37
+ echo "---------------------------------" >> logs/timing.log; \
38
+ echo "Total execution time: $${TOTAL_ELAPSED}s" >> logs/timing.log; \
39
+ echo "All Notebook execution timings:" && cat logs/timing.log
40
+
41
+ # Serve documentation with auto-build and live reload
42
+ serve:
43
+ @echo "Starting auto-build server at http://0.0.0.0:$(PORT)"
44
+ @$(SPHINXAUTOBUILD) "$(SOURCEDIR)" "$(BUILDDIR)/html" \
45
+ --host 0.0.0.0 \
46
+ --port $(PORT) \
47
+ --watch $(SOURCEDIR) \
48
+ --re-ignore ".*\.(ipynb_checkpoints|pyc|pyo|pyd|git)"
49
+
50
+ .PHONY: help Makefile compile clean serve
51
+
52
+ %: Makefile
53
+ @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
54
+
55
+ clean:
56
+ find . -name "*.ipynb" -exec nbstripout {} \;
57
+ rm -rf $(BUILDDIR)
58
+ rm -rf logs
SpecForge-ext/docs/README.md ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SpecForge Documentation
2
+
3
+ We recommend new contributors to start from writing documentation, which helps you quickly understand the SpecForge codebase.
4
+ Most documentation files are located under the `docs/` folder.
5
+
6
+ ## Docs Workflow
7
+
8
+ ### Install Dependency
9
+
10
+ ```bash
11
+ apt-get update && apt-get install -y pandoc parallel retry
12
+ pip install -r requirements.txt
13
+ ```
14
+
15
+ ### Update Documentation
16
+
17
+ Update your Jupyter notebooks in the appropriate subdirectories under `docs/`. If you add new files, remember to update `index.rst` (or relevant `.rst` files) accordingly.
18
+
19
+ - **`pre-commit run --all-files`** manually runs all configured checks, applying fixes if possible. If it fails the first time, re-run it to ensure lint errors are fully resolved. Make sure your code passes all checks **before** creating a Pull Request.
20
+
21
+ ```bash
22
+ # 1) Compile all Jupyter notebooks
23
+ make compile # This step can take a long time (10+ mins). You can consider skipping this step if you can make sure your added files are correct.
24
+ make html
25
+
26
+ # 2) Compile and Preview documentation locally with auto-build
27
+ # This will automatically rebuild docs when files change
28
+ # Open your browser at the displayed port to view the docs
29
+ bash serve.sh
30
+
31
+ # 2a) Alternative ways to serve documentation
32
+ # Directly use make serve
33
+ make serve
34
+ # With custom port
35
+ PORT=8080 make serve
36
+
37
+ # 3) Clean notebook outputs
38
+ # nbstripout removes notebook outputs so your PR stays clean
39
+ pip install nbstripout
40
+ find . -name '*.ipynb' -exec nbstripout {} \;
41
+
42
+ # 4) Pre-commit checks and create a PR
43
+ # After these checks pass, push your changes and open a PR on your branch
44
+ pre-commit run --all-files
45
+ ```
46
+ ---
47
+
48
+ ## Documentation Style Guidelines
49
+
50
+ - For common functionalities, we prefer **Jupyter Notebooks** over Markdown so that all examples can be executed and validated by our docs CI pipeline. For complex features (e.g., distributed serving), Markdown is preferred.
51
+ - Keep in mind the documentation execution time when writing interactive Jupyter notebooks. Each interactive notebook will be run and compiled against every commit to ensure they are runnable, so it is important to apply some tips to reduce the documentation compilation time:
52
+ - Use small models (e.g., `qwen/qwen2.5-0.5b-instruct`) for most cases to reduce server launch time.
53
+ - Reuse the launched server as much as possible to reduce server launch time.
54
+ - Do not use absolute links (e.g., `https://docs.sglang.ai/get_started/install.html`). Always prefer relative links (e.g., `../get_started/install.md`).
55
+ - Follow the existing examples to learn how to launch a server, send a query and other common styles.
SpecForge-ext/docs/conf.py ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import sys
3
+ from datetime import datetime
4
+ from pathlib import Path
5
+
6
+ sys.path.insert(0, os.path.abspath("../.."))
7
+
8
+ DOCS_PATH = Path(__file__).parent
9
+ ROOT_PATH = DOCS_PATH.parent
10
+
11
+ version_file = ROOT_PATH.joinpath("version.txt")
12
+ with open(version_file, "r") as f:
13
+ __version__ = f.read().strip()
14
+
15
+ project = "SGLang"
16
+ copyright = f"2025-{datetime.now().year}, SpecForge"
17
+ author = "SpecForge Team"
18
+
19
+ version = __version__
20
+ release = __version__
21
+
22
+ extensions = [
23
+ "sphinx.ext.autodoc",
24
+ "sphinx.ext.autosummary",
25
+ "sphinx.ext.napoleon",
26
+ "sphinx.ext.viewcode",
27
+ "sphinx.ext.autosectionlabel",
28
+ "sphinx.ext.intersphinx",
29
+ "sphinx_tabs.tabs",
30
+ "myst_parser",
31
+ "sphinx_copybutton",
32
+ "sphinxcontrib.mermaid",
33
+ "nbsphinx",
34
+ "sphinx.ext.mathjax",
35
+ ]
36
+
37
+ nbsphinx_allow_errors = True
38
+ nbsphinx_execute = "never"
39
+
40
+ autosectionlabel_prefix_document = True
41
+ nbsphinx_allow_directives = True
42
+
43
+
44
+ myst_enable_extensions = [
45
+ "dollarmath",
46
+ "amsmath",
47
+ "deflist",
48
+ "colon_fence",
49
+ "html_image",
50
+ "substitution",
51
+ ]
52
+
53
+ myst_heading_anchors = 5
54
+
55
+ nbsphinx_kernel_name = "python3"
56
+ nbsphinx_execute_arguments = [
57
+ "--InlineBackend.figure_formats={'svg', 'pdf'}",
58
+ "--InlineBackend.rc={'figure.dpi': 96}",
59
+ ]
60
+
61
+
62
+ nb_render_priority = {
63
+ "html": (
64
+ "application/vnd.jupyter.widget-view+json",
65
+ "application/javascript",
66
+ "text/html",
67
+ "image/svg+xml",
68
+ "image/png",
69
+ "image/jpeg",
70
+ "text/markdown",
71
+ "text/latex",
72
+ "text/plain",
73
+ )
74
+ }
75
+
76
+ myst_ref_domains = ["std", "py"]
77
+
78
+ templates_path = ["_templates"]
79
+
80
+ source_suffix = {
81
+ ".rst": "restructuredtext",
82
+ ".md": "markdown",
83
+ }
84
+
85
+ master_doc = "index"
86
+
87
+ language = "en"
88
+
89
+ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
90
+
91
+ pygments_style = "sphinx"
92
+
93
+ html_theme = "sphinx_book_theme"
94
+ html_logo = ROOT_PATH.joinpath("assets/logo.png").as_posix()
95
+ html_favicon = ROOT_PATH.joinpath("assets/logo.ico").as_posix()
96
+ html_title = project
97
+ html_copy_source = True
98
+ html_last_updated_fmt = ""
99
+
100
+ html_theme_options = {
101
+ "repository_url": "https://github.com/sgl-project/sgl-project.github.io",
102
+ "repository_branch": "main",
103
+ "show_navbar_depth": 3,
104
+ "max_navbar_depth": 4,
105
+ "collapse_navbar": True,
106
+ "use_edit_page_button": True,
107
+ "use_source_button": True,
108
+ "use_issues_button": True,
109
+ "use_repository_button": True,
110
+ "use_download_button": True,
111
+ "use_sidenotes": True,
112
+ "show_toc_level": 2,
113
+ }
114
+
115
+ html_context = {
116
+ "display_github": True,
117
+ "github_user": "sgl-project",
118
+ "github_repo": "sgl-project.github.io",
119
+ "github_version": "main",
120
+ "conf_py_path": "/docs/",
121
+ }
122
+
123
+ html_static_path = ["_static", "spec_bundle/public"]
124
+ html_css_files = ["css/custom_log.css"]
125
+
126
+
127
+ def setup(app):
128
+ app.add_css_file("css/custom_log.css")
129
+
130
+
131
+ htmlhelp_basename = "sglangdoc"
132
+
133
+ latex_elements = {}
134
+
135
+ latex_documents = [
136
+ (master_doc, "sglang.tex", "sglang Documentation", "SGLang Team", "manual"),
137
+ ]
138
+
139
+ man_pages = [(master_doc, "sglang", "sglang Documentation", [author], 1)]
140
+
141
+ texinfo_documents = [
142
+ (
143
+ master_doc,
144
+ "sglang",
145
+ "sglang Documentation",
146
+ author,
147
+ "sglang",
148
+ "One line description of project.",
149
+ "Miscellaneous",
150
+ ),
151
+ ]
152
+
153
+ epub_title = project
154
+
155
+ epub_exclude_files = ["search.html"]
156
+
157
+ copybutton_prompt_text = r">>> |\.\.\. "
158
+ copybutton_prompt_is_regexp = True
159
+
160
+ autodoc_preserve_defaults = True
161
+ navigation_with_keys = False
162
+
163
+ autodoc_mock_imports = [
164
+ "torch",
165
+ "transformers",
166
+ "triton",
167
+ ]
168
+
169
+ intersphinx_mapping = {
170
+ "python": ("https://docs.python.org/3.12", None),
171
+ "typing_extensions": ("https://typing-extensions.readthedocs.io/en/latest", None),
172
+ "pillow": ("https://pillow.readthedocs.io/en/stable", None),
173
+ "numpy": ("https://numpy.org/doc/stable", None),
174
+ "torch": ("https://pytorch.org/docs/stable", None),
175
+ }
176
+
177
+ html_theme = "sphinx_book_theme"
178
+
179
+
180
+ nbsphinx_prolog = """
181
+ .. raw:: html
182
+
183
+ <style>
184
+ .output_area.stderr, .output_area.stdout {
185
+ color: #d3d3d3 !important; /* light gray */
186
+ }
187
+ </style>
188
+ """
SpecForge-ext/docs/deploy.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Deploy the documents
2
+
3
+ import os
4
+ from datetime import datetime
5
+
6
+
7
+ def run_cmd(cmd):
8
+ print(cmd)
9
+ os.system(cmd)
10
+
11
+
12
+ run_cmd("cd $DOC_SITE_PATH; git pull")
13
+
14
+ # (Optional) Remove old files
15
+ # run_cmd("rm -rf $ALPA_SITE_PATH/*")
16
+
17
+ run_cmd("cp -r _build/html/* $DOC_SITE_PATH")
18
+
19
+ cmd_message = f"Update {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
20
+ run_cmd(
21
+ f"cd $DOC_SITE_PATH; git add .; git commit -m '{cmd_message}'; git push origin main"
22
+ )
SpecForge-ext/docs/index.rst ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SpecForge Documentation
2
+ =======================
3
+
4
+ SpecForge is an ecosystem project developed by the SGLang team. It is a framework for training speculative decoding models so that you can smoothly port them over to the SGLang serving framework to speed up your inference.
5
+
6
+
7
+ .. toctree::
8
+ :maxdepth: 1
9
+ :caption: Get Started
10
+
11
+ get_started/installation.md
12
+ get_started/about.md
13
+
14
+ .. toctree::
15
+ :maxdepth: 1
16
+ :caption: Concepts
17
+
18
+ concepts/speculative_decoding.md
19
+ concepts/EAGLE3.md
20
+
21
+
22
+ .. toctree::
23
+ :maxdepth: 1
24
+ :caption: Basic Usage
25
+
26
+ basic_usage/data_preparation.md
27
+ basic_usage/training.md
28
+
29
+ .. toctree::
30
+ :maxdepth: 1
31
+ :caption: Advanced Features
32
+
33
+ advanced_features/customization.md
34
+
35
+ .. toctree::
36
+ :maxdepth: 1
37
+ :caption: Community Resources
38
+
39
+ community_resources/specbundle.md
40
+ community_resources/dashboard.md
41
+
42
+ .. toctree::
43
+ :maxdepth: 1
44
+ :caption: Examples
45
+
46
+ examples/llama3-eagle3-online.md
47
+ examples/llama3-eagle3-offline.md
48
+
49
+ .. toctree::
50
+ :maxdepth: 1
51
+ :caption: Benchmarks
52
+
53
+ benchmarks/benchmark.md
SpecForge-ext/docs/requirements.txt ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ipykernel
2
+ ipywidgets
3
+ jupyter_client
4
+ markdown>=3.4.0
5
+ matplotlib
6
+ myst-parser
7
+ nbconvert
8
+ nbsphinx
9
+ pandoc
10
+ pillow
11
+ pydantic
12
+ sphinx
13
+ sphinx-book-theme
14
+ sphinx-copybutton
15
+ sphinx-tabs
16
+ nbstripout
17
+ sphinxcontrib-mermaid
18
+ urllib3<2.0.0
19
+ gguf>=0.10.0
20
+ sphinx-autobuild
SpecForge-ext/docs/serve.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Clean and serve documentation with auto-build
2
+ make clean
3
+ make serve
SpecForge-ext/examples/run_deepseek_v3_671b_eagle3_online.sh ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
3
+ ROOT_DIR=$(dirname $SCRIPT_DIR)
4
+
5
+ # train eagle3 for deepseek-v3
6
+ NUM_GPUS=${1:-8}
7
+ TP_SIZE=${2:-8}
8
+ BUILD_DATASET_NUM_PROC=${BUILD_DATASET_NUM_PROC:-64}
9
+
10
+ # train eagle3 online
11
+ torchrun \
12
+ --standalone \
13
+ --nproc_per_node $NUM_GPUS \
14
+ $ROOT_DIR/scripts/train_eagle3.py \
15
+ --target-model-path deepseek-ai/DeepSeek-V3 \
16
+ --draft-model-config $ROOT_DIR/configs/deepseek-v3-671b-eagle3.json \
17
+ --train-data-path $ROOT_DIR/cache/dataset/perfect-blend.jsonl \
18
+ --build-dataset-num-proc $BUILD_DATASET_NUM_PROC \
19
+ --output-dir $ROOT_DIR/outputs/deepseek-v3-671B-eagle3-perfect-blend-online \
20
+ --tp-size $TP_SIZE \
21
+ --target-model-backend sglang \
22
+ --num-epochs 10 \
23
+ --batch-size 1 \
24
+ --learning-rate 5e-5 \
25
+ --max-length 2048 \
26
+ --chat-template deepseek-v3 \
27
+ --cache-dir $ROOT_DIR/cache \
28
+ --dist-timeout 60 \
29
+ --sglang-mem-fraction-static 0.75
SpecForge-ext/examples/run_qwen3_30b_a3b_eagle3_online.sh ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
4
+ ROOT_DIR=$(dirname $SCRIPT_DIR)
5
+ export TORCHINDUCTOR_CACHE_DIR=$ROOT_DIR/cache/compiled_kernels
6
+
7
+ # support tp4/tp8 train eagle3 for Qwen3-30B-A3B
8
+ NUM_GPUS=${1:-4}
9
+ TP_SIZE=${2:-4}
10
+ BUILD_DATASET_NUM_PROC=${BUILD_DATASET_NUM_PROC:-64}
11
+
12
+ torchrun \
13
+ --standalone \
14
+ --nproc_per_node $NUM_GPUS \
15
+ $ROOT_DIR/scripts/train_eagle3.py \
16
+ --target-model-path Qwen/Qwen3-30B-A3B-Instruct-2507 \
17
+ --draft-model-config $ROOT_DIR/configs/qwen3-30B-A3B-eagle3.json \
18
+ --train-data-path $ROOT_DIR/cache/dataset/sharegpt_train.jsonl \
19
+ --build-dataset-num-proc $BUILD_DATASET_NUM_PROC \
20
+ --output-dir $ROOT_DIR/outputs/qwen3-30b-a3b-instruct-eagle3-sharegpt \
21
+ --num-epochs 10 \
22
+ --batch-size 1 \
23
+ --learning-rate 1e-4 \
24
+ --max-length 4096 \
25
+ --chat-template qwen \
26
+ --cache-dir $ROOT_DIR/cache \
27
+ --embedding-key model.embed_tokens.weight \
28
+ --tp-size $TP_SIZE \
29
+ --target-model-backend sglang
SpecForge-ext/examples/run_qwq_eagle3_online.sh ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
2
+ ROOT_DIR=$(dirname $SCRIPT_DIR)
3
+
4
+ export TORCHINDUCTOR_CACHE_DIR=$ROOT_DIR/cache/compiled_kernels
5
+
6
+ # train eagle3 for qwq-32b
7
+ NUM_GPUS=${1:-4}
8
+ TP_SIZE=${2:-4}
9
+ BUILD_DATASET_NUM_PROC=${BUILD_DATASET_NUM_PROC:-64}
10
+
11
+ torchrun \
12
+ --standalone \
13
+ --nproc_per_node $NUM_GPUS \
14
+ $ROOT_DIR/scripts/train_eagle3.py \
15
+ --target-model-path Qwen/QwQ-32B \
16
+ --draft-model-config $ROOT_DIR/configs/qwq-32B-eagle3.json \
17
+ --train-data-path $ROOT_DIR/cache/dataset/sharegpt_train.jsonl \
18
+ --build-dataset-num-proc $BUILD_DATASET_NUM_PROC \
19
+ --output-dir $ROOT_DIR/outputs/qwq-32b-eagle3-sharegpt \
20
+ --num-epochs 10 \
21
+ --batch-size 1 \
22
+ --learning-rate 1e-4 \
23
+ --max-length 4096 \
24
+ --chat-template qwen \
25
+ --cache-dir $ROOT_DIR/cache \
26
+ --embedding-key model.embed_tokens.weight \
27
+ --tp-size $TP_SIZE \
28
+ --target-model-backend sglang
SpecForge-ext/logs/baseline_gsm8k_20260213_100853.log ADDED
@@ -0,0 +1,5 @@
 
 
 
 
0
  0%| | 0/100 [00:00<?, ?it/s]
1
  1%| | 1/100 [00:00<00:27, 3.65it/s]
2
  2%|▏ | 2/100 [00:00<00:24, 4.02it/s]
3
  3%|▎ | 3/100 [00:00<00:34, 2.82it/s]
4
  4%|▍ | 4/100 [00:01<00:29, 3.24it/s]
5
  5%|▌ | 5/100 [00:01<00:33, 2.82it/s]
6
  6%|▌ | 6/100 [00:02<00:43, 2.17it/s]
7
  7%|▋ | 7/100 [00:02<00:44, 2.11it/s]
8
  8%|▊ | 8/100 [00:03<00:55, 1.65it/s]
9
  9%|▉ | 9/100 [00:04<01:08, 1.32it/s]
10
  10%|█ | 10/100 [00:05<01:02, 1.44it/s]
11
  11%|█ | 11/100 [00:05<00:57, 1.55it/s]
12
  12%|█▏ | 12/100 [00:06<00:56, 1.55it/s]
13
  13%|█▎ | 13/100 [00:07<00:54, 1.58it/s]
14
  14%|█▍ | 14/100 [00:07<01:00, 1.43it/s]
15
  15%|█▌ | 15/100 [00:08<01:02, 1.36it/s]
16
  16%|█▌ | 16/100 [00:09<00:57, 1.46it/s]
17
  17%|█▋ | 17/100 [00:09<00:44, 1.86it/s]
18
  18%|█▊ | 18/100 [00:10<00:45, 1.80it/s]
19
  19%|█▉ | 19/100 [00:10<00:41, 1.96it/s]
20
  20%|██ | 20/100 [00:11<00:45, 1.76it/s]
21
  21%|██ | 21/100 [00:12<00:54, 1.45it/s]
22
  22%|██▏ | 22/100 [00:12<00:49, 1.57it/s]
23
  23%|██▎ | 23/100 [00:13<00:45, 1.68it/s]
24
  24%|██▍ | 24/100 [00:13<00:37, 2.02it/s]
25
  25%|██▌ | 25/100 [00:13<00:33, 2.23it/s]
26
  26%|██▌ | 26/100 [00:14<00:41, 1.77it/s]
27
  27%|██▋ | 27/100 [00:15<00:44, 1.64it/s]
28
  28%|██▊ | 28/100 [00:15<00:41, 1.73it/s]
29
  29%|██▉ | 29/100 [00:16<00:37, 1.91it/s]
30
  30%|███ | 30/100 [00:16<00:33, 2.12it/s]
31
  31%|███ | 31/100 [00:17<00:33, 2.04it/s]
32
  32%|███▏ | 32/100 [00:17<00:36, 1.88it/s]
33
  33%|███▎ | 33/100 [00:18<00:32, 2.07it/s]
34
  34%|███▍ | 34/100 [00:18<00:31, 2.10it/s]
35
  35%|███▌ | 35/100 [00:19<00:30, 2.11it/s]
36
  36%|███▌ | 36/100 [00:19<00:27, 2.32it/s]
37
  37%|███▋ | 37/100 [00:19<00:27, 2.30it/s]
38
  38%|███▊ | 38/100 [00:20<00:38, 1.62it/s]
39
  39%|███▉ | 39/100 [00:21<00:37, 1.63it/s]
40
  40%|████ | 40/100 [00:22<00:36, 1.64it/s]
41
  41%|████ | 41/100 [00:22<00:29, 1.97it/s]
42
  42%|████▏ | 42/100 [00:22<00:28, 2.06it/s]
43
  43%|████▎ | 43/100 [00:23<00:25, 2.21it/s]
44
  44%|████▍ | 44/100 [00:24<00:32, 1.74it/s]
45
  45%|████▌ | 45/100 [00:24<00:31, 1.75it/s]
46
  46%|████▌ | 46/100 [00:25<00:29, 1.81it/s]
47
  47%|████▋ | 47/100 [00:25<00:29, 1.78it/s]
48
  48%|████▊ | 48/100 [00:26<00:32, 1.61it/s]
49
  49%|████▉ | 49/100 [00:26<00:28, 1.79it/s]
50
  50%|█████ | 50/100 [00:27<00:25, 1.94it/s]
51
  51%|█████ | 51/100 [00:27<00:25, 1.95it/s]
52
  52%|█████▏ | 52/100 [00:28<00:23, 2.06it/s]
53
  53%|█████▎ | 53/100 [00:28<00:23, 2.00it/s]
54
  54%|█████▍ | 54/100 [00:29<00:23, 1.97it/s]
55
  55%|█████▌ | 55/100 [00:29<00:20, 2.20it/s]
56
  56%|█████▌ | 56/100 [00:29<00:18, 2.43it/s]
57
  57%|█████▋ | 57/100 [00:30<00:16, 2.56it/s]
58
  58%|█████▊ | 58/100 [00:30<00:17, 2.36it/s]
59
  59%|█████▉ | 59/100 [00:31<00:19, 2.12it/s]
60
  60%|██████ | 60/100 [00:31<00:17, 2.30it/s]
61
  61%|██████ | 61/100 [00:32<00:19, 2.00it/s]
62
  62%|██████▏ | 62/100 [00:32<00:19, 1.99it/s]
63
  63%|██████▎ | 63/100 [00:33<00:18, 1.99it/s]
64
  64%|██████▍ | 64/100 [00:34<00:19, 1.87it/s]
65
  65%|██████▌ | 65/100 [00:34<00:19, 1.79it/s]
66
  66%|██████▌ | 66/100 [00:35<00:18, 1.88it/s]
67
  67%|██████▋ | 67/100 [00:35<00:17, 1.91it/s]
68
  68%|██████▊ | 68/100 [00:36<00:16, 1.98it/s]
69
  69%|██████▉ | 69/100 [00:36<00:14, 2.13it/s]
70
  70%|███████ | 70/100 [00:36<00:13, 2.25it/s]
71
  71%|███████ | 71/100 [00:37<00:17, 1.65it/s]
72
  72%|███████▏ | 72/100 [00:38<00:14, 1.94it/s]
73
  73%|███████▎ | 73/100 [00:38<00:13, 2.01it/s]
74
  74%|███████▍ | 74/100 [00:39<00:14, 1.74it/s]
75
  75%|███████▌ | 75/100 [00:40<00:19, 1.29it/s]
76
  76%|███████▌ | 76/100 [00:41<00:18, 1.32it/s]
77
  77%|███████▋ | 77/100 [00:42<00:18, 1.24it/s]
78
  78%|███████▊ | 78/100 [00:42<00:15, 1.39it/s]
79
  79%|███████▉ | 79/100 [00:43<00:14, 1.42it/s]
80
  80%|████████ | 80/100 [00:43<00:11, 1.69it/s]
81
  81%|████████ | 81/100 [00:44<00:10, 1.84it/s]
82
  82%|████████▏ | 82/100 [00:44<00:10, 1.69it/s]
83
  83%|████████▎ | 83/100 [00:45<00:09, 1.77it/s]
84
  84%|████████▍ | 84/100 [00:45<00:07, 2.11it/s]
85
  85%|████████▌ | 85/100 [00:46<00:07, 1.97it/s]
86
  86%|████████▌ | 86/100 [00:46<00:06, 2.08it/s]
87
  87%|████████▋ | 87/100 [00:47<00:06, 2.08it/s]
88
  88%|████████▊ | 88/100 [00:48<00:07, 1.52it/s]
89
  89%|████████▉ | 89/100 [00:48<00:06, 1.74it/s]
90
  90%|█████████ | 90/100 [00:48<00:04, 2.02it/s]
91
  91%|█████████ | 91/100 [00:49<00:04, 1.92it/s]
92
  92%|█████████▏| 92/100 [00:49<00:03, 2.07it/s]
93
  93%|█████████▎| 93/100 [00:50<00:03, 2.19it/s]
94
  94%|█████████▍| 94/100 [00:50<00:02, 2.01it/s]
95
  95%|█████████▌| 95/100 [00:51<00:02, 1.92it/s]
96
  96%|█████████▌| 96/100 [00:51<00:02, 1.87it/s]
97
  97%|█████████▋| 97/100 [00:52<00:01, 2.15it/s]
98
  98%|█████████▊| 98/100 [00:52<00:00, 2.01it/s]
99
  99%|█████████▉| 99/100 [00:53<00:00, 2.24it/s]
 
 
1
+ WARNING:sglang.srt.server_args:Attention backend not explicitly specified. Use fa3 backend by default.
2
+ Running benchmark gsm8k with 100 prompts, batch size 1, steps None, topk None, num_draft_tokens None, subset None
3
+ Loading GSM8K data from local: /workspace/hanrui/datasets/gsm8k/test.jsonl
4
+
5
  0%| | 0/100 [00:00<?, ?it/s]
6
  1%| | 1/100 [00:00<00:27, 3.65it/s]
7
  2%|▏ | 2/100 [00:00<00:24, 4.02it/s]
8
  3%|▎ | 3/100 [00:00<00:34, 2.82it/s]
9
  4%|▍ | 4/100 [00:01<00:29, 3.24it/s]
10
  5%|▌ | 5/100 [00:01<00:33, 2.82it/s]
11
  6%|▌ | 6/100 [00:02<00:43, 2.17it/s]
12
  7%|▋ | 7/100 [00:02<00:44, 2.11it/s]
13
  8%|▊ | 8/100 [00:03<00:55, 1.65it/s]
14
  9%|▉ | 9/100 [00:04<01:08, 1.32it/s]
15
  10%|█ | 10/100 [00:05<01:02, 1.44it/s]
16
  11%|█ | 11/100 [00:05<00:57, 1.55it/s]
17
  12%|█▏ | 12/100 [00:06<00:56, 1.55it/s]
18
  13%|█▎ | 13/100 [00:07<00:54, 1.58it/s]
19
  14%|█▍ | 14/100 [00:07<01:00, 1.43it/s]
20
  15%|█▌ | 15/100 [00:08<01:02, 1.36it/s]
21
  16%|█▌ | 16/100 [00:09<00:57, 1.46it/s]
22
  17%|█▋ | 17/100 [00:09<00:44, 1.86it/s]
23
  18%|█▊ | 18/100 [00:10<00:45, 1.80it/s]
24
  19%|█▉ | 19/100 [00:10<00:41, 1.96it/s]
25
  20%|██ | 20/100 [00:11<00:45, 1.76it/s]
26
  21%|██ | 21/100 [00:12<00:54, 1.45it/s]
27
  22%|██▏ | 22/100 [00:12<00:49, 1.57it/s]
28
  23%|██▎ | 23/100 [00:13<00:45, 1.68it/s]
29
  24%|██▍ | 24/100 [00:13<00:37, 2.02it/s]
30
  25%|██▌ | 25/100 [00:13<00:33, 2.23it/s]
31
  26%|██▌ | 26/100 [00:14<00:41, 1.77it/s]
32
  27%|██▋ | 27/100 [00:15<00:44, 1.64it/s]
33
  28%|██▊ | 28/100 [00:15<00:41, 1.73it/s]
34
  29%|██▉ | 29/100 [00:16<00:37, 1.91it/s]
35
  30%|███ | 30/100 [00:16<00:33, 2.12it/s]
36
  31%|███ | 31/100 [00:17<00:33, 2.04it/s]
37
  32%|███▏ | 32/100 [00:17<00:36, 1.88it/s]
38
  33%|███▎ | 33/100 [00:18<00:32, 2.07it/s]
39
  34%|███▍ | 34/100 [00:18<00:31, 2.10it/s]
40
  35%|███▌ | 35/100 [00:19<00:30, 2.11it/s]
41
  36%|███▌ | 36/100 [00:19<00:27, 2.32it/s]
42
  37%|███▋ | 37/100 [00:19<00:27, 2.30it/s]
43
  38%|███▊ | 38/100 [00:20<00:38, 1.62it/s]
44
  39%|███▉ | 39/100 [00:21<00:37, 1.63it/s]
45
  40%|████ | 40/100 [00:22<00:36, 1.64it/s]
46
  41%|████ | 41/100 [00:22<00:29, 1.97it/s]
47
  42%|████▏ | 42/100 [00:22<00:28, 2.06it/s]
48
  43%|████▎ | 43/100 [00:23<00:25, 2.21it/s]
49
  44%|████▍ | 44/100 [00:24<00:32, 1.74it/s]
50
  45%|████▌ | 45/100 [00:24<00:31, 1.75it/s]
51
  46%|████▌ | 46/100 [00:25<00:29, 1.81it/s]
52
  47%|████▋ | 47/100 [00:25<00:29, 1.78it/s]
53
  48%|████▊ | 48/100 [00:26<00:32, 1.61it/s]
54
  49%|████▉ | 49/100 [00:26<00:28, 1.79it/s]
55
  50%|█████ | 50/100 [00:27<00:25, 1.94it/s]
56
  51%|█████ | 51/100 [00:27<00:25, 1.95it/s]
57
  52%|█████▏ | 52/100 [00:28<00:23, 2.06it/s]
58
  53%|█████▎ | 53/100 [00:28<00:23, 2.00it/s]
59
  54%|█████▍ | 54/100 [00:29<00:23, 1.97it/s]
60
  55%|█████▌ | 55/100 [00:29<00:20, 2.20it/s]
61
  56%|█████▌ | 56/100 [00:29<00:18, 2.43it/s]
62
  57%|█████▋ | 57/100 [00:30<00:16, 2.56it/s]
63
  58%|█████▊ | 58/100 [00:30<00:17, 2.36it/s]
64
  59%|█████▉ | 59/100 [00:31<00:19, 2.12it/s]
65
  60%|██████ | 60/100 [00:31<00:17, 2.30it/s]
66
  61%|██████ | 61/100 [00:32<00:19, 2.00it/s]
67
  62%|██████▏ | 62/100 [00:32<00:19, 1.99it/s]
68
  63%|██████▎ | 63/100 [00:33<00:18, 1.99it/s]
69
  64%|██████▍ | 64/100 [00:34<00:19, 1.87it/s]
70
  65%|██████▌ | 65/100 [00:34<00:19, 1.79it/s]
71
  66%|██████▌ | 66/100 [00:35<00:18, 1.88it/s]
72
  67%|██████▋ | 67/100 [00:35<00:17, 1.91it/s]
73
  68%|██████▊ | 68/100 [00:36<00:16, 1.98it/s]
74
  69%|██████▉ | 69/100 [00:36<00:14, 2.13it/s]
75
  70%|███████ | 70/100 [00:36<00:13, 2.25it/s]
76
  71%|███████ | 71/100 [00:37<00:17, 1.65it/s]
77
  72%|███████▏ | 72/100 [00:38<00:14, 1.94it/s]
78
  73%|███████▎ | 73/100 [00:38<00:13, 2.01it/s]
79
  74%|███████▍ | 74/100 [00:39<00:14, 1.74it/s]
80
  75%|███████▌ | 75/100 [00:40<00:19, 1.29it/s]
81
  76%|███████▌ | 76/100 [00:41<00:18, 1.32it/s]
82
  77%|███████▋ | 77/100 [00:42<00:18, 1.24it/s]
83
  78%|███████▊ | 78/100 [00:42<00:15, 1.39it/s]
84
  79%|███████▉ | 79/100 [00:43<00:14, 1.42it/s]
85
  80%|████████ | 80/100 [00:43<00:11, 1.69it/s]
86
  81%|████████ | 81/100 [00:44<00:10, 1.84it/s]
87
  82%|████████▏ | 82/100 [00:44<00:10, 1.69it/s]
88
  83%|████████▎ | 83/100 [00:45<00:09, 1.77it/s]
89
  84%|████████▍ | 84/100 [00:45<00:07, 2.11it/s]
90
  85%|████████▌ | 85/100 [00:46<00:07, 1.97it/s]
91
  86%|████████▌ | 86/100 [00:46<00:06, 2.08it/s]
92
  87%|████████▋ | 87/100 [00:47<00:06, 2.08it/s]
93
  88%|████████▊ | 88/100 [00:48<00:07, 1.52it/s]
94
  89%|████████▉ | 89/100 [00:48<00:06, 1.74it/s]
95
  90%|█████████ | 90/100 [00:48<00:04, 2.02it/s]
96
  91%|█████████ | 91/100 [00:49<00:04, 1.92it/s]
97
  92%|█████████▏| 92/100 [00:49<00:03, 2.07it/s]
98
  93%|█████████▎| 93/100 [00:50<00:03, 2.19it/s]
99
  94%|█████████▍| 94/100 [00:50<00:02, 2.01it/s]
100
  95%|█████████▌| 95/100 [00:51<00:02, 1.92it/s]
101
  96%|█████████▌| 96/100 [00:51<00:02, 1.87it/s]
102
  97%|█████████▋| 97/100 [00:52<00:01, 2.15it/s]
103
  98%|█████████▊| 98/100 [00:52<00:00, 2.01it/s]
104
  99%|█████████▉| 99/100 [00:53<00:00, 2.24it/s]
105
+ Results saved to ./results/baseline_gsm8k_results_20260213_100955.jsonl
SpecForge-ext/logs/baseline_humaneval_20260213_100956.log ADDED
@@ -0,0 +1,5 @@
 
 
 
 
0
  0%| | 0/164 [00:00<?, ?it/s]
1
  1%| | 1/164 [00:04<11:00, 4.06s/it]
2
  1%| | 2/164 [00:09<12:46, 4.73s/it]
3
  2%|▏ | 3/164 [00:13<11:59, 4.47s/it]
4
  2%|▏ | 4/164 [00:17<11:48, 4.43s/it]
5
  3%|▎ | 5/164 [00:21<11:20, 4.28s/it]
6
  4%|▎ | 6/164 [00:26<11:13, 4.26s/it]
7
  4%|▍ | 7/164 [00:30<11:31, 4.40s/it]
8
  5%|▍ | 8/164 [00:35<11:26, 4.40s/it]
9
  5%|▌ | 9/164 [00:39<11:20, 4.39s/it]
10
  6%|▌ | 10/164 [00:43<11:13, 4.37s/it]
11
  7%|▋ | 11/164 [00:48<11:12, 4.39s/it]
12
  7%|▋ | 12/164 [00:52<11:13, 4.43s/it]
13
  8%|▊ | 13/164 [00:57<11:00, 4.38s/it]
14
  9%|▊ | 14/164 [01:01<10:42, 4.28s/it]
15
  9%|▉ | 15/164 [01:05<10:56, 4.41s/it]
16
  10%|▉ | 16/164 [01:10<10:47, 4.38s/it]
17
  10%|█ | 17/164 [01:14<10:46, 4.40s/it]
18
  11%|█ | 18/164 [01:18<10:38, 4.38s/it]
19
  12%|█▏ | 19/164 [01:23<10:27, 4.33s/it]
20
  12%|█▏ | 20/164 [01:27<10:25, 4.34s/it]
21
  13%|█▎ | 21/164 [01:31<10:21, 4.34s/it]
22
  13%|█▎ | 22/164 [01:36<10:16, 4.34s/it]
23
  14%|█▍ | 23/164 [01:40<10:23, 4.42s/it]
24
  15%|█▍ | 24/164 [01:45<10:22, 4.45s/it]
25
  15%|█▌ | 25/164 [01:49<10:05, 4.36s/it]
26
  16%|█▌ | 26/164 [01:53<09:58, 4.34s/it]
27
  16%|█▋ | 27/164 [01:57<09:47, 4.29s/it]
28
  17%|█▋ | 28/164 [02:02<09:38, 4.26s/it]
29
  18%|█▊ | 29/164 [02:06<09:29, 4.22s/it]
30
  18%|█▊ | 30/164 [02:10<09:41, 4.34s/it]
31
  19%|█▉ | 31/164 [02:14<09:28, 4.27s/it]
32
  20%|█▉ | 32/164 [02:18<09:10, 4.17s/it]
33
  20%|██ | 33/164 [02:23<09:24, 4.31s/it]
34
  21%|██ | 34/164 [02:27<09:17, 4.29s/it]
35
  21%|██▏ | 35/164 [02:31<09:09, 4.26s/it]
36
  22%|██▏ | 36/164 [02:36<09:09, 4.30s/it]
37
  23%|██▎ | 37/164 [02:40<08:55, 4.22s/it]
38
  23%|██▎ | 38/164 [02:44<08:49, 4.21s/it]
39
  24%|██▍ | 39/164 [02:48<08:55, 4.29s/it]
40
  24%|██▍ | 40/164 [02:53<08:51, 4.28s/it]
41
  25%|██▌ | 41/164 [02:57<08:48, 4.30s/it]
42
  26%|██▌ | 42/164 [03:01<08:48, 4.33s/it]
43
  26%|██▌ | 43/164 [03:05<08:18, 4.12s/it]
44
  27%|██▋ | 44/164 [03:09<08:12, 4.10s/it]
45
  27%|██▋ | 45/164 [03:13<08:13, 4.15s/it]
46
  28%|██▊ | 46/164 [03:17<08:03, 4.10s/it]
47
  29%|██▊ | 47/164 [03:22<08:02, 4.12s/it]
48
  29%|██▉ | 48/164 [03:26<07:54, 4.09s/it]
49
  30%|██▉ | 49/164 [03:30<08:04, 4.22s/it]
50
  30%|███ | 50/164 [03:35<08:08, 4.29s/it]
51
  31%|███ | 51/164 [03:39<08:17, 4.40s/it]
52
  32%|███▏ | 52/164 [03:44<08:12, 4.40s/it]
53
  32%|███▏ | 53/164 [03:48<07:54, 4.27s/it]
54
  33%|███▎ | 54/164 [03:51<07:14, 3.95s/it]
55
  34%|███▎ | 55/164 [03:55<07:28, 4.11s/it]
56
  34%|███▍ | 56/164 [03:59<07:20, 4.07s/it]
57
  35%|███▍ | 57/164 [04:04<07:32, 4.23s/it]
58
  35%|███▌ | 58/164 [04:08<07:34, 4.29s/it]
59
  36%|███▌ | 59/164 [04:13<07:28, 4.27s/it]
60
  37%|███▋ | 60/164 [04:17<07:25, 4.28s/it]
61
  37%|███▋ | 61/164 [04:21<07:16, 4.24s/it]
62
  38%|███▊ | 62/164 [04:26<07:25, 4.36s/it]
63
  38%|███▊ | 63/164 [04:30<07:27, 4.44s/it]
64
  39%|███▉ | 64/164 [04:34<07:07, 4.27s/it]
65
  40%|███▉ | 65/164 [04:38<06:54, 4.19s/it]
66
  40%|████ | 66/164 [04:42<06:55, 4.24s/it]
67
  41%|████ | 67/164 [04:47<06:45, 4.18s/it]
68
  41%|████▏ | 68/164 [04:51<06:41, 4.19s/it]
69
  42%|████▏ | 69/164 [04:55<06:45, 4.27s/it]
70
  43%|████▎ | 70/164 [04:59<06:34, 4.20s/it]
71
  43%|████▎ | 71/164 [05:03<06:25, 4.14s/it]
72
  44%|████▍ | 72/164 [05:07<06:12, 4.05s/it]
73
  45%|████▍ | 73/164 [05:11<06:10, 4.08s/it]
74
  45%|████▌ | 74/164 [05:15<06:07, 4.08s/it]
75
  46%|████▌ | 75/164 [05:19<06:05, 4.10s/it]
76
  46%|████▋ | 76/164 [05:24<06:08, 4.18s/it]
77
  47%|████▋ | 77/164 [05:28<06:00, 4.14s/it]
78
  48%|████▊ | 78/164 [05:32<05:59, 4.18s/it]
79
  48%|████▊ | 79/164 [05:36<05:50, 4.13s/it]
80
  49%|████▉ | 80/164 [05:40<05:43, 4.09s/it]
81
  49%|████▉ | 81/164 [05:44<05:43, 4.14s/it]
82
  50%|█████ | 82/164 [05:49<05:49, 4.26s/it]
83
  51%|█████ | 83/164 [05:53<05:34, 4.13s/it]
84
  51%|█████ | 84/164 [05:57<05:28, 4.11s/it]
85
  52%|█████▏ | 85/164 [06:01<05:19, 4.04s/it]
86
  52%|█████▏ | 86/164 [06:05<05:12, 4.01s/it]
87
  53%|█████▎ | 87/164 [06:09<05:09, 4.02s/it]
88
  54%|█████▎ | 88/164 [06:13<05:08, 4.06s/it]
89
  54%|█████▍ | 89/164 [06:17<05:02, 4.03s/it]
90
  55%|█████▍ | 90/164 [06:21<05:08, 4.17s/it]
91
  55%|█████▌ | 91/164 [06:25<04:55, 4.05s/it]
92
  56%|█████▌ | 92/164 [06:29<04:55, 4.10s/it]
93
  57%|█████▋ | 93/164 [06:33<04:45, 4.03s/it]
94
  57%|█████▋ | 94/164 [06:38<04:54, 4.21s/it]
95
  58%|█████▊ | 95/164 [06:42<04:45, 4.14s/it]
96
  59%|█████▊ | 96/164 [06:46<04:40, 4.13s/it]
97
  59%|█████▉ | 97/164 [06:50<04:31, 4.06s/it]
98
  60%|█████▉ | 98/164 [06:54<04:26, 4.03s/it]
99
  60%|██████ | 99/164 [06:58<04:24, 4.08s/it]
100
  61%|██████ | 100/164 [07:01<04:09, 3.90s/it]
101
  62%|██████▏ | 101/164 [07:05<04:09, 3.96s/it]
102
  62%|██████▏ | 102/164 [07:10<04:12, 4.08s/it]
103
  63%|██████▎ | 103/164 [07:14<04:03, 4.00s/it]
104
  63%|██████▎ | 104/164 [07:17<03:55, 3.93s/it]
105
  64%|██████▍ | 105/164 [07:21<03:54, 3.97s/it]
106
  65%|██████▍ | 106/164 [07:26<03:53, 4.02s/it]
107
  65%|██████▌ | 107/164 [07:30<03:51, 4.06s/it]
108
  66%|██████▌ | 108/164 [07:34<03:44, 4.01s/it]
109
  66%|██████▋ | 109/164 [07:38<03:38, 3.98s/it]
110
  67%|██████▋ | 110/164 [07:41<03:33, 3.94s/it]
111
  68%|██████▊ | 111/164 [07:46<03:34, 4.06s/it]
112
  68%|██████▊ | 112/164 [07:50<03:30, 4.05s/it]
113
  69%|██████▉ | 113/164 [07:54<03:29, 4.11s/it]
114
  70%|██████▉ | 114/164 [07:58<03:24, 4.09s/it]
115
  70%|███████ | 115/164 [08:02<03:17, 4.04s/it]
116
  71%|███████ | 116/164 [08:06<03:17, 4.12s/it]
117
  71%|███████▏ | 117/164 [08:10<03:10, 4.04s/it]
118
  72%|███████▏ | 118/164 [08:14<03:06, 4.05s/it]
119
  73%|███████▎ | 119/164 [08:18<03:03, 4.09s/it]
120
  73%|███████▎ | 120/164 [08:23<03:08, 4.29s/it]
121
  74%|███████▍ | 121/164 [08:27<03:03, 4.26s/it]
122
  74%|███████▍ | 122/164 [08:31<02:56, 4.20s/it]
123
  75%|███████▌ | 123/164 [08:35<02:47, 4.09s/it]
124
  76%|███████▌ | 124/164 [08:39<02:43, 4.10s/it]
125
  76%|███████▌ | 125/164 [08:43<02:39, 4.09s/it]
126
  77%|███████▋ | 126/164 [08:48<02:36, 4.12s/it]
127
  77%|███████▋ | 127/164 [08:51<02:29, 4.04s/it]
128
  78%|███████▊ | 128/164 [08:56<02:26, 4.06s/it]
129
  79%|███████▊ | 129/164 [09:00<02:21, 4.04s/it]
130
  79%|███████▉ | 130/164 [09:04<02:18, 4.08s/it]
131
  80%|███████▉ | 131/164 [09:08<02:15, 4.12s/it]
132
  80%|████████ | 132/164 [09:12<02:09, 4.04s/it]
133
  81%|████████ | 133/164 [09:16<02:10, 4.20s/it]
134
  82%|████████▏ | 134/164 [09:21<02:05, 4.19s/it]
135
  82%|████████▏ | 135/164 [09:24<01:57, 4.05s/it]
136
  83%|████████▎ | 136/164 [09:28<01:53, 4.05s/it]
137
  84%|████████▎ | 137/164 [09:32<01:50, 4.09s/it]
138
  84%|████████▍ | 138/164 [09:37<01:47, 4.14s/it]
139
  85%|████████▍ | 139/164 [09:41<01:44, 4.18s/it]
140
  85%|████████▌ | 140/164 [09:45<01:39, 4.13s/it]
141
  86%|████████▌ | 141/164 [09:49<01:36, 4.21s/it]
142
  87%|████████▋ | 142/164 [09:54<01:32, 4.22s/it]
143
  87%|████████▋ | 143/164 [09:58<01:27, 4.17s/it]
144
  88%|████████▊ | 144/164 [10:02<01:22, 4.13s/it]
145
  88%|████████▊ | 145/164 [10:06<01:20, 4.22s/it]
146
  89%|████████▉ | 146/164 [10:10<01:16, 4.24s/it]
147
  90%|████████▉ | 147/164 [10:14<01:10, 4.13s/it]
148
  90%|█████████ | 148/164 [10:19<01:06, 4.15s/it]
149
  91%|█████████ | 149/164 [10:23<01:03, 4.23s/it]
150
  91%|█████████▏| 150/164 [10:27<00:59, 4.23s/it]
151
  92%|█████████▏| 151/164 [10:31<00:54, 4.21s/it]
152
  93%|█████████▎| 152/164 [10:35<00:49, 4.11s/it]
153
  93%|█████████▎| 153/164 [10:39<00:45, 4.11s/it]
154
  94%|█████████▍| 154/164 [10:43<00:41, 4.11s/it]
155
  95%|█████████▍| 155/164 [10:48<00:37, 4.22s/it]
156
  95%|█████████▌| 156/164 [10:52<00:33, 4.13s/it]
157
  96%|█████████▌| 157/164 [10:56<00:28, 4.10s/it]
158
  96%|█████████▋| 158/164 [11:00<00:24, 4.16s/it]
159
  97%|█████████▋| 159/164 [11:04<00:20, 4.20s/it]
160
  98%|█████████▊| 160/164 [11:09<00:16, 4.20s/it]
161
  98%|█████████▊| 161/164 [11:13<00:12, 4.27s/it]
162
  99%|█████████▉| 162/164 [11:17<00:08, 4.21s/it]
163
  99%|█████████▉| 163/164 [11:21<00:04, 4.16s/it]
 
 
1
+ WARNING:sglang.srt.server_args:Attention backend not explicitly specified. Use fa3 backend by default.
2
+ Running benchmark humaneval with 164 prompts, batch size 1, steps None, topk None, num_draft_tokens None, subset None
3
+ Loading HumanEval data from local: /workspace/hanrui/datasets/humaneval/test.jsonl
4
+
5
  0%| | 0/164 [00:00<?, ?it/s]
6
  1%| | 1/164 [00:04<11:00, 4.06s/it]
7
  1%| | 2/164 [00:09<12:46, 4.73s/it]
8
  2%|▏ | 3/164 [00:13<11:59, 4.47s/it]
9
  2%|▏ | 4/164 [00:17<11:48, 4.43s/it]
10
  3%|▎ | 5/164 [00:21<11:20, 4.28s/it]
11
  4%|▎ | 6/164 [00:26<11:13, 4.26s/it]
12
  4%|▍ | 7/164 [00:30<11:31, 4.40s/it]
13
  5%|▍ | 8/164 [00:35<11:26, 4.40s/it]
14
  5%|▌ | 9/164 [00:39<11:20, 4.39s/it]
15
  6%|▌ | 10/164 [00:43<11:13, 4.37s/it]
16
  7%|▋ | 11/164 [00:48<11:12, 4.39s/it]
17
  7%|▋ | 12/164 [00:52<11:13, 4.43s/it]
18
  8%|▊ | 13/164 [00:57<11:00, 4.38s/it]
19
  9%|▊ | 14/164 [01:01<10:42, 4.28s/it]
20
  9%|▉ | 15/164 [01:05<10:56, 4.41s/it]
21
  10%|▉ | 16/164 [01:10<10:47, 4.38s/it]
22
  10%|█ | 17/164 [01:14<10:46, 4.40s/it]
23
  11%|█ | 18/164 [01:18<10:38, 4.38s/it]
24
  12%|█▏ | 19/164 [01:23<10:27, 4.33s/it]
25
  12%|█▏ | 20/164 [01:27<10:25, 4.34s/it]
26
  13%|█▎ | 21/164 [01:31<10:21, 4.34s/it]
27
  13%|█▎ | 22/164 [01:36<10:16, 4.34s/it]
28
  14%|█▍ | 23/164 [01:40<10:23, 4.42s/it]
29
  15%|█▍ | 24/164 [01:45<10:22, 4.45s/it]
30
  15%|█▌ | 25/164 [01:49<10:05, 4.36s/it]
31
  16%|█▌ | 26/164 [01:53<09:58, 4.34s/it]
32
  16%|█▋ | 27/164 [01:57<09:47, 4.29s/it]
33
  17%|█▋ | 28/164 [02:02<09:38, 4.26s/it]
34
  18%|█▊ | 29/164 [02:06<09:29, 4.22s/it]
35
  18%|█▊ | 30/164 [02:10<09:41, 4.34s/it]
36
  19%|█▉ | 31/164 [02:14<09:28, 4.27s/it]
37
  20%|█▉ | 32/164 [02:18<09:10, 4.17s/it]
38
  20%|██ | 33/164 [02:23<09:24, 4.31s/it]
39
  21%|██ | 34/164 [02:27<09:17, 4.29s/it]
40
  21%|██▏ | 35/164 [02:31<09:09, 4.26s/it]
41
  22%|██▏ | 36/164 [02:36<09:09, 4.30s/it]
42
  23%|██▎ | 37/164 [02:40<08:55, 4.22s/it]
43
  23%|██▎ | 38/164 [02:44<08:49, 4.21s/it]
44
  24%|██▍ | 39/164 [02:48<08:55, 4.29s/it]
45
  24%|██▍ | 40/164 [02:53<08:51, 4.28s/it]
46
  25%|██▌ | 41/164 [02:57<08:48, 4.30s/it]
47
  26%|██▌ | 42/164 [03:01<08:48, 4.33s/it]
48
  26%|██▌ | 43/164 [03:05<08:18, 4.12s/it]
49
  27%|██▋ | 44/164 [03:09<08:12, 4.10s/it]
50
  27%|██▋ | 45/164 [03:13<08:13, 4.15s/it]
51
  28%|██▊ | 46/164 [03:17<08:03, 4.10s/it]
52
  29%|██▊ | 47/164 [03:22<08:02, 4.12s/it]
53
  29%|██▉ | 48/164 [03:26<07:54, 4.09s/it]
54
  30%|██▉ | 49/164 [03:30<08:04, 4.22s/it]
55
  30%|███ | 50/164 [03:35<08:08, 4.29s/it]
56
  31%|███ | 51/164 [03:39<08:17, 4.40s/it]
57
  32%|███▏ | 52/164 [03:44<08:12, 4.40s/it]
58
  32%|███▏ | 53/164 [03:48<07:54, 4.27s/it]
59
  33%|███▎ | 54/164 [03:51<07:14, 3.95s/it]
60
  34%|███▎ | 55/164 [03:55<07:28, 4.11s/it]
61
  34%|███▍ | 56/164 [03:59<07:20, 4.07s/it]
62
  35%|███▍ | 57/164 [04:04<07:32, 4.23s/it]
63
  35%|███▌ | 58/164 [04:08<07:34, 4.29s/it]
64
  36%|███▌ | 59/164 [04:13<07:28, 4.27s/it]
65
  37%|███▋ | 60/164 [04:17<07:25, 4.28s/it]
66
  37%|███▋ | 61/164 [04:21<07:16, 4.24s/it]
67
  38%|███▊ | 62/164 [04:26<07:25, 4.36s/it]
68
  38%|███▊ | 63/164 [04:30<07:27, 4.44s/it]
69
  39%|███▉ | 64/164 [04:34<07:07, 4.27s/it]
70
  40%|███▉ | 65/164 [04:38<06:54, 4.19s/it]
71
  40%|████ | 66/164 [04:42<06:55, 4.24s/it]
72
  41%|████ | 67/164 [04:47<06:45, 4.18s/it]
73
  41%|████▏ | 68/164 [04:51<06:41, 4.19s/it]
74
  42%|████▏ | 69/164 [04:55<06:45, 4.27s/it]
75
  43%|████▎ | 70/164 [04:59<06:34, 4.20s/it]
76
  43%|████▎ | 71/164 [05:03<06:25, 4.14s/it]
77
  44%|████▍ | 72/164 [05:07<06:12, 4.05s/it]
78
  45%|████▍ | 73/164 [05:11<06:10, 4.08s/it]
79
  45%|████▌ | 74/164 [05:15<06:07, 4.08s/it]
80
  46%|████▌ | 75/164 [05:19<06:05, 4.10s/it]
81
  46%|████▋ | 76/164 [05:24<06:08, 4.18s/it]
82
  47%|████▋ | 77/164 [05:28<06:00, 4.14s/it]
83
  48%|████▊ | 78/164 [05:32<05:59, 4.18s/it]
84
  48%|████▊ | 79/164 [05:36<05:50, 4.13s/it]
85
  49%|████▉ | 80/164 [05:40<05:43, 4.09s/it]
86
  49%|████▉ | 81/164 [05:44<05:43, 4.14s/it]
87
  50%|█████ | 82/164 [05:49<05:49, 4.26s/it]
88
  51%|█████ | 83/164 [05:53<05:34, 4.13s/it]
89
  51%|█████ | 84/164 [05:57<05:28, 4.11s/it]
90
  52%|█████▏ | 85/164 [06:01<05:19, 4.04s/it]
91
  52%|█████▏ | 86/164 [06:05<05:12, 4.01s/it]
92
  53%|█████▎ | 87/164 [06:09<05:09, 4.02s/it]
93
  54%|█████▎ | 88/164 [06:13<05:08, 4.06s/it]
94
  54%|█████▍ | 89/164 [06:17<05:02, 4.03s/it]
95
  55%|█████▍ | 90/164 [06:21<05:08, 4.17s/it]
96
  55%|█████▌ | 91/164 [06:25<04:55, 4.05s/it]
97
  56%|█████▌ | 92/164 [06:29<04:55, 4.10s/it]
98
  57%|█████▋ | 93/164 [06:33<04:45, 4.03s/it]
99
  57%|█████▋ | 94/164 [06:38<04:54, 4.21s/it]
100
  58%|█████▊ | 95/164 [06:42<04:45, 4.14s/it]
101
  59%|█████▊ | 96/164 [06:46<04:40, 4.13s/it]
102
  59%|█████▉ | 97/164 [06:50<04:31, 4.06s/it]
103
  60%|█████▉ | 98/164 [06:54<04:26, 4.03s/it]
104
  60%|██████ | 99/164 [06:58<04:24, 4.08s/it]
105
  61%|██████ | 100/164 [07:01<04:09, 3.90s/it]
106
  62%|██████▏ | 101/164 [07:05<04:09, 3.96s/it]
107
  62%|██████▏ | 102/164 [07:10<04:12, 4.08s/it]
108
  63%|██████▎ | 103/164 [07:14<04:03, 4.00s/it]
109
  63%|██████▎ | 104/164 [07:17<03:55, 3.93s/it]
110
  64%|██████▍ | 105/164 [07:21<03:54, 3.97s/it]
111
  65%|██████▍ | 106/164 [07:26<03:53, 4.02s/it]
112
  65%|██████▌ | 107/164 [07:30<03:51, 4.06s/it]
113
  66%|██████▌ | 108/164 [07:34<03:44, 4.01s/it]
114
  66%|██████▋ | 109/164 [07:38<03:38, 3.98s/it]
115
  67%|██████▋ | 110/164 [07:41<03:33, 3.94s/it]
116
  68%|██████▊ | 111/164 [07:46<03:34, 4.06s/it]
117
  68%|██████▊ | 112/164 [07:50<03:30, 4.05s/it]
118
  69%|██████▉ | 113/164 [07:54<03:29, 4.11s/it]
119
  70%|██████▉ | 114/164 [07:58<03:24, 4.09s/it]
120
  70%|███████ | 115/164 [08:02<03:17, 4.04s/it]
121
  71%|███████ | 116/164 [08:06<03:17, 4.12s/it]
122
  71%|███████▏ | 117/164 [08:10<03:10, 4.04s/it]
123
  72%|███████▏ | 118/164 [08:14<03:06, 4.05s/it]
124
  73%|███████▎ | 119/164 [08:18<03:03, 4.09s/it]
125
  73%|███████▎ | 120/164 [08:23<03:08, 4.29s/it]
126
  74%|███████▍ | 121/164 [08:27<03:03, 4.26s/it]
127
  74%|███████▍ | 122/164 [08:31<02:56, 4.20s/it]
128
  75%|███████▌ | 123/164 [08:35<02:47, 4.09s/it]
129
  76%|███████▌ | 124/164 [08:39<02:43, 4.10s/it]
130
  76%|███████▌ | 125/164 [08:43<02:39, 4.09s/it]
131
  77%|███████▋ | 126/164 [08:48<02:36, 4.12s/it]
132
  77%|███████▋ | 127/164 [08:51<02:29, 4.04s/it]
133
  78%|███████▊ | 128/164 [08:56<02:26, 4.06s/it]
134
  79%|███████▊ | 129/164 [09:00<02:21, 4.04s/it]
135
  79%|███████▉ | 130/164 [09:04<02:18, 4.08s/it]
136
  80%|███████▉ | 131/164 [09:08<02:15, 4.12s/it]
137
  80%|████████ | 132/164 [09:12<02:09, 4.04s/it]
138
  81%|████████ | 133/164 [09:16<02:10, 4.20s/it]
139
  82%|████████▏ | 134/164 [09:21<02:05, 4.19s/it]
140
  82%|████████▏ | 135/164 [09:24<01:57, 4.05s/it]
141
  83%|████████▎ | 136/164 [09:28<01:53, 4.05s/it]
142
  84%|████████▎ | 137/164 [09:32<01:50, 4.09s/it]
143
  84%|████████▍ | 138/164 [09:37<01:47, 4.14s/it]
144
  85%|████████▍ | 139/164 [09:41<01:44, 4.18s/it]
145
  85%|████████▌ | 140/164 [09:45<01:39, 4.13s/it]
146
  86%|████████▌ | 141/164 [09:49<01:36, 4.21s/it]
147
  87%|████████▋ | 142/164 [09:54<01:32, 4.22s/it]
148
  87%|████████▋ | 143/164 [09:58<01:27, 4.17s/it]
149
  88%|████████▊ | 144/164 [10:02<01:22, 4.13s/it]
150
  88%|████████▊ | 145/164 [10:06<01:20, 4.22s/it]
151
  89%|████████▉ | 146/164 [10:10<01:16, 4.24s/it]
152
  90%|████████▉ | 147/164 [10:14<01:10, 4.13s/it]
153
  90%|█████████ | 148/164 [10:19<01:06, 4.15s/it]
154
  91%|█████████ | 149/164 [10:23<01:03, 4.23s/it]
155
  91%|█████████▏| 150/164 [10:27<00:59, 4.23s/it]
156
  92%|█████████▏| 151/164 [10:31<00:54, 4.21s/it]
157
  93%|█████████▎| 152/164 [10:35<00:49, 4.11s/it]
158
  93%|█████████▎| 153/164 [10:39<00:45, 4.11s/it]
159
  94%|█████████▍| 154/164 [10:43<00:41, 4.11s/it]
160
  95%|█████████▍| 155/164 [10:48<00:37, 4.22s/it]
161
  95%|█████████▌| 156/164 [10:52<00:33, 4.13s/it]
162
  96%|█████████▌| 157/164 [10:56<00:28, 4.10s/it]
163
  96%|█████████▋| 158/164 [11:00<00:24, 4.16s/it]
164
  97%|█████████▋| 159/164 [11:04<00:20, 4.20s/it]
165
  98%|█████████▊| 160/164 [11:09<00:16, 4.20s/it]
166
  98%|█████████▊| 161/164 [11:13<00:12, 4.27s/it]
167
  99%|█████████▉| 162/164 [11:17<00:08, 4.21s/it]
168
  99%|█████████▉| 163/164 [11:21<00:04, 4.16s/it]
169
+ Results saved to ./results/baseline_humaneval_results_20260213_102128.jsonl