Lekr0 commited on 7 days ago

Commit

4024ed7

verified ·

1 Parent(s): 7c50656

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

SpecForge-ext/.devcontainer/Dockerfile +32 -0
SpecForge-ext/.devcontainer/devcontainer.json +30 -0
SpecForge-ext/.editorconfig +25 -0
SpecForge-ext/.github/CODEOWNERS +11 -0
SpecForge-ext/.github/pull_request_template.md +30 -0
SpecForge-ext/.isort.cfg +3 -0
SpecForge-ext/LICENSE +21 -0
SpecForge-ext/MANIFEST.in +2 -0
SpecForge-ext/README.md +70 -0
SpecForge-ext/analyze_accept_length.sh +91 -0
SpecForge-ext/assets/logo.svg +0 -0
SpecForge-ext/configs/deepseek-v2-lite-eagle3.json +39 -0
SpecForge-ext/configs/deepseek-v3-671b-eagle3.json +32 -0
SpecForge-ext/configs/gemma3-1b-eagle3.json +32 -0
SpecForge-ext/configs/gpt-oss-120B-eagle3.json +30 -0
SpecForge-ext/configs/gpt-oss-20B-eagle3.json +30 -0
SpecForge-ext/configs/ling-flash-2.0-eagle3.json +24 -0
SpecForge-ext/configs/llama3-70B-ealge3.json +37 -0
SpecForge-ext/configs/llama3-8B-eagle3.json +24 -0
SpecForge-ext/configs/llama4-scout-17B-16E-eagle3.json +22 -0
SpecForge-ext/configs/longcat-flash-dflash.json +41 -0
SpecForge-ext/configs/longcat-flash-eagle3.json +31 -0
SpecForge-ext/configs/phi4-eagle3.json +27 -0
SpecForge-ext/configs/qwen2.5-7b-eagle3.json +30 -0
SpecForge-ext/configs/qwen2.5-vl-32b-eagle3.json +40 -0
SpecForge-ext/configs/qwen3-235B-A22B-eagle3.json +36 -0
SpecForge-ext/configs/qwen3-30B-A3B-eagle3.json +31 -0
SpecForge-ext/configs/qwen3-32b-eagle3.json +31 -0
SpecForge-ext/configs/qwen3-4b-eagle3.json +31 -0
SpecForge-ext/configs/qwen3-8b-dflash.json +41 -0
SpecForge-ext/configs/qwen3-8b-eagle3.json +31 -0
SpecForge-ext/configs/qwen3-8b-qwen3eagle-5layer.json +31 -0
SpecForge-ext/configs/qwen3-coder-30B-A3B-instruct-eagle3.json +31 -0
SpecForge-ext/configs/qwen3-coder-480B-A35B-instruct-eagle3.json +31 -0
SpecForge-ext/configs/qwen3-next-80b-a3b-eagle3.json +29 -0
SpecForge-ext/configs/qwq-32B-eagle3.json +28 -0
SpecForge-ext/datasets/README.md +5 -0
SpecForge-ext/datasets/download_laion.sh +36 -0
SpecForge-ext/docs/Makefile +58 -0
SpecForge-ext/docs/README.md +55 -0
SpecForge-ext/docs/conf.py +188 -0
SpecForge-ext/docs/deploy.py +22 -0
SpecForge-ext/docs/index.rst +53 -0
SpecForge-ext/docs/requirements.txt +20 -0
SpecForge-ext/docs/serve.sh +3 -0
SpecForge-ext/examples/run_deepseek_v3_671b_eagle3_online.sh +29 -0
SpecForge-ext/examples/run_qwen3_30b_a3b_eagle3_online.sh +29 -0
SpecForge-ext/examples/run_qwq_eagle3_online.sh +28 -0
SpecForge-ext/logs/baseline_gsm8k_20260213_100853.log +5 -0
SpecForge-ext/logs/baseline_humaneval_20260213_100956.log +5 -0

SpecForge-ext/.devcontainer/Dockerfile ADDED Viewed

	@@ -0,0 +1,32 @@

+FROM lmsysorg/sglang:dev
+# Create non-root user with specified UID and GID
+# NOTE: Replace with your own UID and GID. This is a workaround from https://github.com/microsoft/vscode-remote-release/issues/49#issuecomment-489060908.
+ARG HOST_UID=1003
+ARG HOST_GID=1003
+RUN groupadd -g $HOST_GID devuser && \
+    useradd -m -u $HOST_UID -g $HOST_GID -s /bin/zsh devuser
+# Give devuser sudo access
+RUN apt-get update && apt-get install -y sudo && \
+    echo "devuser ALL=(ALL) NOPASSWD:ALL" > /etc/sudoers.d/devuser && \
+    rm -rf /var/lib/apt/lists/* && \
+    apt-get clean
+# Set up oh-my-zsh for devuser
+RUN cp -r /root/.oh-my-zsh /home/devuser/.oh-my-zsh && \
+    cp /root/.zshrc /home/devuser/.zshrc && \
+    cp /root/.vimrc /home/devuser/.vimrc && \
+    cp /root/.tmux.conf /home/devuser/.tmux.conf && \
+    sed -i 's|/root/.oh-my-zsh|/home/devuser/.oh-my-zsh|g' /home/devuser/.zshrc && \
+    chown -R devuser:devuser /home/devuser/
+# Set workspace directory and ownership
+WORKDIR /sgl-workspace/sglang
+RUN chown -R devuser:devuser /sgl-workspace
+# Switch to devuser
+USER devuser
+# Install rust
+RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y

SpecForge-ext/.devcontainer/devcontainer.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+    "name": "sglang",
+    "build": {
+        "dockerfile": "Dockerfile"
+    },
+    "remoteUser": "devuser",
+    "customizations": {
+        "vscode": {
+            "extensions": [
+                // Python development
+                "ms-python.python",
+                "charliermarsh.ruff",
+                // Rust development
+                "rust-lang.rust-analyzer",
+                "tamasfe.even-better-toml"
+            ]
+        }
+    },
+    "forwardPorts": [],
+    "runArgs": [
+        "--gpus",
+        "all"
+    ],
+    // The two lines below ensures that your local changes in the sglang
+    // repo is automatically synced to the sglang pip package installed
+    // in the dev docker container. You can remove / comment out these
+    // two lines if you prefer to sync code changes manually.
+    "workspaceMount": "source=${localWorkspaceFolder},target=/sgl-workspace/specforge,type=bind",
+    "workspaceFolder": "/sgl-workspace/specforge"
+}

SpecForge-ext/.editorconfig ADDED Viewed

	@@ -0,0 +1,25 @@

+# https://editorconfig.org/
+root = true
+[*]
+charset = utf-8
+end_of_line = lf
+indent_style = space
+indent_size = 4
+trim_trailing_whitespace = true
+insert_final_newline = true
+[*.{json,yaml,yml}]
+indent_size = 2
+[*.md]
+indent_size = 2
+x-soft-wrap-text = true
+[*.rst]
+indent_size = 4
+x-soft-wrap-text = true
+[Makefile]
+indent_style = tab

SpecForge-ext/.github/CODEOWNERS ADDED Viewed

	@@ -0,0 +1,11 @@

+.github @FrankLeeeee
+/specforge/core @FrankLeeeee
+/specforge/data @zyksir @sleepcoo @shuaills
+/specforge/layers @FrankLeeeee @FlamingoPg @sleepcoo @shuaills
+/specforge/modeling @FlamingoPg @sleepcoo @shuaills @FrankLeeeee
+/tests @FrankLeeeee
+/assets @FrankLeeeee @zhyncs
+/examples @shuaills @sleepcoo @FlamingoPg
+/configs @FrankLeeeee @FlamingoPg
+/benchmarks @FrankLeeeee
+/scripts @shuaills @sleepcoo @FlamingoPg

SpecForge-ext/.github/pull_request_template.md ADDED Viewed

	@@ -0,0 +1,30 @@

+<!-- Thank you for your contribution! We appreciate it. The following guidelines will help improve your pull request and facilitate feedback. If anything is unclear, don't hesitate to submit your pull request and ask the maintainers for assistance. -->
+## Motivation
+<!-- Explain the purpose of this PR and the goals it aims to achieve. -->
+## Modifications
+<!-- Describe the changes made in this PR. -->
+## Related Issues
+<!-- Link to any related issues here. e.g. "Fixes #123" or "Closes #456" -->
+## Accuracy Test
+<!-- If this PR affects model-side code (e.g., kernels, model architecture), please provide accuracy test results. Ref: https://docs.sglang.ai/references/accuracy_evaluation.html -->
+## Benchmark & Profiling
+<!-- If this PR is expected to impact performance, please provide benchmark and profiling results. Ref: https://docs.sglang.ai/references/benchmark_and_profiling.html -->
+## Checklist
+- [ ] Format your code according to the [Code Formatting with Pre-Commit](https://docs.sglang.ai/references/contribution_guide.html#code-formatting-with-pre-commit).
+- [ ] Add unit tests as outlined in the [Running Unit Tests](https://docs.sglang.ai/references/contribution_guide.html#running-unit-tests-adding-to-ci).
+- [ ] Update documentation / docstrings / example tutorials as needed, according to [Writing Documentation](https://docs.sglang.ai/references/contribution_guide.html#writing-documentation-running-docs-ci).
+- [ ] Provide throughput / latency benchmark results and accuracy evaluation results as needed, according to [Benchmark and Profiling](https://docs.sglang.ai/references/benchmark_and_profiling.html) and [Accuracy Results](https://docs.sglang.ai/references/accuracy_evaluation.html).
+- [ ] For reviewers: If you haven't made any contributions to this PR and are only assisting with merging the main branch, please remove yourself as a co-author when merging the PR.
+- [ ] Please feel free to join our Slack channel at https://sgl-fru7574.slack.com/archives/C09784E3EN6 to discuss your PR.

SpecForge-ext/.isort.cfg ADDED Viewed

	@@ -0,0 +1,3 @@

+[settings]
+profile=black
+known_first_party=sgl-eagle

SpecForge-ext/LICENSE ADDED Viewed

	@@ -0,0 +1,21 @@

+MIT License
+Copyright (c) 2025 sgl-project
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.

SpecForge-ext/MANIFEST.in ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ include requirements.txt
2	+ include version.txt

SpecForge-ext/README.md ADDED Viewed

	@@ -0,0 +1,70 @@

+<div align="center" id="sglangtop">
+<img src="./assets/logo.png" alt="logo" width="400" margin="10px"></img>
+[![documentation](https://img.shields.io/badge/📖-Documentation-red.svg?style=flat)](https://docs.sglang.ai/SpecForge/)
+[![SpecBundle](https://img.shields.io/badge/🤗%20SpecBundle-yellow.svg?style=flat)](https://huggingface.co/collections/lmsys/specbundle)
+[![DeepWiki](https://img.shields.io/badge/DeepWiki-SpecForge-blue.svg?logo=data:image/png;base64,iVBORw0KGgoAAAANSUhEUgAAACwAAAAyCAYAAAAnWDnqAAAAAXNSR0IArs4c6QAAA05JREFUaEPtmUtyEzEQhtWTQyQLHNak2AB7ZnyXZMEjXMGeK/AIi+QuHrMnbChYY7MIh8g01fJoopFb0uhhEqqcbWTp06/uv1saEDv4O3n3dV60RfP947Mm9/SQc0ICFQgzfc4CYZoTPAswgSJCCUJUnAAoRHOAUOcATwbmVLWdGoH//PB8mnKqScAhsD0kYP3j/Yt5LPQe2KvcXmGvRHcDnpxfL2zOYJ1mFwrryWTz0advv1Ut4CJgf5uhDuDj5eUcAUoahrdY/56ebRWeraTjMt/00Sh3UDtjgHtQNHwcRGOC98BJEAEymycmYcWwOprTgcB6VZ5JK5TAJ+fXGLBm3FDAmn6oPPjR4rKCAoJCal2eAiQp2x0vxTPB3ALO2CRkwmDy5WohzBDwSEFKRwPbknEggCPB/imwrycgxX2NzoMCHhPkDwqYMr9tRcP5qNrMZHkVnOjRMWwLCcr8ohBVb1OMjxLwGCvjTikrsBOiA6fNyCrm8V1rP93iVPpwaE+gO0SsWmPiXB+jikdf6SizrT5qKasx5j8ABbHpFTx+vFXp9EnYQmLx02h1QTTrl6eDqxLnGjporxl3NL3agEvXdT0WmEost648sQOYAeJS9Q7bfUVoMGnjo4AZdUMQku50McDcMWcBPvr0SzbTAFDfvJqwLzgxwATnCgnp4wDl6Aa+Ax283gghmj+vj7feE2KBBRMW3FzOpLOADl0Isb5587h/U4gGvkt5v60Z1VLG8BhYjbzRwyQZemwAd6cCR5/XFWLYZRIMpX39AR0tjaGGiGzLVyhse5C9RKC6ai42ppWPKiBagOvaYk8lO7DajerabOZP46Lby5wKjw1HCRx7p9sVMOWGzb/vA1hwiWc6jm3MvQDTogQkiqIhJV0nBQBTU+3okKCFDy9WwferkHjtxib7t3xIUQtHxnIwtx4mpg26/HfwVNVDb4oI9RHmx5WGelRVlrtiw43zboCLaxv46AZeB3IlTkwouebTr1y2NjSpHz68WNFjHvupy3q8TFn3Hos2IAk4Ju5dCo8B3wP7VPr/FGaKiG+T+v+TQqIrOqMTL1VdWV1DdmcbO8KXBz6esmYWYKPwDL5b5FA1a0hwapHiom0r/cKaoqr+27/XcrS5UwSMbQAAAABJRU5ErkJggg==)](https://deepwiki.com/sgl-project/SpecForge)
+[![github badge](https://img.shields.io/badge/📃%20LMSYS-Blog-black.svg?style=flat)](https://lmsys.org/blog/2025-07-25-spec-forge/)
+[![slack badge](https://img.shields.io/badge/Slack-join-blueviolet?logo=slack&amp)](https://sgl-fru7574.slack.com/archives/C09784E3EN6)
+[![license](https://img.shields.io/badge/License-MIT%202.0-blue)](./LICENSE)
+</div>
+## 📍 Overview
+SpecForge is an ecosystem project developed by the SGLang team. It is a framework for training speculative decoding models so that you can smoothly port them over to the SGLang serving framework to speed up your inference.
+We have seen many open-source projects for speculative decoding, but most of them are not well-maintained or not directly compatible with SGLang. We prepared this project because we wish that the open-source community can enjoy a speculative decoding framework that is
+- regularly maintained by the SpecForge team: the code is runnable out-of-the-box
+- directly compatible with SGLang: there is no additional efforts for porting to SGLang
+- provide performant training capabilities: we provided online/offline/tensor-parallel/FSDP to suit your needs
+Check out [**our documentation**](https://docs.sglang.ai/SpecForge/) to get started.
+## 🚀 Accelerate with SpecBundle
+SpecBundle is a collection of production-grade speculative decoding models that are released by the SpecForge team and our industry partners. They provide higher acceptance rate compared to the existing open-source checkpoints over a wide range of domains. Together with SGLang, you can experience up to 4x speedup for inference. Check out our resources below:
+| Item | Link |
+| --- | --- |
+| 📝 Documentation | [Link](https://docs.sglang.io/SpecForge/community_resources/specbundle.html) |
+| 📊 Performance Dashboard | [Link](https://docs.sglang.io/SpecForge/SpecBundle/index.html) |
+| 🤗 Hugging Face Collection | [Link](https://huggingface.co/collections/lmsys/specbundle) |
+## 🎉 News
+- [2025-12] 🎉 Released SpecBundle (phase 1) and SpecForge v0.2. Check out our blog at [LMSYS.org](https://lmsys.org/blog/2025-12-23-spec-bundle-phase-1/)
+- [2025-12] 🔔 Released the roadmap for 2026 Q1.
+- [2025-08] 🔔 SpecForge is listed as a [flagship project](https://lmsys.org/about/) in LMSYS. Congratulations to the SpecForge team!
+- [2025-08] 🔥 SpecForge powered the Eagle3 draft model for GPT-OSS. Check out the blog at [LMSYS.org](https://lmsys.org/blog/2025-08-27-gpt-oss/)
+- [2025-07] 🔥 SpecForge is released together with Llama4-Eagle3 checkpoints. Check out our blog at [LMSYS.org](https://lmsys.org/blog/2025-07-25-spec-forge/)
+## ✨ Acknowledgements
+<img src="./assets/acknowledgements.png" alt="acknowledgements"></img>
+We would like to express our sincere gratitude to the official EAGLE team, especially Hongyang Zhang and Yuhui Li, for their invaluable contributions and support. Our thanks also go to the NVIDIA team—particularly Avery H and Izzy Putterman—and to the Google team, especially Ying Wang, for their insightful discussions and generous assistance throughout the project.
+We are especially grateful to Meituan for their strong backing and meaningful contributions, which played a vital role in driving this project forward.
+This project has also been inspired by many outstanding open-source projects from the LLM community, including [EAGLE](https://github.com/SafeAILab/EAGLE), [BaldEagle](https://github.com/NickL77/BaldEagle), and [TensorRT-Model-Optimizer](https://github.com/NVIDIA/TensorRT-Model-Optimizer) and others. Their contributions and shared knowledge have greatly benefited our work.
+## 💡 Special Thanks to Voltage Park
+We would like to extend our sincere thanks to [Voltage Park](https://www.voltagepark.com/), our official infrastructure partner. As part of a formal collaboration with the SGLang team, Voltage Park provided critical GPU resources that empowered us to train and evaluate large-scale speculative decoding models efficiently and reliably. This partnership was instrumental in making SpecForge possible. We deeply appreciate Voltage Park’s mission to make cutting-edge AI infrastructure more accessible, and we look forward to continued collaboration as we push the boundaries of open-source LLM serving and optimization.
+## 📃 Citation
+```bibtex
+@misc{specforge2025,
+  title={SpecForge: Train speculative decoding models effortlessly},
+  author={Shenggui Li, Yikai Zhu, Chao Wang, Fan Yin, Shuai Shi, Yubo Wang, Yi Zhang, Yingyi Huang, Haoshuai Zheng, Yineng Zhang},
+  year={2025},
+  publisher={GitHub},
+  howpublished={\url{https://github.com/sgl-project/specforge}},
+}

SpecForge-ext/analyze_accept_length.sh ADDED Viewed

	@@ -0,0 +1,91 @@

+#!/bin/bash
+# 分析accept length的脚本
+echo "=========================================="
+echo "Accept Length Analysis"
+echo "=========================================="
+echo ""
+# 检查results目录
+if [ ! -d "results" ]; then
+    echo "Error: results directory not found"
+    exit 1
+fi
+# 查找所有结果文件
+result_files=$(ls results/*.jsonl 2>/dev/null)
+if [ -z "$result_files" ]; then
+    echo "No result files found in results/ directory"
+    echo ""
+    echo "Please run the benchmark first:"
+    echo "  python benchmarks/bench_eagle3.py ..."
+    exit 1
+fi
+echo "Found result files:"
+ls -lh results/*.jsonl
+echo ""
+echo "=========================================="
+echo ""
+# 分析每个结果文件
+for file in $result_files; do
+    filename=$(basename "$file")
+    echo "File: $filename"
+    echo "----------------------------------------"
+    # 检查文件是否包含mtbench结果
+    if grep -q "mtbench" "$file"; then
+        # 提取accept_length
+        echo "Accept lengths:"
+        cat "$file" | jq -r '.mtbench[0].metrics[] | "  Sample \(.sample_id): accept_length=\(.accept_length // "N/A"), output_tokens=\(.output_tokens // "N/A")"' 2>/dev/null
+        echo ""
+        echo "Statistics:"
+        # 计算平均值
+        avg_accept=$(cat "$file" | jq -r '.mtbench[0].metrics[] | .accept_length' 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) printf "  Average accept_length: %.4f\n", sum/count; else print "  No data"}')
+        echo "$avg_accept"
+        # 计算最小值和最大值
+        min_accept=$(cat "$file" | jq -r '.mtbench[0].metrics[] | .accept_length' 2>/dev/null | sort -n | head -1)
+        max_accept=$(cat "$file" | jq -r '.mtbench[0].metrics[] | .accept_length' 2>/dev/null | sort -n | tail -1)
+        echo "  Min accept_length: $min_accept"
+        echo "  Max accept_length: $max_accept"
+        # 样本数量
+        sample_count=$(cat "$file" | jq -r '.mtbench[0].metrics | length' 2>/dev/null)
+        echo "  Total samples: $sample_count"
+    else
+        echo "  No mtbench results found in this file"
+    fi
+    echo ""
+    echo "=========================================="
+    echo ""
+done
+# 如果有baseline和trained的结果，进行对比
+baseline_file=$(ls results/baseline*.jsonl 2>/dev/null | head -1)
+trained_file=$(ls results/trained*.jsonl 2>/dev/null | head -1)
+if [ -n "$baseline_file" ] && [ -n "$trained_file" ]; then
+    echo "Comparison: Baseline vs Trained"
+    echo "----------------------------------------"
+    baseline_avg=$(cat "$baseline_file" | jq -r '.mtbench[0].metrics[] | .accept_length' 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) print sum/count}')
+    trained_avg=$(cat "$trained_file" | jq -r '.mtbench[0].metrics[] | .accept_length' 2>/dev/null | awk '{sum+=$1; count++} END {if(count>0) print sum/count}')
+    if [ -n "$baseline_avg" ] && [ -n "$trained_avg" ]; then
+        echo "Baseline average: $baseline_avg"
+        echo "Trained average:  $trained_avg"
+        # 计算提升百分比
+        improvement=$(echo "$baseline_avg $trained_avg" | awk '{printf "%.2f%%", ($2-$1)/$1*100}')
+        echo "Improvement:      $improvement"
+    fi
+    echo ""
+fi
+echo "Done!"

SpecForge-ext/assets/logo.svg ADDED Viewed

SpecForge-ext/configs/deepseek-v2-lite-eagle3.json ADDED Viewed

	@@ -0,0 +1,39 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 100000,
+  "eos_token_id": 100001,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 10944,
+  "max_position_embeddings": 163840,
+  "max_window_layers": 64,
+  "model_type": "llama",
+  "num_attention_heads": 16,
+  "num_hidden_layers": 1,
+  "num_key_value_heads": 16,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": {
+    "beta_fast": 32.0,
+    "beta_slow": 1.0,
+    "factor": 40.0,
+    "mscale": 0.707,
+    "mscale_all_dim": 0.707,
+    "original_max_position_embeddings": 4096,
+    "rope_type": "yarn"
+  },
+  "rope_theta": 10000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.33.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 102400,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/deepseek-v3-671b-eagle3.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "eagle_config": {
+    "eagle_aux_hidden_state_layer_ids": [
+      1,
+      29,
+      57
+    ],
+    "use_aux_hidden_state": true
+  },
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 7168,
+  "initializer_range": 0.02,
+  "intermediate_size": 40960,
+  "max_position_embeddings": 163840,
+  "model_type": "llama",
+  "num_attention_heads": 56,
+  "num_key_value_heads": 8,
+  "num_hidden_layers": 1,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-05,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.51.0",
+  "use_cache": true,
+  "vocab_size": 129280,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/gemma3-1b-eagle3.json ADDED Viewed

	@@ -0,0 +1,32 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 2,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "head_dim": 256,
+  "hidden_act": "silu",
+  "hidden_size": 1152,
+  "initializer_range": 0.02,
+  "intermediate_size": 6912,
+  "max_position_embeddings": 32768,
+  "model_type": "llama",
+  "num_attention_heads": 4,
+  "num_hidden_layers": 1,
+  "num_key_value_heads": 1,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": 512,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.50.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 262145,
+  "draft_vocab_size": 32000,
+  "target_model_type": "gemma3_text"
+}

SpecForge-ext/configs/gpt-oss-120B-eagle3.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "eagle_config": {
+    "eagle_aux_hidden_state_layer_ids": [
+      1,
+      17,
+      33
+    ]
+  },
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2880,
+  "initializer_range": 0.02,
+  "intermediate_size": 17280,
+  "max_position_embeddings": 4096,
+  "model_type": "llama",
+  "num_attention_heads": 64,
+  "num_key_value_heads": 8,
+  "num_hidden_layers": 1,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-05,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.3",
+  "use_cache": true,
+  "vocab_size": 201088,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/gpt-oss-20B-eagle3.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "eagle_config": {
+    "eagle_aux_hidden_state_layer_ids": [
+      1,
+      11,
+      21
+    ]
+  },
+  "head_dim": 64,
+  "hidden_act": "silu",
+  "hidden_size": 2880,
+  "initializer_range": 0.02,
+  "intermediate_size": 17280,
+  "max_position_embeddings": 4096,
+  "model_type": "llama",
+  "num_attention_heads": 64,
+  "num_key_value_heads": 8,
+  "num_hidden_layers": 1,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-05,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.52.3",
+  "use_cache": true,
+  "vocab_size": 201088,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/ling-flash-2.0-eagle3.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "bos_token_id": 163584,
+  "eos_token_id": 163585,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 32768,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_key_value_heads": 8,
+  "num_hidden_layers": 1,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-05,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.57.1",
+  "use_cache": true,
+  "vocab_size": 157184,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/llama3-70B-ealge3.json ADDED Viewed

	@@ -0,0 +1,37 @@

+{
+    "architectures": [
+        "LlamaForCausalLMEagle3"
+    ],
+    "bos_token_id": 128000,
+    "eos_token_id": [
+        128001,
+        128008,
+        128009
+    ],
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 8192,
+    "initializer_range": 0.02,
+    "intermediate_size": 28672,
+    "max_position_embeddings": 4096,
+    "model_type": "llama",
+    "num_attention_heads": 64,
+    "num_key_value_heads": 8,
+    "num_hidden_layers": 1,
+    "pad_token_id": 0,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": {
+        "factor": 8.0,
+        "high_freq_factor": 4.0,
+        "low_freq_factor": 1.0,
+        "original_max_position_embeddings": 4096,
+        "rope_type": "llama3"
+    },
+    "rope_theta": 500000.0,
+    "tie_word_embeddings": false,
+    "torch_dtype": "float16",
+    "transformers_version": "4.28.1",
+    "use_cache": true,
+    "vocab_size": 128256,
+    "draft_vocab_size": 32000
+}

SpecForge-ext/configs/llama3-8B-eagle3.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "bos_token_id": 128000,
+  "eos_token_id": 128001,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 14336,
+  "max_position_embeddings": 2048,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_key_value_heads": 8,
+  "num_hidden_layers": 1,
+  "pad_token_id": 0,
+  "rms_norm_eps": 1e-05,
+  "tie_word_embeddings": false,
+  "torch_dtype": "float16",
+  "transformers_version": "4.28.1",
+  "use_cache": true,
+  "vocab_size": 128256,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/llama4-scout-17B-16E-eagle3.json ADDED Viewed

	@@ -0,0 +1,22 @@

+{
+    "architectures": [
+        "LlamaForCausalLMEagle3"
+    ],
+    "hidden_act": "silu",
+    "hidden_size": 5120,
+    "initializer_range": 0.02,
+    "intermediate_size": 32768,
+    "max_position_embeddings": 2048,
+    "model_type": "llama",
+    "num_attention_heads": 40,
+    "num_key_value_heads": 8,
+    "num_hidden_layers": 1,
+    "pad_token_id": 0,
+    "rms_norm_eps": 1e-05,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.52.3",
+    "use_cache": true,
+    "vocab_size": 202048,
+    "draft_vocab_size": 32000
+}

SpecForge-ext/configs/longcat-flash-dflash.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+    "architectures": [
+      "DFlashDraftModel"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "auto_map": {
+      "AutoModel": "modeling_dflash.DFlashDraftModel"
+    },
+    "block_size": 16,
+    "bos_token_id": 1,
+    "dtype": "bfloat16",
+    "eos_token_id": 2,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 6144,
+    "initializer_range": 0.02,
+    "intermediate_size": 12288,
+    "layer_types": [
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention",
+      "full_attention"
+    ],
+    "max_position_embeddings": 40960,
+    "max_window_layers": 5,
+    "model_type": "qwen3",
+    "num_attention_heads": 32,
+    "num_hidden_layers": 5,
+    "num_key_value_heads": 8,
+    "num_target_layers": 28,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": null,
+    "rope_theta": 1000000,
+    "sliding_window": null,
+    "tie_word_embeddings": false,
+    "use_cache": true,
+    "use_sliding_window": false,
+    "vocab_size": 131072
+  }

SpecForge-ext/configs/longcat-flash-eagle3.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+    "architectures": [
+      "LlamaForCausalLMEagle3"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "bos_token_id": 1,
+    "eos_token_id": 2,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 6144,
+    "initializer_range": 0.02,
+    "intermediate_size": 12288,
+    "max_position_embeddings": 131072,
+    "max_window_layers": 48,
+    "model_type": "llama",
+    "num_attention_heads": 64,
+    "num_hidden_layers": 1,
+    "num_key_value_heads":16,
+    "rms_norm_eps": 1e-05,
+    "rope_scaling": null,
+    "rope_theta": 10000000.0,
+    "sliding_window": null,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.53.2",
+    "use_cache": true,
+    "use_sliding_window": false,
+    "vocab_size": 131072,
+    "draft_vocab_size": 131072
+  }

SpecForge-ext/configs/phi4-eagle3.json ADDED Viewed

	@@ -0,0 +1,27 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 100257,
+  "eos_token_id": 100257,
+  "pad_token_id": 100257,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 17920,
+  "max_position_embeddings": 16384,
+  "model_type": "phi3",
+  "num_attention_heads": 40,
+  "num_hidden_layers": 1,
+  "num_key_value_heads": 10,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 250000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.47.0",
+  "use_cache": true,
+  "vocab_size": 100352,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/qwen2.5-7b-eagle3.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 3584,
+  "initializer_range": 0.02,
+  "intermediate_size": 18944,
+  "max_position_embeddings": 32768,
+  "max_window_layers": 28,
+  "model_type": "llama",
+  "num_attention_heads": 28,
+  "num_hidden_layers": 1,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": 131072,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 152064,
+  "draft_vocab_size": 16000
+}

SpecForge-ext/configs/qwen2.5-vl-32b-eagle3.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+    "architectures": [
+      "LlamaForCausalLMEagle3"
+    ],
+    "attention_bias": false,
+    "attention_dropout": 0.0,
+    "bos_token_id": 151643,
+    "eos_token_id": 151645,
+    "head_dim": 128,
+    "hidden_act": "silu",
+    "hidden_size": 5120,
+    "initializer_range": 0.02,
+    "intermediate_size": 18944,
+    "max_position_embeddings": 8192,
+    "max_window_layers": 28,
+    "model_type": "llama",
+    "target_model_type": "qwen2_5_vl",
+    "num_attention_heads": 28,
+    "num_hidden_layers": 1,
+    "num_key_value_heads": 4,
+    "rms_norm_eps": 1e-06,
+    "pretraining_tp": 1,
+    "rope_scaling": {
+        "type": "mrope",
+        "mrope_section": [
+          16,
+          24,
+          24
+        ]
+      },
+    "rope_theta": 1000000,
+    "sliding_window": 32768,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.51.0",
+    "use_cache": true,
+    "use_sliding_window": false,
+    "vocab_size": 152064,
+    "draft_vocab_size": 32000
+  }

SpecForge-ext/configs/qwen3-235B-A22B-eagle3.json ADDED Viewed

	@@ -0,0 +1,36 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "eagle_config": {
+    "eagle_aux_hidden_state_layer_ids": [
+      1,
+      46,
+      90
+    ],
+    "use_aux_hidden_state": true
+  },
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "draft_vocab_size": 32000,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 24576,
+  "max_position_embeddings": 40960,
+  "model_type": "llama",
+  "num_attention_heads": 64,
+  "num_hidden_layers": 1,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "rope_theta": 1000000.0,
+  "rope_scaling": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_cache": true,
+  "vocab_size": 151936
+}

SpecForge-ext/configs/qwen3-30B-A3B-eagle3.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "max_position_embeddings": 2048,
+  "max_window_layers": 48,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 1,
+  "num_key_value_heads":4,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.53.2",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/qwen3-32b-eagle3.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 25600,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 64,
+  "model_type": "llama",
+  "num_attention_heads": 64,
+  "num_hidden_layers": 1,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/qwen3-4b-eagle3.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2560,
+  "initializer_range": 0.02,
+  "intermediate_size": 9728,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 1,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/qwen3-8b-dflash.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "architectures": [
+    "DFlashDraftModel"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "auto_map": {
+    "AutoModel": "modeling_dflash.DFlashDraftModel"
+  },
+  "block_size": 16,
+  "bos_token_id": 151643,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
+  "max_position_embeddings": 40960,
+  "max_window_layers": 5,
+  "model_type": "qwen3",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 5,
+  "num_key_value_heads": 8,
+  "num_target_layers": 36,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

SpecForge-ext/configs/qwen3-8b-eagle3.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 1,
+  "num_key_value_heads":8 ,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/qwen3-8b-qwen3eagle-5layer.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 4096,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 36,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 5,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/qwen3-coder-30B-A3B-instruct-eagle3.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 12288,
+  "max_position_embeddings": 2048,
+  "max_window_layers": 48,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 1,
+  "num_key_value_heads": 4,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.53.2",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/qwen3-coder-480B-A35B-instruct-eagle3.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 6144,
+  "initializer_range": 0.02,
+  "intermediate_size": 16384,
+  "max_position_embeddings": 262144,
+  "max_window_layers": 62,
+  "model_type": "llama",
+  "num_attention_heads": 96,
+  "num_hidden_layers": 1,
+  "num_key_value_heads":8,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 1000000,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.0",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/configs/qwen3-next-80b-a3b-eagle3.json ADDED Viewed

	@@ -0,0 +1,29 @@

+{
+    "architectures": [
+      "LlamaForCausalLMEagle3"
+    ],
+    "attention_dropout": 0.0,
+    "bos_token_id": 151643,
+    "decoder_sparse_step": 1,
+    "eos_token_id": 151645,
+    "head_dim": 256,
+    "hidden_act": "silu",
+    "hidden_size": 2048,
+    "initializer_range": 0.02,
+    "intermediate_size": 16384,
+    "max_position_embeddings": 262144,
+    "model_type": "llama",
+    "num_attention_heads": 16,
+    "num_hidden_layers": 1,
+    "num_key_value_heads": 2,
+    "rms_norm_eps": 1e-06,
+    "rope_scaling": null,
+    "rope_theta": 10000000,
+    "tie_word_embeddings": false,
+    "torch_dtype": "bfloat16",
+    "transformers_version": "4.57.0.dev0",
+    "use_cache": true,
+    "use_sliding_window": false,
+    "vocab_size": 151936,
+    "draft_vocab_size": 32000
+  }

SpecForge-ext/configs/qwq-32B-eagle3.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "architectures": [
+    "LlamaForCausalLMEagle3"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 151643,
+  "eos_token_id": 151645,
+  "hidden_act": "silu",
+  "hidden_size": 5120,
+  "initializer_range": 0.02,
+  "intermediate_size": 27648,
+  "max_position_embeddings": 40960,
+  "max_window_layers": 64,
+  "model_type": "qwen2",
+  "num_attention_heads": 40,
+  "num_hidden_layers": 1,
+  "num_key_value_heads": 8,
+  "rms_norm_eps": 1e-05,
+  "rope_theta": 1000000.0,
+  "sliding_window": 32768,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.43.1",
+  "use_cache": true,
+  "use_sliding_window": false,
+  "vocab_size": 152064,
+  "draft_vocab_size": 32000
+}

SpecForge-ext/datasets/README.md ADDED Viewed

	@@ -0,0 +1,5 @@

+## Store Comprehensive Datasets Download Scripts
+| DatasetName | Github | Huggingface | command |
+| -------- | -------- | -------- | -------- |
+| ALLaVA-4V | [link](https://github.com/FreedomIntelligence/ALLaVA) | [link](https://huggingface.co/datasets/FreedomIntelligence/ALLaVA-4V) | download_laion.sh |

SpecForge-ext/datasets/download_laion.sh ADDED Viewed

	@@ -0,0 +1,36 @@

+laion_root="allava_laion"
+mkdir $laion_root
+cd $laion_root
+# 1. download annotation files
+## 1.1 caption
+wget -c -O ALLaVA-Caption-LAION-4V.json https://huggingface.co/datasets/FreedomIntelligence/ALLaVA-4V/resolve/main/allava_laion/ALLaVA-Caption-LAION-4V.json?download=true
+## 1.2 instruction
+wget -c -O ALLaVA-Instruct-LAION-4V.json https://huggingface.co/datasets/FreedomIntelligence/ALLaVA-4V/resolve/main/allava_laion/ALLaVA-Instruct-LAION-4V.json?download=true
+# 2. download and upzip images
+mkdir image_chunks
+## 2.1 download
+for ((i=0; i<10; i++))
+do
+    wget -c -O image_chunks/images_$i.zip https://huggingface.co/datasets/FreedomIntelligence/ALLaVA-4V/resolve/main/allava_laion/image_chunks/images_$i.zip?download=true &
+done
+mkdir -p images/
+wait
+## 2.2 unzip
+for ((i=0; i<10; i++))
+do
+    unzip -j -o image_chunks/images_$i.zip -d images/ & # wait patiently, it takes a while...
+done
+wait
+echo "All done!"

SpecForge-ext/docs/Makefile ADDED Viewed

	@@ -0,0 +1,58 @@

+# Minimal Makefile for Sphinx documentation
+SPHINXOPTS    ?=
+SPHINXBUILD   ?= sphinx-build
+SPHINXAUTOBUILD ?= sphinx-autobuild
+SOURCEDIR     = .
+BUILDDIR      = _build
+PORT          ?= 8003
+help:
+	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+	@echo ""
+	@echo "Additional targets:"
+	@echo "  serve       to build and serve documentation with auto-build and live reload"
+# Compile Notebook files and record execution time
+compile:
+	@set -e; \
+	echo "Starting Notebook compilation..."; \
+	mkdir -p logs; \
+	echo "Notebook execution timings:" > logs/timing.log; \
+	START_TOTAL=$$(date +%s); \
+	find $(SOURCEDIR) -path "*/_build/*" -prune -o -name "*.ipynb" -print0 | \
+		parallel -0 -j3 --halt soon,fail=1 ' \
+		NB_NAME=$$(basename {}); \
+		START_TIME=$$(date +%s); \
+		retry --delay=0 --times=2 -- \
+			jupyter nbconvert --to notebook --execute --inplace "{}" \
+			--ExecutePreprocessor.timeout=600 \
+			--ExecutePreprocessor.kernel_name=python3; \
+		RET_CODE=$$?; \
+		END_TIME=$$(date +%s); \
+		ELAPSED_TIME=$$((END_TIME - START_TIME)); \
+		echo "$${NB_NAME}: $${ELAPSED_TIME}s" >> logs/timing.log; \
+		exit $$RET_CODE' || exit 1; \
+	END_TOTAL=$$(date +%s); \
+	TOTAL_ELAPSED=$$((END_TOTAL - START_TOTAL)); \
+	echo "---------------------------------" >> logs/timing.log; \
+	echo "Total execution time: $${TOTAL_ELAPSED}s" >> logs/timing.log; \
+	echo "All Notebook execution timings:" && cat logs/timing.log
+# Serve documentation with auto-build and live reload
+serve:
+	@echo "Starting auto-build server at http://0.0.0.0:$(PORT)"
+	@$(SPHINXAUTOBUILD) "$(SOURCEDIR)" "$(BUILDDIR)/html" \
+		--host 0.0.0.0 \
+		--port $(PORT) \
+		--watch $(SOURCEDIR) \
+		--re-ignore ".*\.(ipynb_checkpoints|pyc|pyo|pyd|git)"
+.PHONY: help Makefile compile clean serve
+%: Makefile
+	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
+clean:
+	find . -name "*.ipynb" -exec nbstripout {} \;
+	rm -rf $(BUILDDIR)
+	rm -rf logs

SpecForge-ext/docs/README.md ADDED Viewed

	@@ -0,0 +1,55 @@

+# SpecForge Documentation
+We recommend new contributors to start from writing documentation, which helps you quickly understand the SpecForge codebase.
+Most documentation files are located under the `docs/` folder.
+## Docs Workflow
+### Install Dependency
+```bash
+apt-get update && apt-get install -y pandoc parallel retry
+pip install -r requirements.txt
+```
+### Update Documentation
+Update your Jupyter notebooks in the appropriate subdirectories under `docs/`. If you add new files, remember to update `index.rst` (or relevant `.rst` files) accordingly.
+- **`pre-commit run --all-files`** manually runs all configured checks, applying fixes if possible. If it fails the first time, re-run it to ensure lint errors are fully resolved. Make sure your code passes all checks **before** creating a Pull Request.
+```bash
+# 1) Compile all Jupyter notebooks
+make compile  # This step can take a long time (10+ mins). You can consider skipping this step if you can make sure your added files are correct.
+make html
+# 2) Compile and Preview documentation locally with auto-build
+# This will automatically rebuild docs when files change
+# Open your browser at the displayed port to view the docs
+bash serve.sh
+# 2a) Alternative ways to serve documentation
+# Directly use make serve
+make serve
+# With custom port
+PORT=8080 make serve
+# 3) Clean notebook outputs
+# nbstripout removes notebook outputs so your PR stays clean
+pip install nbstripout
+find . -name '*.ipynb' -exec nbstripout {} \;
+# 4) Pre-commit checks and create a PR
+# After these checks pass, push your changes and open a PR on your branch
+pre-commit run --all-files
+```
+---
+## Documentation Style Guidelines
+- For common functionalities, we prefer **Jupyter Notebooks** over Markdown so that all examples can be executed and validated by our docs CI pipeline. For complex features (e.g., distributed serving), Markdown is preferred.
+- Keep in mind the documentation execution time when writing interactive Jupyter notebooks. Each interactive notebook will be run and compiled against every commit to ensure they are runnable, so it is important to apply some tips to reduce the documentation compilation time:
+  - Use small models (e.g., `qwen/qwen2.5-0.5b-instruct`) for most cases to reduce server launch time.
+  - Reuse the launched server as much as possible to reduce server launch time.
+- Do not use absolute links (e.g., `https://docs.sglang.ai/get_started/install.html`). Always prefer relative links (e.g., `../get_started/install.md`).
+- Follow the existing examples to learn how to launch a server, send a query and other common styles.

SpecForge-ext/docs/conf.py ADDED Viewed

	@@ -0,0 +1,188 @@

+import os
+import sys
+from datetime import datetime
+from pathlib import Path
+sys.path.insert(0, os.path.abspath("../.."))
+DOCS_PATH = Path(__file__).parent
+ROOT_PATH = DOCS_PATH.parent
+version_file = ROOT_PATH.joinpath("version.txt")
+with open(version_file, "r") as f:
+    __version__ = f.read().strip()
+project = "SGLang"
+copyright = f"2025-{datetime.now().year}, SpecForge"
+author = "SpecForge Team"
+version = __version__
+release = __version__
+extensions = [
+    "sphinx.ext.autodoc",
+    "sphinx.ext.autosummary",
+    "sphinx.ext.napoleon",
+    "sphinx.ext.viewcode",
+    "sphinx.ext.autosectionlabel",
+    "sphinx.ext.intersphinx",
+    "sphinx_tabs.tabs",
+    "myst_parser",
+    "sphinx_copybutton",
+    "sphinxcontrib.mermaid",
+    "nbsphinx",
+    "sphinx.ext.mathjax",
+]
+nbsphinx_allow_errors = True
+nbsphinx_execute = "never"
+autosectionlabel_prefix_document = True
+nbsphinx_allow_directives = True
+myst_enable_extensions = [
+    "dollarmath",
+    "amsmath",
+    "deflist",
+    "colon_fence",
+    "html_image",
+    "substitution",
+]
+myst_heading_anchors = 5
+nbsphinx_kernel_name = "python3"
+nbsphinx_execute_arguments = [
+    "--InlineBackend.figure_formats={'svg', 'pdf'}",
+    "--InlineBackend.rc={'figure.dpi': 96}",
+]
+nb_render_priority = {
+    "html": (
+        "application/vnd.jupyter.widget-view+json",
+        "application/javascript",
+        "text/html",
+        "image/svg+xml",
+        "image/png",
+        "image/jpeg",
+        "text/markdown",
+        "text/latex",
+        "text/plain",
+    )
+}
+myst_ref_domains = ["std", "py"]
+templates_path = ["_templates"]
+source_suffix = {
+    ".rst": "restructuredtext",
+    ".md": "markdown",
+}
+master_doc = "index"
+language = "en"
+exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"]
+pygments_style = "sphinx"
+html_theme = "sphinx_book_theme"
+html_logo = ROOT_PATH.joinpath("assets/logo.png").as_posix()
+html_favicon = ROOT_PATH.joinpath("assets/logo.ico").as_posix()
+html_title = project
+html_copy_source = True
+html_last_updated_fmt = ""
+html_theme_options = {
+    "repository_url": "https://github.com/sgl-project/sgl-project.github.io",
+    "repository_branch": "main",
+    "show_navbar_depth": 3,
+    "max_navbar_depth": 4,
+    "collapse_navbar": True,
+    "use_edit_page_button": True,
+    "use_source_button": True,
+    "use_issues_button": True,
+    "use_repository_button": True,
+    "use_download_button": True,
+    "use_sidenotes": True,
+    "show_toc_level": 2,
+}
+html_context = {
+    "display_github": True,
+    "github_user": "sgl-project",
+    "github_repo": "sgl-project.github.io",
+    "github_version": "main",
+    "conf_py_path": "/docs/",
+}
+html_static_path = ["_static", "spec_bundle/public"]
+html_css_files = ["css/custom_log.css"]
+def setup(app):
+    app.add_css_file("css/custom_log.css")
+htmlhelp_basename = "sglangdoc"
+latex_elements = {}
+latex_documents = [
+    (master_doc, "sglang.tex", "sglang Documentation", "SGLang Team", "manual"),
+]
+man_pages = [(master_doc, "sglang", "sglang Documentation", [author], 1)]
+texinfo_documents = [
+    (
+        master_doc,
+        "sglang",
+        "sglang Documentation",
+        author,
+        "sglang",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
+]
+epub_title = project
+epub_exclude_files = ["search.html"]
+copybutton_prompt_text = r">>> |\.\.\. "
+copybutton_prompt_is_regexp = True
+autodoc_preserve_defaults = True
+navigation_with_keys = False
+autodoc_mock_imports = [
+    "torch",
+    "transformers",
+    "triton",
+]
+intersphinx_mapping = {
+    "python": ("https://docs.python.org/3.12", None),
+    "typing_extensions": ("https://typing-extensions.readthedocs.io/en/latest", None),
+    "pillow": ("https://pillow.readthedocs.io/en/stable", None),
+    "numpy": ("https://numpy.org/doc/stable", None),
+    "torch": ("https://pytorch.org/docs/stable", None),
+}
+html_theme = "sphinx_book_theme"
+nbsphinx_prolog = """
+.. raw:: html
+    <style>
+        .output_area.stderr, .output_area.stdout {
+            color: #d3d3d3 !important; /* light gray */
+        }
+    </style>
+"""

SpecForge-ext/docs/deploy.py ADDED Viewed

	@@ -0,0 +1,22 @@

+# Deploy the documents
+import os
+from datetime import datetime
+def run_cmd(cmd):
+    print(cmd)
+    os.system(cmd)
+run_cmd("cd $DOC_SITE_PATH; git pull")
+# (Optional) Remove old files
+# run_cmd("rm -rf $ALPA_SITE_PATH/*")
+run_cmd("cp -r _build/html/* $DOC_SITE_PATH")
+cmd_message = f"Update {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}"
+run_cmd(
+    f"cd $DOC_SITE_PATH; git add .; git commit -m '{cmd_message}'; git push origin main"
+)

SpecForge-ext/docs/index.rst ADDED Viewed

	@@ -0,0 +1,53 @@

+SpecForge Documentation
+=======================
+SpecForge is an ecosystem project developed by the SGLang team. It is a framework for training speculative decoding models so that you can smoothly port them over to the SGLang serving framework to speed up your inference.
+.. toctree::
+   :maxdepth: 1
+   :caption: Get Started
+   get_started/installation.md
+   get_started/about.md
+.. toctree::
+   :maxdepth: 1
+   :caption: Concepts
+   concepts/speculative_decoding.md
+   concepts/EAGLE3.md
+.. toctree::
+   :maxdepth: 1
+   :caption: Basic Usage
+   basic_usage/data_preparation.md
+   basic_usage/training.md
+.. toctree::
+   :maxdepth: 1
+   :caption: Advanced Features
+   advanced_features/customization.md
+.. toctree::
+   :maxdepth: 1
+   :caption: Community Resources
+   community_resources/specbundle.md
+   community_resources/dashboard.md
+.. toctree::
+   :maxdepth: 1
+   :caption: Examples
+   examples/llama3-eagle3-online.md
+   examples/llama3-eagle3-offline.md
+.. toctree::
+   :maxdepth: 1
+   :caption: Benchmarks
+   benchmarks/benchmark.md

SpecForge-ext/docs/requirements.txt ADDED Viewed

	@@ -0,0 +1,20 @@

+ipykernel
+ipywidgets
+jupyter_client
+markdown>=3.4.0
+matplotlib
+myst-parser
+nbconvert
+nbsphinx
+pandoc
+pillow
+pydantic
+sphinx
+sphinx-book-theme
+sphinx-copybutton
+sphinx-tabs
+nbstripout
+sphinxcontrib-mermaid
+urllib3<2.0.0
+gguf>=0.10.0
+sphinx-autobuild

SpecForge-ext/docs/serve.sh ADDED Viewed

	@@ -0,0 +1,3 @@

+# Clean and serve documentation with auto-build
+make clean
+make serve

SpecForge-ext/examples/run_deepseek_v3_671b_eagle3_online.sh ADDED Viewed

	@@ -0,0 +1,29 @@

+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ROOT_DIR=$(dirname $SCRIPT_DIR)
+# train eagle3 for deepseek-v3
+NUM_GPUS=${1:-8}
+TP_SIZE=${2:-8}
+BUILD_DATASET_NUM_PROC=${BUILD_DATASET_NUM_PROC:-64}
+# train eagle3 online
+torchrun \
+    --standalone \
+    --nproc_per_node $NUM_GPUS \
+    $ROOT_DIR/scripts/train_eagle3.py \
+    --target-model-path deepseek-ai/DeepSeek-V3  \
+    --draft-model-config $ROOT_DIR/configs/deepseek-v3-671b-eagle3.json  \
+    --train-data-path $ROOT_DIR/cache/dataset/perfect-blend.jsonl  \
+    --build-dataset-num-proc $BUILD_DATASET_NUM_PROC \
+    --output-dir $ROOT_DIR/outputs/deepseek-v3-671B-eagle3-perfect-blend-online \
+    --tp-size $TP_SIZE \
+    --target-model-backend sglang \
+    --num-epochs 10 \
+    --batch-size 1 \
+    --learning-rate 5e-5 \
+    --max-length 2048 \
+    --chat-template deepseek-v3 \
+    --cache-dir $ROOT_DIR/cache \
+    --dist-timeout 60 \
+    --sglang-mem-fraction-static 0.75

SpecForge-ext/examples/run_qwen3_30b_a3b_eagle3_online.sh ADDED Viewed

	@@ -0,0 +1,29 @@

+#!/bin/bash
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ROOT_DIR=$(dirname $SCRIPT_DIR)
+export TORCHINDUCTOR_CACHE_DIR=$ROOT_DIR/cache/compiled_kernels
+# support tp4/tp8 train eagle3 for Qwen3-30B-A3B
+NUM_GPUS=${1:-4}
+TP_SIZE=${2:-4}
+BUILD_DATASET_NUM_PROC=${BUILD_DATASET_NUM_PROC:-64}
+torchrun \
+    --standalone \
+    --nproc_per_node $NUM_GPUS \
+    $ROOT_DIR/scripts/train_eagle3.py \
+    --target-model-path Qwen/Qwen3-30B-A3B-Instruct-2507 \
+    --draft-model-config $ROOT_DIR/configs/qwen3-30B-A3B-eagle3.json \
+    --train-data-path $ROOT_DIR/cache/dataset/sharegpt_train.jsonl \
+    --build-dataset-num-proc $BUILD_DATASET_NUM_PROC \
+    --output-dir $ROOT_DIR/outputs/qwen3-30b-a3b-instruct-eagle3-sharegpt \
+    --num-epochs 10 \
+    --batch-size 1 \
+    --learning-rate 1e-4 \
+    --max-length 4096 \
+    --chat-template qwen \
+    --cache-dir $ROOT_DIR/cache \
+    --embedding-key model.embed_tokens.weight \
+    --tp-size $TP_SIZE \
+    --target-model-backend sglang

SpecForge-ext/examples/run_qwq_eagle3_online.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+ROOT_DIR=$(dirname $SCRIPT_DIR)
+export TORCHINDUCTOR_CACHE_DIR=$ROOT_DIR/cache/compiled_kernels
+# train eagle3 for qwq-32b
+NUM_GPUS=${1:-4}
+TP_SIZE=${2:-4}
+BUILD_DATASET_NUM_PROC=${BUILD_DATASET_NUM_PROC:-64}
+torchrun \
+    --standalone \
+    --nproc_per_node $NUM_GPUS \
+    $ROOT_DIR/scripts/train_eagle3.py \
+    --target-model-path Qwen/QwQ-32B \
+    --draft-model-config $ROOT_DIR/configs/qwq-32B-eagle3.json \
+    --train-data-path $ROOT_DIR/cache/dataset/sharegpt_train.jsonl \
+    --build-dataset-num-proc $BUILD_DATASET_NUM_PROC \
+    --output-dir $ROOT_DIR/outputs/qwq-32b-eagle3-sharegpt \
+    --num-epochs 10 \
+    --batch-size 1 \
+    --learning-rate 1e-4 \
+    --max-length 4096 \
+    --chat-template qwen \
+    --cache-dir $ROOT_DIR/cache \
+    --embedding-key model.embed_tokens.weight \
+    --tp-size $TP_SIZE \
+    --target-model-backend sglang

SpecForge-ext/logs/baseline_gsm8k_20260213_100853.log ADDED Viewed

@@ -0,0 +1,5 @@
  0%|          | 0/100 [00:00<?, ?it/s]
  1%|          | 1/100 [00:00<00:27,  3.65it/s]
  2%|▏         | 2/100 [00:00<00:24,  4.02it/s]
  3%|▎         | 3/100 [00:00<00:34,  2.82it/s]
  4%|▍         | 4/100 [00:01<00:29,  3.24it/s]
  5%|▌         | 5/100 [00:01<00:33,  2.82it/s]
  6%|▌         | 6/100 [00:02<00:43,  2.17it/s]
  7%|▋         | 7/100 [00:02<00:44,  2.11it/s]
  8%|▊         | 8/100 [00:03<00:55,  1.65it/s]
  9%|▉         | 9/100 [00:04<01:08,  1.32it/s]
 10%|█         | 10/100 [00:05<01:02,  1.44it/s]
 11%|█         | 11/100 [00:05<00:57,  1.55it/s]
 12%|█▏        | 12/100 [00:06<00:56,  1.55it/s]
 13%|█▎        | 13/100 [00:07<00:54,  1.58it/s]
 14%|█▍        | 14/100 [00:07<01:00,  1.43it/s]
 15%|█▌        | 15/100 [00:08<01:02,  1.36it/s]
 16%|█▌        | 16/100 [00:09<00:57,  1.46it/s]
 17%|█▋        | 17/100 [00:09<00:44,  1.86it/s]
 18%|█▊        | 18/100 [00:10<00:45,  1.80it/s]
 19%|█▉        | 19/100 [00:10<00:41,  1.96it/s]
 20%|██        | 20/100 [00:11<00:45,  1.76it/s]
 21%|██        | 21/100 [00:12<00:54,  1.45it/s]
 22%|██▏       | 22/100 [00:12<00:49,  1.57it/s]
 23%|██▎       | 23/100 [00:13<00:45,  1.68it/s]
 24%|██▍       | 24/100 [00:13<00:37,  2.02it/s]
 25%|██▌       | 25/100 [00:13<00:33,  2.23it/s]
 26%|██▌       | 26/100 [00:14<00:41,  1.77it/s]
 27%|██▋       | 27/100 [00:15<00:44,  1.64it/s]
 28%|██▊       | 28/100 [00:15<00:41,  1.73it/s]
 29%|██▉       | 29/100 [00:16<00:37,  1.91it/s]
 30%|███       | 30/100 [00:16<00:33,  2.12it/s]
 31%|███       | 31/100 [00:17<00:33,  2.04it/s]
 32%|███▏      | 32/100 [00:17<00:36,  1.88it/s]
 33%|███▎      | 33/100 [00:18<00:32,  2.07it/s]
 34%|███▍      | 34/100 [00:18<00:31,  2.10it/s]
 35%|███▌      | 35/100 [00:19<00:30,  2.11it/s]
 36%|███▌      | 36/100 [00:19<00:27,  2.32it/s]
 37%|███▋      | 37/100 [00:19<00:27,  2.30it/s]
 38%|███▊      | 38/100 [00:20<00:38,  1.62it/s]
 39%|███▉      | 39/100 [00:21<00:37,  1.63it/s]
 40%|████      | 40/100 [00:22<00:36,  1.64it/s]
 41%|████      | 41/100 [00:22<00:29,  1.97it/s]
 42%|████▏     | 42/100 [00:22<00:28,  2.06it/s]
 43%|████▎     | 43/100 [00:23<00:25,  2.21it/s]
 44%|████▍     | 44/100 [00:24<00:32,  1.74it/s]
 45%|████▌     | 45/100 [00:24<00:31,  1.75it/s]
 46%|████▌     | 46/100 [00:25<00:29,  1.81it/s]
 47%|████▋     | 47/100 [00:25<00:29,  1.78it/s]
 48%|████▊     | 48/100 [00:26<00:32,  1.61it/s]
 49%|████▉     | 49/100 [00:26<00:28,  1.79it/s]
 50%|█████     | 50/100 [00:27<00:25,  1.94it/s]
 51%|█████     | 51/100 [00:27<00:25,  1.95it/s]
 52%|█████▏    | 52/100 [00:28<00:23,  2.06it/s]
 53%|█████▎    | 53/100 [00:28<00:23,  2.00it/s]
 54%|█████▍    | 54/100 [00:29<00:23,  1.97it/s]
 55%|█████▌    | 55/100 [00:29<00:20,  2.20it/s]
 56%|█████▌    | 56/100 [00:29<00:18,  2.43it/s]
 57%|█████▋    | 57/100 [00:30<00:16,  2.56it/s]
 58%|█████▊    | 58/100 [00:30<00:17,  2.36it/s]
 59%|█████▉    | 59/100 [00:31<00:19,  2.12it/s]
 60%|██████    | 60/100 [00:31<00:17,  2.30it/s]
 61%|██████    | 61/100 [00:32<00:19,  2.00it/s]
 62%|██████▏   | 62/100 [00:32<00:19,  1.99it/s]
 63%|██████▎   | 63/100 [00:33<00:18,  1.99it/s]
 64%|██████▍   | 64/100 [00:34<00:19,  1.87it/s]
 65%|██████▌   | 65/100 [00:34<00:19,  1.79it/s]
 66%|██████▌   | 66/100 [00:35<00:18,  1.88it/s]
 67%|██████▋   | 67/100 [00:35<00:17,  1.91it/s]
 68%|██████▊   | 68/100 [00:36<00:16,  1.98it/s]
 69%|██████▉   | 69/100 [00:36<00:14,  2.13it/s]
 70%|███████   | 70/100 [00:36<00:13,  2.25it/s]
 71%|███████   | 71/100 [00:37<00:17,  1.65it/s]
 72%|███████▏  | 72/100 [00:38<00:14,  1.94it/s]
 73%|███████▎  | 73/100 [00:38<00:13,  2.01it/s]
 74%|███████▍  | 74/100 [00:39<00:14,  1.74it/s]
 75%|███████▌  | 75/100 [00:40<00:19,  1.29it/s]
 76%|███████▌  | 76/100 [00:41<00:18,  1.32it/s]
 77%|███████▋  | 77/100 [00:42<00:18,  1.24it/s]
 78%|███████▊  | 78/100 [00:42<00:15,  1.39it/s]
 79%|███████▉  | 79/100 [00:43<00:14,  1.42it/s]
 80%|████████  | 80/100 [00:43<00:11,  1.69it/s]
 81%|████████  | 81/100 [00:44<00:10,  1.84it/s]
 82%|████████▏ | 82/100 [00:44<00:10,  1.69it/s]
 83%|████████▎ | 83/100 [00:45<00:09,  1.77it/s]
 84%|████████▍ | 84/100 [00:45<00:07,  2.11it/s]
 85%|████████▌ | 85/100 [00:46<00:07,  1.97it/s]
 86%|████████▌ | 86/100 [00:46<00:06,  2.08it/s]
 87%|████████▋ | 87/100 [00:47<00:06,  2.08it/s]
 88%|████████▊ | 88/100 [00:48<00:07,  1.52it/s]
 89%|████████▉ | 89/100 [00:48<00:06,  1.74it/s]
 90%|█████████ | 90/100 [00:48<00:04,  2.02it/s]
 91%|█████████ | 91/100 [00:49<00:04,  1.92it/s]
 92%|█████████▏| 92/100 [00:49<00:03,  2.07it/s]
 93%|█████████▎| 93/100 [00:50<00:03,  2.19it/s]
 94%|█████████▍| 94/100 [00:50<00:02,  2.01it/s]
 95%|█████████▌| 95/100 [00:51<00:02,  1.92it/s]
 96%|█████████▌| 96/100 [00:51<00:02,  1.87it/s]
 97%|█████████▋| 97/100 [00:52<00:01,  2.15it/s]
 98%|█████████▊| 98/100 [00:52<00:00,  2.01it/s]
 99%|█████████▉| 99/100 [00:53<00:00,  2.24it/s]

+WARNING:sglang.srt.server_args:Attention backend not explicitly specified. Use fa3 backend by default.
+Running benchmark gsm8k with 100 prompts, batch size 1, steps None, topk None, num_draft_tokens None, subset None
+Loading GSM8K data from local: /workspace/hanrui/datasets/gsm8k/test.jsonl
  0%|          | 0/100 [00:00<?, ?it/s]
  1%|          | 1/100 [00:00<00:27,  3.65it/s]
  2%|▏         | 2/100 [00:00<00:24,  4.02it/s]
  3%|▎         | 3/100 [00:00<00:34,  2.82it/s]
  4%|▍         | 4/100 [00:01<00:29,  3.24it/s]
  5%|▌         | 5/100 [00:01<00:33,  2.82it/s]
  6%|▌         | 6/100 [00:02<00:43,  2.17it/s]
  7%|▋         | 7/100 [00:02<00:44,  2.11it/s]
  8%|▊         | 8/100 [00:03<00:55,  1.65it/s]
  9%|▉         | 9/100 [00:04<01:08,  1.32it/s]
 10%|█         | 10/100 [00:05<01:02,  1.44it/s]
 11%|█         | 11/100 [00:05<00:57,  1.55it/s]
 12%|█▏        | 12/100 [00:06<00:56,  1.55it/s]
 13%|█▎        | 13/100 [00:07<00:54,  1.58it/s]
 14%|█▍        | 14/100 [00:07<01:00,  1.43it/s]
 15%|█▌        | 15/100 [00:08<01:02,  1.36it/s]
 16%|█▌        | 16/100 [00:09<00:57,  1.46it/s]
 17%|█▋        | 17/100 [00:09<00:44,  1.86it/s]
 18%|█▊        | 18/100 [00:10<00:45,  1.80it/s]
 19%|█▉        | 19/100 [00:10<00:41,  1.96it/s]
 20%|██        | 20/100 [00:11<00:45,  1.76it/s]
 21%|██        | 21/100 [00:12<00:54,  1.45it/s]
 22%|██▏       | 22/100 [00:12<00:49,  1.57it/s]
 23%|██▎       | 23/100 [00:13<00:45,  1.68it/s]
 24%|██▍       | 24/100 [00:13<00:37,  2.02it/s]
 25%|██▌       | 25/100 [00:13<00:33,  2.23it/s]
 26%|██▌       | 26/100 [00:14<00:41,  1.77it/s]
 27%|██▋       | 27/100 [00:15<00:44,  1.64it/s]
 28%|██▊       | 28/100 [00:15<00:41,  1.73it/s]
 29%|██▉       | 29/100 [00:16<00:37,  1.91it/s]
 30%|███       | 30/100 [00:16<00:33,  2.12it/s]
 31%|███       | 31/100 [00:17<00:33,  2.04it/s]
 32%|███▏      | 32/100 [00:17<00:36,  1.88it/s]
 33%|███▎      | 33/100 [00:18<00:32,  2.07it/s]
 34%|███▍      | 34/100 [00:18<00:31,  2.10it/s]
 35%|███▌      | 35/100 [00:19<00:30,  2.11it/s]
 36%|███▌      | 36/100 [00:19<00:27,  2.32it/s]
 37%|███▋      | 37/100 [00:19<00:27,  2.30it/s]
 38%|███▊      | 38/100 [00:20<00:38,  1.62it/s]
 39%|███▉      | 39/100 [00:21<00:37,  1.63it/s]
 40%|████      | 40/100 [00:22<00:36,  1.64it/s]
 41%|████      | 41/100 [00:22<00:29,  1.97it/s]
 42%|████▏     | 42/100 [00:22<00:28,  2.06it/s]
 43%|████▎     | 43/100 [00:23<00:25,  2.21it/s]
 44%|████▍     | 44/100 [00:24<00:32,  1.74it/s]
 45%|████▌     | 45/100 [00:24<00:31,  1.75it/s]
 46%|████▌     | 46/100 [00:25<00:29,  1.81it/s]
 47%|████▋     | 47/100 [00:25<00:29,  1.78it/s]
 48%|████▊     | 48/100 [00:26<00:32,  1.61it/s]
 49%|████▉     | 49/100 [00:26<00:28,  1.79it/s]
 50%|█████     | 50/100 [00:27<00:25,  1.94it/s]
 51%|█████     | 51/100 [00:27<00:25,  1.95it/s]
 52%|█████▏    | 52/100 [00:28<00:23,  2.06it/s]
 53%|█████▎    | 53/100 [00:28<00:23,  2.00it/s]
 54%|█████▍    | 54/100 [00:29<00:23,  1.97it/s]
 55%|█████▌    | 55/100 [00:29<00:20,  2.20it/s]
 56%|█████▌    | 56/100 [00:29<00:18,  2.43it/s]
 57%|█████▋    | 57/100 [00:30<00:16,  2.56it/s]
 58%|█████▊    | 58/100 [00:30<00:17,  2.36it/s]
 59%|█████▉    | 59/100 [00:31<00:19,  2.12it/s]
 60%|██████    | 60/100 [00:31<00:17,  2.30it/s]
 61%|██████    | 61/100 [00:32<00:19,  2.00it/s]
 62%|██████▏   | 62/100 [00:32<00:19,  1.99it/s]
 63%|██████▎   | 63/100 [00:33<00:18,  1.99it/s]
 64%|██████▍   | 64/100 [00:34<00:19,  1.87it/s]
 65%|██████▌   | 65/100 [00:34<00:19,  1.79it/s]
 66%|██████▌   | 66/100 [00:35<00:18,  1.88it/s]
 67%|██████▋   | 67/100 [00:35<00:17,  1.91it/s]
 68%|██████▊   | 68/100 [00:36<00:16,  1.98it/s]
 69%|██████▉   | 69/100 [00:36<00:14,  2.13it/s]
 70%|███████   | 70/100 [00:36<00:13,  2.25it/s]
 71%|███████   | 71/100 [00:37<00:17,  1.65it/s]
 72%|███████▏  | 72/100 [00:38<00:14,  1.94it/s]
 73%|███████▎  | 73/100 [00:38<00:13,  2.01it/s]
 74%|███████▍  | 74/100 [00:39<00:14,  1.74it/s]
 75%|███████▌  | 75/100 [00:40<00:19,  1.29it/s]
 76%|███████▌  | 76/100 [00:41<00:18,  1.32it/s]
 77%|███████▋  | 77/100 [00:42<00:18,  1.24it/s]
 78%|███████▊  | 78/100 [00:42<00:15,  1.39it/s]
 79%|███████▉  | 79/100 [00:43<00:14,  1.42it/s]
 80%|████████  | 80/100 [00:43<00:11,  1.69it/s]
 81%|████████  | 81/100 [00:44<00:10,  1.84it/s]
 82%|████████▏ | 82/100 [00:44<00:10,  1.69it/s]
 83%|████████▎ | 83/100 [00:45<00:09,  1.77it/s]
 84%|████████▍ | 84/100 [00:45<00:07,  2.11it/s]
 85%|████████▌ | 85/100 [00:46<00:07,  1.97it/s]
 86%|████████▌ | 86/100 [00:46<00:06,  2.08it/s]
 87%|████████▋ | 87/100 [00:47<00:06,  2.08it/s]
 88%|████████▊ | 88/100 [00:48<00:07,  1.52it/s]
 89%|████████▉ | 89/100 [00:48<00:06,  1.74it/s]
 90%|█████████ | 90/100 [00:48<00:04,  2.02it/s]
 91%|█████████ | 91/100 [00:49<00:04,  1.92it/s]
 92%|█████████▏| 92/100 [00:49<00:03,  2.07it/s]
 93%|█████████▎| 93/100 [00:50<00:03,  2.19it/s]
 94%|█████████▍| 94/100 [00:50<00:02,  2.01it/s]
 95%|█████████▌| 95/100 [00:51<00:02,  1.92it/s]
 96%|█████████▌| 96/100 [00:51<00:02,  1.87it/s]
 97%|█████████▋| 97/100 [00:52<00:01,  2.15it/s]
 98%|█████████▊| 98/100 [00:52<00:00,  2.01it/s]
 99%|█████████▉| 99/100 [00:53<00:00,  2.24it/s]
+Results saved to ./results/baseline_gsm8k_results_20260213_100955.jsonl

SpecForge-ext/logs/baseline_humaneval_20260213_100956.log ADDED Viewed

@@ -0,0 +1,5 @@
  0%|          | 0/164 [00:00<?, ?it/s]
  1%|          | 1/164 [00:04<11:00,  4.06s/it]
  1%|          | 2/164 [00:09<12:46,  4.73s/it]
  2%|▏         | 3/164 [00:13<11:59,  4.47s/it]
  2%|▏         | 4/164 [00:17<11:48,  4.43s/it]
  3%|▎         | 5/164 [00:21<11:20,  4.28s/it]
  4%|▎         | 6/164 [00:26<11:13,  4.26s/it]
  4%|▍         | 7/164 [00:30<11:31,  4.40s/it]
  5%|▍         | 8/164 [00:35<11:26,  4.40s/it]
  5%|▌         | 9/164 [00:39<11:20,  4.39s/it]
  6%|▌         | 10/164 [00:43<11:13,  4.37s/it]
  7%|▋         | 11/164 [00:48<11:12,  4.39s/it]
  7%|▋         | 12/164 [00:52<11:13,  4.43s/it]
  8%|▊         | 13/164 [00:57<11:00,  4.38s/it]
  9%|▊         | 14/164 [01:01<10:42,  4.28s/it]
  9%|▉         | 15/164 [01:05<10:56,  4.41s/it]
 10%|▉         | 16/164 [01:10<10:47,  4.38s/it]
 10%|█         | 17/164 [01:14<10:46,  4.40s/it]
 11%|█         | 18/164 [01:18<10:38,  4.38s/it]
 12%|█▏        | 19/164 [01:23<10:27,  4.33s/it]
 12%|█▏        | 20/164 [01:27<10:25,  4.34s/it]
 13%|█▎        | 21/164 [01:31<10:21,  4.34s/it]
 13%|█▎        | 22/164 [01:36<10:16,  4.34s/it]
 14%|█▍        | 23/164 [01:40<10:23,  4.42s/it]
 15%|█▍        | 24/164 [01:45<10:22,  4.45s/it]
 15%|█▌        | 25/164 [01:49<10:05,  4.36s/it]
 16%|█▌        | 26/164 [01:53<09:58,  4.34s/it]
 16%|█▋        | 27/164 [01:57<09:47,  4.29s/it]
 17%|█▋        | 28/164 [02:02<09:38,  4.26s/it]
 18%|█▊        | 29/164 [02:06<09:29,  4.22s/it]
 18%|█▊        | 30/164 [02:10<09:41,  4.34s/it]
 19%|█▉        | 31/164 [02:14<09:28,  4.27s/it]
 20%|█▉        | 32/164 [02:18<09:10,  4.17s/it]
 20%|██        | 33/164 [02:23<09:24,  4.31s/it]
 21%|██        | 34/164 [02:27<09:17,  4.29s/it]
 21%|██▏       | 35/164 [02:31<09:09,  4.26s/it]
 22%|██▏       | 36/164 [02:36<09:09,  4.30s/it]
 23%|██▎       | 37/164 [02:40<08:55,  4.22s/it]
 23%|██▎       | 38/164 [02:44<08:49,  4.21s/it]
 24%|██▍       | 39/164 [02:48<08:55,  4.29s/it]
 24%|██▍       | 40/164 [02:53<08:51,  4.28s/it]
 25%|██▌       | 41/164 [02:57<08:48,  4.30s/it]
 26%|██▌       | 42/164 [03:01<08:48,  4.33s/it]
 26%|██▌       | 43/164 [03:05<08:18,  4.12s/it]
 27%|██▋       | 44/164 [03:09<08:12,  4.10s/it]
 27%|██▋       | 45/164 [03:13<08:13,  4.15s/it]
 28%|██▊       | 46/164 [03:17<08:03,  4.10s/it]
 29%|██▊       | 47/164 [03:22<08:02,  4.12s/it]
 29%|██▉       | 48/164 [03:26<07:54,  4.09s/it]
 30%|██▉       | 49/164 [03:30<08:04,  4.22s/it]
 30%|███       | 50/164 [03:35<08:08,  4.29s/it]
 31%|███       | 51/164 [03:39<08:17,  4.40s/it]
 32%|███▏      | 52/164 [03:44<08:12,  4.40s/it]
 32%|███▏      | 53/164 [03:48<07:54,  4.27s/it]
 33%|███▎      | 54/164 [03:51<07:14,  3.95s/it]
 34%|███▎      | 55/164 [03:55<07:28,  4.11s/it]
 34%|███▍      | 56/164 [03:59<07:20,  4.07s/it]
 35%|███▍      | 57/164 [04:04<07:32,  4.23s/it]
 35%|███▌      | 58/164 [04:08<07:34,  4.29s/it]
 36%|███▌      | 59/164 [04:13<07:28,  4.27s/it]
 37%|███▋      | 60/164 [04:17<07:25,  4.28s/it]
 37%|███▋      | 61/164 [04:21<07:16,  4.24s/it]
 38%|███▊      | 62/164 [04:26<07:25,  4.36s/it]
 38%|███▊      | 63/164 [04:30<07:27,  4.44s/it]
 39%|███▉      | 64/164 [04:34<07:07,  4.27s/it]
 40%|███▉      | 65/164 [04:38<06:54,  4.19s/it]
 40%|████      | 66/164 [04:42<06:55,  4.24s/it]
 41%|████      | 67/164 [04:47<06:45,  4.18s/it]
 41%|████▏     | 68/164 [04:51<06:41,  4.19s/it]
 42%|████▏     | 69/164 [04:55<06:45,  4.27s/it]
 43%|████▎     | 70/164 [04:59<06:34,  4.20s/it]
 43%|████▎     | 71/164 [05:03<06:25,  4.14s/it]
 44%|████▍     | 72/164 [05:07<06:12,  4.05s/it]
 45%|████▍     | 73/164 [05:11<06:10,  4.08s/it]
 45%|████▌     | 74/164 [05:15<06:07,  4.08s/it]
 46%|████▌     | 75/164 [05:19<06:05,  4.10s/it]
 46%|████▋     | 76/164 [05:24<06:08,  4.18s/it]
 47%|████▋     | 77/164 [05:28<06:00,  4.14s/it]
 48%|████▊     | 78/164 [05:32<05:59,  4.18s/it]
 48%|████▊     | 79/164 [05:36<05:50,  4.13s/it]
 49%|████▉     | 80/164 [05:40<05:43,  4.09s/it]
 49%|████▉     | 81/164 [05:44<05:43,  4.14s/it]
 50%|█████     | 82/164 [05:49<05:49,  4.26s/it]
 51%|█████     | 83/164 [05:53<05:34,  4.13s/it]
 51%|█████     | 84/164 [05:57<05:28,  4.11s/it]
 52%|█████▏    | 85/164 [06:01<05:19,  4.04s/it]
 52%|█████▏    | 86/164 [06:05<05:12,  4.01s/it]
 53%|█████▎    | 87/164 [06:09<05:09,  4.02s/it]
 54%|█████▎    | 88/164 [06:13<05:08,  4.06s/it]
 54%|█████▍    | 89/164 [06:17<05:02,  4.03s/it]
 55%|█████▍    | 90/164 [06:21<05:08,  4.17s/it]
 55%|█████▌    | 91/164 [06:25<04:55,  4.05s/it]
 56%|█████▌    | 92/164 [06:29<04:55,  4.10s/it]
 57%|█████▋    | 93/164 [06:33<04:45,  4.03s/it]
 57%|█████▋    | 94/164 [06:38<04:54,  4.21s/it]
 58%|█████▊    | 95/164 [06:42<04:45,  4.14s/it]
 59%|█████▊    | 96/164 [06:46<04:40,  4.13s/it]
 59%|█████▉    | 97/164 [06:50<04:31,  4.06s/it]
 60%|█████▉    | 98/164 [06:54<04:26,  4.03s/it]
 60%|██████    | 99/164 [06:58<04:24,  4.08s/it]
 61%|██████    | 100/164 [07:01<04:09,  3.90s/it]
 62%|██████▏   | 101/164 [07:05<04:09,  3.96s/it]
 62%|██████▏   | 102/164 [07:10<04:12,  4.08s/it]
 63%|██████▎   | 103/164 [07:14<04:03,  4.00s/it]
 63%|██████▎   | 104/164 [07:17<03:55,  3.93s/it]
 64%|██████▍   | 105/164 [07:21<03:54,  3.97s/it]
 65%|██████▍   | 106/164 [07:26<03:53,  4.02s/it]
 65%|██████▌   | 107/164 [07:30<03:51,  4.06s/it]
 66%|██████▌   | 108/164 [07:34<03:44,  4.01s/it]
 66%|██████▋   | 109/164 [07:38<03:38,  3.98s/it]
 67%|██████▋   | 110/164 [07:41<03:33,  3.94s/it]
 68%|██████▊   | 111/164 [07:46<03:34,  4.06s/it]
 68%|██████▊   | 112/164 [07:50<03:30,  4.05s/it]
 69%|██████▉   | 113/164 [07:54<03:29,  4.11s/it]
 70%|██████▉   | 114/164 [07:58<03:24,  4.09s/it]
 70%|███████   | 115/164 [08:02<03:17,  4.04s/it]
 71%|███████   | 116/164 [08:06<03:17,  4.12s/it]
 71%|███████▏  | 117/164 [08:10<03:10,  4.04s/it]
 72%|███████▏  | 118/164 [08:14<03:06,  4.05s/it]
 73%|███████▎  | 119/164 [08:18<03:03,  4.09s/it]
 73%|███████▎  | 120/164 [08:23<03:08,  4.29s/it]
 74%|███████▍  | 121/164 [08:27<03:03,  4.26s/it]
 74%|███████▍  | 122/164 [08:31<02:56,  4.20s/it]
 75%|███████▌  | 123/164 [08:35<02:47,  4.09s/it]
 76%|███████▌  | 124/164 [08:39<02:43,  4.10s/it]
 76%|███████▌  | 125/164 [08:43<02:39,  4.09s/it]
 77%|███████▋  | 126/164 [08:48<02:36,  4.12s/it]
 77%|███████▋  | 127/164 [08:51<02:29,  4.04s/it]
 78%|███████▊  | 128/164 [08:56<02:26,  4.06s/it]
 79%|███████▊  | 129/164 [09:00<02:21,  4.04s/it]
 79%|███████▉  | 130/164 [09:04<02:18,  4.08s/it]
 80%|███████▉  | 131/164 [09:08<02:15,  4.12s/it]
 80%|████████  | 132/164 [09:12<02:09,  4.04s/it]
 81%|████████  | 133/164 [09:16<02:10,  4.20s/it]
 82%|████████▏ | 134/164 [09:21<02:05,  4.19s/it]
 82%|████████▏ | 135/164 [09:24<01:57,  4.05s/it]
 83%|████████▎ | 136/164 [09:28<01:53,  4.05s/it]
 84%|████████▎ | 137/164 [09:32<01:50,  4.09s/it]
 84%|████████▍ | 138/164 [09:37<01:47,  4.14s/it]
 85%|████████▍ | 139/164 [09:41<01:44,  4.18s/it]
 85%|████████▌ | 140/164 [09:45<01:39,  4.13s/it]
 86%|████████▌ | 141/164 [09:49<01:36,  4.21s/it]
 87%|████████▋ | 142/164 [09:54<01:32,  4.22s/it]
 87%|████████▋ | 143/164 [09:58<01:27,  4.17s/it]
 88%|████████▊ | 144/164 [10:02<01:22,  4.13s/it]
 88%|████████▊ | 145/164 [10:06<01:20,  4.22s/it]
 89%|████████▉ | 146/164 [10:10<01:16,  4.24s/it]
 90%|████████▉ | 147/164 [10:14<01:10,  4.13s/it]
 90%|█████████ | 148/164 [10:19<01:06,  4.15s/it]
 91%|█████████ | 149/164 [10:23<01:03,  4.23s/it]
 91%|█████████▏| 150/164 [10:27<00:59,  4.23s/it]
 92%|█████████▏| 151/164 [10:31<00:54,  4.21s/it]
 93%|█████████▎| 152/164 [10:35<00:49,  4.11s/it]
 93%|█████████▎| 153/164 [10:39<00:45,  4.11s/it]
 94%|█████████▍| 154/164 [10:43<00:41,  4.11s/it]
 95%|█████████▍| 155/164 [10:48<00:37,  4.22s/it]
 95%|█████████▌| 156/164 [10:52<00:33,  4.13s/it]
 96%|█████████▌| 157/164 [10:56<00:28,  4.10s/it]
 96%|█████████▋| 158/164 [11:00<00:24,  4.16s/it]
 97%|█████████▋| 159/164 [11:04<00:20,  4.20s/it]
 98%|█████████▊| 160/164 [11:09<00:16,  4.20s/it]
 98%|█████████▊| 161/164 [11:13<00:12,  4.27s/it]
 99%|█████████▉| 162/164 [11:17<00:08,  4.21s/it]
 99%|█████████▉| 163/164 [11:21<00:04,  4.16s/it]

+WARNING:sglang.srt.server_args:Attention backend not explicitly specified. Use fa3 backend by default.
+Running benchmark humaneval with 164 prompts, batch size 1, steps None, topk None, num_draft_tokens None, subset None
+Loading HumanEval data from local: /workspace/hanrui/datasets/humaneval/test.jsonl
  0%|          | 0/164 [00:00<?, ?it/s]
  1%|          | 1/164 [00:04<11:00,  4.06s/it]
  1%|          | 2/164 [00:09<12:46,  4.73s/it]
  2%|▏         | 3/164 [00:13<11:59,  4.47s/it]
  2%|▏         | 4/164 [00:17<11:48,  4.43s/it]
  3%|▎         | 5/164 [00:21<11:20,  4.28s/it]
  4%|▎         | 6/164 [00:26<11:13,  4.26s/it]
  4%|▍         | 7/164 [00:30<11:31,  4.40s/it]
  5%|▍         | 8/164 [00:35<11:26,  4.40s/it]
  5%|▌         | 9/164 [00:39<11:20,  4.39s/it]
  6%|▌         | 10/164 [00:43<11:13,  4.37s/it]
  7%|▋         | 11/164 [00:48<11:12,  4.39s/it]
  7%|▋         | 12/164 [00:52<11:13,  4.43s/it]
  8%|▊         | 13/164 [00:57<11:00,  4.38s/it]
  9%|▊         | 14/164 [01:01<10:42,  4.28s/it]
  9%|▉         | 15/164 [01:05<10:56,  4.41s/it]
 10%|▉         | 16/164 [01:10<10:47,  4.38s/it]
 10%|█         | 17/164 [01:14<10:46,  4.40s/it]
 11%|█         | 18/164 [01:18<10:38,  4.38s/it]
 12%|█▏        | 19/164 [01:23<10:27,  4.33s/it]
 12%|█▏        | 20/164 [01:27<10:25,  4.34s/it]
 13%|█▎        | 21/164 [01:31<10:21,  4.34s/it]
 13%|█▎        | 22/164 [01:36<10:16,  4.34s/it]
 14%|█▍        | 23/164 [01:40<10:23,  4.42s/it]
 15%|█▍        | 24/164 [01:45<10:22,  4.45s/it]
 15%|█▌        | 25/164 [01:49<10:05,  4.36s/it]
 16%|█▌        | 26/164 [01:53<09:58,  4.34s/it]
 16%|█▋        | 27/164 [01:57<09:47,  4.29s/it]
 17%|█▋        | 28/164 [02:02<09:38,  4.26s/it]
 18%|█▊        | 29/164 [02:06<09:29,  4.22s/it]
 18%|█▊        | 30/164 [02:10<09:41,  4.34s/it]
 19%|█▉        | 31/164 [02:14<09:28,  4.27s/it]
 20%|█▉        | 32/164 [02:18<09:10,  4.17s/it]
 20%|██        | 33/164 [02:23<09:24,  4.31s/it]
 21%|██        | 34/164 [02:27<09:17,  4.29s/it]
 21%|██▏       | 35/164 [02:31<09:09,  4.26s/it]
 22%|██▏       | 36/164 [02:36<09:09,  4.30s/it]
 23%|██▎       | 37/164 [02:40<08:55,  4.22s/it]
 23%|██▎       | 38/164 [02:44<08:49,  4.21s/it]
 24%|██▍       | 39/164 [02:48<08:55,  4.29s/it]
 24%|██▍       | 40/164 [02:53<08:51,  4.28s/it]
 25%|██▌       | 41/164 [02:57<08:48,  4.30s/it]
 26%|██▌       | 42/164 [03:01<08:48,  4.33s/it]
 26%|██▌       | 43/164 [03:05<08:18,  4.12s/it]
 27%|██▋       | 44/164 [03:09<08:12,  4.10s/it]
 27%|██▋       | 45/164 [03:13<08:13,  4.15s/it]
 28%|██▊       | 46/164 [03:17<08:03,  4.10s/it]
 29%|██▊       | 47/164 [03:22<08:02,  4.12s/it]
 29%|██▉       | 48/164 [03:26<07:54,  4.09s/it]
 30%|██▉       | 49/164 [03:30<08:04,  4.22s/it]
 30%|███       | 50/164 [03:35<08:08,  4.29s/it]
 31%|███       | 51/164 [03:39<08:17,  4.40s/it]
 32%|███▏      | 52/164 [03:44<08:12,  4.40s/it]
 32%|███▏      | 53/164 [03:48<07:54,  4.27s/it]
 33%|███▎      | 54/164 [03:51<07:14,  3.95s/it]
 34%|███▎      | 55/164 [03:55<07:28,  4.11s/it]
 34%|███▍      | 56/164 [03:59<07:20,  4.07s/it]
 35%|███▍      | 57/164 [04:04<07:32,  4.23s/it]
 35%|███▌      | 58/164 [04:08<07:34,  4.29s/it]
 36%|███▌      | 59/164 [04:13<07:28,  4.27s/it]
 37%|███▋      | 60/164 [04:17<07:25,  4.28s/it]
 37%|███▋      | 61/164 [04:21<07:16,  4.24s/it]
 38%|███▊      | 62/164 [04:26<07:25,  4.36s/it]
 38%|███▊      | 63/164 [04:30<07:27,  4.44s/it]
 39%|███▉      | 64/164 [04:34<07:07,  4.27s/it]
 40%|███▉      | 65/164 [04:38<06:54,  4.19s/it]
 40%|████      | 66/164 [04:42<06:55,  4.24s/it]
 41%|████      | 67/164 [04:47<06:45,  4.18s/it]
 41%|████▏     | 68/164 [04:51<06:41,  4.19s/it]
 42%|████▏     | 69/164 [04:55<06:45,  4.27s/it]
 43%|████▎     | 70/164 [04:59<06:34,  4.20s/it]
 43%|████▎     | 71/164 [05:03<06:25,  4.14s/it]
 44%|████▍     | 72/164 [05:07<06:12,  4.05s/it]
 45%|████▍     | 73/164 [05:11<06:10,  4.08s/it]
 45%|████▌     | 74/164 [05:15<06:07,  4.08s/it]
 46%|████▌     | 75/164 [05:19<06:05,  4.10s/it]
 46%|████▋     | 76/164 [05:24<06:08,  4.18s/it]
 47%|████▋     | 77/164 [05:28<06:00,  4.14s/it]
 48%|████▊     | 78/164 [05:32<05:59,  4.18s/it]
 48%|████▊     | 79/164 [05:36<05:50,  4.13s/it]
 49%|████▉     | 80/164 [05:40<05:43,  4.09s/it]
 49%|████▉     | 81/164 [05:44<05:43,  4.14s/it]
 50%|█████     | 82/164 [05:49<05:49,  4.26s/it]
 51%|█████     | 83/164 [05:53<05:34,  4.13s/it]
 51%|█████     | 84/164 [05:57<05:28,  4.11s/it]
 52%|█████▏    | 85/164 [06:01<05:19,  4.04s/it]
 52%|█████▏    | 86/164 [06:05<05:12,  4.01s/it]
 53%|█████▎    | 87/164 [06:09<05:09,  4.02s/it]
 54%|█████▎    | 88/164 [06:13<05:08,  4.06s/it]
 54%|█████▍    | 89/164 [06:17<05:02,  4.03s/it]
 55%|█████▍    | 90/164 [06:21<05:08,  4.17s/it]
 55%|█████▌    | 91/164 [06:25<04:55,  4.05s/it]
 56%|█████▌    | 92/164 [06:29<04:55,  4.10s/it]
 57%|█████▋    | 93/164 [06:33<04:45,  4.03s/it]
 57%|█████▋    | 94/164 [06:38<04:54,  4.21s/it]
 58%|█████▊    | 95/164 [06:42<04:45,  4.14s/it]
 59%|█████▊    | 96/164 [06:46<04:40,  4.13s/it]
 59%|█████▉    | 97/164 [06:50<04:31,  4.06s/it]
 60%|█████▉    | 98/164 [06:54<04:26,  4.03s/it]
 60%|██████    | 99/164 [06:58<04:24,  4.08s/it]
 61%|██████    | 100/164 [07:01<04:09,  3.90s/it]
 62%|██████▏   | 101/164 [07:05<04:09,  3.96s/it]
 62%|██████▏   | 102/164 [07:10<04:12,  4.08s/it]
 63%|██████▎   | 103/164 [07:14<04:03,  4.00s/it]
 63%|██████▎   | 104/164 [07:17<03:55,  3.93s/it]
 64%|██████▍   | 105/164 [07:21<03:54,  3.97s/it]
 65%|██████▍   | 106/164 [07:26<03:53,  4.02s/it]
 65%|██████▌   | 107/164 [07:30<03:51,  4.06s/it]
 66%|██████▌   | 108/164 [07:34<03:44,  4.01s/it]
 66%|██████▋   | 109/164 [07:38<03:38,  3.98s/it]
 67%|██████▋   | 110/164 [07:41<03:33,  3.94s/it]
 68%|██████▊   | 111/164 [07:46<03:34,  4.06s/it]
 68%|██████▊   | 112/164 [07:50<03:30,  4.05s/it]
 69%|██████▉   | 113/164 [07:54<03:29,  4.11s/it]
 70%|██████▉   | 114/164 [07:58<03:24,  4.09s/it]
 70%|███████   | 115/164 [08:02<03:17,  4.04s/it]
 71%|███████   | 116/164 [08:06<03:17,  4.12s/it]
 71%|███████▏  | 117/164 [08:10<03:10,  4.04s/it]
 72%|███████▏  | 118/164 [08:14<03:06,  4.05s/it]
 73%|███████▎  | 119/164 [08:18<03:03,  4.09s/it]
 73%|███████▎  | 120/164 [08:23<03:08,  4.29s/it]
 74%|███████▍  | 121/164 [08:27<03:03,  4.26s/it]
 74%|███████▍  | 122/164 [08:31<02:56,  4.20s/it]
 75%|███████▌  | 123/164 [08:35<02:47,  4.09s/it]
 76%|███████▌  | 124/164 [08:39<02:43,  4.10s/it]
 76%|███████▌  | 125/164 [08:43<02:39,  4.09s/it]
 77%|███████▋  | 126/164 [08:48<02:36,  4.12s/it]
 77%|███████▋  | 127/164 [08:51<02:29,  4.04s/it]
 78%|███████▊  | 128/164 [08:56<02:26,  4.06s/it]
 79%|███████▊  | 129/164 [09:00<02:21,  4.04s/it]
 79%|███████▉  | 130/164 [09:04<02:18,  4.08s/it]
 80%|███████▉  | 131/164 [09:08<02:15,  4.12s/it]
 80%|████████  | 132/164 [09:12<02:09,  4.04s/it]
 81%|████████  | 133/164 [09:16<02:10,  4.20s/it]
 82%|████████▏ | 134/164 [09:21<02:05,  4.19s/it]
 82%|████████▏ | 135/164 [09:24<01:57,  4.05s/it]
 83%|████████▎ | 136/164 [09:28<01:53,  4.05s/it]
 84%|████████▎ | 137/164 [09:32<01:50,  4.09s/it]
 84%|████████▍ | 138/164 [09:37<01:47,  4.14s/it]
 85%|████████▍ | 139/164 [09:41<01:44,  4.18s/it]
 85%|████████▌ | 140/164 [09:45<01:39,  4.13s/it]
 86%|████████▌ | 141/164 [09:49<01:36,  4.21s/it]
 87%|████████▋ | 142/164 [09:54<01:32,  4.22s/it]
 87%|████████▋ | 143/164 [09:58<01:27,  4.17s/it]
 88%|████████▊ | 144/164 [10:02<01:22,  4.13s/it]
 88%|████████▊ | 145/164 [10:06<01:20,  4.22s/it]
 89%|████████▉ | 146/164 [10:10<01:16,  4.24s/it]
 90%|████████▉ | 147/164 [10:14<01:10,  4.13s/it]
 90%|█████████ | 148/164 [10:19<01:06,  4.15s/it]
 91%|█████████ | 149/164 [10:23<01:03,  4.23s/it]
 91%|█████████▏| 150/164 [10:27<00:59,  4.23s/it]
 92%|█████████▏| 151/164 [10:31<00:54,  4.21s/it]
 93%|█████████▎| 152/164 [10:35<00:49,  4.11s/it]
 93%|█████████▎| 153/164 [10:39<00:45,  4.11s/it]
 94%|█████████▍| 154/164 [10:43<00:41,  4.11s/it]
 95%|█████████▍| 155/164 [10:48<00:37,  4.22s/it]
 95%|█████████▌| 156/164 [10:52<00:33,  4.13s/it]
 96%|█████████▌| 157/164 [10:56<00:28,  4.10s/it]
 96%|█████████▋| 158/164 [11:00<00:24,  4.16s/it]
 97%|█████████▋| 159/164 [11:04<00:20,  4.20s/it]
 98%|█████████▊| 160/164 [11:09<00:16,  4.20s/it]
 98%|█████████▊| 161/164 [11:13<00:12,  4.27s/it]
 99%|█████████▉| 162/164 [11:17<00:08,  4.21s/it]
 99%|█████████▉| 163/164 [11:21<00:04,  4.16s/it]
+Results saved to ./results/baseline_humaneval_results_20260213_102128.jsonl