ScienceOne-AI commited on 8 days ago

Commit

816198f

verified ·

1 Parent(s): 6d30fe1

Upload 61 files

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +17 -0
LICENSE +201 -0
README.md +306 -3
README_en.md +306 -0
assets/benchmark_performance.png +3 -0
cases/case_DeepResearchIF_general_en_01.png +3 -0
cases/case_DeepResearchIF_general_zh_01.png +3 -0
cases/case_DeepResearchIF_science_en_01.png +3 -0
cases/case_DeepResearchIF_science_zh_01.png +3 -0
cases/case_deepresearch-report-writing_general_zh_01.png +3 -0
cases/case_deepresearch-report-writing_science_en_01.png +3 -0
cases/case_deepresearch-report-writing_science_zh_01.png +3 -0
cases/case_file-understanding-generation_general_en_01.png +3 -0
cases/case_file-understanding-generation_science_zh_01.png +3 -0
cases/case_long-horizon-reasoning_general_en_01.png +3 -0
cases/case_long-horizon-reasoning_general_en_02.png +3 -0
cases/case_long-horizon-reasoning_general_zh_01.png +3 -0
cases/case_long-horizon-reasoning_general_zh_02.png +3 -0
cases/case_skills_science_en_01.png +3 -0
cases/case_skills_science_zh_01.png +3 -0
inference/README.md +224 -0
inference/README_en.md +226 -0
inference/inference/run_batch_inference.py +373 -0
inference/inference/run_single_inference.py +354 -0
inference/models/tokenizer/added_tokens.json +28 -0
inference/models/tokenizer/chat_template.jinja +89 -0
inference/models/tokenizer/config.json +41 -0
inference/models/tokenizer/merges.txt +0 -0
inference/models/tokenizer/special_tokens_map.json +31 -0
inference/models/tokenizer/tokenizer.json +3 -0
inference/models/tokenizer/tokenizer_config.json +239 -0
inference/models/tokenizer/vocab.json +0 -0
inference/requirements.txt +176 -0
inference/run_batch_inference_demo.sh +150 -0
inference/run_batch_inference_online_demo.sh +183 -0
inference/server/llm_api.py +665 -0
inference/server/tool_api.py +59 -0
inference/server/tool_execution.py +73 -0
inference/test_all_tools.py +123 -0
inference/tool_kits/__init__.py +48 -0
inference/tool_kits/ask_question_about_image_toolkit.py +32 -0
inference/tool_kits/ask_question_about_video_toolkit.py +32 -0
inference/tool_kits/base.py +183 -0
inference/tool_kits/bash_toolkit.py +30 -0
inference/tool_kits/execute_code_toolkit.py +23 -0
inference/tool_kits/file_wide_parse_toolkit.py +27 -0
inference/tool_kits/image_search_toolkit.py +30 -0
inference/tool_kits/scholar_search_toolkit.py +29 -0
inference/tool_kits/wide_search_toolkit.py +29 -0
inference/tool_kits/wide_visit_toolkit.py +32 -0

.gitattributes CHANGED Viewed

@@ -34,3 +34,20 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text

 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
 tokenizer.json filter=lfs diff=lfs merge=lfs -text
+assets/benchmark_performance.png filter=lfs diff=lfs merge=lfs -text
+cases/case_deepresearch-report-writing_general_zh_01.png filter=lfs diff=lfs merge=lfs -text
+cases/case_deepresearch-report-writing_science_en_01.png filter=lfs diff=lfs merge=lfs -text
+cases/case_deepresearch-report-writing_science_zh_01.png filter=lfs diff=lfs merge=lfs -text
+cases/case_DeepResearchIF_general_en_01.png filter=lfs diff=lfs merge=lfs -text
+cases/case_DeepResearchIF_general_zh_01.png filter=lfs diff=lfs merge=lfs -text
+cases/case_DeepResearchIF_science_en_01.png filter=lfs diff=lfs merge=lfs -text
+cases/case_DeepResearchIF_science_zh_01.png filter=lfs diff=lfs merge=lfs -text
+cases/case_file-understanding-generation_general_en_01.png filter=lfs diff=lfs merge=lfs -text
+cases/case_file-understanding-generation_science_zh_01.png filter=lfs diff=lfs merge=lfs -text
+cases/case_long-horizon-reasoning_general_en_01.png filter=lfs diff=lfs merge=lfs -text
+cases/case_long-horizon-reasoning_general_en_02.png filter=lfs diff=lfs merge=lfs -text
+cases/case_long-horizon-reasoning_general_zh_01.png filter=lfs diff=lfs merge=lfs -text
+cases/case_long-horizon-reasoning_general_zh_02.png filter=lfs diff=lfs merge=lfs -text
+cases/case_skills_science_en_01.png filter=lfs diff=lfs merge=lfs -text
+cases/case_skills_science_zh_01.png filter=lfs diff=lfs merge=lfs -text
+inference/models/tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text

LICENSE ADDED Viewed

	@@ -0,0 +1,201 @@

+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+   1. Definitions.
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+   END OF TERMS AND CONDITIONS
+   APPENDIX: How to apply the Apache License to your work.
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+   Copyright [yyyy] [name of copyright owner]
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+       http://www.apache.org/licenses/LICENSE-2.0
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

README.md CHANGED Viewed

@@ -1,3 +1,306 @@
----
-license: apache-2.0
----

+<div align="center">
+# S1-DeepResearch：面向长程深度研究的端到端模型
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg?style=for-the-badge)](./LICENSE)
+[![HuggingFace](https://img.shields.io/badge/🤗%20HuggingFace-S1--DeepResearch--15k-0040A1?style=for-the-badge)](https://huggingface.co/datasets/ScienceOne-AI/S1-DeepResearch-15k)
+[![HuggingFace](https://img.shields.io/badge/🤗%20HuggingFace-S1--DeepResearch--32B-ffd21e?style=for-the-badge)](https://huggingface.co/ScienceOne-AI/S1-DeepResearch-32B)
+[![ModelScope](https://img.shields.io/badge/🤖%20ModelScope-S1--DeepResearch--32B-mediumpurple?style=for-the-badge)](https://modelscope.cn/models/ScienceOne-AI/S1-DeepResearch-32B)
+[English](./README_en.md) | 中文
+</div>
+<hr>
+## 🔥 最新动态 (News & Updates)
+- **[2026/04/04]** 🎉 发布 [**S1-DeepResearch-32B**](https://huggingface.co/ScienceOne-AI/S1-DeepResearch-32B)：面向长程深度研究的端到端旗舰模型，更侧重**真实场景落地**——在**长链复杂推理**之外，重点强化**深度研究指令遵循**、**深度调研报告写作**、**文件理解与生成**、**技能调用**等能力。在 20 项智能体基准能力评测中，相对基座 **Qwen3-32B** 全方位显著领先，整体性能接近主流闭源旗舰模型（**GPT 5.2**、**Claude 4.6**、**GLM-5**）。推理代码及 [15K 智能体训练轨迹数据（开源版本）](https://huggingface.co/datasets/ScienceOne-AI/S1-DeepResearch-15k)同步发布。
+- **[2025/12/31]** 我们开源了 [**S1-DeepResearch-8B-Preview**](https://huggingface.co/ScienceOne-AI/S1-DeepResearch-8B-Preview)：聚焦**通用长链路复杂推理**，以轻量参数探索深度研究场景下的可用空间。
+## 📝 概述 (Overview)
+**S1-DeepResearch-32B** 是磐石团队（ScienceOne AI）研发的面向 **长程深度研究（Long-Horizon Deep Research）** 的端到端模型，其核心能力可概括为 **五大维度**：
+- **长链复杂推理**：支持多阶段、多跳任务中的持续推理与行动推进，突破单步问答范式。通过跨文档检索、证据聚合、状态记忆与策略迭代，实现复杂任务中的路径规划、信息整合与结果收敛，确保推理过程的稳定性与结论的可靠性。
+- **深度研究指令遵循**：精准解析深度研究场景下的多约束复杂指令，构建围绕「任务定义—方法机理—工具执行—结果呈现」等深度研究全链路的指令理解范式；并在认知、产物、执行与环境四层上协同约束，让复杂任务可控、过程可预期、结果与意图一致。
+- **深度调研报告写作**：在信息整合之上输出可论证、可引用的报告体例；支持多源材料组织与证据核对，兼顾论述结构、可读性与事实可追溯，直接服务科研写作与决策研判。
+- **文件理解与生成**：覆盖 PDF、表格、网页等多形态输入的理解，以及结构化、可交付的输出生成。在多轮工具增强交互中尽量保持语义与执行一致，形成「解析—加工—生成」的闭环，减轻科研与数据密集型流程中的重复手工环节。
+- **技能使用（Skills）**：将文献检索、数据分析、实验设计、计算建模、可视化与报告生成等以可调用模块形式组织，按任务目标进行动态装配与渐进式加载，支撑从数据获取到结果呈现的连续工作流。
+### ✨ 核心特性
+- **超长上下文建模**：支持 128K 上下文窗口，单会话承载更长证据链与多轮交互历史，适配长程研究任务。
+- **长程工具调用**：可稳定执行 **150+** 轮连续工具调用，构建基于推理驱动的工具编排与决策闭环，实现多阶段任务的持续规划、执行与自我校正。
+- **原生工具体系**：内置 **9** 种常用工具（如搜索、网页浏览、代码执行、命令行等），开箱即用。
+## 🚀 模型下载 (Model Download)
+<div align="center">
+| 模型名称 | 参数量 | 上下文长度 | 下载链接 |
+| :---: | :---: | :---: | :---: |
+| **S1-DeepResearch-32B** | 32B | 128k | [🤗 HuggingFace](https://huggingface.co/ScienceOne-AI/S1-DeepResearch-32B) \| [🤖 ModelScope](https://modelscope.cn/models/ScienceOne-AI/S1-DeepResearch-32B) |
+| **S1-DeepResearch-8B-Preview** | 8B | 128k | [🤗 HuggingFace](https://huggingface.co/ScienceOne-AI/S1-DeepResearch-8B-Preview) \| [🤖 ModelScope](https://modelscope.cn/models/ScienceOne-AI/S1-DeepResearch-8B-Preview) |
+</div>
+## 📊 性能评估 (Evaluation)
+我们在与模型 **五大能力** 相对应的 **5 个维度、共 20 项智能体能力基准** 上对 **S1-DeepResearch-32B** 进行了系统评估，各维度与基准对应关系如下：
+- **长链复杂推理**：文本模态包括 GAIA (text)、BrowseComp、BrowseComp-ZH、XBench-DeepSearch、HLE (text)；图文模态包括 LiveVQA、MM-Search、BrowseComp-VL、RealX-Bench、HLE-VL、MM-BrowseComp。
+- **���度研究指令遵循**：ComplexBench、DeepResearchIF (in-house)。
+- **深度调研报告写作**：DeepResearch Bench、DeepResearch Bench II、Research Rubrics。
+- **文件理解与生成**：GAIA (file)、GTA、FileSys (in-house)。
+- **技能调用**：SkillsUse (in-house)。
+<div align="center">
+<img src="./assets/benchmark_performance.png" alt="S1-DeepResearch-32B 与基座及闭源旗舰在 20 项智能体基准上的性能对比" width="800" />
+</div>
+**S1-DeepResearch-32B** 在所有榜单上相对基座 **Qwen3-32B** 及更大参数量模型 **Qwen3-235B** 均取得显著优势；在深度研究指令遵循、文件理解与生成、技能调用等维度的内部榜单中，亦超越 **Qwen3.5-397B**。整体性能接近主流闭源旗舰（**GPT 5.2**、**Claude 4.6**、**GLM-5**、**Kimi-K2.5**）。开放榜单与内部任务的结果相互印证，表明 S1-DeepResearch-32B 已具备面向真实业务场景部署与落地的能力。
+## 📂 任务样例 (Cases)
+以下展示 S1-DeepResearch-32B 在技能调用方面的案例，模型在进行材料建模的过程中，首先调用了科学技能`scientific-skills/pymatgen`补充专业知识，然后根据技能的指导，使用`pymatgen`完成建模，并输出cif文件。
+<div align="center">
+<img src="./cases/case_skills_science_en_01.png" alt="英文科学 Skills 协同任务样例节选" width="600" />
+</div>
+更多案例将持续补充至 `cases/` 目录。
+## 🚀 快速开始
+### 环境配置
+1. **安装依赖**：
+```bash
+pip install -r requirements.txt
+```
+2. **Docker 配置**:
+项目提供官方预构建 Docker 镜像，支持快速部署与运行。系统包含两个核心镜像：
+- **toolkits-api**：工具服务主容器（对外提供 API 能力）
+- **code-sandbox**：代码执行沙箱镜像（由服务按需创建，用于隔离执行任务）
+当前执行类工具（`execute_code`、`bash`）采用 **Docker-outside-of-Docker（DooD）** 模式：通过挂载宿主机 Docker socket，由工具容器直接调用宿主机 Docker daemon，按需创建隔离的沙箱容器执行任务。
+**镜像地址:**
+```text
+ghcr.io/wenge-research/toolkits-api:v2.0.260403
+ghcr.io/wenge-research/code-sandbox:v1.0.260403
+```
+**拉取镜像:**
+```text
+docker pull ghcr.io/wenge-research/toolkits-api:v2.0.260403
+docker pull ghcr.io/wenge-research/code-sandbox:v1.0.260403
+```
+**运行容器:**
+运行容器时需要挂载配置文件 `src/config.yaml`、Docker socket（用于沙箱执行），以及日志和缓存目录（可选）：
+```bash
+docker run -d \
+  --name toolkits-api \
+  --network host \
+  -e API_PORT=8080 \
+  -e API_WORKERS=4 \
+  -e HOST_LOG_DIR=$(pwd)/logs \
+  -e SANDBOX_MODE=docker \
+  -e HTTP_PROXY=http://your-proxy:port \
+  -e HTTPS_PROXY=http://your-proxy:port \
+  -e PROXY_URL=http://your-proxy:port \
+  -v /etc/localtime:/etc/localtime:ro \
+  -v /etc/timezone:/etc/timezone:ro \
+  -v /var/run/docker.sock:/var/run/docker.sock \
+  -v $(pwd)/src/config.yaml:/app/src/config.yaml \
+  -v $(pwd)/logs:/app/logs \
+  -v $(pwd)/cache:/app/cache \
+  ghcr.io/wenge-research/toolkits-api:v2.0.260403
+```
+**参数说明**：
+| 参数 | 说明 |
+|------|------|
+| `-e API_PORT` | 服务监听端口，默认 8080 |
+| `-e API_WORKERS` | worker 进程数，根据并发需求调整，默认 1 |
+| `-e SANDBOX_MODE=docker` | 启用 Docker 沙箱模式（否则为 subprocess） |
+| `-e HOST_LOG_DIR` | 当启用 Docker 沙箱模式时，需要传入宿主机日志目录，供沙箱容器挂载 |
+| `-e HTTP_PROXY / HTTPS_PROXY / PROXY_URL` | 代理配置（可选） |
+| `--network host` | 如果使用宿主机的代理端口，需要设置此参数（可选） |
+| `-v /etc/localtime:/etc/localtime:ro` | 同步宿主机时区（只读） |
+| `-v /etc/timezone:/etc/timezone:ro` | 同步宿主机时区文件（只读） |
+| `-v /var/run/docker.sock` | 当启用 Docker 沙箱模式时，需要挂载宿主机 Docker socket，用于调度沙箱容器 |
+| `-v config.yaml` | 挂载配置文件（API Key、模型配置、沙箱配置等） |
+| `-v logs` | 挂载日志目录（可选） |
+| `-v cache` | 挂载缓存目录，缓存数据形式参考容器内 /app/cache 中文件进行构造（可选） |
+3. **配置工具服务地址**
+推荐通过 JSON 配置文件或环境变量覆盖默认项。不建议直接编辑 `utils/configs.py`。
+**方式一（推荐）：本地 JSON 配置**
+从示例文件复制并生成本地配置：
+```bash
+cp utils/config/config.example.json utils/config/config.local.json
+```
+在 `utils/config/config.local.json` 中设置工具服务基地址，例如：
+```json
+{
+  "TOOLS_SERVER_BASE_ENDPOINT_URL": [
+    "http://127.0.0.1:8080"
+  ]
+}
+```
+**方式二：环境变量**
+指定配置文件路径，或对单项进行覆盖：
+```bash
+export S1_DR_CONFIG_JSON="utils/config/config.local.json"
+# 或仅覆盖 TOOLS_SERVER_BASE_ENDPOINT_URL
+export TOOLS_SERVER_BASE_ENDPOINT_URL='["http://127.0.0.1:8080"]'
+```
+4. **配置 API 密钥**
+建议通过 `utils/config/config.local.json` 配置各服务商密钥，或覆盖同名环境变量：
+```json
+{
+  "AIHUBMIX_KEY": "<your_aihubmix_key>",
+  "AZURE_KEY": "<your_azure_key>",
+  "VOLCANO_KEY": "<your_volcano_key>",
+  "ALIYUN_KEY": "<your_aliyun_key>"
+}
+```
+环境变量示例：
+```bash
+export AIHUBMIX_KEY="<your_aihubmix_key>"
+export AZURE_KEY="<your_azure_key>"
+export VOLCANO_KEY="<your_volcano_key>"
+export ALIYUN_KEY="<your_aliyun_key>"
+```
+### 单条推理示例
+```python
+import asyncio
+from server.llm_api import LLMClient
+from server.tool_api import return_all_tools
+from inference.run_single_inference import run_one_query
+from utils.prompts import DEEPRESEARCH_SYSTEM_PROMPT
+async def main():
+    llm_client_urls = ["http://127.0.0.1:10777/v1/chat/completions"]
+    llm_client_models = ["S1-DeepResearch-32B"]
+    llm_client = LLMClient(llm_client_urls, llm_client_models)
+    all_tools = return_all_tools()
+    result = await run_one_query(
+        llm=llm_client,
+        user_query="阿里巴巴成立时，18位创始团队成员中，姓马、姓蔡、姓张的创始人的平均年龄，保留一位小数",
+        file_path=[],
+        system=DEEPRESEARCH_SYSTEM_PROMPT,
+        max_rounds=15,
+        temperature=0.4,
+        top_p=0.95,
+        extra_payload={},
+        debug=True,
+        all_tools=all_tools,
+        system_format="deep_research",
+        log_label="quick_start_single",
+    )
+    final_answer = result[-1]["final_answer"] if result else ""
+    print(final_answer)
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+说明：
+- `file_path` 在当前实现中应传 `list`（如 `[]` 或 `['/path/a.pdf']`）。
+- `system_format` 可选：`deep_research`、`azure`、`aihubmix`、`aihubmix_claude`、`aihubmix_glm`、`volcano`、`aliyun`。
+### 批量推理示例
+本地/vLLM：
+```bash
+cd inference
+cp run_batch_inference_demo.sh run_batch_local.sh
+# 编辑 run_batch_local.sh 里的参数（LLM_CLIENT_URLS、LLM_CLIENT_MODELS、TEST_DATA_FILE 等）
+bash run_batch_local.sh
+```
+在线平台：
+```bash
+cd inference
+cp run_batch_inference_online_demo.sh run_batch_online.sh
+# 编辑 run_batch_online.sh 里的参数（LLM_CLIENT_URLS、LLM_CLIENT_MODELS、SYSTEM_FORMAT 等）
+bash run_batch_online.sh
+```
+日志查看：
+```bash
+tail -f run_logs/*.log
+```
+更多推理功能详见 📖 **[进阶使用方法](./inference/README.md)**。
+## 🔭 未来工作 (Future Work)
+- **S1-DeepResearch 论文：** 预计两周内发布S1-DeepResearch论文，详细介绍支撑 S1-DeepResearch 五大能力特性的数据合成策略、模型训练与推理机制设计，以及推理时扩展等关键评测结论与实践经验。
+- **S1-DeepResearch-VL 版本：** 2026年上半年，将推出支持视觉理解与跨模态推理的 S1-DeepResearch-VL 模型，以覆盖更丰富的研究型任务场景。
+## 📜 协议 (License)
+本项目采用 **[Apache License 2.0](./LICENSE)** 开源协议。
+## 引用 (Citation)
+如果您觉得 S1-DeepResearch 对您的工作有帮助，请考虑引用我们的工作：
+```bibtex
+@software{s1deepresearch2026,
+    title={S1-DeepResearch: End-to-End Deep Research Models},
+    author={ScienceOne Team},
+    year={2026},
+    url={https://github.com/ScienceOne-AI/S1-DeepResearch},
+}
+```

README_en.md ADDED Viewed

	@@ -0,0 +1,306 @@

+<div align="center">
+# S1-DeepResearch: End-to-End Models for Long-Horizon Deep Research
+[![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg?style=for-the-badge)](./LICENSE)
+[![HuggingFace](https://img.shields.io/badge/🤗%20HuggingFace-S1--DeepResearch--15k-0040A1?style=for-the-badge)](https://huggingface.co/datasets/ScienceOne-AI/S1-DeepResearch-15k)
+[![HuggingFace](https://img.shields.io/badge/🤗%20HuggingFace-S1--DeepResearch--32B-ffd21e?style=for-the-badge)](https://huggingface.co/ScienceOne-AI/S1-DeepResearch-32B)
+[![ModelScope](https://img.shields.io/badge/🤖%20ModelScope-S1--DeepResearch--32B-mediumpurple?style=for-the-badge)](https://modelscope.cn/models/ScienceOne-AI/S1-DeepResearch-32B)
+English | [中文](./README.md)
+</div>
+<hr>
+## 🔥 News & Updates
+- **[2026/04/04]** 🎉 We release [**S1-DeepResearch-32B**](https://huggingface.co/ScienceOne-AI/S1-DeepResearch-32B), an end-to-end agentic model for long-horizon deep research, with stronger emphasis on **real-world deployment**—beyond **long-chain complex reasoning**, it focuses on **deep-research instruction following**, **deep research report writing**, **file understanding and generation**, and **skills using**. On **20 agentic capability benchmarks**, it **outperforms the base model Qwen3-32B by a clear margin across the board**, and overall performance is close to mainstream closed-source flagship models (**GPT 5.2**, **Claude 4.6**, **GLM-5**). Inference code and the [**15K agent training trajectory dataset**](https://huggingface.co/datasets/ScienceOne-AI/S1-DeepResearch-15k) (a subset of the full training data) are released together.
+- **[2025/12/31]** We open-sourced [**S1-DeepResearch-8B-Preview**](https://huggingface.co/ScienceOne-AI/S1-DeepResearch-8B-Preview), focusing on **general long-chain complex reasoning** and exploring what is feasible in deep research at a smaller parameter scale.
+## 📝 Overview
+**S1-DeepResearch-32B** is an end-to-end model developed by the ScienceOne AI for **long-horizon deep research**. Its core capabilities span **five dimensions**:
+- **Long-chain complex reasoning**: Supports sustained reasoning and action across multi-stage, multi-hop tasks, going beyond single-step Q&A. Through cross-document retrieval, evidence aggregation, state memory, and policy iteration, it plans paths, integrates information, and converges results in complex settings, keeping the reasoning process stable and conclusions reliable.
+- **Deep research instruction following**: Parses multi-constraint instructions in deep research scenarios and builds an instruction-understanding paradigm along the full research chain—**task definition → mechanisms → tool execution → result presentation**—with coordinated constraints across cognition, artifacts, execution, and environment so complex tasks stay controllable, processes predictable, and outputs aligned with intent.
+- **Deep research report writing**: Produces arguable, citable report-style outputs on top of information integration; organizes multi-source material and evidence checks while balancing structure, readability, and traceability—suited for scientific writing and decision support.
+- **File understanding and generation**: Covers PDFs, tables, web pages, and other modalities for input understanding, plus structured, deliverable outputs. In multi-turn tool-augmented interaction, it keeps semantics and execution aligned, closing the loop **parse → process → generate** and reducing repetitive manual work in research and data-heavy workflows.
+- **Skills Using**: Organizes literature search, data analysis, experiment design, computational modeling, visualization, report generation, and more as callable modules, dynamically assembled and progressively loaded toward task goals, supporting continuous workflows from data acquisition to presentation.
+### ✨ Key Features
+- **Ultra-long context modeling**: A **128K** context window lets a single session hold longer evidence chains and multi-turn interaction history, suited to long-horizon research tasks.
+- **Long-horizon tool calling**: Stably runs **150+** consecutive tool-call rounds, building reasoning-driven tool orchestration and a decision closed loop—enabling continuous planning, execution, and self-correction across multi-stage tasks.
+- **Native tool ecosystem**: **9** built-in common tools (e.g., search, web browsing, code execution, command line) ready to use out of the box.
+## 🚀 Model Download
+<div align="center">
+| Model | Parameters | Context length | Download |
+| :---: | :---: | :---: | :---: |
+| **S1-DeepResearch-32B** | 32B | 128k | [🤗 HuggingFace](https://huggingface.co/ScienceOne-AI/S1-DeepResearch-32B) \| [🤖 ModelScope](https://modelscope.cn/models/ScienceOne-AI/S1-DeepResearch-32B) |
+| **S1-DeepResearch-8B-Preview** | 8B | 128k | [🤗 HuggingFace](https://huggingface.co/ScienceOne-AI/S1-DeepResearch-8B-Preview) \| [🤖 ModelScope](https://modelscope.cn/models/ScienceOne-AI/S1-DeepResearch-8B-Preview) |
+</div>
+## 📊 Evaluation
+We systematically evaluated **S1-DeepResearch-32B** on **20 agentic capability benchmarks** grouped into **5 dimensions** aligned with the five capability areas:
+- **Long-chain complex reasoning**: Text—GAIA (text), BrowseComp, BrowseComp-ZH, XBench-DeepSearch, HLE (text); vision-language—LiveVQA, MM-Search, BrowseComp-VL, RealX-Bench, HLE-VL, MM-BrowseComp.
+- **Deep research instruction following**: ComplexBench, DeepResearchIF (in-house).
+- **Deep research report writing**: DeepResearch Bench, DeepResearch Bench II, Research Rubrics.
+- **File understanding and generation**: GAIA (file), GTA, FileSys (in-house).
+- **Skills Using**: SkillsUse (in-house).
+<div align="center">
+<img src="./assets/benchmark_performance.png" alt="S1-DeepResearch-32B vs. base and closed-source flagships on 20 agentic benchmarks" width="800" />
+</div>
+**S1-DeepResearch-32B** gains a **clear advantage** over the base **Qwen3-32B** and the larger **Qwen3-235B** on all listed benchmarks; on in-house leaderboards for deep-research instruction following, file understanding and generation, and skill invocation, it also **surpasses Qwen3.5-397B**. Overall performance is close to mainstream closed-source flagships (**GPT 5.2**, **Claude 4.6**, **GLM-5**, **Kimi-K2.5**). Public benchmarks and internal tasks are mutually consistent, indicating that S1-DeepResearch-32B is **ready for real business deployment**.
+## 📂 Example Cases
+Below is an example of **S1-DeepResearch-32B** using skills: during materials modeling, the model first invokes the scientific skill `scientific-skills/pymatgen` for domain knowledge, then follows the skill guidance to run modeling with `pymatgen` and outputs a CIF file.
+<div align="center">
+<img src="./cases/case_skills_science_en_01.png" alt="English scientific skills collaboration example" width="600" />
+</div>
+More cases will be added under the `cases/` directory.
+## 🚀 Quick Start
+### Environment setup
+1. **Install dependencies**:
+```bash
+pip install -r requirements.txt
+```
+2. **Docker setup**
+The project provides official pre-built Docker images for fast deployment. There are two core images:
+- **toolkits-api**: Main tool-service container (exposes API capabilities)
+- **code-sandbox**: Code-execution sandbox image (created on demand by the service for isolated runs)
+Execution-oriented tools (`execute_code`, `bash`) use **Docker-outside-of-Docker (DooD)**: by mounting the host Docker socket, the tool container talks to the host Docker daemon and creates isolated sandbox containers as needed.
+**Image tags:**
+```text
+ghcr.io/wenge-research/toolkits-api:v2.0.260403
+ghcr.io/wenge-research/code-sandbox:v1.0.260403
+```
+**Pull images:**
+```text
+docker pull ghcr.io/wenge-research/toolkits-api:v2.0.260403
+docker pull ghcr.io/wenge-research/code-sandbox:v1.0.260403
+```
+**Run the container**
+Mount `src/config.yaml`, the Docker socket (for sandbox execution), and optionally log and cache directories:
+```bash
+docker run -d \
+  --name toolkits-api \
+  --network host \
+  -e API_PORT=8080 \
+  -e API_WORKERS=4 \
+  -e HOST_LOG_DIR=$(pwd)/logs \
+  -e SANDBOX_MODE=docker \
+  -e HTTP_PROXY=http://your-proxy:port \
+  -e HTTPS_PROXY=http://your-proxy:port \
+  -e PROXY_URL=http://your-proxy:port \
+  -v /etc/localtime:/etc/localtime:ro \
+  -v /etc/timezone:/etc/timezone:ro \
+  -v /var/run/docker.sock:/var/run/docker.sock \
+  -v $(pwd)/src/config.yaml:/app/src/config.yaml \
+  -v $(pwd)/logs:/app/logs \
+  -v $(pwd)/cache:/app/cache \
+  ghcr.io/wenge-research/toolkits-api:v2.0.260403
+```
+**Parameter reference**
+| Flag / env | Description |
+|------|------|
+| `-e API_PORT` | Listen port, default 8080 |
+| `-e API_WORKERS` | Number of worker processes; tune for concurrency, default 1 |
+| `-e SANDBOX_MODE=docker` | Enable Docker sandbox mode (otherwise subprocess) |
+| `-e HOST_LOG_DIR` | Host log directory for sandbox mounts when Docker sandbox is enabled |
+| `-e HTTP_PROXY / HTTPS_PROXY / PROXY_URL` | Proxy settings (optional) |
+| `--network host` | Use if you rely on a proxy bound on the host (optional) |
+| `-v /etc/localtime:/etc/localtime:ro` | Sync host timezone (read-only) |
+| `-v /etc/timezone:/etc/timezone:ro` | Sync host timezone file (read-only) |
+| `-v /var/run/docker.sock` | Required for Docker sandbox mode to schedule sandbox containers |
+| `-v config.yaml` | Mount config (API keys, model and sandbox settings) |
+| `-v logs` | Mount log directory (optional) |
+| `-v cache` | Mount cache directory; structure mirrors `/app/cache` inside the container (optional) |
+3. **Configure the tool service URL**
+Prefer JSON config or environment variables to override defaults. Avoid editing `utils/configs.py` directly.
+**Option A (recommended): local JSON**
+Copy from the example and edit locally:
+```bash
+cp utils/config/config.example.json utils/config/config.local.json
+```
+Set the tool service base URL in `utils/config/config.local.json`, for example:
+```json
+{
+  "TOOLS_SERVER_BASE_ENDPOINT_URL": [
+    "http://127.0.0.1:8080"
+  ]
+}
+```
+**Option B: environment variables**
+Point to a config file or override individual keys:
+```bash
+export S1_DR_CONFIG_JSON="utils/config/config.local.json"
+# or override TOOLS_SERVER_BASE_ENDPOINT_URL only
+export TOOLS_SERVER_BASE_ENDPOINT_URL='["http://127.0.0.1:8080"]'
+```
+4. **API keys**
+Prefer `utils/config/config.local.json` for provider keys, or mirror the same names with environment variables:
+```json
+{
+  "AIHUBMIX_KEY": "<your_aihubmix_key>",
+  "AZURE_KEY": "<your_azure_key>",
+  "VOLCANO_KEY": "<your_volcano_key>",
+  "ALIYUN_KEY": "<your_aliyun_key>"
+}
+```
+Environment variables:
+```bash
+export AIHUBMIX_KEY="<your_aihubmix_key>"
+export AZURE_KEY="<your_azure_key>"
+export VOLCANO_KEY="<your_volcano_key>"
+export ALIYUN_KEY="<your_aliyun_key>"
+```
+### Single-query inference
+```python
+import asyncio
+from server.llm_api import LLMClient
+from server.tool_api import return_all_tools
+from inference.run_single_inference import run_one_query
+from utils.prompts import DEEPRESEARCH_SYSTEM_PROMPT
+async def main():
+    llm_client_urls = ["http://127.0.0.1:10777/v1/chat/completions"]
+    llm_client_models = ["S1-DeepResearch-32B"]
+    llm_client = LLMClient(llm_client_urls, llm_client_models)
+    all_tools = return_all_tools()
+    result = await run_one_query(
+        llm=llm_client,
+        user_query="阿里巴巴成立时，18位创始团队成员中，姓马、姓蔡、姓张的创始人的平均年龄，保留一位小数",
+        file_path=[],
+        system=DEEPRESEARCH_SYSTEM_PROMPT,
+        max_rounds=15,
+        temperature=0.4,
+        top_p=0.95,
+        extra_payload={},
+        debug=True,
+        all_tools=all_tools,
+        system_format="deep_research",
+        log_label="quick_start_single",
+    )
+    final_answer = result[-1]["final_answer"] if result else ""
+    print(final_answer)
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+Notes:
+- `file_path` must be a `list` in the current implementation (e.g. `[]` or `['/path/a.pdf']`).
+- `system_format` options: `deep_research`, `azure`, `aihubmix`, `aihubmix_claude`, `aihubmix_glm`, `volcano`, `aliyun`.
+### Batch inference
+Local / vLLM:
+```bash
+cd inference
+cp run_batch_inference_demo.sh run_batch_local.sh
+# Edit run_batch_local.sh (LLM_CLIENT_URLS, LLM_CLIENT_MODELS, TEST_DATA_FILE, etc.)
+bash run_batch_local.sh
+```
+Hosted APIs:
+```bash
+cd inference
+cp run_batch_inference_online_demo.sh run_batch_online.sh
+# Edit run_batch_online.sh (LLM_CLIENT_URLS, LLM_CLIENT_MODELS, SYSTEM_FORMAT, etc.)
+bash run_batch_online.sh
+```
+Logs:
+```bash
+tail -f run_logs/*.log
+```
+📖 **[Advanced usage](./inference/README.md)**.
+## 🔭 Future Work
+- **S1-DeepResearch Paper:** We expect to release the paper within about two weeks, covering data synthesis for the five capability areas, training and inference design, test-time scaling, and key evaluation takeaways.
+- **S1-DeepResearch-VL:** In the first half of 2026, we plan to release **S1-DeepResearch-VL** with vision understanding and cross-modal reasoning for richer research-style tasks.
+## 📜 License
+This project is licensed under the **[Apache License 2.0](./LICENSE)**.
+## Citation
+If S1-DeepResearch is useful to your work, please consider citing:
+```bibtex
+@software{s1deepresearch2026,
+    title={S1-DeepResearch: End-to-End Deep Research Models},
+    author={ScienceOne Team},
+    year={2026},
+    url={https://github.com/ScienceOne-AI/S1-DeepResearch},
+}
+```

assets/benchmark_performance.png ADDED Viewed

Git LFS Details

SHA256: e321dc036928d58c2d0491fc86d3b368b903da2d8c9f04e2aaaae1e1873d4e09
Pointer size: 132 Bytes
Size of remote file: 1.36 MB

cases/case_DeepResearchIF_general_en_01.png ADDED Viewed

Git LFS Details

SHA256: 3464e28b961abc897e18b5936ff64205c3788db7e063880273e061c6c81b5e56
Pointer size: 132 Bytes
Size of remote file: 2.16 MB

cases/case_DeepResearchIF_general_zh_01.png ADDED Viewed

Git LFS Details

SHA256: 99b2338a3b7897b89f65a4351250ab795fbdc0652e839e92544009e6436b78c8
Pointer size: 132 Bytes
Size of remote file: 2.08 MB

cases/case_DeepResearchIF_science_en_01.png ADDED Viewed

Git LFS Details

SHA256: ad1fad3f60ba985fb39e0f11d7ad8aa1c3a9484b249ecbd09c6b0347ce696ebc
Pointer size: 132 Bytes
Size of remote file: 2.18 MB

cases/case_DeepResearchIF_science_zh_01.png ADDED Viewed

Git LFS Details

SHA256: 99b55c00fbf5ff3ec9ba4273e51299b0b215c5b5ac2db1a9d899e735390b1c54
Pointer size: 132 Bytes
Size of remote file: 2.4 MB

cases/case_deepresearch-report-writing_general_zh_01.png ADDED Viewed

Git LFS Details

SHA256: ca784be83c6940f8a3cf445e071938c37c6ec0d58c932d15aee6d8eca41f5c11
Pointer size: 132 Bytes
Size of remote file: 2.63 MB

cases/case_deepresearch-report-writing_science_en_01.png ADDED Viewed

Git LFS Details

SHA256: 29f2e2dfb62696329af323cac80c4c4b734ba5facc7372d5e4e17ddde3440da2
Pointer size: 132 Bytes
Size of remote file: 2.35 MB

cases/case_deepresearch-report-writing_science_zh_01.png ADDED Viewed

Git LFS Details

SHA256: c71665a8ef51a5ca3a5871e35dff6ac554e5dcdf0136a58947e8adb09e542a75
Pointer size: 132 Bytes
Size of remote file: 2.62 MB

cases/case_file-understanding-generation_general_en_01.png ADDED Viewed

Git LFS Details

SHA256: cea3755b7d5c696cf547b9212a3ad5665398a8318ac97d183381a4d2069b9bca
Pointer size: 132 Bytes
Size of remote file: 1.84 MB

cases/case_file-understanding-generation_science_zh_01.png ADDED Viewed

Git LFS Details

SHA256: 76845d578b3cbdd7be5334a22dfedca6d44180ef22d67b63ceaf97c7de766322
Pointer size: 132 Bytes
Size of remote file: 2.4 MB

cases/case_long-horizon-reasoning_general_en_01.png ADDED Viewed

Git LFS Details

SHA256: 18c574807d8a8551303aff231ccd173fe5cfcc48569cc1a3562abbbf448b3514
Pointer size: 132 Bytes
Size of remote file: 1.95 MB

cases/case_long-horizon-reasoning_general_en_02.png ADDED Viewed

Git LFS Details

SHA256: e35aea9449dc126867159be6fe7ba4bf86e9e30c0c59c006a8402fa0213ee67b
Pointer size: 132 Bytes
Size of remote file: 2.08 MB

cases/case_long-horizon-reasoning_general_zh_01.png ADDED Viewed

Git LFS Details

SHA256: de7da110e0ff422e5e781491ca0d9b11495ae3a8fde0537ff8271c82dd3f6b2f
Pointer size: 132 Bytes
Size of remote file: 2.19 MB

cases/case_long-horizon-reasoning_general_zh_02.png ADDED Viewed

Git LFS Details

SHA256: 9818e91488abb2567d3b39de9f2cd2a947128b7652b72b68b8cf749fa224ad5b
Pointer size: 132 Bytes
Size of remote file: 2.07 MB

cases/case_skills_science_en_01.png ADDED Viewed

Git LFS Details

SHA256: 31b8e4bf2026f880100bb80bd999b2e4320ccc6b162f9eca2c6565391f954520
Pointer size: 132 Bytes
Size of remote file: 2.05 MB

cases/case_skills_science_zh_01.png ADDED Viewed

Git LFS Details

SHA256: b2310377245f09ca0fd8912cafb0556347a705812cb7a22fed968f5ea73c7e98
Pointer size: 132 Bytes
Size of remote file: 2.24 MB

inference/README.md ADDED Viewed

	@@ -0,0 +1,224 @@

+中文 | [English](./README_en.md)
+# S1-deepresearch 推理框架
+## 核心特性
+- **多 LLM 客户端**: 支持 vLLM、Azure OpenAI、AIHubMix 等多种 LLM 服务
+- **丰富的工具集**: 提供 9 种工具，涵盖搜索、网页访问、文件解析、代码执行、多模态问答、bash 等
+- **批量推理**: 支持并发批量推理，自动断点续传，定期保存结果
+- **单条推理**: 支持单条查询的详细调试和测试
+- **负载均衡**: 支持多 LLM 节点的负载均衡和一致性调度
+- **详细日志**: 为每个查询生成独立的日志文件，便于问题追踪和分析
+## 项目结构（当前）
+```text
+./
+├── run_batch_inference_demo.sh          # 本地/vLLM 脚本模板
+├── run_batch_inference_online_demo.sh   # 在线平台脚本模板
+├── inference/
+│   ├── run_batch_inference.py
+│   └── run_single_inference.py
+├── server/
+├── tool_kits/
+├── utils/
+│   └── config/
+│       ├── config.example.json
+│       └── README.md
+├── models/tokenizer/
+└── test_all_tools.py
+```
+## 快速开始
+### 1. 安装依赖
+```bash
+pip install -r requirements.txt
+```
+### 2. 配置（推荐 JSON 或环境变量）
+配置优先级：`自定义 JSON > 环境变量 > utils/config.py 默认值`。
+常用做法：
+```bash
+cp utils/config/config.example.json utils/config/config.local.json
+```
+然后按需修改 `config.local.json`，例如：
+- `TOOLS_SERVER_BASE_ENDPOINT_URL`
+- `AIHUBMIX_KEY` / `AZURE_KEY` / `VOLCANO_KEY` / `ALIYUN_KEY`
+- `CLIENT_TIMEOUT`
+也可以通过环境变量覆盖，例如：
+```bash
+export S1_DR_CONFIG_JSON="utils/config/config.local.json"
+```
+### 3. 准备输入 JSONL
+输入文件每行一个 JSON。最少建议包含 `question`，通常同时包含 `id` 与 `file_path`。
+#### 3.1 jsonl 示例（涉及文件输入）
+```json
+{"id":"query_001","question":"阿里巴巴成立时，18位创始团队成员中，姓马、姓蔡、姓张的创始人的平均年龄，保留一位小数","file_path":[]}
+{"id":"query_002","question":"阅读当前说明书，大疆发布的起飞重量最大的AIR系列无人机飞完半程马拉松，电池还剩多少毫安时的电能？（注1：假设水平无风，最低耗能的情况为最大航速的60%飞行；注2：耗电可以按最长飞行时间换算）","file_path":["/path/to/file.pdf"]}
+```
+#### 3.2 jsonl 示例（涉及 Skill 使用）
+```json
+{"id":"query_003","question":"Use pymatgen to build a simple TiO2 surface slab. Please generate a common low-index surface, report the Miller index, slab thickness, and vacuum size, and briefly describe the resulting surface structure.","skills":[{"name": "skill_name1", "description": "description1", "skill_path": "skill_path1"}, {"name": "skill_name2", "description": "description2", "skill_path": "skill_path2"}]}
+```
+## 推荐启动方式：复制脚本后运行
+### A. 本地 / vLLM（`run_batch_inference_demo.sh`）
+```bash
+cp run_batch_inference_demo.sh run_batch_local.sh
+mkdir -p run_logs
+# 编辑 run_batch_local.sh 中的参数
+bash run_batch_local.sh
+```
+说明：
+- 脚本内部已使用 `nohup ... &` 启动 Python，会打印后台 PID。
+- 常看日志：`tail -f run_logs/run.log`
+### B. 在线平台（`run_batch_inference_online_demo.sh`）
+```bash
+cp run_batch_inference_online_demo.sh run_batch_online.sh
+mkdir -p run_logs
+# 编辑 run_batch_online.sh 中的参数
+bash run_batch_online.sh
+```
+说明：
+- 重点修改：`LLM_CLIENT_URLS`、`LLM_CLIENT_MODELS`、`SYSTEM_FORMAT`
+- 常看日志：`tail -f run_logs/run_batch_*.log`
+## 脚本参数说明
+### 基础参数
+- `LLM_CLIENT_URLS`：模型服务地址，多个地址用空格分隔（与模型列表一一对应）
+- `LLM_CLIENT_MODELS`：模型名列表，多个模型用空格分隔
+- `TEST_DATA_FILE`：输入 JSONL 路径
+- `OUTPUT_FILE`：`ROLLOUT_NUM=1` 时的输出文件
+- `OUTPUT_DIR`：`ROLLOUT_NUM>1` 时输出目录（生成 `rollout_01.jsonl` 等）
+- `ROLLOUT_NUM`：每条样本重复推理次数
+- `RESUME_FROM_FILE`：断点续跑文件（可空）
+- `AVAILABLE_TOOLS`：启用工具列表（空格分隔）
+- `TASK_TYPE`：是否按“输入仅文本”场景处理，默认 `input_only`
+### 推理控制参数
+- `MAX_ROUNDS`：单 query 最大轮次
+- `CONCURRENCY_WORKERS`：并发 worker 数
+- `SAVE_BATCH_SIZE`：每处理多少条就自动落盘一次
+- `TEMPERATURE`：采样温度
+- `TOP_P`：top-p（`run_batch_inference_demo.sh` 已包含）
+- `EXTRA_PAYLOAD`：额外模型 payload（JSON 字符串，`run_batch_inference_demo.sh` 已包含）
+- `TIMEOUT_FOR_ONE_QUERY`：单 query 超时时间（秒）
+- `LLM_API_RETRY_TIMES`：LLM 请求失败后的重试次数（不含首次）
+- `SYSTEM_PROMPT`：自定义 system prompt；留空时使用内置默认 prompt
+- `SYSTEM_FORMAT`：平台格式（主要在 `run_batch_inference_online_demo.sh`）
+### 上下���截断相关参数
+- `DISCARD_ALL_MODE`：是否启用 discard-all（`true/false`）
+- `MODEL_MAX_CONTEXT_TOKENS`：模型最大上下文长度
+- `DISCARD_RATIO`：触发 discard 的比例阈值
+- `TOKENIZER_PATH`：token 统计所用 tokenizer 路径
+### 日志参数
+- `LOG_LABEL`：日志标签，目录形如 `logs/YYYY_MM_DD_<LOG_LABEL>/`
+- `LOG_FILE`：脚本启动日志文件（`run_logs/*.log`）
+- `LOGGING_ROOT`：日志根路径（`run_batch_inference_demo.sh` 已包含，可空）
+## `SYSTEM_FORMAT` 可选值
+`SYSTEM_FORMAT` 将对应不同的平台处理逻辑，根据该关键词进入不同的处理分支。
+- `deep_research`：本地 deep research 格式（vLLM 部署）
+- `azure`：Azure OpenAI
+- `aihubmix`：AIHubMix（OpenAI 兼容）
+- `aihubmix_claude`：AIHubMix Claude 格式
+- `aihubmix_glm`：AIHubMix GLM 格式
+- `volcano`：火山引擎
+- `aliyun`：阿里云百炼平台格式
+## 当前默认可用工具（9 个）
+- `wide_search`：基于 Serp 进行通用网页搜索，支持一轮提交多个 query
+- `scholar_search`：基于 Google Scholar 进行学术检索  + web 结果）
+- `image_search`：图片检索，支持多 query。
+- `wide_visit`：访问网页并按目标 `goal` 产出摘要
+- `file_wide_parse`：解析本地/在线文件（PDF、DOCX、MD、CSV等）
+- `execute_code`：执行 Python 代码
+- `ask_question_about_image`：图像理解与问答
+- `ask_question_about_video`：视频理解与问答
+- `bash`：执行 shell 脚本
+各工具对应的 schema 定义详见 utils/prompts.py 下的 `DEEPRESEARCH_SYSTEM_PROMPT`
+## 输出与日志
+### 输出 JSONL（字段详解）
+`run_batch_inference.py` 写出的每行字段如下：
+- `time_stamp`：该行结果写入时的时间戳（`YYYY-MM-DD HH:MM:SS`）。
+- `query_id`：批处理层生成的 query 标识（基于 `question` 哈希）。
+- `query`：本条输入的 `question` 文本。
+- `result`：单个 segment 的详细结果对象（来自 `run_single_inference.py`）。
+- `status`：任务状态，`success` / `timeout` / `error`。
+- `discard_segments`：被 `discard-all` 截断并做 summary 的段数（不含最终段）。
+- `elapsed_sec`：该 query 本次 rollout 的总耗时（秒）。
+- `rollout_idx`：第几次 rollout（从 1 开始）。
+- `src`：原始输入行完整内容（通常含 `id`、`question`、`file_path`、skills 等）。
+- `segment_idx`：当前是第几个 segment（从 1 开始）。
+- `segment_total`：该 query 共拆成多少个 segment。若无有效 `result`，会写成 `0`。
+其中 `result` 常见字段（`run_single_inference.py`）：
+- `query_id`：单次运行实例 ID（含时间后缀）。
+- `tools`：本次启用的 tools schema（字符串形式）。
+- `messages`：用于模型推理与工具交互的日志消息。
+- `final_answer`：当前 segment 的回答文本。
+- `transcript`：更完整的对话轨迹（含工具回填）。
+- `rounds`：该 segment 执行到的轮数。
+- `stopped_reason`：停止原因（如 `no_tool_calls`、`discard_all_01`、`discard_all_final`、`max_rounds_exceeded`）。
+- `error`：仅在异常时可能出现。
+### 日志目录
+默认日志结构如下（`LOGGING_ROOT` 为空时）：
+```text
+logs/
+└── YYYY_MM_DD_<LOG_LABEL>/
+    ├── collect.log
+    └── <query_id>/
+        ├── run.log
+        └── result.json
+```
+## 工具测试
+运行工具测试脚本：
+```bash
+python test_all_tools.py
+```
+该脚本会测试所有注册的工具，验证其基本功能是否正常。

inference/README_en.md ADDED Viewed

	@@ -0,0 +1,226 @@

+[中文](./README.md) | English
+# S1-DeepResearch Inference Framework
+## Key Features
+- **Multiple LLM clients**: Supports vLLM, Azure OpenAI, AIHubMix, and other LLM services
+- **Rich toolset**: Nine tools covering search, web browsing, file parsing, code execution, multimodal Q&A, bash, and more
+- **Batch inference**: Concurrent batch inference with resume-from-checkpoint and periodic result saving
+- **Single-query inference**: Detailed debugging and testing for individual queries
+- **Load balancing**: Multi-node LLM load balancing and consistent scheduling
+- **Detailed logging**: Per-query log files for easier troubleshooting and analysis
+## Project Layout (current)
+```text
+./
+├── run_batch_inference_demo.sh          # Local / vLLM script template
+├── run_batch_inference_online_demo.sh   # Online platform script template
+├── inference/
+│   ├── run_batch_inference.py
+│   └── run_single_inference.py
+├── server/
+├── tool_kits/
+├── utils/
+│   └── config/
+│       ├── config.example.json
+│       └── README.md
+├── models/tokenizer/
+└── test_all_tools.py
+```
+## Quick Start
+### 1. Install dependencies
+```bash
+pip install -r requirements.txt
+```
+### 2. Configuration (JSON or environment variables recommended)
+Precedence: **custom JSON > environment variables > defaults in `utils/config.py`**.
+Typical workflow:
+```bash
+cp utils/config/config.example.json utils/config/config.local.json
+```
+Edit `config.local.json` as needed, for example:
+- `TOOLS_SERVER_BASE_ENDPOINT_URL`
+- `AIHUBMIX_KEY` / `AZURE_KEY` / `VOLCANO_KEY` / `ALIYUN_KEY`
+- `CLIENT_TIMEOUT`
+You can also override via environment variables, for example:
+```bash
+export S1_DR_CONFIG_JSON="utils/config/config.local.json"
+```
+### 3. Prepare input JSONL
+Each line is one JSON object. At minimum include `question`; usually also `id` and `file_path`.
+#### 3.1 JSONL example (file inputs)
+```json
+{"id":"query_001","question":"When Alibaba was founded, what was the average age of the founders whose surnames are Ma, Cai, or Zhang among the 18 co-founders? Round to one decimal place.","file_path":[]}
+{"id":"query_002","question":"According to the manual, for DJI's heaviest AIR-series drone by takeoff weight, how many mAh of battery energy remain after flying half a marathon? (Note 1: assume calm air; minimum energy use is flying at 60% of max speed. Note 2: power draw can be converted from max flight time.)","file_path":["/path/to/file.pdf"]}
+```
+#### 3.2 JSONL example (using Skills)
+```json
+{"id":"query_003","question":"Use pymatgen to build a simple TiO2 surface slab. Please generate a common low-index surface, report the Miller index, slab thickness, and vacuum size, and briefly describe the resulting surface structure.","skills":[{"name": "skill_name1", "description": "description1", "skill_path": "skill_path1"}, {"name": "skill_name2", "description": "description2", "skill_path": "skill_path2"}]}
+```
+## Recommended workflow: copy a script, then run
+### A. Local / vLLM (`run_batch_inference_demo.sh`)
+```bash
+cp run_batch_inference_demo.sh run_batch_local.sh
+mkdir -p run_logs
+# Edit parameters inside run_batch_local.sh
+bash run_batch_local.sh
+```
+Notes:
+- The script starts Python with `nohup ... &` and prints the background PID.
+- Tail logs: `tail -f run_logs/run.log`
+### B. Online platform (`run_batch_inference_online_demo.sh`)
+```bash
+cp run_batch_inference_online_demo.sh run_batch_online.sh
+mkdir -p run_logs
+# Edit parameters inside run_batch_online.sh
+bash run_batch_online.sh
+```
+Notes:
+- Focus on: `LLM_CLIENT_URLS`, `LLM_CLIENT_MODELS`, `SYSTEM_FORMAT`
+- Tail logs: `tail -f run_logs/run_batch_*.log`
+## Script parameters
+### Basic
+- `LLM_CLIENT_URLS`: Model service URLs, space-separated (paired with the model list)
+- `LLM_CLIENT_MODELS`: Model names, space-separated
+- `TEST_DATA_FILE`: Input JSONL path
+- `OUTPUT_FILE`: Output file when `ROLLOUT_NUM=1`
+- `OUTPUT_DIR`: Output directory when `ROLLOUT_NUM>1` (e.g. `rollout_01.jsonl`, …)
+- `ROLLOUT_NUM`: Number of rollouts per sample
+- `RESUME_FROM_FILE`: Resume checkpoint file (may be empty)
+- `AVAILABLE_TOOLS`: Enabled tools, space-separated
+- `TASK_TYPE`: Whether to treat input as text-only; default `input_only`
+### Inference control
+- `MAX_ROUNDS`: Max rounds per query
+- `CONCURRENCY_WORKERS`: Number of concurrent workers
+- `SAVE_BATCH_SIZE`: Flush results to disk every N samples
+- `TEMPERATURE`: Sampling temperature
+- `TOP_P`: Top-p (included in `run_batch_inference_demo.sh`)
+- `EXTRA_PAYLOAD`: Extra model payload (JSON string; included in `run_batch_inference_demo.sh`)
+- `TIMEOUT_FOR_ONE_QUERY`: Per-query timeout (seconds)
+- `LLM_API_RETRY_TIMES`: Retries after LLM failure (not counting the first attempt)
+- `SYSTEM_PROMPT`: Custom system prompt; empty uses the built-in default
+- `SYSTEM_FORMAT`: Platform format (mainly in `run_batch_inference_online_demo.sh`)
+### Context truncation
+- `DISCARD_ALL_MODE`: Enable discard-all (`true`/`false`)
+- `MODEL_MAX_CONTEXT_TOKENS`: Model max context length
+- `DISCARD_RATIO`: Threshold ratio to trigger discard
+- `TOKENIZER_PATH`: Path to tokenizer used for token counting
+### Logging
+- `LOG_LABEL`: Log label; directory shape `logs/YYYY_MM_DD_<LOG_LABEL>/`
+- `LOG_FILE`: Script log file under `run_logs/*.log`
+- `LOGGING_ROOT`: Log root (set in `run_batch_inference_demo.sh`; may be empty)
+## `SYSTEM_FORMAT` values
+`SYSTEM_FORMAT` selects platform-specific handling via keyword branches.
+- `deep_research`: Local deep-research format (vLLM deployment)
+- `azure`: Azure OpenAI
+- `aihubmix`: AIHubMix (OpenAI-compatible)
+- `aihubmix_claude`: AIHubMix Claude format
+- `aihubmix_glm`: AIHubMix GLM format
+- `volcano`: Volcano Engine
+- `aliyun`: Alibaba Cloud Bailian format
+## Currently available tools (9)
+- `wide_search`: General web search via Serp; multiple queries in one round
+- `scholar_search`: Google Scholar academic search (+ web results)
+- `image_search`: Image search; multiple queries supported
+- `wide_visit`: Visit pages and summarize toward a `goal`
+- `file_wide_parse`: Parse local/remote files (PDF, DOCX, MD, CSV, etc.)
+- `execute_code`: Run Python code
+- `ask_question_about_image`: Image understanding and Q&A
+- `ask_question_about_video`: Video understanding and Q&A
+- `bash`: Run shell commands
+Tool schemas are defined in `DEEPRESEARCH_SYSTEM_PROMPT` in `utils/prompts.py`.
+## Outputs and logs
+### Output JSONL fields
+Each line written by `run_batch_inference.py` contains:
+- `time_stamp`: Write time for that row (`YYYY-MM-DD HH:MM:SS`).
+- `query_id`: Batch-level query id (hash of `question`).
+- `query`: This row’s `question` text.
+- `result`: Detailed result object for one segment (from `run_single_inference.py`).
+- `status`: `success` / `timeout` / `error`.
+- `discard_segments`: Segments truncated by discard-all and summarized (excluding the final segment).
+- `elapsed_sec`: Total seconds for this rollout of the query.
+- `rollout_idx`: Rollout index (1-based).
+- `src`: Full original input line (often includes `id`, `question`, `file_path`, skills, etc.).
+- `segment_idx`: Current segment index (1-based).
+- `segment_total`: Total segments for this query; `0` if there is no valid `result`.
+Common fields inside `result` (`run_single_inference.py`):
+- `query_id`: Single-run instance id (includes a time suffix).
+- `tools`: Enabled tool schemas (string form).
+- `messages`: Messages for model reasoning and tool interaction.
+- `final_answer`: Answer text for this segment.
+- `transcript`: Fuller trajectory (including tool returns).
+- `rounds`: Rounds executed in this segment.
+- `stopped_reason`: Why it stopped (e.g. `no_tool_calls`, `discard_all_01`, `discard_all_final`, `max_rounds_exceeded`).
+- `error`: Present only on failure.
+### Log directories
+Default layout when `LOGGING_ROOT` is empty:
+```text
+logs/
+└── YYYY_MM_DD_<LOG_LABEL>/
+    ├── collect.log
+    └── <query_id>/
+        ├── run.log
+        └── result.json
+```
+## Tool tests
+Run the tool test script:
+```bash
+python test_all_tools.py
+```
+This exercises all registered tools and checks that basic behavior works.

inference/inference/run_batch_inference.py ADDED Viewed

	@@ -0,0 +1,373 @@

+import argparse
+import asyncio
+import datetime
+import json
+import os
+import sys
+import time
+from anyio import Path
+from numpy._core.numerictypes import str_
+# 获取项目根目录路径，并加入 sys.path
+project_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(project_root)
+from inference.run_single_inference import run_one_query
+from server.llm_api import LLMClient
+from server.tool_api import return_all_tools
+from utils.configs import LITERATURE_SEED_DATA_DIR
+from tqdm import tqdm  # pyright: ignore[reportMissingModuleSource]
+from typing import Any, Dict
+from utils.common import _to_bool, get_query_uuid, load_jsonl
+from utils.logger import setup_collect_logger
+from utils.prompts import DEEPRESEARCH_SYSTEM_PROMPT
+from utils.skill_prompt import build_skills_system_text, extract_skills_from_row, resolve_skill_source_dirs
+from utils.build_prompt import build_openai_schema
+def parse_args():
+    parser = argparse.ArgumentParser(description="批量推理脚本")
+    parser.add_argument("--llm_client_urls", type=str, nargs='+', default=["http://10.20.4.18:10777/vllm_generate"], help="vllm 远程挂载的模型 URL (可传多个，用空格分隔)")
+    parser.add_argument("--llm_client_models", type=str, nargs='+', default=["LLM_CLIENT_NAME"], help="vllm 远程挂载的模型名称 (可传多个，用空格分隔)")
+    parser.add_argument("--test_data_file", type=str, default="test_files/test.jsonl", help="测试需要生成答案的文件(.jsonl)")
+    parser.add_argument("--available_tools", type=str, nargs="+", default=["web_search", "visit_url", "execute_code"], help="可用的tool名称（列表）")
+    parser.add_argument("--resume_from_file", type=str, default="test_files/test_result_20251112.jsonl", help="已完成结果的本地文件(可选)，自动跳过已完成样本")
+    parser.add_argument("--concurrency_workers", type=int, default=10, help="并发进程数量")
+    parser.add_argument("--save_batch_size", type=int, default=1, help="每得到多少条数据结果就存储一次")
+    parser.add_argument("--rollout_num", type=int, default=1, help="每条数据的推理次数，每次推理结果以rollout_xx.jsonl命名保存到output_dir中")
+    parser.add_argument("--max_rounds", type=int, default=100, help="与模型交互的最大轮数")
+    parser.add_argument("--temperature", type=float, default=0.7, help="采样温度")
+    parser.add_argument("--top_p", type=float, default=0.95, help="nucleus sampling 的 top_p 参数")
+    parser.add_argument("--extra_payload", type=str, default="{}", help="额外的 payload 参数（JSON 字符串），如 '{\"presence_penalty\": 1.1}'")
+    parser.add_argument("--timeout_for_one_query", type=int, default=7200, help="单个query最大执行时长（秒）")
+    parser.add_argument("--llm_api_retry_times", type=int, default=2, help="LLM API 请求失败后的重试次数，不含首次请求")
+    parser.add_argument("--output_file", type=str, default="test_files/test_result_today.jsonl", help="结果输出文件路径")
+    parser.add_argument("--output_dir", type=str, default="test_files/output", help="结果输出目录路径（每个rollout结果以rollout_xx.jsonl保存）")
+    parser.add_argument('--system_format', type=str, default="deep_research", help="采用什么模型的prompt拼接方式(默认用 deep_research 的)")
+    parser.add_argument('--log_label', type=str, default="", help=f"log 路径加入自定义文本标记，同时也是附件类数据暂存附件的存储路径 {LITERATURE_SEED_DATA_DIR}/{{log_label}}")
+    parser.add_argument('--system_prompt', type=str, default=None, help="自定义全局system_prompt的文件路径或字符串(默认用DEEPRESEARCH)")
+    parser.add_argument('--verbose', action='store_true', default=True, help="是否输出debug日志")
+    parser.add_argument('--clean_files_copy_dir', action='store_true', default=False, help="执行完后是否删除files_copy_dir临时文件夹")
+    parser.add_argument("--discard_all_mode", type=str, default="false", help="是否开启 discard-all 模式（true/false）")
+    parser.add_argument("--model_max_context_tokens", type=int, default=128000, help="模型最大上下文长度")
+    parser.add_argument("--discard_ratio", type=float, default=0.8, help="触发 discard 的上下文比例阈值")
+    parser.add_argument("--tokenizer_path", type=str, default="models/tokenizer", help="用于 token 统计的 tokenizer 路径")
+    parser.add_argument("--logging_root", type=str, default=None, help="用于自定义 log 存储路径")
+    return parser.parse_args()
+# 工具注册 全局，减少取用延迟
+ALL_TOOLS = return_all_tools()
+async def main_async(args):
+    # --- 日志
+    logging_root = args.logging_root if args.logging_root else project_root
+    logger, log_path = setup_collect_logger(logging_root, args.log_label)
+    logger.info(f"[Collector] Script Start. Log file: {log_path}")
+    # --------- 参数处理与初始化 ---------
+    def abs_path_if_needed(path):
+        if not path:
+            return path
+        # 如果是相对路径，且不是以.或..开头，拼接到project_root；否则用os.path.abspath
+        if not os.path.isabs(path):
+            if path.startswith("./") or path.startswith("../"):
+                return os.path.abspath(path)
+            else:
+                return os.path.join(project_root, path)
+        return path
+    llm_client_urls = args.llm_client_urls
+    llm_client_models = args.llm_client_models
+    test_data_file = abs_path_if_needed(args.test_data_file)
+    available_tools = args.available_tools
+    resume_from_file = abs_path_if_needed(args.resume_from_file)
+    concurrency_workers = args.concurrency_workers
+    save_batch_size = args.save_batch_size
+    max_rounds = args.max_rounds
+    temperature = args.temperature
+    top_p = args.top_p
+    _extra_raw = (args.extra_payload or "").strip()
+    extra_payload = json.loads(_extra_raw if _extra_raw else "{}")
+    system_format = args.system_format
+    timeout_for_one_query = args.timeout_for_one_query
+    llm_api_retry_times = max(0, args.llm_api_retry_times)
+    output_file = abs_path_if_needed(args.output_file)
+    output_dir = abs_path_if_needed(args.output_dir)
+    rollout_num = args.rollout_num
+    discard_all_mode = _to_bool(args.discard_all_mode)
+    model_max_context_tokens = args.model_max_context_tokens
+    discard_ratio = args.discard_ratio
+    tokenizer_path = abs_path_if_needed(args.tokenizer_path)
+    # 检查并创建 output_file 的文件夹（如果不存在）
+    output_dir_from_file = os.path.dirname(output_file)
+    if output_dir_from_file and not os.path.exists(output_dir_from_file):
+        logger.warning(f"[Save Dir created] Make the dir {output_dir_from_file}")
+        os.makedirs(output_dir_from_file, exist_ok=True)
+    if output_dir and not os.path.exists(output_dir):
+        logger.warning(f"[Save Dir created] Make the dir {output_dir}")
+        os.makedirs(output_dir, exist_ok=True)
+    verbose = args.verbose
+    # system prompt来源：命令行 > 默认常量
+    if args.system_prompt:
+        if os.path.isfile(args.system_prompt):
+            with open(args.system_prompt, encoding="utf-8") as f:
+                system_prompt = f.read()
+        else:
+            system_prompt = args.system_prompt
+    else:
+        system_prompt = DEEPRESEARCH_SYSTEM_PROMPT
+    # 剔除未启用的工具
+    selected_tools = {name: spec for name, spec in ALL_TOOLS.items() if name in available_tools}
+    logger.info(f"[Selected_tools] {build_openai_schema(selected_tools)}")
+    llm_client = LLMClient(
+        llm_client_urls,
+        llm_client_models,
+        max_retries=llm_api_retry_times,
+    )
+    data_list = load_jsonl(test_data_file)  # 加载全部待推理数据
+    logger.info(f"Number of rollouts per query: {rollout_num}")
+    logger.info(f"LLM API retry times: {llm_api_retry_times}")
+    # 为每个rollout准备输出文件路径
+    rollout_output_files = {}
+    if rollout_num > 1:
+        for rollout_idx in range(1, rollout_num + 1):
+            rollout_output_file = os.path.join(output_dir, f"rollout_{rollout_idx:02d}.jsonl")
+            rollout_output_files[rollout_idx] = rollout_output_file
+            logger.info(f"Rollout {rollout_idx}: output_file={rollout_output_file}")
+    else:
+        rollout_output_files[1] = output_file
+    # 记录本轮中有用到文件拷贝的目录，最后用来删除
+    files_copy_dir = None
+    if args.log_label:
+        files_copy_dir = f"data/{args.log_label}"
+    # 处理每个rollout
+    for rollout_idx in range(1, rollout_num + 1):
+        logger.info(f"{'='*50}")
+        logger.info(f"Starting Rollout {rollout_idx}/{rollout_num}")
+        logger.info(f"{'='*50}")
+        rollout_output_file = rollout_output_files.get(rollout_idx, output_file)
+        results = []
+        finished_keys = set()
+        def _get_finish_key_from_item(item: Dict[str, Any]) -> str:
+            """
+            用于判定一条 query 是否已完成的 key。
+            - 修改 key 的逻辑，将 id + query 的内容同时作为 id 进行判定，这样能够避免某些数据集 id 没有处理干净的情况
+            """
+            if not isinstance(item, dict):
+                return ""
+            now_id = ""
+            _id = item.get("id", None)
+            if _id is not None and str(_id) != "":
+                now_id += str(_id)
+            q = item.get("question", None)
+            if q is not None and str(q) != "":
+                now_id += "__" + str(q)
+            return now_id
+        # --------- 恢复完成数据 ---------
+        # 优先用 args.resume_from_file，其次用当前rollout的output_file
+        resume_path = None
+        if resume_from_file and os.path.isfile(resume_from_file):
+            resume_path = resume_from_file
+        elif os.path.isfile(rollout_output_file):
+            resume_path = rollout_output_file
+        if resume_path:
+            logger.info(f"[Resume Rollout {rollout_idx}] Loading finished IDs from: {resume_path}")
+            with open(resume_path, "r", encoding="utf-8") as f:
+                for line in f:
+                    try:
+                        obj = json.loads(line)
+                        src = obj.get("src") or {}
+                        # 兼容旧输出：src 里可能没有 id；同时回退到 obj.query
+                        key = _get_finish_key_from_item(src)
+                        if not key:
+                            q = obj.get("query", None)
+                            key = "" if q is None else str(q)
+                        # if key and obj.get("status", "") == "success":
+                        if key and obj.get("status", "") == "success" and obj.get("result", {}).get("final_answer"):
+                            # 正确且 success 的留下
+                            finished_keys.add(key)
+                            results.append(obj)
+                    except Exception:
+                        continue
+        # 只保留未完成的条目
+        original_num = len(data_list)
+        data_list_for_rollout = [item for item in data_list if _get_finish_key_from_item(item) not in finished_keys]
+        logger.info(f"[Resume Rollout {rollout_idx}] Skipped {len(finished_keys)} finished items, {len(data_list_for_rollout)} remaining (total={original_num}).")
+        if not data_list_for_rollout:
+            logger.info(f"[Rollout {rollout_idx}] All queries already processed, skipping.")
+            continue
+        # --------- 并发控制 ---------
+        sem = asyncio.Semaphore(concurrency_workers)
+        save_every = save_batch_size
+        async def _worker(idx: int, item: Dict[str, Any], rollout_idx: int):
+            async with sem:
+                query = item.get("question")
+                id = item.get("id")
+                query_id = get_query_uuid(str(query))
+                file_path = item.get("file_path", "")
+                # 统一将 file_path 转为 list 处理（兼容 str 和 list 和 None）
+                file_paths = []
+                if isinstance(file_path, list):
+                    file_paths = file_path
+                elif isinstance(file_path, str) and file_path:
+                    file_paths = [file_path]
+                start = time.time()
+                progress = {}
+                # 抽取行中的 skill 字段信息
+                row_skills = extract_skills_from_row(item)
+                # 拼接成 # Skill 部分的文本内容
+                system_skill_text = build_skills_system_text(row_skills) if row_skills else None
+                # 记录所有的 skill 的绝对路径
+                skill_source_dirs = resolve_skill_source_dirs(row_skills, project_root) if row_skills else []
+                try:
+                    result = await asyncio.wait_for(
+                        run_one_query(
+                            llm=llm_client,
+                            user_query=str(query),
+                            file_path=file_paths, # 将真实的 file_paths 传入，在内部进行图像的拷贝
+                            system=system_prompt,
+                            max_rounds=max_rounds,
+                            temperature=temperature,
+                            top_p=top_p,
+                            extra_payload=extra_payload,
+                            debug=verbose,
+                            progress=progress,
+                            all_tools=selected_tools,
+                            system_format = system_format,
+                            log_label = args.log_label,
+                            file_prefix = "", # 由内部进行设定，因为现在还不知道 query_id (也就是工具需要的 conversation_id) 是多少
+                            discard_all_mode=discard_all_mode,
+                            model_max_context_tokens=model_max_context_tokens,
+                            discard_ratio=discard_ratio,
+                            tokenizer_path=tokenizer_path,
+                            logging_root=logging_root,
+                            skill_source_dirs=skill_source_dirs,
+                            system_skill_text=system_skill_text,
+                        ),
+                        timeout=timeout_for_one_query
+                    )
+                    status = "success"
+                except asyncio.TimeoutError:
+                    status = "timeout"
+                    result = progress.get('result', [])
+                    logger.error(f"[Timeout] id={id}, query_id={query_id}, elapsed={round(time.time() - start, 3)}s")
+                except Exception as e:
+                    status = "error"
+                    result = progress.get('result', [])
+                    if isinstance(result, list):
+                        if not result:
+                            result = [{"error": str(e)}]
+                        else:
+                            result[-1]["error"] = str(e)
+                    logger.error(f"[Error] id={id}, query_id={query_id}, err={e}")
+                elapsed = time.time() - start
+                llm_client.pop_query_id(query_id) # llm_client 弹出当前 query 用于动态记录负载
+                logger.info(f"[Finish] id={id}, query_id={query_id}, status={status}, elapsed={round(elapsed,3)}s")
+                discard_segments = sum(
+                    1
+                    for rr in (result or [])
+                    if isinstance(rr, dict) and str(rr.get("stopped_reason", "")).startswith("discard_all_")
+                    and rr.get("stopped_reason") != "discard_all_final"
+                )
+                return {
+                    "time_stamp": datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S'),
+                    "query_id": query_id,
+                    "query": query,
+                    "result": result,
+                    "status": status,
+                    "discard_segments": discard_segments, # 统计当前有多少个 summary 碎片（即没有 <answer>..</answer>, 而是被强行截断的）
+                    "elapsed_sec": round(elapsed, 3),
+                    "rollout_idx": rollout_idx,
+                    "src": item,
+                }
+        # --------- 分派任务 & tqdm进度 ---------
+        tasks = [asyncio.create_task(_worker(i, item, rollout_idx)) for i, item in enumerate(data_list_for_rollout)]
+        pbar = tqdm(total=len(tasks), desc=f"Rollout {rollout_idx}/{rollout_num}", ncols=80)
+        finished = 0
+        # --------- result循环处理&定期保存 ---------
+        for coro in asyncio.as_completed(tasks):
+            r = await coro
+            seg_results = r.get("result", [])
+            if isinstance(seg_results, dict):
+                seg_results = [seg_results]
+            if not isinstance(seg_results, list):
+                seg_results = []
+            if not seg_results:
+                row = dict(r)
+                row["result"] = {}
+                row["segment_idx"] = 1
+                row["segment_total"] = 0
+                results.append(row)
+            else:
+                total = len(seg_results)
+                for seg_idx, seg in enumerate(seg_results, start=1):
+                    row = dict(r)
+                    row["result"] = seg
+                    row["segment_idx"] = seg_idx # 下标从 1 开始
+                    row["segment_total"] = total # 一共有几个片段（最后的有 <answer> 的也算一个片段）
+                    results.append(row)
+            finished += 1
+            pbar.update(1)
+            # 定期保存
+            if rollout_output_file and save_every and finished % save_every == 0:
+                with open(rollout_output_file, "w", encoding="utf-8") as f:
+                    for rr in results:
+                        f.write(json.dumps(rr, ensure_ascii=False) + "\n")
+                logger.info(f"[AutoSave Rollout {rollout_idx}] Progress saved to: {rollout_output_file} ({finished}/{len(tasks)})")
+        pbar.close()
+        # --------- 最后一次保存 ---------
+        if rollout_output_file:
+            with open(rollout_output_file, "w", encoding="utf-8") as f:
+                for r in results:
+                    f.write(json.dumps(r, ensure_ascii=False) + "\n")
+            logger.info(f"[Rollout {rollout_idx}] Wrote results to: {rollout_output_file}")
+    logger.info(f"{'='*50}")
+    logger.info(f"All {rollout_num} rollouts completed!")
+    logger.info(f"{'='*50}")
+    logger.info("[Collector] Script finished.")
+    # ===== Final clean-up: 删除文件拷贝过去的目录 =====
+    if args.clean_files_copy_dir:  # 只有开启该参数才清理
+        if files_copy_dir and os.path.exists(files_copy_dir):
+            try:
+                import shutil
+                shutil.rmtree(files_copy_dir)
+                logger.info(f"[Cleanup] Removed copied files directory: {files_copy_dir}")
+            except Exception as e:
+                logger.error(f"[Cleanup] Failed to remove directory {files_copy_dir}: {e}")
+def main():
+    args = parse_args()
+    try:
+        asyncio.run(main_async(args))
+    except KeyboardInterrupt:
+        print("Interrupted by user.")
+if __name__ == "__main__":
+    main()

inference/inference/run_single_inference.py ADDED Viewed

	@@ -0,0 +1,354 @@

+import asyncio
+import copy
+import logging
+import sys
+import os
+import time
+import datetime
+import json
+# 获取项目根目录路径，并加入 sys.path
+PROJECT_ROOT = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+sys.path.append(PROJECT_ROOT)
+from typing import Any, Dict, List, Tuple
+from anyio import Path
+from server.llm_api import LLMClient
+from server.tool_api import return_all_tools
+from server.tool_execution import execute_tool_call
+from utils.configs import LITERATURE_SEED_DATA_DIR, ONLINE_PLATFORM
+from utils.build_prompt import (
+    _build_summary_message,
+    build_initial_messages,
+    build_openai_schema,
+    build_tongyi_schema,
+    build_user_payload,
+    get_tools_json,
+    wrap_tool_responses_into_user_message,
+)
+from utils.common import _estimate_message_tokens, count_tokens, get_query_uuid
+from utils.extract_schemas_nlp import extract_nlp_tool_calls # 更新工具读取逻辑
+from utils.extract_schemas_online import extract_aihubmix_tool_calls # 新增 aihubmix 的工具读取逻辑
+from utils.logger import save_result_to_log_dir, setup_logger_for_query  # 新增：用于结果的json序列化
+from utils.skill_prompt import normalize_skill_dir_path
+# 读取 json 文件，对于每一行 qa 执行任务
+async def run_one_query(
+    llm: LLMClient,
+    user_query: str,
+    file_path: List,
+    system: str,
+    max_rounds: int,
+    temperature: float,
+    top_p: float = 0.95,
+    extra_payload: dict = {},
+    debug: bool = False,
+    args=None,
+    progress: dict = {},  # 新增参数
+    all_tools: Dict = {},
+    system_format: str = "deep_research",
+    log_label:str = "",
+    file_prefix:str = "",
+    discard_all_mode: bool = False,
+    model_max_context_tokens: int = 128000,
+    discard_ratio: float = 0.8,
+    tokenizer_path: str = "models/tokenizer",
+    logging_root=None,
+    skill_source_dirs: List[str] | None = None,
+    system_skill_text: str | None = None,
+    # 默认采用 deep_research 的 system_format，如果是其他模型，就采用 system + tool_list_shcema 的形式拼接
+) -> List[Dict[str, Any]]:
+    """
+    Run a complete multi-step tool-calling session until the model stops calling tools
+    or max_rounds is reached. Returns a dict with the transcript and final answer.
+    """
+    query_id = get_query_uuid(user_query) + "-" + datetime.datetime.now().strftime("%Y%m%d%H%M%S")
+    project_root = logging_root if logging_root else PROJECT_ROOT
+    logger, log_file_path = setup_logger_for_query(query_id, project_root, log_label)
+    if debug:
+        print(f"[run_single_inference.py] Logging configured. Query log file: {log_file_path}")
+    def copy_file_to_docker_path(file_paths:List) -> List:
+        """
+        文件存在性检查和文件拷贝
+        将文件拷贝到挂载到 docker 内部的路经下，然后返回文件名称
+        e.g.
+        处理过程：/test/test_file.jsonl -> /LITERATURE_SEED_DATA_DIR/log_label/query_id/test_file.jsonl
+        返回内容：[test_file.jsonl]
+        同时，skills 文件也会检测，如果满足存在 skills/skill_name 的 skill，就会把它拷贝到 data/{log_label}/{query_id} 文件夹下面
+        """
+        file_names = []
+        dest_dir = f"data/{log_label}/{query_id}"
+        os.makedirs(dest_dir, exist_ok=True)
+        for fp in file_paths:
+            if fp:
+                if os.path.isfile(fp):
+                    logger.info(f"[File load] Get file `{fp}` for id={id}, query_id={query_id}")
+                    dest_path = os.path.join(dest_dir, Path(fp).name)
+                    try:
+                        import shutil
+                        shutil.copy2(fp, dest_path)
+                        logger.info(f"[File copy] Copied file to `{dest_path}`")
+                    except Exception as e:
+                        logger.error(f"[File copy error] Failed to copy file `{fp}` to `{dest_path}`: {e}")
+                    file_names.append(Path(fp).name)
+                else:
+                    logger.warning(f"File not found for id={id}, query_id={query_id}: expected {fp}")
+        copied_skill_dirs = set()
+        for skill_dir in (skill_source_dirs or []):
+            if not skill_dir:
+                continue
+            abs_skill_dir = skill_dir if os.path.isabs(skill_dir) else os.path.join(PROJECT_ROOT, skill_dir)
+            if os.path.isfile(abs_skill_dir) and os.path.basename(abs_skill_dir) == "SKILL.md":
+                abs_skill_dir = os.path.dirname(abs_skill_dir)
+            if not os.path.isdir(abs_skill_dir):
+                logger.warning(f"[Skill copy] Skill abs_skill_dir not found: {abs_skill_dir}")
+                continue
+            normalized_skill_dir = normalize_skill_dir_path(abs_skill_dir)
+            rel_after_skills = normalized_skill_dir
+            if rel_after_skills.startswith("skills/"):
+                rel_after_skills = rel_after_skills[len("skills/"):]
+            if not rel_after_skills:
+                logger.warning(f"[Skill copy] Skill rel_after_skills not found: {rel_after_skills}")
+                # 说明这个 skill 不存在呗，那也需要 continue
+                continue
+            dest_skill_dir = os.path.join(dest_dir, "skills", rel_after_skills)
+            if dest_skill_dir in copied_skill_dirs:
+                continue
+            copied_skill_dirs.add(dest_skill_dir)
+            try:
+                import shutil
+                shutil.copytree(abs_skill_dir, dest_skill_dir, dirs_exist_ok=True)
+                logger.info(f"[Skill copy] Copied skill dir `{abs_skill_dir}` -> `{dest_skill_dir}`")
+            except Exception as e:
+                logger.error(f"[Skill copy error] Failed to copy skill dir `{abs_skill_dir}` -> `{dest_skill_dir}`: {e}")
+        # 若 file_names 为空，则传空字符串，否则传列表
+        send_file_path = file_names if file_names else []
+        return send_file_path
+    # 将文件拷贝至 docker 内部
+    file_path = copy_file_to_docker_path(file_path)
+    # 定义 file_prefix 前缀，用于 ask_xxx, parse_file 等工具能够找到这个附件文件在哪
+    file_prefix = os.path.join(LITERATURE_SEED_DATA_DIR, log_label, query_id)
+    messages = build_initial_messages(
+        user_query,
+        file_path,
+        system=system,
+        system_format=system_format,
+        tool_mapping=all_tools,
+        system_skill_text=system_skill_text,
+    )
+    processed_system_start = copy.deepcopy(messages[0])
+    # 找到 messages 中第一个 role 为 user 的 dict
+    processed_user_start = next((copy.deepcopy(msg) for msg in messages if msg.get('role') == 'user'), None)
+    if processed_user_start is None:
+        processed_user_start = {"role": "user", "content": user_query}
+    transcript: List[Dict[str, Any]] = list(messages)  # shallow copy
+    result_objs: List[Dict[str, Any]] = []
+    discard_count = 0
+    discard_threshold = int(model_max_context_tokens * discard_ratio)
+    # For json log
+    log_messages: List[Dict[str, Any]] = []
+    # Add system and user
+    log_messages.append({"role": "system", "content": system}) # 没有把动态的拼接回去，原始的 system prompt
+    log_messages.append({"role": "user", "content": build_user_payload(user_query, file_path, system_format)})
+    round_idx = 1
+    while round_idx <= max_rounds:
+        tmp_token_numbers = _estimate_message_tokens(log_messages, tokenizer_path)
+        if discard_all_mode and _estimate_message_tokens(log_messages, tokenizer_path) >= discard_threshold:
+            print(f"当前 token 数量：{tmp_token_numbers} > 阈值：{discard_threshold}...")
+            discard_count += 1
+            summary_dict = await _build_summary_message(llm, messages, temperature, logger, query_id, system_format)
+            discard_marker = {
+                "role": "assistant",
+                "content": '<tool_call>{"name": "new_context_tool", "arguments": {"begin_new_context": True}}</tool_call>',
+            }
+            discard_tool_result = {"role": "tool", "content": summary_dict['content'], "usage": summary_dict['usage']}
+            discard_tool_result_for_transcript = {"role": "user", "content": f"{summary_dict['content']}", "usage": summary_dict['usage']}
+            discard_follow_up = {"role": "assistant", "content": "Start new conversation to continue the task..."}
+            discard_log_messages = copy.deepcopy(log_messages) + [discard_marker, discard_tool_result, discard_follow_up]
+            # discard_transcript 中 role: tool 需要替换成 user
+            discard_transcript = copy.deepcopy(transcript) + [discard_marker, discard_tool_result_for_transcript, discard_follow_up]
+            discard_result = {
+                "query_id": query_id,
+                "tools": get_tools_json(all_tools) if all_tools is not None else "[]",
+                "messages": discard_log_messages,
+                "final_answer": discard_follow_up["content"],
+                "transcript": discard_transcript,
+                "rounds": round_idx,
+                "stopped_reason": f"discard_all_{discard_count:02d}",
+            }
+            result_objs.append(discard_result)
+            if progress is not None:
+                progress["result"] = copy.deepcopy(result_objs)
+            # 兜底，上下文还是在 summary 的时候爆了（导致 summary_dict['content'] 为空)，回退到 user query
+            summary_start = {"role": "user", "content": summary_dict['content'] if summary_dict['content'] else processed_user_start['content']}
+            if not summary_dict['content']:
+                logger.info(f"Summary failed due to exceeding max context length, fallback to user query: {processed_user_start['content']}")
+            log_system_start = {"role": "system", "content": system}
+            transcript = copy.deepcopy([processed_system_start, summary_start] if processed_system_start['role'] == 'system' else [summary_start])
+            log_messages = copy.deepcopy([log_system_start, summary_start])
+            # fix: messages 没有改，所以后续长度还在不断增加，messages 也需要重置
+            messages = copy.deepcopy([processed_system_start, summary_start] if processed_system_start['role'] == 'system' else [summary_start])
+            # 同时 round_idx 也需要重置
+            round_idx = 1
+            continue
+        llm_start = time.time()
+        logger.info(f"[round {round_idx}] Round {round_idx} starting...")
+        tool_call_ids = [] # 用于在线平台的 tool_id 记录
+        # 不同的调用方式采用不同的 chat，tongyi 不需要传入 tool_list，aihubmix 需要把 aihubmix_chat 传入
+        response = {}
+        if system_format == "deep_research":
+            response = await llm.chat(messages, temperature=temperature, top_p=top_p, extra_payload=extra_payload, logger=logger, query_id=query_id)
+            assistant_text = response['content']
+            usage = response['usage']
+            if debug and response['error']:
+                logger.info(f"[round {round_idx}] llm.chat error: {response['error']}")
+            llm_elapsed_time = time.time() - llm_start
+            # 修复用于部分模型的输出中已经预制了 <think>，我们需要将它补全
+            assistant_fix_prefix_think_text = assistant_text if assistant_text.lstrip().startswith("<think>") else "<think>\n" + assistant_text.lstrip()
+            transcript.append({"role": "assistant", "content": assistant_fix_prefix_think_text, "elapsed_time": llm_elapsed_time, "usage": usage})
+            log_messages.append({"role": "assistant", "content": assistant_text, "elapsed_time": llm_elapsed_time, "usage": usage})
+            tool_calls = extract_nlp_tool_calls(assistant_text, file_prefix=file_prefix, prefix_mode="benchmark")
+        elif system_format in ONLINE_PLATFORM:
+            if system_format == "azure":
+                response = await llm.azure_chat(messages, temperature=temperature, tool_list=build_openai_schema(all_tools), logger=logger, query_id=query_id)
+            elif system_format in ["aihubmix", "aihubmix_claude"]:
+                response = await llm.aihubmix_chat(messages, temperature=temperature, tool_list=build_openai_schema(all_tools), logger=logger, query_id=query_id)
+            elif system_format in ["aihubmix_glm"]:
+                response = await llm.aihubmix_chat(messages, temperature=temperature, tool_list=build_tongyi_schema(all_tools), logger=logger, query_id=query_id)
+            elif system_format == "volcano":
+                response = await llm.volcano_chat(messages, temperature=temperature, tool_list=build_tongyi_schema(all_tools), logger=logger, query_id=query_id)
+            elif system_format == "aliyun":
+                response = await llm.aliyun_chat(messages, temperature=temperature, tool_list=build_tongyi_schema(all_tools), logger=logger, query_id=query_id)
+            usage = response['usage']
+            messages = response['next_messages'] # 带 openai 类的下一次 message，用于回传给在线平台（和平台交互用）
+            now_log_messages =  response['log_messages'] # 带 openai 类的下一次 message，本地记录，会把一些耗时等信息也打印到 dict 里面（本地落盘用）
+            tool_call_ids = response['tool_call_ids']
+            meta_data = response['meta_data'] # 人工拼接的形成的 {"role":"...", "content": "..."}
+            llm_elapsed_time = time.time() - llm_start
+            if debug and 'error' in response:
+                logger.info(f"[round {round_idx}] llm.chat error: {response['error']}")
+            transcript.append({**meta_data, "elapsed_time": llm_elapsed_time, "usage": usage})
+            log_messages.extend(now_log_messages)
+            tool_calls = extract_aihubmix_tool_calls(meta_data['content'], all_tools, file_prefix=file_prefix, prefix_mode="benchmark")
+            assistant_text = meta_data['content']
+        else:
+            raise ValueError(f"[system_format={system_format} failed] Please define a function to extract calls like `utils -> extract_schemas -> extract_nlp_tool_calls`")
+        if debug:
+            logger.info(f"[round {round_idx}] tool_calls: {tool_calls}")
+        result_obj = {
+            "query_id": query_id,
+            "tools": get_tools_json(all_tools) if all_tools is not None else "[]",
+            "messages": copy.deepcopy(log_messages),
+            "final_answer": assistant_text,
+            "transcript": copy.deepcopy(transcript),
+            "rounds": round_idx,
+            "stopped_reason": "no_tool_calls" if not tool_calls else None
+        }
+        # 新增：每轮都更新 progress['result']
+        if progress is not None:
+            progress['result'] = copy.deepcopy(result_objs + [result_obj])
+        #如果没有工具调用就到这里为止停止调用了
+        if not tool_calls:
+            logger.info("[run_one_query] Stopping: no tool calls in round %d", round_idx)
+            result_obj["stopped_reason"] = "discard_all_final" if discard_count > 0 else "no_tool_calls"
+            result_objs.append(result_obj)
+            # 保存最终结果到logs_base_dir
+            save_result_to_log_dir(query_id, result_objs, project_root, log_label)
+            return result_objs
+        # Execute all tool calls sequentially for this assistant turn (the model may emit multiple).
+        responses: List[Tuple[str, str]] = []
+        tool_total_time = 0.0
+        for idx, call in enumerate(tool_calls):
+            name = call.get("name")
+            args = call.get("arguments", {})
+            tool_start = time.time()
+            resp = await execute_tool_call(name, args, all_tools, logger, None, f"{log_label}/{query_id}") # fix 传递的 conversation id 和 skill 上传的路径一致，这样子才能够找到对应的文件
+            tool_elapsed = time.time() - tool_start
+            tool_total_time += tool_elapsed
+            responses.append(resp)
+            # Log each tool response as a message
+            if system_format in ONLINE_PLATFORM:
+                tool_response = {}
+                if system_format in ["azure", "aihubmix"]:
+                    tool_response = {
+                        "type": "function_call_output",
+                        "call_id": tool_call_ids[idx], # 在线平台需要和 工具 id 绑定
+                        "output": resp[1], # 工具执行结果
+                    }
+                elif system_format in ["aihubmix_claude"]:
+                    tool_response = {
+                        "role": "user",
+                        "content": [{
+                            "type": "tool_result",
+                            "tool_use_id": tool_call_ids[idx], # 在线平台需要和 工具 id 绑定
+                            "content": resp[1], # 工具执行结果
+                        }]
+                    }
+                elif system_format in ["volcano", "aihubmix_glm"]:
+                    tool_response = {
+                        "role": "tool",
+                        "tool_call_id": tool_call_ids[idx], # 在线平台需要和 工具 id 绑定
+                        "content": resp[1], # 工具执行结果
+                    }
+                elif system_format in ["aliyun"]:
+                    tool_response = {
+                        "role": "tool",
+                        "tool_call_id": tool_call_ids[idx], # 在线平台需要和 工具 id 绑定
+                        "content": resp[1], # 工具执行结果
+                    }
+                messages.append(copy.deepcopy(tool_response))
+                tool_response["elapsed_time"] = tool_elapsed # 这个参数不能传进去
+                log_messages.append(tool_response)
+            else:
+                log_messages.append({"role": "tool", "content": resp[1], "elapsed_time": tool_elapsed})
+        # Feed tool responses back as a single 'user' message (matching the template behavior)
+        tool_user_msg = wrap_tool_responses_into_user_message(responses)
+        if system_format not in ONLINE_PLATFORM:
+            messages.extend([{"role": "assistant", "content": assistant_text}, tool_user_msg])
+        transcript.extend([tool_user_msg])
+        round_idx += 1
+    # If we get here, we hit the max rounds without a clean finish
+    logger.info("[run_one_query] Max rounds (%d) exceeded for query: %s", max_rounds, user_query)
+    result_obj = {
+        "query_id": query_id,
+        "tools": get_tools_json(all_tools) if all_tools is not None else "[]",
+        "messages": copy.deepcopy(log_messages),
+        "final_answer": transcript[-1]["content"] if transcript else "",
+        "transcript": copy.deepcopy(transcript),
+        "rounds": max_rounds,
+        "stopped_reason": "discard_all_final" if discard_count > 0 else "max_rounds_exceeded"
+    }
+    if progress is not None:
+        progress['result'] = copy.deepcopy(result_objs + [result_obj])
+    result_objs.append(result_obj)
+    # 保存最终结果到logs_base_dir
+    save_result_to_log_dir(query_id, result_objs, project_root, log_label)
+    return result_objs

inference/models/tokenizer/added_tokens.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "</think>": 151668,
+  "</tool_call>": 151658,
+  "</tool_response>": 151666,
+  "<think>": 151667,
+  "<tool_call>": 151657,
+  "<tool_response>": 151665,
+  "<|box_end|>": 151649,
+  "<|box_start|>": 151648,
+  "<|endoftext|>": 151643,
+  "<|file_sep|>": 151664,
+  "<|fim_middle|>": 151660,
+  "<|fim_pad|>": 151662,
+  "<|fim_prefix|>": 151659,
+  "<|fim_suffix|>": 151661,
+  "<|im_end|>": 151645,
+  "<|im_start|>": 151644,
+  "<|image_pad|>": 151655,
+  "<|object_ref_end|>": 151647,
+  "<|object_ref_start|>": 151646,
+  "<|quad_end|>": 151651,
+  "<|quad_start|>": 151650,
+  "<|repo_name|>": 151663,
+  "<|video_pad|>": 151656,
+  "<|vision_end|>": 151653,
+  "<|vision_pad|>": 151654,
+  "<|vision_start|>": 151652
+}

inference/models/tokenizer/chat_template.jinja ADDED Viewed

	@@ -0,0 +1,89 @@

+{%- if tools %}
+    {{- '<|im_start|>system\n' }}
+    {%- if messages[0].role == 'system' %}
+        {{- messages[0].content + '\n\n' }}
+    {%- endif %}
+    {{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
+    {%- for tool in tools %}
+        {{- "\n" }}
+        {{- tool | tojson }}
+    {%- endfor %}
+    {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
+{%- else %}
+    {%- if messages[0].role == 'system' %}
+        {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
+    {%- endif %}
+{%- endif %}
+{%- set ns = namespace(multi_step_tool=true, last_query_index=messages|length - 1) %}
+{%- for message in messages[::-1] %}
+    {%- set index = (messages|length - 1) - loop.index0 %}
+    {%- if ns.multi_step_tool and message.role == "user" and message.content is string and not(message.content.startswith('<tool_response>') and message.content.endswith('</tool_response>')) %}
+        {%- set ns.multi_step_tool = false %}
+        {%- set ns.last_query_index = index %}
+    {%- endif %}
+{%- endfor %}
+{%- for message in messages %}
+    {%- if message.content is string %}
+        {%- set content = message.content %}
+    {%- else %}
+        {%- set content = '' %}
+    {%- endif %}
+    {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
+        {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
+    {%- elif message.role == "assistant" %}
+        {%- set reasoning_content = '' %}
+        {%- if message.reasoning_content is string %}
+            {%- set reasoning_content = message.reasoning_content %}
+        {%- else %}
+            {%- if '</think>' in content %}
+                {%- set reasoning_content = content.split('</think>')[0].rstrip('\n').split('<think>')[-1].lstrip('\n') %}
+                {%- set content = content.split('</think>')[-1].lstrip('\n') %}
+            {%- endif %}
+        {%- endif %}
+        {%- if loop.index0 > ns.last_query_index %}
+            {%- if loop.last or (not loop.last and reasoning_content) %}
+                {{- '<|im_start|>' + message.role + '\n<think>\n' + reasoning_content.strip('\n') + '\n</think>\n\n' + content.lstrip('\n') }}
+            {%- else %}
+                {{- '<|im_start|>' + message.role + '\n' + content }}
+            {%- endif %}
+        {%- else %}
+            {{- '<|im_start|>' + message.role + '\n' + content }}
+        {%- endif %}
+        {%- if message.tool_calls %}
+            {%- for tool_call in message.tool_calls %}
+                {%- if (loop.first and content) or (not loop.first) %}
+                    {{- '\n' }}
+                {%- endif %}
+                {%- if tool_call.function %}
+                    {%- set tool_call = tool_call.function %}
+                {%- endif %}
+                {{- '<tool_call>\n{"name": "' }}
+                {{- tool_call.name }}
+                {{- '", "arguments": ' }}
+                {%- if tool_call.arguments is string %}
+                    {{- tool_call.arguments }}
+                {%- else %}
+                    {{- tool_call.arguments | tojson }}
+                {%- endif %}
+                {{- '}\n</tool_call>' }}
+            {%- endfor %}
+        {%- endif %}
+        {{- '<|im_end|>\n' }}
+    {%- elif message.role == "tool" %}
+        {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
+            {{- '<|im_start|>user' }}
+        {%- endif %}
+        {{- '\n<tool_response>\n' }}
+        {{- content }}
+        {{- '\n</tool_response>' }}
+        {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
+            {{- '<|im_end|>\n' }}
+        {%- endif %}
+    {%- endif %}
+{%- endfor %}
+{%- if add_generation_prompt %}
+    {{- '<|im_start|>assistant\n' }}
+    {%- if enable_thinking is defined and enable_thinking is false %}
+        {{- '<think>\n\n</think>\n\n' }}
+    {%- endif %}
+{%- endif %}

inference/models/tokenizer/config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "architectures": [
+    "Qwen3MoeForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "decoder_sparse_step": 1,
+  "dtype": "bfloat16",
+  "eos_token_id": 151645,
+  "head_dim": 128,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 5472,
+  "max_position_embeddings": 131072,
+  "max_window_layers": 28,
+  "mlp_only_layers": [],
+  "model_type": "qwen3_moe",
+  "moe_intermediate_size": 768,
+  "norm_topk_prob": true,
+  "num_attention_heads": 32,
+  "num_experts": 128,
+  "num_experts_per_tok": 8,
+  "num_hidden_layers": 48,
+  "num_key_value_heads": 4,
+  "output_router_logits": false,
+  "pad_token_id": 151643,
+  "qkv_bias": false,
+  "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
+  "rope_theta": 5000000,
+  "router_aux_loss_coef": 0.0,
+  "sliding_window": null,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.51.3",
+  "use_cache": true,
+  "use_qk_norm": true,
+  "use_sliding_window": false,
+  "vocab_size": 151936
+}

inference/models/tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

inference/models/tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "eos_token": {
+    "content": "<|im_end|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

inference/models/tokenizer/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
+size 11422654

inference/models/tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,239 @@

+{
+  "add_bos_token": false,
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "151643": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151644": {
+      "content": "<|im_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151645": {
+      "content": "<|im_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151646": {
+      "content": "<|object_ref_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151647": {
+      "content": "<|object_ref_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151648": {
+      "content": "<|box_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151649": {
+      "content": "<|box_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151650": {
+      "content": "<|quad_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151651": {
+      "content": "<|quad_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151652": {
+      "content": "<|vision_start|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151653": {
+      "content": "<|vision_end|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151654": {
+      "content": "<|vision_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151655": {
+      "content": "<|image_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151656": {
+      "content": "<|video_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "151657": {
+      "content": "<tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151658": {
+      "content": "</tool_call>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151659": {
+      "content": "<|fim_prefix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151660": {
+      "content": "<|fim_middle|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151661": {
+      "content": "<|fim_suffix|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151662": {
+      "content": "<|fim_pad|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151663": {
+      "content": "<|repo_name|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151664": {
+      "content": "<|file_sep|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151665": {
+      "content": "<tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151666": {
+      "content": "</tool_response>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151667": {
+      "content": "<think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "151668": {
+      "content": "</think>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "additional_special_tokens": [
+    "<|im_start|>",
+    "<|im_end|>",
+    "<|object_ref_start|>",
+    "<|object_ref_end|>",
+    "<|box_start|>",
+    "<|box_end|>",
+    "<|quad_start|>",
+    "<|quad_end|>",
+    "<|vision_start|>",
+    "<|vision_end|>",
+    "<|vision_pad|>",
+    "<|image_pad|>",
+    "<|video_pad|>"
+  ],
+  "bos_token": null,
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<|im_end|>",
+  "errors": "replace",
+  "extra_special_tokens": {},
+  "model_max_length": 131072,
+  "pad_token": "<|endoftext|>",
+  "split_special_tokens": false,
+  "tokenizer_class": "Qwen2Tokenizer",
+  "unk_token": null
+}

inference/models/tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

inference/requirements.txt ADDED Viewed

	@@ -0,0 +1,176 @@

+aiofiles==24.1.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.13.3
+aiosignal==1.4.0
+annotated-doc==0.0.4
+annotated-types==0.7.0
+anthropic==0.75.0
+anyio==4.12.1
+async-timeout==5.0.1
+asyncer==0.0.12
+asyncpg==0.31.0
+attrs==25.4.0
+azure-core==1.38.0
+azure-storage-blob==12.28.0
+beautifulsoup4==4.14.3
+bidict==0.23.1
+blinker==1.9.0
+boto3==1.42.27
+botocore==1.42.27
+certifi==2026.1.4
+cffi==2.0.0
+chainlit==2.9.5
+charset-normalizer==3.4.4
+chevron==0.14.0
+click==8.3.1
+colorama==0.4.6
+cryptography==46.0.3
+cuid==0.4
+dataclasses-json==0.6.7
+Deprecated==1.3.1
+distro==1.9.0
+docstring_parser==0.17.0
+fastapi==0.128.0
+filelock==3.20.3
+filetype==1.2.0
+Flask==3.1.2
+frozenlist==1.8.0
+fsspec==2026.1.0
+google==3.0.0
+google-ai-generativelanguage==0.6.15
+google-api-core==2.29.0
+google-api-python-client==2.188.0
+google-auth==2.47.0
+google-auth-httplib2==0.3.0
+google-cloud-core==2.5.0
+google-cloud-storage==3.8.0
+google-crc32c==1.8.0
+google-generativeai==0.8.6
+google-resumable-media==2.8.0
+googleapis-common-protos==1.72.0
+grpcio==1.76.0
+grpcio-status==1.71.2
+h11==0.16.0
+hf-xet==1.2.0
+httpcore==1.0.9
+httplib2==0.31.2
+httpx==0.28.1
+httpx-sse==0.4.3
+huggingface-hub==0.36.0
+idna==3.11
+importlib_metadata==8.7.1
+inflection==0.5.1
+isodate==0.7.2
+itsdangerous==2.2.0
+Jinja2==3.1.6
+jiter==0.12.0
+jmespath==1.0.1
+jsonschema==4.26.0
+jsonschema-specifications==2025.9.1
+Lazify==0.4.0
+literalai==0.1.201
+MarkupSafe==3.0.3
+marshmallow==3.26.2
+mcp==1.25.0
+multidict==6.7.0
+mypy_extensions==1.1.0
+numpy==2.2.6
+openai==2.15.0
+opentelemetry-api==1.39.1
+opentelemetry-exporter-otlp-proto-common==1.39.1
+opentelemetry-exporter-otlp-proto-grpc==1.39.1
+opentelemetry-exporter-otlp-proto-http==1.39.1
+opentelemetry-instrumentation==0.60b1
+opentelemetry-instrumentation-agno==0.50.1
+opentelemetry-instrumentation-alephalpha==0.50.1
+opentelemetry-instrumentation-anthropic==0.50.1
+opentelemetry-instrumentation-bedrock==0.50.1
+opentelemetry-instrumentation-chromadb==0.50.1
+opentelemetry-instrumentation-cohere==0.50.1
+opentelemetry-instrumentation-crewai==0.50.1
+opentelemetry-instrumentation-google-generativeai==0.50.1
+opentelemetry-instrumentation-groq==0.50.1
+opentelemetry-instrumentation-haystack==0.50.1
+opentelemetry-instrumentation-lancedb==0.50.1
+opentelemetry-instrumentation-langchain==0.50.1
+opentelemetry-instrumentation-llamaindex==0.50.1
+opentelemetry-instrumentation-logging==0.60b1
+opentelemetry-instrumentation-marqo==0.50.1
+opentelemetry-instrumentation-mcp==0.50.1
+opentelemetry-instrumentation-milvus==0.50.1
+opentelemetry-instrumentation-mistralai==0.50.1
+opentelemetry-instrumentation-ollama==0.50.1
+opentelemetry-instrumentation-openai==0.50.1
+opentelemetry-instrumentation-openai-agents==0.50.1
+opentelemetry-instrumentation-pinecone==0.50.1
+opentelemetry-instrumentation-qdrant==0.50.1
+opentelemetry-instrumentation-redis==0.60b1
+opentelemetry-instrumentation-replicate==0.50.1
+opentelemetry-instrumentation-requests==0.60b1
+opentelemetry-instrumentation-sagemaker==0.50.1
+opentelemetry-instrumentation-sqlalchemy==0.60b1
+opentelemetry-instrumentation-threading==0.60b1
+opentelemetry-instrumentation-together==0.50.1
+opentelemetry-instrumentation-transformers==0.50.1
+opentelemetry-instrumentation-urllib3==0.60b1
+opentelemetry-instrumentation-vertexai==0.50.1
+opentelemetry-instrumentation-watsonx==0.50.1
+opentelemetry-instrumentation-weaviate==0.50.1
+opentelemetry-instrumentation-writer==0.50.1
+opentelemetry-proto==1.39.1
+opentelemetry-sdk==1.39.1
+opentelemetry-semantic-conventions==0.60b1
+opentelemetry-semantic-conventions-ai==0.4.13
+opentelemetry-util-http==0.60b1
+pandas==2.3.3
+pillow==12.1.0
+propcache==0.4.1
+proto-plus==1.27.0
+protobuf==5.29.5
+pyasn1==0.6.1
+pyasn1_modules==0.4.2
+pycparser==2.23
+pydantic==2.12.5
+pydantic-settings==2.12.0
+pydantic_core==2.41.5
+PyJWT==2.10.1
+pyparsing==3.3.2
+python-dotenv==1.2.1
+python-engineio==4.13.0
+python-multipart==0.0.21
+python-socketio==5.16.0
+pytz==2025.2
+PyYAML==6.0.3
+referencing==0.37.0
+regex==2025.11.3
+requests==2.32.5
+rpds-py==0.30.0
+rsa==4.9.1
+s3transfer==0.16.0
+safetensors==0.7.0
+shellingham==1.5.4
+simple-websocket==1.1.0
+sniffio==1.3.1
+soupsieve==2.8.3
+sse-starlette==3.1.2
+starlette==0.50.0
+syncer==2.0.3
+tenacity==9.1.2
+tokenizers==0.22.2
+tomli==2.4.0
+tqdm==4.67.1
+traceloop-sdk==0.50.1
+transformers==4.57.3
+typer-slim==0.21.1
+typing-inspect==0.9.0
+typing-inspection==0.4.2
+tzdata==2025.3
+uritemplate==4.2.0
+urllib3==2.6.3
+uvicorn==0.40.0
+watchfiles==1.1.1
+Werkzeug==3.1.5
+wrapt==1.17.3
+wsproto==1.3.2
+yarl==1.22.0
+zipp==3.23.0

inference/run_batch_inference_demo.sh ADDED Viewed

	@@ -0,0 +1,150 @@

+#!/bin/bash
+# 可选：通过 JSON / 环境变量覆盖 `utils.config` 中的配置
+# 示例 1：指定 JSON 配置文件（优先级最高）
+# export S1_DR_CONFIG_JSON="utils/config/config.example.json"
+#
+# 示例 2：直接用环境变量覆盖
+# export CLIENT_TIMEOUT=7200
+# export USE_NLP_FORMAT_RETURN=true
+# export MY_NEW_FLAG="demo_value"
+# system prompt 参数
+SYSTEM_PROMPT="" # 标准的 9 个工具 system prompt
+LLM_CLIENT_URLS="http://url:port/v1/chat/completions http://url:port/v1/chat/completions"
+LLM_CLIENT_MODELS="model_name1 model_name1"
+TEST_DATA_FILE="test.jsonl"
+OUTPUT_FILE="test_results.jsonl"
+# OUTPUT_DIR 仅在 Pass@K（多次 rollout）场景下生效。
+# 当 ROLLOUT_NUM=1 时，忽略 OUTPUT_DIR；单次推理结果写入 OUTPUT_FILE 所指定的 jsonl 路径。
+# 当 ROLLOUT_NUM≠1 时，忽略 OUTPUT_FILE；在 OUTPUT_DIR 下按 xxx_01.jsonl、xxx_02.jsonl、… 命名保存各次 rollout 结果。
+OUTPUT_DIR=""
+# 启动所有的 9 个工具
+AVAILABLE_TOOLS="wide_search scholar_search file_wide_parse execute_code wide_visit ask_question_about_image ask_question_about_video image_search bash"
+ROLLOUT_NUM=1
+RESUME_FROM_FILE=""
+# LOGGING_ROOT：日志根路径；在其下创建 `logs` 子目录。例如设为 "/app" 时，日志目录为 "/app/logs"。
+LOGGING_ROOT=""
+# LOG_LABEL：日志标签；运行日志写入 `logs/YYYY_MM_DD_<LOG_LABEL>/` 子目录（与 LOGGING_ROOT 组合使用时，位于上述 logs 路径之下）。
+LOG_LABEL="test"
+LOG_FILE="run_logs/run.log"
+# 仅在与附件相关的任务中需调整 TASK_TYPE；其余推理任务可沿用默认值，无需修改该参数。
+TASK_TYPE="input_only"
+MAX_ROUNDS=100
+CONCURRENCY_WORKERS=16
+SAVE_BATCH_SIZE=10
+TEMPERATURE=0.7
+TOP_P=0.95
+# 额外的 payload 参数（JSON 字符串），透传给模型 API，可按需增减字段
+# 不需要额外参数时设为空 JSON：EXTRA_PAYLOAD='{}'
+EXTRA_PAYLOAD='{"presence_penalty": 0.0}'
+TIMEOUT_FOR_ONE_QUERY=3600
+LLM_API_RETRY_TIMES=2
+# discard-all：此处为 false，表示不启用该模式
+DISCARD_ALL_MODE="false"
+MODEL_MAX_CONTEXT_TOKENS=128000
+DISCARD_RATIO=0.8
+TOKENIZER_PATH="models/tokenizer"
+PARAM_INFO=$(
+cat <<EOF
+========== Run Parameters ==========
+Start Time: $(date)
+LLM_CLIENT_URLS: $LLM_CLIENT_URLS
+LLM_CLIENT_MODELS: $LLM_CLIENT_MODELS
+TEST_DATA_FILE: $TEST_DATA_FILE
+OUTPUT_FILE: $OUTPUT_FILE
+OUTPUT_DIR: $OUTPUT_DIR
+AVAILABLE_TOOLS: $AVAILABLE_TOOLS
+CONCURRENCY_WORKERS: $CONCURRENCY_WORKERS
+SAVE_BATCH_SIZE: $SAVE_BATCH_SIZE
+ROLLOUT_NUM: $ROLLOUT_NUM
+MAX_ROUNDS: $MAX_ROUNDS
+TEMPERATURE: $TEMPERATURE
+TOP_P: $TOP_P
+EXTRA_PAYLOAD: $EXTRA_PAYLOAD
+TIMEOUT_FOR_ONE_QUERY: $TIMEOUT_FOR_ONE_QUERY
+LLM_API_RETRY_TIMES: $LLM_API_RETRY_TIMES
+DISCARD_ALL_MODE: $DISCARD_ALL_MODE
+MODEL_MAX_CONTEXT_TOKENS: $MODEL_MAX_CONTEXT_TOKENS
+DISCARD_RATIO: $DISCARD_RATIO
+TOKENIZER_PATH: $TOKENIZER_PATH
+RESUME_FROM_FILE: $RESUME_FROM_FILE
+LOG_LABEL: $LOG_LABEL
+TASK_TYPE: $TASK_TYPE
+LOGGING_ROOT: $LOGGING_ROOT
+SYSTEM_PROMPT: $SYSTEM_PROMPT
+Shell PID: $$
+====================================
+EOF
+)
+echo "$PARAM_INFO"
+echo "$PARAM_INFO" > "$LOG_FILE"
+# 使用 nohup 在后台启动 Python：标准输出与标准错误追加写入 LOG_FILE；随后将进程 PID 输出至终端，并同步追加至 LOG_FILE。
+# 当 TASK_TYPE 为 input_only 时，须在命令行中加入 --clean_files_copy_dir。
+if [ "$TASK_TYPE" = "input_only" ]; then
+  nohup python inference/run_batch_inference.py \
+    --llm_client_urls $LLM_CLIENT_URLS \
+    --llm_client_models $LLM_CLIENT_MODELS \
+    --test_data_file "$TEST_DATA_FILE" \
+    --output_file "$OUTPUT_FILE" \
+    --output_dir "$OUTPUT_DIR" \
+    --available_tools $AVAILABLE_TOOLS \
+    --concurrency_workers $CONCURRENCY_WORKERS \
+    --save_batch_size $SAVE_BATCH_SIZE \
+    --rollout_num $ROLLOUT_NUM \
+    --max_rounds $MAX_ROUNDS \
+    --temperature $TEMPERATURE \
+    --top_p $TOP_P \
+    --extra_payload "$EXTRA_PAYLOAD" \
+    --timeout_for_one_query $TIMEOUT_FOR_ONE_QUERY \
+    --llm_api_retry_times $LLM_API_RETRY_TIMES \
+    --discard_all_mode "$DISCARD_ALL_MODE" \
+    --model_max_context_tokens $MODEL_MAX_CONTEXT_TOKENS \
+    --discard_ratio $DISCARD_RATIO \
+    --tokenizer_path "$TOKENIZER_PATH" \
+    --resume_from_file "$RESUME_FROM_FILE" \
+    --log_label "$LOG_LABEL" \
+    --logging_root "$LOGGING_ROOT" \
+    --system_prompt "$SYSTEM_PROMPT" \
+    --verbose \
+    --clean_files_copy_dir \
+    >> "$LOG_FILE" 2>&1 &
+else
+  nohup python inference/run_batch_inference.py \
+    --llm_client_urls $LLM_CLIENT_URLS \
+    --llm_client_models $LLM_CLIENT_MODELS \
+    --test_data_file "$TEST_DATA_FILE" \
+    --output_file "$OUTPUT_FILE" \
+    --output_dir "$OUTPUT_DIR" \
+    --available_tools $AVAILABLE_TOOLS \
+    --concurrency_workers $CONCURRENCY_WORKERS \
+    --save_batch_size $SAVE_BATCH_SIZE \
+    --rollout_num $ROLLOUT_NUM \
+    --max_rounds $MAX_ROUNDS \
+    --temperature $TEMPERATURE \
+    --top_p $TOP_P \
+    --extra_payload "$EXTRA_PAYLOAD" \
+    --timeout_for_one_query $TIMEOUT_FOR_ONE_QUERY \
+    --llm_api_retry_times $LLM_API_RETRY_TIMES \
+    --discard_all_mode "$DISCARD_ALL_MODE" \
+    --model_max_context_tokens $MODEL_MAX_CONTEXT_TOKENS \
+    --discard_ratio $DISCARD_RATIO \
+    --tokenizer_path "$TOKENIZER_PATH" \
+    --resume_from_file "$RESUME_FROM_FILE" \
+    --log_label "$LOG_LABEL" \
+    --logging_root "$LOGGING_ROOT" \
+    --system_prompt "$SYSTEM_PROMPT" \
+    --verbose \
+    >> "$LOG_FILE" 2>&1 &
+fi
+PY_PID=$!
+echo "Python running as PID: $PY_PID"
+echo "Python running as PID: $PY_PID" >> "$LOG_FILE"

inference/run_batch_inference_online_demo.sh ADDED Viewed

	@@ -0,0 +1,183 @@

+# 可选：通过 JSON / 环境变量覆盖 `utils.config` 中的配置
+# 示例 1：指定 JSON 配置文件（优先级最高）
+# export S1_DR_CONFIG_JSON="utils/config/config.example.json"
+#
+# 示例 2：直接用环境变量覆盖
+# export CLIENT_TIMEOUT=7200
+# export USE_NLP_FORMAT_RETURN=true
+# export MY_NEW_FLAG="demo_value"
+# GLM5
+LLM_CLIENT_URLS="https://aihubmix.com/v1/chat/completions"
+LLM_CLIENT_MODELS="glm-5"
+SYSTEM_FORMAT="aihubmix_glm"
+# # azure GPT 系列
+# LLM_CLIENT_URLS="https://<your_special_id>.openai.azure.com/openai/v1/"
+# LLM_CLIENT_MODELS="gpt-5"
+# SYSTEM_FORMAT="azure"
+# # aihubmix GPT 系列
+# LLM_CLIENT_URLS="https://aihubmix.com/v1"
+# LLM_CLIENT_MODELS="gpt-5"
+# SYSTEM_FORMAT="aihubmix"
+# # aihubmix Claude 系列
+# LLM_CLIENT_URLS="https://aihubmix.com/v1"
+# LLM_CLIENT_MODELS="claude-3.5-sonnet"
+# SYSTEM_FORMAT="aihubmix_claude"
+# # 火山引擎
+# LLM_CLIENT_URLS="https://ark.cn-beijing.volces.com/api/v3"
+# LLM_CLIENT_MODELS="ep-xxx"
+# SYSTEM_FORMAT="volcano"
+# # 阿里云百炼接口
+# LLM_CLIENT_URLS="https://dashscope.aliyuncs.com/compatible-mode/v1"
+# LLM_CLIENT_MODELS="qwen-plus"
+# SYSTEM_FORMAT="aliyun"
+TEST_DATA_FILE="test_files/test_one_query.jsonl"
+OUTPUT_FILE="test_files/test_one_query_results.jsonl"
+# OUTPUT_DIR 仅在 Pass@K（多次 rollout）场景下生效。
+# 当 ROLLOUT_NUM=1 时，忽略 OUTPUT_DIR；单次推理结果写入 OUTPUT_FILE 所指定的 jsonl 路径。
+# 当 ROLLOUT_NUM≠1 时，忽略 OUTPUT_FILE；在 OUTPUT_DIR 下按 xxx_01.jsonl、xxx_02.jsonl、… 命名保存各次 rollout 结果。
+OUTPUT_DIR="run_logs/GAIA_0126/rollouts"
+# 启动所有的 9 个工具
+AVAILABLE_TOOLS="wide_search scholar_search file_wide_parse execute_code wide_visit ask_question_about_image ask_question_about_video image_search bash"
+ROLLOUT_NUM=1
+RESUME_FROM_FILE=""
+LOG_LABEL="glm-5"
+LOG_FILE="run_logs/run_batch_glm-5.log"
+SYSTEM_PROMPT="You are a deep research assistant. Your core function is to conduct thorough, multi-source investigations into any topic. You must handle both broad, open-domain inquiries and queries within specialized academic fields. For every request, synthesize information from credible, diverse sources to deliver a comprehensive, accurate, and objective response. When you have gathered sufficient information and are ready to provide the definitive response, you must enclose the entire final answer within <answer></answer> tags.
+# Note
+## General Rules
+- The current working directory (cwd) is `.`. Treat the cwd as the project root.
+- You are authorized to read, edit, or create files within this directory. **You must use relative paths** for all operations; absolute paths are strictly forbidden.
+## Citation & Reference Policy
+- User instructions always override this policy.
+- If the response does not use external sources, do not include citations or references.
+- External sources include web searches, user-uploaded files, or explicitly cited webpages.
+- If external sources are used:
+  - For lightweight factual or real-time information (e.g., weather, simple lookups), include in-text citation only.
+  - For research, analysis, or document-based tasks
+    (e.g., using multiple external sources or any user-uploaded file),
+    include both in-text citations and a reference list.
+- Reference lists are for source traceability only; do not introduce new information.
+- For citation-only cases, keep responses concise and avoid research-style structuring.
+Current date: $(date +"%Y-%m-%d")"
+# 仅在与附件相关的任务中需调整 TASK_TYPE；其余推理任务可沿用默认值，无需修改该参数。
+TASK_TYPE="input_only"
+MAX_ROUNDS=100
+CONCURRENCY_WORKERS=16
+SAVE_BATCH_SIZE=10
+TEMPERATURE=0.85
+TIMEOUT_FOR_ONE_QUERY=3600
+LLM_API_RETRY_TIMES=2
+# discard-all：此处为 false，表示不启用该模式
+DISCARD_ALL_MODE="false"
+MODEL_MAX_CONTEXT_TOKENS=131072
+DISCARD_RATIO=0.8
+TOKENIZER_PATH="models/tokenizer"
+PARAM_INFO=$(
+cat <<EOF
+========== Run Parameters ==========
+Start Time: $(date)
+LLM_CLIENT_URLS: $LLM_CLIENT_URLS
+LLM_CLIENT_MODELS: $LLM_CLIENT_MODELS
+TEST_DATA_FILE: $TEST_DATA_FILE
+OUTPUT_FILE: $OUTPUT_FILE
+OUTPUT_DIR: $OUTPUT_DIR
+AVAILABLE_TOOLS: $AVAILABLE_TOOLS
+CONCURRENCY_WORKERS: $CONCURRENCY_WORKERS
+SAVE_BATCH_SIZE: $SAVE_BATCH_SIZE
+ROLLOUT_NUM: $ROLLOUT_NUM
+MAX_ROUNDS: $MAX_ROUNDS
+TEMPERATURE: $TEMPERATURE
+TIMEOUT_FOR_ONE_QUERY: $TIMEOUT_FOR_ONE_QUERY
+LLM_API_RETRY_TIMES: $LLM_API_RETRY_TIMES
+DISCARD_ALL_MODE: $DISCARD_ALL_MODE
+MODEL_MAX_CONTEXT_TOKENS: $MODEL_MAX_CONTEXT_TOKENS
+DISCARD_RATIO: $DISCARD_RATIO
+TOKENIZER_PATH: $TOKENIZER_PATH
+RESUME_FROM_FILE: $RESUME_FROM_FILE
+TASK_TYPE: $TASK_TYPE
+LOG_LABEL: $LOG_LABEL
+SYSTEM_FORMAT: $SYSTEM_FORMAT
+Shell PID: $$
+====================================
+EOF
+)
+echo "$PARAM_INFO"
+echo "$PARAM_INFO" > "$LOG_FILE"
+# 使用 nohup 在后台启动 Python：标准输出与标准错误追加写入 LOG_FILE；随后将进程 PID 输出至终端，并同步��加至 LOG_FILE。
+# 当 TASK_TYPE 为 input_only 时，须在命令行中加入 --clean_files_copy_dir。
+if [ "$TASK_TYPE" = "input_only" ]; then
+  nohup python inference/run_batch_inference.py \
+    --llm_client_urls $LLM_CLIENT_URLS \
+    --llm_client_models $LLM_CLIENT_MODELS \
+    --test_data_file "$TEST_DATA_FILE" \
+    --output_file "$OUTPUT_FILE" \
+    --output_dir "$OUTPUT_DIR" \
+    --available_tools $AVAILABLE_TOOLS \
+    --concurrency_workers $CONCURRENCY_WORKERS \
+    --save_batch_size $SAVE_BATCH_SIZE \
+    --rollout_num $ROLLOUT_NUM \
+    --max_rounds $MAX_ROUNDS \
+    --temperature $TEMPERATURE \
+    --timeout_for_one_query $TIMEOUT_FOR_ONE_QUERY \
+    --llm_api_retry_times $LLM_API_RETRY_TIMES \
+    --discard_all_mode "$DISCARD_ALL_MODE" \
+    --model_max_context_tokens $MODEL_MAX_CONTEXT_TOKENS \
+    --discard_ratio $DISCARD_RATIO \
+    --tokenizer_path "$TOKENIZER_PATH" \
+    --resume_from_file "$RESUME_FROM_FILE" \
+    --log_label "$LOG_LABEL" \
+    --system_format "$SYSTEM_FORMAT" \
+    --system_prompt "$SYSTEM_PROMPT" \
+    --verbose \
+    --clean_files_copy_dir \
+    >> "$LOG_FILE" 2>&1 &
+else
+  nohup python inference/run_batch_inference.py \
+    --llm_client_urls $LLM_CLIENT_URLS \
+    --llm_client_models $LLM_CLIENT_MODELS \
+    --test_data_file "$TEST_DATA_FILE" \
+    --output_file "$OUTPUT_FILE" \
+    --output_dir "$OUTPUT_DIR" \
+    --available_tools $AVAILABLE_TOOLS \
+    --concurrency_workers $CONCURRENCY_WORKERS \
+    --save_batch_size $SAVE_BATCH_SIZE \
+    --rollout_num $ROLLOUT_NUM \
+    --max_rounds $MAX_ROUNDS \
+    --temperature $TEMPERATURE \
+    --timeout_for_one_query $TIMEOUT_FOR_ONE_QUERY \
+    --llm_api_retry_times $LLM_API_RETRY_TIMES \
+    --discard_all_mode "$DISCARD_ALL_MODE" \
+    --model_max_context_tokens $MODEL_MAX_CONTEXT_TOKENS \
+    --discard_ratio $DISCARD_RATIO \
+    --tokenizer_path "$TOKENIZER_PATH" \
+    --resume_from_file "$RESUME_FROM_FILE" \
+    --log_label "$LOG_LABEL" \
+    --system_format "$SYSTEM_FORMAT" \
+    --system_prompt "$SYSTEM_PROMPT" \
+    --verbose \
+    >> "$LOG_FILE" 2>&1 &
+fi
+PY_PID=$!
+echo "Python running as PID: $PY_PID"
+echo "Python running as PID: $PY_PID" >> "$LOG_FILE"

inference/server/llm_api.py ADDED Viewed

	@@ -0,0 +1,665 @@

+import asyncio
+from copy import deepcopy
+import json
+import copy
+from typing import Awaitable, Callable, Dict, List
+import requests
+import aiohttp
+from openai import AsyncAzureOpenAI, AsyncOpenAI, OpenAI
+import random
+from collections import defaultdict
+from utils.common import reorder_keys
+from utils.configs import AIHUBMIX_KEY, ALIYUN_KEY, AZURE_KEY, CLIENT_TIMEOUT, VOLCANO_KEY
+class LLMClient:
+    """
+    调用远端启动的 vllm 接口
+    """
+    def __init__(
+        self,
+        url: List,
+        model_names: List,
+        client_timeout: int | float | None = None,
+        api_keys: dict | None = None,
+        max_retries: int = 0,
+    ):
+        self.base_urls = url
+        self.model_names = model_names
+        self.client_timeout = client_timeout or CLIENT_TIMEOUT
+        self.max_retries = max(0, int(max_retries))
+        self.retry_backoff_seconds = 30.0
+        self.api_keys = api_keys or {
+            "aihubmix": AIHUBMIX_KEY,
+            "azure": AZURE_KEY,
+            "volcano": VOLCANO_KEY,
+            "aliyun": ALIYUN_KEY,
+        }
+        # 优化的路由分配结构:
+        # 用一个 dict 记录 query_id => url（一一绑定，方便直接查找 query_id 所属 url，而不用遍历所有列表）
+        self.queryid_to_url: Dict[str, str] = {}
+        # 统计每个 url 当前负载（每个 url 被分配了多少个 query_id），直接用 defaultdict(int)
+        self.url_load: Dict[str, int] = defaultdict(int)
+        for u in self.base_urls:
+            self.url_load[u] = 0
+    def pop_query_id(self, query_id: str):
+        """
+        将 query 弹出 url 记录表
+        """
+        url = self.queryid_to_url.pop(query_id, None)
+        if url is not None:
+            if url in self.url_load and self.url_load[url] > 0:
+                self.url_load[url] -= 1
+    def allocate_url_by_query_id(self, query_id: str, logger = None) -> str:
+        # 已有绑定
+        if query_id in self.queryid_to_url:
+            return self.queryid_to_url[query_id]
+        # 分配给当前负载最小的 url
+        min_load_url = min(self.url_load.items(), key=lambda x: x[1])[0]
+        self.queryid_to_url[query_id] = min_load_url
+        self.url_load[min_load_url] += 1
+        if logger:
+            logger.info(f"[vllm allocate] {query_id} allocated to {min_load_url}, Running: {self.url_load[min_load_url]} reqs")
+        return min_load_url
+    async def _run_with_retry(
+        self,
+        request_name: str,
+        request_coro_factory: Callable[[], Awaitable[dict]],
+        logger = None,
+        query_id: str = "",
+    ) -> dict:
+        total_attempts = self.max_retries + 1
+        last_error: Exception | None = None
+        query_suffix = f", query_id={query_id}" if query_id else ""
+        for attempt in range(1, total_attempts + 1):
+            if logger is not None and attempt > 1:
+                logger.info(
+                    "[llm retry] %s retry attempt %d/%d started%s",
+                    request_name,
+                    attempt,
+                    total_attempts,
+                    query_suffix,
+                )
+            try:
+                result = await request_coro_factory()
+                if isinstance(result, dict) and result.get("error"):
+                    raise RuntimeError(str(result["error"]))
+                if logger is not None and attempt > 1:
+                    logger.info(
+                        "[llm retry] %s attempt %d/%d succeeded%s",
+                        request_name,
+                        attempt,
+                        total_attempts,
+                        query_suffix,
+                    )
+                return result
+            except Exception as exc:
+                last_error = exc
+                if logger is not None:
+                    logger.warning(
+                        "[llm retry] %s attempt %d/%d failed%s: %s",
+                        request_name,
+                        attempt,
+                        total_attempts,
+                        query_suffix,
+                        exc,
+                    )
+                if attempt >= total_attempts:
+                    break
+                retry_delay = self.retry_backoff_seconds * attempt
+                if logger is not None:
+                    logger.info(
+                        "[llm retry] %s will retry in %.1fs%s",
+                        request_name,
+                        retry_delay,
+                        query_suffix,
+                    )
+                await asyncio.sleep(retry_delay)
+        if last_error is None:
+            raise RuntimeError(f"{request_name} failed without an explicit error{query_suffix}")
+        raise last_error
+    async def chat(self, messages: List[Dict[str, str]], tool_list = [], temperature=0.7, top_p=0.95, extra_payload: dict = {}, logger= None, query_id = "") -> dict:
+        """
+        注意：requests.post 是同步阻塞的，不适用于 async def���即它会在请求时阻塞当前协程，不能发挥异步优势。
+        当前函数用 aiohttp 替代了 requests，实现了真正的异步非阻塞网络请求，可以提升异步环境下的并发性能，
+        不会像同步阻塞那样导致协程队列卡顿或效率低下。
+        extra_payload 支持传入任意额外的 payload 参数（如 presence_penalty 等），会覆盖默认值。
+        """
+        payload = {
+            "messages": messages,
+            "temperature": temperature,
+            "top_p": top_p,
+        }
+        payload.update(extra_payload)
+        if len(tool_list) > 0:
+            payload['tools'] = tool_list
+        # 选择 URL，优先根据 query_id 做负载均衡&一致性调度
+        if query_id:
+            chosen_url = self.allocate_url_by_query_id(query_id, logger)
+        else:
+            chosen_url = random.choice(self.base_urls)
+        chosen_idx = self.base_urls.index(chosen_url)
+        choose_model = self.model_names[chosen_idx]
+        payload['model'] = choose_model # 兼容 vllm
+        resp_json = None
+        async def _request_once() -> dict:
+            nonlocal resp_json
+            async with aiohttp.ClientSession() as session:
+                async with session.post(chosen_url, json=payload, timeout=self.client_timeout) as resp:
+                    resp.raise_for_status()
+                    resp_json = await resp.json()
+                    return {
+                        "content": resp_json['choices'][0]['message']['content'],
+                        "usage": resp_json['usage'],
+                        "error": ""
+                    }
+        try:
+            return await self._run_with_retry(
+                request_name=f"chat url={chosen_url} model={choose_model}",
+                request_coro_factory=_request_once,
+                logger=logger,
+                query_id=query_id,
+            )
+        except Exception as e:
+            try:
+                if logger is not None:
+                    logger.info("[vllm response] %s", resp_json)
+            except:
+                pass
+            return {
+                "content": "",
+                "usage": {
+                    'completion_tokens': -1,
+                    'prompt_tokens': -1,
+                    'prompt_tokens_details': None,
+                    'total_tokens': -1
+                },
+                "error": str(e)
+            }
+    async def _call_openai_chat(self,
+                                raw_messages: List[Dict[str, str]],
+                                tool_list = [],
+                                temperature=0.7,
+                                top_p=0.95,
+                                logger = None,
+                                api_key=None,
+                                query_id: str = "") -> dict:
+        idx = random.randrange(len(self.base_urls))
+        chosen_url = self.base_urls[idx]
+        chosen_model = self.model_names[idx]
+        if 'claude' in chosen_model or 'glm' in chosen_model:
+            # 路由成 requests 调用的方式，这样就可以和火山引擎兼容
+            # 此时需要传递 idx 来保证选取的还是这个模型和 URL
+            return await self._call_request_chat(raw_messages, tool_list, temperature, top_p, logger, api_key, idx, query_id)
+        client = OpenAI(
+            base_url = chosen_url,
+            api_key = api_key,
+        )
+        meta_data = {
+            "role": "assistant",
+            "content": ""
+        }
+        tool_call_ids = []
+        response_json = None
+        messages = copy.deepcopy(raw_messages)
+        for msg in messages:
+            if isinstance(msg, dict) and msg.get('role') == 'user' and isinstance(msg.get('content'), list):
+                for item in msg['content']:
+                    if isinstance(item, dict) and item.get('type') == 'text':
+                        item['type'] = 'input_text'
+        async def _request_once() -> dict:
+            nonlocal response_json, meta_data, tool_call_ids
+            tool_call_ids = []
+            meta_data = {
+                "role": "assistant",
+                "content": ""
+            }
+            loop = asyncio.get_event_loop()
+            if chosen_model in ["gpt-4.1", "gpt-4o"]:
+                func = lambda: client.responses.create(
+                    input=messages,
+                    model=chosen_model,
+                    tools=tool_list
+                )
+            else:
+                func = lambda: client.responses.create(
+                    input=messages,
+                    model=chosen_model,
+                    tools=tool_list,
+                    reasoning={'effort': 'medium', 'summary': 'detailed'}
+                )
+            try:
+                response = await asyncio.wait_for(
+                    loop.run_in_executor(None, func),
+                    timeout=self.client_timeout
+                )
+            except Exception as run_executor_exc:
+                print(f"[client error] {run_executor_exc}")
+                raise
+            response_json = response.model_dump()
+            next_messages = messages + response.output
+            summary_list = []
+            answer_content_list = []
+            tool_calls = ""
+            for msg in response_json['output']:
+                if msg['type'] == 'reasoning':
+                    summary_items = msg.get("summary", [])
+                    summary_list.extend(s for s in summary_items if s.get("type") == "summary_text")
+                elif msg['type'] == 'function_call':
+                    now_tool_call = {
+                        "name": msg['name'],
+                        "arguments": json.loads(msg['arguments'])
+                    }
+                    tool_call_ids.append(msg['call_id'])
+                    tool_calls += "<tool_call>\n" + json.dumps(now_tool_call, ensure_ascii=False) + "\n</tool_call>\n"
+                elif msg['type'] == 'message':
+                    for block in msg.get("content", []):
+                        if block.get("type") == "output_text":
+                            answer_content_list.append(block.get("text", "").strip())
+            reasoning_content = "\n".join([i.get('text', "") for i in summary_list if i.get("text", "")]).strip()
+            content = "\n".join(answer_content_list).strip()
+            tool_calls = tool_calls.strip()
+            meta_data_content = ""
+            meta_data_content += "<think>\n"
+            meta_data_content += f"{reasoning_content}\n</think>" if reasoning_content else "</think>"
+            meta_data_content += f"\n{content}"
+            meta_data_content += f"\n" if content else ""
+            meta_data_content += f"{tool_calls}" if tool_calls else ""
+            meta_data['content'] = meta_data_content
+            return {
+                "next_messages": next_messages,
+                "log_messages": [reorder_keys(rep) for rep in response_json['output']],
+                "meta_data": meta_data,
+                "tool_call_ids": tool_call_ids,
+                "usage": response_json['usage'],
+            }
+        try:
+            return await self._run_with_retry(
+                request_name=f"openai_chat url={chosen_url} model={chosen_model}",
+                request_coro_factory=_request_once,
+                logger=logger,
+                query_id=query_id,
+            )
+        except Exception as e:
+            try:
+                if logger is not None:
+                    logger.info("[vllm response] %s", response_json)
+            except:
+                pass
+            return {
+                "next_messages": messages,
+                "log_messages": [],
+                "meta_data": meta_data,
+                "tool_call_ids": tool_call_ids,
+                "usage": response_json['usage'] if response_json is not None and 'usage' in response_json else None,
+                "error": str(e)
+            }
+    async def _call_request_chat(self,
+                                raw_messages: List[Dict[str, str]],
+                                tool_list = [],
+                                temperature=0.7,
+                                top_p=0.95,
+                                logger = None,
+                                api_key=None,
+                                idx = None,
+                                query_id: str = "") -> dict:
+        idx = random.randrange(len(self.base_urls)) if idx is None else idx
+        chosen_url = self.base_urls[idx]
+        chosen_model = self.model_names[idx]
+        messages = copy.deepcopy(raw_messages)
+        if "claude" in chosen_model:
+            headers={
+                "X-Api-Key": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+            }
+            # claude 真服了， tool_list 也不统一
+            for tool in tool_list:
+                if isinstance(tool, dict):
+                    tool['type'] = 'custom'
+                    if 'parameters' in tool:
+                        tool['input_schema'] = tool.pop('parameters')
+        elif any(x in chosen_model for x in ["glm", "doubao"]):
+            headers = {
+                "Authorization": f"Bearer {api_key}",
+                "Content-Type": "application/json",
+            }
+        else:
+            headers = {
+                'Authorization': f'Bearer {api_key}',
+                'x-ark-moderation-scene': 'skip-ark-moderation'
+            }
+        # 默认都需要改成 text
+        for msg in messages:
+            if isinstance(msg, dict) and msg.get('role') == 'user' and isinstance(msg.get('content'), list):
+                for item in msg['content']:
+                    if isinstance(item, dict) and item.get('type') == 'input_text':
+                        item['type'] = 'text'
+        data=json.dumps({
+            "model": chosen_model, # 替换模型 id
+            "messages": messages,
+            "max_tokens": 128000,
+            "thinking" :{
+                "type": "enabled",
+                "budget_tokens": 15000
+            },
+            "tools": tool_list,
+        })
+        response_json = {}
+        tool_call_ids = []
+        meta_data = {
+            "role": "assistant",
+            "content": ""
+        }
+        answer_content_list = []
+        summary_list = []
+        log_messages = []
+        async def _request_once() -> dict:
+            nonlocal response_json, tool_call_ids, meta_data, answer_content_list, summary_list, log_messages
+            tool_call_ids = []
+            meta_data = {
+                "role": "assistant",
+                "content": ""
+            }
+            answer_content_list = []
+            summary_list = []
+            log_messages = []
+            timeout = aiohttp.ClientTimeout(total=self.client_timeout)
+            connector = aiohttp.TCPConnector(ssl=False)
+            # aiohttp 的 ClientSession.post 方法不支持 verify=False 参数，证书校验需要在 TCPConnector 里开启
+            # 所以需要在 ClientSession 构造时传入 connector
+            async with aiohttp.ClientSession(timeout=timeout, connector=connector) as session:
+                async with session.post(chosen_url, data=data, headers=headers) as resp:
+                    resp.raise_for_status()
+                    response_json = await resp.json()
+            tool_calls = ""
+            if "content" in response_json:
+                # 说明当前就是 cladue 的调用结果，直接把 content 列表拼回去就可以了
+                log_messages = [{"role": "assistant", "content": response_json['content']}]
+                next_messages = messages + log_messages
+                for msg in response_json['content']:
+                    if msg['type'] == "tool_use":
+                        tool_call_ids.append(msg['id'])
+                        now_tool_call = {
+                            "name": msg['name'],
+                            "arguments": msg['input']
+                        }
+                        tool_calls += "<tool_call>\n" + json.dumps(now_tool_call, ensure_ascii=False) + "\n</tool_call>\n"
+                    elif msg['type'] == "text":
+                        answer_content_list.append(msg['text'])
+                    elif msg['type'] == 'thinking':
+                        summary_list.append(msg['thinking'])
+            elif "choices" in response_json and len(response_json['choices']):
+                tmp_messages = response_json['choices'][0]['message']
+                log_messages = [tmp_messages]
+                next_messages = messages + [tmp_messages]
+                msg = tmp_messages
+                if "reasoning_content" in msg:
+                    summary_list.append(msg['reasoning_content'])
+                if "content" in msg:
+                    answer_content_list.append(msg['content'])
+                if "tool_calls" in msg and msg['tool_calls']:
+                    for tool_call in msg['tool_calls']:
+                        tool_call_ids.append(tool_call['id'])
+                        now_tool_call = {
+                            "name": tool_call['function']['name'],
+                            "arguments": json.loads(tool_call['function']['arguments'])
+                        }
+                        # 保证dict序列化为json字符串时使用双引号
+                        tool_calls += "<tool_call>\n" + json.dumps(now_tool_call, ensure_ascii=False) + "\n</tool_call>\n"
+            else:
+                raise RuntimeError(f"Unexpected response payload: {response_json}")
+            reasoning_content = "\n".join(summary_list).strip()
+            content = "\n".join(answer_content_list).strip()
+            tool_calls = tool_calls.strip()
+            meta_data_content = ""
+            meta_data_content += "<think>\n"
+            meta_data_content += f"{reasoning_content}\n</think>" if reasoning_content else "</think>"
+            meta_data_content += f"\n{content}"
+            meta_data_content += f"\n" if content else ""
+            meta_data_content += f"{tool_calls}" if tool_calls else ""
+            meta_data['content'] = meta_data_content
+            return {
+                "next_messages": next_messages,
+                "log_messages": log_messages,
+                "meta_data": meta_data,
+                "tool_call_ids": tool_call_ids,
+                "usage": response_json['usage'],
+            }
+        try:
+            return await self._run_with_retry(
+                request_name=f"request_chat url={chosen_url} model={chosen_model}",
+                request_coro_factory=_request_once,
+                logger=logger,
+                query_id=query_id,
+            )
+        except Exception as e:
+            try:
+                if logger is not None:
+                    logger.info("[vllm response] %s", response_json)
+            except:
+                pass
+            return {
+                "next_messages": messages,
+                "log_messages": [],
+                "meta_data": meta_data,
+                "tool_call_ids": tool_call_ids,
+                "usage": response_json['usage'] if response_json is not None and 'usage' in response_json else None,
+                "error": str(e)
+            }
+    async def _call_aliyun_chat(self,
+                                raw_messages: List[Dict[str, str]],
+                                tool_list = [],
+                                temperature=0.7,
+                                top_p=0.95,
+                                logger = None,
+                                api_key=None,
+                                query_id: str = "") -> dict:
+        idx = random.randrange(len(self.base_urls))
+        chosen_url = self.base_urls[idx]
+        chosen_model = self.model_names[idx]
+        if chosen_url.rstrip("/").endswith("/chat/completions"):
+            chosen_url = chosen_url.rstrip("/")[: -len("/chat/completions")]
+        client = OpenAI(
+            api_key=api_key,
+            base_url=chosen_url,
+        )
+        messages = copy.deepcopy(raw_messages)
+        response_json = None
+        answer_content_list = []
+        summary_list = []
+        log_messages = []
+        tool_call_ids = []
+        tool_calls = ""
+        meta_data = {
+            "role": "assistant",
+            "content": ""
+        }
+        async def _request_once() -> dict:
+            nonlocal response_json, answer_content_list, summary_list, log_messages, tool_call_ids, tool_calls, meta_data
+            response_json = None
+            answer_content_list = []
+            summary_list = []
+            log_messages = []
+            tool_call_ids = []
+            tool_calls = ""
+            meta_data = {
+                "role": "assistant",
+                "content": ""
+            }
+            loop = asyncio.get_event_loop()
+            request_kwargs = {
+                "model": chosen_model,
+                "messages": messages,
+                "temperature": temperature,
+                "top_p": top_p,
+                "extra_body": {"enable_thinking": True},
+            }
+            if tool_list:
+                request_kwargs["tools"] = tool_list
+            func = lambda: client.chat.completions.create(**request_kwargs)
+            completion = await asyncio.wait_for(
+                loop.run_in_executor(None, func),
+                timeout=self.client_timeout
+            )
+            response_json = completion.model_dump()
+            tmp_messages = response_json['choices'][0]['message']
+            log_messages = [tmp_messages]
+            next_messages = messages + [tmp_messages]
+            msg = tmp_messages
+            if "reasoning_content" in msg:
+                summary_list.append(msg['reasoning_content'])
+            if "content" in msg:
+                answer_content_list.append(msg['content'])
+            if "tool_calls" in msg and msg['tool_calls']:
+                for tool_call in msg['tool_calls']:
+                    tool_call_ids.append(tool_call['id'])
+                    arguments_raw = tool_call['function']['arguments']
+                    try:
+                        arguments_obj = json.loads(arguments_raw)
+                    except Exception:
+                        arguments_obj = arguments_raw
+                    now_tool_call = {
+                        "name": tool_call['function']['name'],
+                        "arguments": arguments_obj
+                    }
+                    tool_calls += "<tool_call>\n" + json.dumps(now_tool_call, ensure_ascii=False) + "\n</tool_call>\n"
+            reasoning_content = "\n".join(summary_list).strip()
+            content = "\n".join(answer_content_list).strip()
+            tool_calls = tool_calls.strip()
+            meta_data_content = ""
+            meta_data_content += "<think>\n"
+            meta_data_content += f"{reasoning_content}\n</think>" if reasoning_content else "</think>"
+            meta_data_content += f"\n{content}"
+            meta_data_content += f"\n" if content else ""
+            meta_data_content += f"{tool_calls}" if tool_calls else ""
+            meta_data['content'] = meta_data_content
+            return {
+                "next_messages": next_messages,
+                "log_messages": log_messages,
+                "meta_data": meta_data,
+                "tool_call_ids": tool_call_ids,
+                "usage": response_json['usage'],
+            }
+        try:
+            return await self._run_with_retry(
+                request_name=f"aliyun_chat url={chosen_url} model={chosen_model}",
+                request_coro_factory=_request_once,
+                logger=logger,
+                query_id=query_id,
+            )
+        except Exception as e:
+            try:
+                if logger is not None:
+                    logger.info("[aliyun response] %s", response_json)
+            except:
+                pass
+            return {
+                "next_messages": messages,
+                "log_messages": [],
+                "meta_data": meta_data,
+                "tool_call_ids": tool_call_ids,
+                "usage": response_json['usage'] if response_json is not None and 'usage' in response_json else None,
+                "error": str(e)
+            }
+    async def aihubmix_chat(self, raw_messages: List[Dict[str, str]], tool_list = [], temperature=0.7, top_p=0.95, logger = None, query_id: str = "") -> dict:
+        return await self._call_openai_chat(
+            raw_messages=raw_messages,
+            tool_list=tool_list,
+            temperature=temperature,
+            top_p=top_p,
+            logger=logger,
+            api_key=self.api_keys.get("aihubmix"),
+            query_id=query_id,
+        )
+    async def azure_chat(self, raw_messages: List[Dict[str, str]], tool_list = [], temperature=0.7, top_p=0.95, logger = None, query_id: str = "") -> dict:
+        return await self._call_openai_chat(
+            raw_messages=raw_messages,
+            tool_list=tool_list,
+            temperature=temperature,
+            top_p=top_p,
+            logger=logger,
+            api_key=self.api_keys.get("azure"),
+            query_id=query_id,
+        )
+    async def volcano_chat(self, raw_messages: List[Dict[str, str]], tool_list = [], temperature=0.7, top_p=0.95, logger = None, query_id: str = "") -> dict:
+        return await self._call_request_chat(
+            raw_messages=raw_messages,
+            tool_list=tool_list,
+            temperature=temperature,
+            top_p=top_p,
+            logger=logger,
+            api_key=self.api_keys.get("volcano"),
+            query_id=query_id,
+        )
+    async def aliyun_chat(self, raw_messages: List[Dict[str, str]], tool_list = [], temperature=0.7, top_p=0.95, logger = None, query_id: str = "") -> dict:
+        return await self._call_aliyun_chat(
+            raw_messages=raw_messages,
+            tool_list=tool_list,
+            temperature=temperature,
+            top_p=top_p,
+            logger=logger,
+            api_key=self.api_keys.get("aliyun"),
+            query_id=query_id,
+        )

inference/server/tool_api.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import os
+import sys
+from tool_kits import (
+    AskQuestionAboutImageToolkit,
+    AskQuestionAboutVideoToolkit,
+    ExecuteCodeToolkit,
+    WideSearchToolkit,
+    ImageSearchToolkit,
+    ScholarSearchToolkit,
+    FileWideParseToolkit,
+    WideVisitToolkit,
+    BashToolkit,
+)
+from urllib.parse import urljoin
+from typing import Callable, Dict, Any
+# 基于 docker 的所有工具
+def return_all_tools(tool_urls=None, use_cache=None, use_tongyi_format=None):
+    # initialize all tools (alphabetically sorted)
+    # 后续根据 key 找到实际调用的是哪个工具
+    tool_kwargs = {}
+    if tool_urls is not None:
+        tool_kwargs["server_url"] = tool_urls
+    if use_cache is not None:
+        tool_kwargs["use_cache"] = use_cache
+    if use_tongyi_format is not None:
+        tool_kwargs["is_tongyi_format"] = use_tongyi_format
+    # 只有需要 deep_research format 才能传 **tool_kwargs
+    tools = {
+        "ask_question_about_image": AskQuestionAboutImageToolkit(),
+        "ask_question_about_video": AskQuestionAboutVideoToolkit(),
+        "execute_code": ExecuteCodeToolkit(),
+        "wide_search": WideSearchToolkit(**tool_kwargs),
+        "image_search": ImageSearchToolkit(**tool_kwargs),
+        "scholar_search": ScholarSearchToolkit(**tool_kwargs),
+        "file_wide_parse": FileWideParseToolkit(**tool_kwargs),
+        "wide_visit": WideVisitToolkit(**tool_kwargs),
+        "bash": BashToolkit(),
+    }
+    ALL_TOOLS: Dict[str, Dict[str, Any]] = {}
+    # for competibility with the old code
+    for tool_name, tool in tools.items():
+        ALL_TOOLS[tool_name] = {
+            "name": tool.name,
+            "description": tool.description,
+            "strict": True,
+            "parameters": tool.params,
+            "function": tool.forward, # 工具执行函数
+            "schema_json": tool.json
+        }
+    return ALL_TOOLS

inference/server/tool_execution.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from typing import Any, Tuple
+import json
+import asyncio
+import os
+import uuid
+import importlib.util
+from pathlib import Path
+from utils.configs import USE_NLP_FORMAT_RETURN
+async def execute_tool_call(name: Any, arguments: Any, all_tools, logger=None, USE_NLP_FORMAT_RETURN: bool | None = None, query_id = "") -> Tuple[str, str]:
+    """
+    Execute a single tool call locally. Returns (tool_name, tool_response_json_str).
+    """
+    if USE_NLP_FORMAT_RETURN is None:
+        USE_NLP_FORMAT_RETURN = USE_NLP_FORMAT_RETURN
+    if name == "parse_error_tool_call":
+        if USE_NLP_FORMAT_RETURN:
+            result = f"Error: Tool call is not a valid JSON. Tool call must contain a valid \"name\" and \"arguments\" field. Parse error: {arguments.get('parse_error', '')}"
+        else:
+            result = json.dumps({"error": f"Parse error: {arguments.get('parse_error', '')}", "raw": arguments.get('raw', '')}, ensure_ascii=False)
+        if logger:
+            logger.error(result)
+        return name or "parse_error_tool_call", result
+    tool = all_tools.get(name)
+    if tool is None:
+        result = json.dumps({"error": f"Unknown tool: {name}"}, ensure_ascii=False)
+        if logger:
+            logger.error(result)
+        return name or "unknown", result
+    # Ensure arguments is a dict
+    if not isinstance(arguments, dict):
+        arguments = {"_": arguments}
+    import functools
+    loop = asyncio.get_running_loop()
+    arguments["conversation_id"] = query_id
+    func = functools.partial(tool['function'], **arguments)
+    # 根据工具名称选择不同的超时时间
+    if name == "browse_url":
+        timeout = 5400 # browse 是 1.5 小时
+    else:
+        timeout = 1800 # 其他工具就是 30 分钟
+    try:
+        out = await asyncio.wait_for(loop.run_in_executor(None, func), timeout=timeout)
+        result = out if isinstance(out, str) else json.dumps(out, ensure_ascii=False) # 返回结果一定是字符串
+    except asyncio.TimeoutError:
+        if USE_NLP_FORMAT_RETURN:
+            result = f"The tool call timed out: execution exceeded {timeout} seconds for tool '{name}'."
+        else:
+            result = json.dumps({"error": f"Tool call timeout: exceeded {timeout}s", "tool": name, "arguments": arguments}, ensure_ascii=False)
+        if logger:
+            logger.error(result)
+    except TypeError as te:
+        if USE_NLP_FORMAT_RETURN:
+            result = f"Tool '{name}' failed due to argument mismatch: {str(te)}. Input arguments: {arguments}."
+        else:
+            result = json.dumps({"error": f"Argument mismatch for tool '{name}': {str(te)}", "received": arguments}, ensure_ascii=False)
+        if logger:
+            logger.error(result)
+    except Exception as e:
+        if USE_NLP_FORMAT_RETURN:
+            result = f"Tool '{name}' encountered an error: {str(e)}."
+        else:
+            result = json.dumps({"error": f"Tool '{name}' raised an exception: {str(e)}"}, ensure_ascii=False)
+        if logger:
+            logger.error(result)
+    return name, result

inference/test_all_tools.py ADDED Viewed

	@@ -0,0 +1,123 @@

+from tool_kits import (
+    AskQuestionAboutImageToolkit,
+    AskQuestionAboutVideoToolkit,
+    ExecuteCodeToolkit,
+    WideSearchToolkit,
+    ImageSearchToolkit,
+    ScholarSearchToolkit,
+    FileWideParseToolkit,
+    WideVisitToolkit,
+    BashToolkit
+)
+from urllib.parse import urljoin
+from typing import Callable, Dict, Any
+# initialize all tools (alphabetically sorted)
+tools = {
+    "ask_question_about_image": AskQuestionAboutImageToolkit(),
+    "ask_question_about_video": AskQuestionAboutVideoToolkit(),
+    "execute_code": ExecuteCodeToolkit(),
+    "wide_search": WideSearchToolkit(),
+    "image_search": ImageSearchToolkit(),
+    "scholar_search": ScholarSearchToolkit(),
+    "file_wide_parse": FileWideParseToolkit(),
+    "wide_visit": WideVisitToolkit(),
+    "bash": BashToolkit(),
+}
+# register all tools
+ALL_TOOLS: Dict[str, Dict[str, Any]] = {}
+def register_tool(name: str, description: str, parameters: Dict[str, Any]):
+    """
+    装饰器：注册工具到 TOOLS
+    """
+    def decorator(func: Callable):
+        ALL_TOOLS[name] = {
+            "name": name,
+            "description": description,
+            "strict": True,
+            "parameters": parameters,
+            "function": func,
+        }
+        return func
+    return decorator
+# for competibility with the old code
+for tool_name, tool in tools.items():
+    ALL_TOOLS[tool_name] = {
+        "name": tool.name,
+        "description": tool.description,
+        "strict": True,
+        "parameters": tool.params,
+        "function": tool.forward, # 工具执行函数
+        "schema_json": tool.json
+    }
+def test_tools():
+    """
+    测试所有注册的工具，参考xxl_wrapped_camel_tools.py中的测试用例
+    """
+    results = {}
+    # 测试用例定义（每个工具一个简单用例）
+    test_cases = {
+        "ask_question_about_image": {"image_path": [
+            "http://img.daimg.com/uploads/allimg/240712/3-240G2112F6.jpg"
+            ], "question": "What is in this image?"},
+        "ask_question_about_video": {"video_path": "https://www.bilibili.com/video/BV11p81zFEJT/?spm_id_from=333.337.search-card.all.click", "question": "描述这个视频的内容和主要场景。"},
+        "bash": {"command": "echo 'hello world'"},
+        "execute_code": {"code": "print('Hello World')"},
+        "execute_code": {"code": "import math\nimport matplotlib.pyplot as plt\n"},
+        "wide_search": {"query": ['伊莎贝尔·于佩尔 包法利夫人 苦的砒霜', 'Isabelle Huppert insisted poison taste bitter']},
+        "image_search": {"query": ["咖喱", "肉骨茶", "印尼九层塔"]},
+        "scholar_search": {"query": ["spa", "烟花", "attention"]},
+        "file_wide_parse": {
+                "files": [
+                    "http://img.daimg.com/uploads/allimg/240712/3-240G2112F6.jpg"
+                    ],
+            },
+        "wide_visit": {"url": "https://www.sohu.com/a/960662276_163491", "goal": "疯狂动物城有哪些周边"},
+    }
+    for tool_name, test_case in test_cases.items():
+        if tool_name not in ALL_TOOLS:
+            print(f"Tool {tool_name} not found in registered tools.")
+            continue
+        tool_info = ALL_TOOLS[tool_name]
+        print(f"\nTool: {tool_name}")
+        print(f"Description: {tool_info['description']}")
+        print(f"Parameters: {tool_info['parameters']}")
+        params = test_case
+        import time
+        params['conversation_id'] = f"test_{time.strftime('%Y%m%d%H%M%S', time.localtime())}"
+        print(f"Testing with parameters: {params}")
+        try:
+            # 支持异步工具（如browse_url）
+            if tool_name == "browse_url":
+                import asyncio
+                result = asyncio.run(tool_info["function"](**params))
+            else:
+                result = tool_info["function"](**params)
+            print(f"\n✅ Test result: {str(result)}")
+            results[tool_name] = {"success": True, "result": result}
+        except Exception as e:
+            print(f"\n❌ Test failed: {str(e)}")
+            results[tool_name] = {"success": False, "error": str(e)}
+        print("\n" +  "🏃..🎈 " * 20 + "\n")
+    print("\n" +  "==" * 20 + "END" + "==" * 20 + "\n")
+if __name__ == "__main__":
+    for key in ALL_TOOLS.keys():
+        print(ALL_TOOLS[key]['schema_json'])
+    print("=" * 100)
+    test_tools()

inference/tool_kits/__init__.py ADDED Viewed

	@@ -0,0 +1,48 @@

+from .ask_question_about_image_toolkit import AskQuestionAboutImageToolkit
+from .ask_question_about_video_toolkit import AskQuestionAboutVideoToolkit
+from .base import BaseToolkit
+from .browser_toolkit import BrowserToolkit
+from .execute_code_toolkit import ExecuteCodeToolkit
+from .extract_doc_content_toolkit import ExtractDocumentContentToolkit
+from .extract_csv_content_toolkit import ExtractCSVContentToolkit
+from .extract_pdf_content_toolkit import ExtractPDFContentToolkit
+from .fetch_web_page_toolkit import FetchWebPageToolkit
+from .image_to_text_toolkit import ImageToTextToolkit
+from .visit_toolkit import VisitToolkit
+from .web_search_toolkit import WebSearchToolkit
+from .write_to_file_toolkit import WriteToFileToolkit
+# 新增
+from .wide_search_toolkit import WideSearchToolkit
+from .image_search_toolkit import ImageSearchToolkit
+from .file_wide_parse_toolkit import FileWideParseToolkit
+from .scholar_search_toolkit import ScholarSearchToolkit
+from .wide_visit_toolkit import WideVisitToolkit
+# 04.03 新增
+from .bash_toolkit import BashToolkit
+__all__ = [
+    'AskQuestionAboutImageToolkit',
+    'AskQuestionAboutVideoToolkit',
+    'BaseToolkit',
+    'BrowserToolkit',
+    'ExecuteCodeToolkit',
+    'ExtractCSVContentToolkit',
+    'ExtractDocumentContentToolkit',
+    'ExtractPDFContentToolkit',
+    'FetchWebPageToolkit',
+    'ImageToTextToolkit',
+    'VisitToolkit',
+    'WebSearchToolkit',
+    'WriteToFileToolkit',
+    # 新增
+    'WideSearchToolkit',
+    'ImageSearchToolkit',
+    'ScholarSearchToolkit',
+    "FileWideParseToolkit",
+    "WideVisitToolkit",
+    # 04.03 新增
+    'BashToolkit',
+]

inference/tool_kits/ask_question_about_image_toolkit.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import yaml
+import os
+import sys
+from urllib.parse import urljoin
+from typing import Callable, Dict, Any
+from utils.configs import TOOLS_SERVER_BASE_ENDPOINT_URL, WEB_BASED_TOOLS_USE_CACHE
+from tool_kits.base import BaseToolkit
+class AskQuestionAboutImageToolkit(BaseToolkit):
+    NAME = "ask_question_about_image"
+    TOOLS_SERVER_BASE_ENDPOINT = TOOLS_SERVER_BASE_ENDPOINT_URL
+    ENTRY_POINT = "ask_question_about_image"
+    DESCRIPTION = f"Identify image content and answer questions about one or more images."
+    TIMEOUT = 600
+    TOOL_PARAMS = {
+        "image_path": {
+            "type": "array",
+            "items": {
+                "type": "string",
+                "description": "Local path or URL to an image file.",
+            },
+            "minItems": 1,
+            "description": "Array of local paths or URLs to image files.",
+        },
+        "question": {
+            "type": "string",
+            "description": "Query about the image content.",
+        },
+    }
+    TOOL_PARAMS_REQUIRED = ["image_path", "question"]
+    USE_CACHE = WEB_BASED_TOOLS_USE_CACHE

inference/tool_kits/ask_question_about_video_toolkit.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import yaml
+import os
+import sys
+from urllib.parse import urljoin
+from typing import Callable, Dict, Any
+from utils.configs import TOOLS_SERVER_BASE_ENDPOINT_URL, WEB_BASED_TOOLS_USE_CACHE
+from tool_kits.base import BaseToolkit
+class AskQuestionAboutVideoToolkit(BaseToolkit):
+    NAME = "ask_question_about_video"
+    TOOLS_SERVER_BASE_ENDPOINT = TOOLS_SERVER_BASE_ENDPOINT_URL
+    ENTRY_POINT = "ask_question_about_video"
+    DESCRIPTION = f"Ask a question about one or more videos."
+    TIMEOUT = 600
+    TOOL_PARAMS = {
+        "video_path": {
+            "type": "array",
+            "items": {
+                "type": "string",
+                "description": "Local path or URL to the video file.",
+            },
+            "minItems": 1,
+            "description": "Array of local paths or URLs to video files.",
+        },
+        "question": {
+            "type": "string",
+            "description": "The question to ask about the video.",
+        },
+    }
+    TOOL_PARAMS_REQUIRED = ["video_path", "question"]
+    USE_CACHE = WEB_BASED_TOOLS_USE_CACHE

inference/tool_kits/base.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import os
+import random
+import json
+import threading
+import queue
+import asyncio
+from typing import Any
+from urllib.parse import urljoin
+import httpx
+import time
+import uuid
+class BaseToolkit():
+    """A tool for assisting test tasks."""
+    NAME = "tool_base"
+    DESCRIPTION = f"The tool backbone of all real tools."
+    TIMEOUT = 60
+    TOOLS_SERVER_BASE_ENDPOINT = []
+    ENTRY_POINT = ""
+    TOOL_PARAMS = {}
+    TOOL_PARAMS_REQUIRED = []
+    USE_CACHE = False
+    def __init__(
+        self,
+        name: str = "",
+        description: str = "",
+        params: dict = {},
+        required_params: list[str] = [],
+        server_url: str | list[str] = [],
+        entry_point: str = "",
+        timeout: float | None = None,
+        request_id: str = "",
+        use_cache: bool | None = None,
+        is_tongyi_format: bool | None = None,
+        **kwargs,
+    ):
+        # 这行代码的意思是：获取当前实例(self)的类（即class），并把它赋值给变量cls。
+        # 这样可以在初始化方法中使用cls来访问类属性，比如cls.NAME等，无论是否通过继承生成子类。
+        cls = type(self)
+        self.name = name or getattr(cls, "NAME", "")
+        self.description = description or getattr(cls, "DESCRIPTION", "")
+        self.params = params or getattr(cls, "TOOL_PARAMS", {})
+        self.required_params = required_params or getattr(cls, "TOOL_PARAMS_REQUIRED", [])
+        self.server_url = server_url or getattr(cls, "TOOLS_SERVER_BASE_ENDPOINT", "")
+        self.entry_point = entry_point or getattr(cls, "ENTRY_POINT", "") or getattr(cls, "NAME", "")
+        if timeout is not None:
+            self.timeout = timeout
+        else:
+            self.timeout = getattr(cls, "TIMEOUT", 600)
+        if use_cache is not None:
+            self.use_cache = use_cache
+        else:
+            self.use_cache = getattr(cls, "USE_CACHE", False)
+        if is_tongyi_format is not None:
+            self.is_tongyi_format = is_tongyi_format
+        else:
+            self.is_tongyi_format = getattr(cls, "USE_TONGYI_FORMAT", None)
+        self.set_request_id(request_id=request_id)
+        self._init_client()
+    def _init_client(self):
+        """
+        Initialize the HTTP client for making requests.
+        """
+        # httpx 是一个用于发送 HTTP 请求的库，这里用它来创建一个客户端对象，方便后续发送 HTTP 请求到工具服务器。
+        self.client = httpx.Client()
+    @property
+    def json(self):
+        return {
+            "type": "function",
+            "function": {
+                "name": self.name,
+                "description": self.description,
+                "parameters": {
+                    "type": "object",
+                    "additionalProperties": False,
+                    "properties": self.params,
+                    "required": self.required_params,
+                },
+            },
+        }
+    def _post(self, pload: dict[str, Any]) -> Any:
+        """
+        Post request to the tool server and return the response.
+        """
+        # support multiple server urls for load balancing
+        server_url = random.choice(self.server_url) if isinstance(self.server_url, list) else self.server_url
+        tool_endpoint = urljoin(server_url, self.entry_point) # url + 访问接口
+        # with httpx.Client() as client:
+        try:
+            resp = self.client.post(tool_endpoint, json=pload, timeout=self.timeout)
+            if not resp.is_success:
+                return  f"{resp.status_code} {resp.text}"
+            data = resp.json()
+            return data.get("result", "")
+        except Exception as e:
+            raise e
+    # **kwargs 是 Python 中的一种语法，用于将所有额外的关键字参数以字典形式收集起来
+    # 例如: forward(a=1, b=2) 时，kwargs={'a': 1, 'b': 2}
+    def forward(self, **kwargs):
+        """
+        Execute this tool.
+        Args:
+            keyword arguments: Arguments to be submitted to this tool.
+        Returns:
+            ToolOutput: An object containing either the tool's results or an error message.
+        """
+        # request_id, use_cache, and real params for the tool
+        try:
+            payload = {}
+            # Ensure request_id and use_cache is present
+            # timestamp = time.strftime("%Y%m%d%H%M%S", time.localtime())
+            timestamp = time.strftime("%Y%m%d%", time.localtime())
+            if self.request_id:
+                payload["request_id"] = f"{self.request_id}_{self.name}_{timestamp}"
+            else:
+                # payload["request_id"] = f"{self.name}_{timestamp}"
+                payload["request_id"] = self.name
+            payload["use_cache"] = self.use_cache
+            # Ensure is_tongyi_format is inside params
+            if self.is_tongyi_format is not None:
+                kwargs['is_tongyi_format'] = self.is_tongyi_format
+            payload['params'] = kwargs
+            conversation_id = kwargs.pop('conversation_id', None)
+            if conversation_id is not None:
+                payload['conversation_id'] = conversation_id
+            # print("payload:", payload)
+            raw = self._post(payload)
+            return raw
+        except Exception as e:
+            raise e
+    def set_request_id(self, request_id: str):
+        """
+        Set the request ID for this tool.
+        """
+        self.request_id = request_id
+    def set_use_cache(self, use_cache: bool):
+        """
+        Set whether to use cache for this tool.
+        """
+        self.use_cache = use_cache
+    def set_timeout(self, timeout: float):
+        """
+        Set the timeout for this tool.
+        """
+        self.timeout = timeout
+    def __del__(self):
+        # try:
+        #     if getattr(self, "client", None):
+        #         self.client.close()
+        # except Exception:
+        #     pass
+        pass

inference/tool_kits/bash_toolkit.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import yaml
+import os
+import sys
+from urllib.parse import urljoin
+from typing import Callable, Dict, Any
+from utils.configs import TOOLS_SERVER_BASE_ENDPOINT_URL, WEB_BASED_TOOLS_USE_CACHE
+from tool_kits.base import BaseToolkit
+class BashToolkit(BaseToolkit):
+    NAME = "bash"
+    TOOLS_SERVER_BASE_ENDPOINT = TOOLS_SERVER_BASE_ENDPOINT_URL
+    ENTRY_POINT = "bash"
+    DESCRIPTION = (
+        "Execute a shell script in the current working directory. "
+        "Use this tool to run one or more shell commands as a single script or "
+        "execute script files (e.g. `python script.py`)."
+    )
+    TIMEOUT = 900
+    TOOL_PARAMS = {
+        "command": {
+            "type": "string",
+            "description": (
+                "A shell script to execute. Multiple commands are allowed and will be "
+                "executed sequentially in the same shell session. Use relative paths by default."
+            ),
+        },
+    }
+    TOOL_PARAMS_REQUIRED = ["command"]
+    USE_CACHE = WEB_BASED_TOOLS_USE_CACHE

inference/tool_kits/execute_code_toolkit.py ADDED Viewed

	@@ -0,0 +1,23 @@

+import yaml
+import os
+import sys
+from urllib.parse import urljoin
+from typing import Callable, Dict, Any
+from utils.configs import TOOLS_SERVER_BASE_ENDPOINT_URL, WEB_BASED_TOOLS_USE_CACHE
+from tool_kits.base import BaseToolkit
+class ExecuteCodeToolkit(BaseToolkit):
+    NAME = "execute_code"
+    TOOLS_SERVER_BASE_ENDPOINT = TOOLS_SERVER_BASE_ENDPOINT_URL
+    ENTRY_POINT = "execute_code"
+    DESCRIPTION = f"Execute a given code snippet for data processing, model training, analysis, or workflow automation, including writing or modifying files as needed."
+    TIMEOUT = 900
+    TOOL_PARAMS = {
+        "code": {
+            "type": "string",
+            "description": "The input code to the Code Interpreter tool call.",
+        },
+    }
+    TOOL_PARAMS_REQUIRED = ["code"]
+    USE_CACHE = WEB_BASED_TOOLS_USE_CACHE

inference/tool_kits/file_wide_parse_toolkit.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import yaml
+import os
+import sys
+from urllib.parse import urljoin
+from typing import Callable, Dict, Any
+from utils.configs import TOOLS_SERVER_BASE_ENDPOINT_URL, USE_NLP_FORMAT_RETURN, WEB_BASED_TOOLS_USE_CACHE
+from tool_kits.base import BaseToolkit
+class FileWideParseToolkit(BaseToolkit):
+    NAME = "parse_file"
+    TOOLS_SERVER_BASE_ENDPOINT = TOOLS_SERVER_BASE_ENDPOINT_URL
+    ENTRY_POINT = "file_wide_parse"
+    DESCRIPTION = f"This is a tool that can be used to parse multiple user uploaded local files or online files such as PDF, DOCX, PPTX, TXT, CSV, XLSX, DOC, ZIP, MP4, MP3."
+    TIMEOUT = 600
+    TOOL_PARAMS = {
+        "files": {
+            "type": "array",
+            "items": {
+                "type": "string"
+            },
+            "description": "The online file's URLs or the user uploaded local file paths to be parsed."
+        },
+    }
+    TOOL_PARAMS_REQUIRED = ["files"]
+    USE_CACHE = WEB_BASED_TOOLS_USE_CACHE
+    USE_TONGYI_FORMAT = USE_NLP_FORMAT_RETURN

inference/tool_kits/image_search_toolkit.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import yaml
+import os
+import sys
+from urllib.parse import urljoin
+from typing import Callable, Dict, Any
+from utils.configs import TOOLS_SERVER_BASE_ENDPOINT_URL, USE_NLP_FORMAT_RETURN, WEB_BASED_TOOLS_USE_CACHE
+from tool_kits.base import BaseToolkit
+class ImageSearchToolkit(BaseToolkit):
+    NAME = "image_search"
+    TOOLS_SERVER_BASE_ENDPOINT = TOOLS_SERVER_BASE_ENDPOINT_URL
+    ENTRY_POINT = "image_search"
+    DESCRIPTION = f"Search images by query and return a list of related images. Accepts multiple complementary search queries in a single call."
+    TIMEOUT = 600
+    TOOL_PARAMS = {
+        "query": {
+            "type": "array",
+            "items": {
+                "type": "string",
+                "description": "A single image search query string.",
+            },
+            "minItems": 1,
+            "description": "Array of query strings. Multiple complementary search queries can be provided in one request for image search.",
+        },
+    }
+    TOOL_PARAMS_REQUIRED = ["query"]
+    USE_CACHE = WEB_BASED_TOOLS_USE_CACHE
+    USE_TONGYI_FORMAT = USE_NLP_FORMAT_RETURN

inference/tool_kits/scholar_search_toolkit.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import yaml
+import os
+import sys
+from urllib.parse import urljoin
+from typing import Callable, Dict, Any
+from utils.configs import TOOLS_SERVER_BASE_ENDPOINT_URL, USE_NLP_FORMAT_RETURN, WEB_BASED_TOOLS_USE_CACHE
+from tool_kits.base import BaseToolkit
+class ScholarSearchToolkit(BaseToolkit):
+    NAME = "google_scholar"
+    TOOLS_SERVER_BASE_ENDPOINT = TOOLS_SERVER_BASE_ENDPOINT_URL
+    ENTRY_POINT = "scholar_search"
+    DESCRIPTION = f"Leverage Google Scholar to retrieve relevant information from academic publications. Accepts multiple queries. This tool will also return results from google search"
+    TIMEOUT = 600
+    TOOL_PARAMS = {
+        "query": {
+            "type": "array",
+            "items": {
+                "type": "string",
+                "description": "The search query.",
+            },
+            "minItems": 1,
+            "description": "The list of search queries for Google Scholar.",
+        },
+    }
+    TOOL_PARAMS_REQUIRED = ["query"]
+    USE_CACHE = WEB_BASED_TOOLS_USE_CACHE
+    USE_TONGYI_FORMAT = USE_NLP_FORMAT_RETURN

inference/tool_kits/wide_search_toolkit.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import yaml
+import os
+import sys
+from urllib.parse import urljoin
+from typing import Callable, Dict, Any
+from utils.configs import TOOLS_SERVER_BASE_ENDPOINT_URL, USE_NLP_FORMAT_RETURN, WEB_BASED_TOOLS_USE_CACHE
+from tool_kits.base import BaseToolkit
+class WideSearchToolkit(BaseToolkit):
+    NAME = "search"
+    TOOLS_SERVER_BASE_ENDPOINT = TOOLS_SERVER_BASE_ENDPOINT_URL
+    ENTRY_POINT = "wide_search"
+    DESCRIPTION = f"Perform Google web searches then returns a string of the top search results. Accepts multiple queries."
+    TIMEOUT = 600
+    TOOL_PARAMS = {
+        "query": {
+            "type": "array",
+            "items": {
+                "type": "string",
+                "description": "The search query.",
+            },
+            "minItems": 1,
+            "description": "The list of search queries.",
+        },
+    }
+    TOOL_PARAMS_REQUIRED = ["query"]
+    USE_CACHE = WEB_BASED_TOOLS_USE_CACHE
+    USE_TONGYI_FORMAT = USE_NLP_FORMAT_RETURN

inference/tool_kits/wide_visit_toolkit.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import yaml
+import os
+import sys
+from urllib.parse import urljoin
+from typing import Callable, Dict, Any
+from utils.configs import TOOLS_SERVER_BASE_ENDPOINT_URL, USE_NLP_FORMAT_RETURN, WEB_BASED_TOOLS_USE_CACHE
+from tool_kits.base import BaseToolkit
+class WideVisitToolkit(BaseToolkit):
+    NAME = "visit"
+    TOOLS_SERVER_BASE_ENDPOINT = TOOLS_SERVER_BASE_ENDPOINT_URL
+    ENTRY_POINT = "wide_visit"
+    DESCRIPTION = "Visit webpage(s) and return the summary of the content."
+    TIMEOUT = 600
+    TOOL_PARAMS = {
+        "url": {
+            "type": "array",
+            "items": {
+                "type": "string",
+            },
+            "minItems": 1,
+            "description": "The URL(s) of the webpage(s) to visit. Can be a single URL or an array of URLs.",
+        },
+        "goal": {
+            "type": "string",
+            "description": "The specific information goal for visiting webpage(s).",
+        },
+    }
+    TOOL_PARAMS_REQUIRED = ["url", "goal"]
+    USE_CACHE = WEB_BASED_TOOLS_USE_CACHE
+    USE_TONGYI_FORMAT = USE_NLP_FORMAT_RETURN