S1-DeepResearch-32B / inference /run_batch_inference_online_demo.sh

Upload 61 files

816198f verified 8 days ago

7.17 kB

	# 可选：通过 JSON / 环境变量覆盖 `utils.config` 中的配置
	# 示例 1：指定 JSON 配置文件（优先级最高）
	# export S1_DR_CONFIG_JSON="utils/config/config.example.json"
	#
	# 示例 2：直接用环境变量覆盖
	# export CLIENT_TIMEOUT=7200
	# export USE_NLP_FORMAT_RETURN=true
	# export MY_NEW_FLAG="demo_value"

	# GLM5
	LLM_CLIENT_URLS="https://aihubmix.com/v1/chat/completions"
	LLM_CLIENT_MODELS="glm-5"
	SYSTEM_FORMAT="aihubmix_glm"

	# # azure GPT 系列
	# LLM_CLIENT_URLS="https://<your_special_id>.openai.azure.com/openai/v1/"
	# LLM_CLIENT_MODELS="gpt-5"
	# SYSTEM_FORMAT="azure"

	# # aihubmix GPT 系列
	# LLM_CLIENT_URLS="https://aihubmix.com/v1"
	# LLM_CLIENT_MODELS="gpt-5"
	# SYSTEM_FORMAT="aihubmix"

	# # aihubmix Claude 系列
	# LLM_CLIENT_URLS="https://aihubmix.com/v1"
	# LLM_CLIENT_MODELS="claude-3.5-sonnet"
	# SYSTEM_FORMAT="aihubmix_claude"

	# # 火山引擎
	# LLM_CLIENT_URLS="https://ark.cn-beijing.volces.com/api/v3"
	# LLM_CLIENT_MODELS="ep-xxx"
	# SYSTEM_FORMAT="volcano"

	# # 阿里云百炼接口
	# LLM_CLIENT_URLS="https://dashscope.aliyuncs.com/compatible-mode/v1"
	# LLM_CLIENT_MODELS="qwen-plus"
	# SYSTEM_FORMAT="aliyun"


	TEST_DATA_FILE="test_files/test_one_query.jsonl"
	OUTPUT_FILE="test_files/test_one_query_results.jsonl"

	# OUTPUT_DIR 仅在 Pass@K（多次 rollout）场景下生效。
	# 当 ROLLOUT_NUM=1 时，忽略 OUTPUT_DIR；单次推理结果写入 OUTPUT_FILE 所指定的 jsonl 路径。
	# 当 ROLLOUT_NUM≠1 时，忽略 OUTPUT_FILE；在 OUTPUT_DIR 下按 xxx_01.jsonl、xxx_02.jsonl、… 命名保存各次 rollout 结果。
	OUTPUT_DIR="run_logs/GAIA_0126/rollouts"
	# 启动所有的 9 个工具
	AVAILABLE_TOOLS="wide_search scholar_search file_wide_parse execute_code wide_visit ask_question_about_image ask_question_about_video image_search bash"
	ROLLOUT_NUM=1
	RESUME_FROM_FILE=""
	LOG_LABEL="glm-5"
	LOG_FILE="run_logs/run_batch_glm-5.log"

	SYSTEM_PROMPT="You are a deep research assistant. Your core function is to conduct thorough, multi-source investigations into any topic. You must handle both broad, open-domain inquiries and queries within specialized academic fields. For every request, synthesize information from credible, diverse sources to deliver a comprehensive, accurate, and objective response. When you have gathered sufficient information and are ready to provide the definitive response, you must enclose the entire final answer within <answer></answer> tags.

	# Note

	## General Rules

	- The current working directory (cwd) is `.`. Treat the cwd as the project root.
	- You are authorized to read, edit, or create files within this directory. You must use relative paths for all operations; absolute paths are strictly forbidden.

	## Citation & Reference Policy

	- User instructions always override this policy.
	- If the response does not use external sources, do not include citations or references.
	- External sources include web searches, user-uploaded files, or explicitly cited webpages.
	- If external sources are used:
	- For lightweight factual or real-time information (e.g., weather, simple lookups), include in-text citation only.
	- For research, analysis, or document-based tasks
	(e.g., using multiple external sources or any user-uploaded file),
	include both in-text citations and a reference list.
	- Reference lists are for source traceability only; do not introduce new information.
	- For citation-only cases, keep responses concise and avoid research-style structuring.

	Current date: $(date +"%Y-%m-%d")"

	# 仅在与附件相关的任务中需调整 TASK_TYPE；其余推理任务可沿用默认值，无需修改该参数。
	TASK_TYPE="input_only"
	MAX_ROUNDS=100
	CONCURRENCY_WORKERS=16
	SAVE_BATCH_SIZE=10
	TEMPERATURE=0.85
	TIMEOUT_FOR_ONE_QUERY=3600
	LLM_API_RETRY_TIMES=2
	# discard-all：此处为 false，表示不启用该模式
	DISCARD_ALL_MODE="false"
	MODEL_MAX_CONTEXT_TOKENS=131072
	DISCARD_RATIO=0.8
	TOKENIZER_PATH="models/tokenizer"


	PARAM_INFO=$(
	cat <<EOF
	========== Run Parameters ==========
	Start Time: $(date)
	LLM_CLIENT_URLS: $LLM_CLIENT_URLS
	LLM_CLIENT_MODELS: $LLM_CLIENT_MODELS
	TEST_DATA_FILE: $TEST_DATA_FILE
	OUTPUT_FILE: $OUTPUT_FILE
	OUTPUT_DIR: $OUTPUT_DIR
	AVAILABLE_TOOLS: $AVAILABLE_TOOLS
	CONCURRENCY_WORKERS: $CONCURRENCY_WORKERS
	SAVE_BATCH_SIZE: $SAVE_BATCH_SIZE
	ROLLOUT_NUM: $ROLLOUT_NUM
	MAX_ROUNDS: $MAX_ROUNDS
	TEMPERATURE: $TEMPERATURE
	TIMEOUT_FOR_ONE_QUERY: $TIMEOUT_FOR_ONE_QUERY
	LLM_API_RETRY_TIMES: $LLM_API_RETRY_TIMES
	DISCARD_ALL_MODE: $DISCARD_ALL_MODE
	MODEL_MAX_CONTEXT_TOKENS: $MODEL_MAX_CONTEXT_TOKENS
	DISCARD_RATIO: $DISCARD_RATIO
	TOKENIZER_PATH: $TOKENIZER_PATH
	RESUME_FROM_FILE: $RESUME_FROM_FILE
	TASK_TYPE: $TASK_TYPE
	LOG_LABEL: $LOG_LABEL
	SYSTEM_FORMAT: $SYSTEM_FORMAT
	Shell PID: $$
	====================================
	EOF
	)
	echo "$PARAM_INFO"
	echo "$PARAM_INFO" > "$LOG_FILE"

	# 使用 nohup 在后台启动 Python：标准输出与标准错误追加写入 LOG_FILE；随后将进程 PID 输出至终端，并同步追加至 LOG_FILE。
	# 当 TASK_TYPE 为 input_only 时，须在命令行中加入 --clean_files_copy_dir。
	if [ "$TASK_TYPE" = "input_only" ]; then
	nohup python inference/run_batch_inference.py \
	--llm_client_urls $LLM_CLIENT_URLS \
	--llm_client_models $LLM_CLIENT_MODELS \
	--test_data_file "$TEST_DATA_FILE" \
	--output_file "$OUTPUT_FILE" \
	--output_dir "$OUTPUT_DIR" \
	--available_tools $AVAILABLE_TOOLS \
	--concurrency_workers $CONCURRENCY_WORKERS \
	--save_batch_size $SAVE_BATCH_SIZE \
	--rollout_num $ROLLOUT_NUM \
	--max_rounds $MAX_ROUNDS \
	--temperature $TEMPERATURE \
	--timeout_for_one_query $TIMEOUT_FOR_ONE_QUERY \
	--llm_api_retry_times $LLM_API_RETRY_TIMES \
	--discard_all_mode "$DISCARD_ALL_MODE" \
	--model_max_context_tokens $MODEL_MAX_CONTEXT_TOKENS \
	--discard_ratio $DISCARD_RATIO \
	--tokenizer_path "$TOKENIZER_PATH" \
	--resume_from_file "$RESUME_FROM_FILE" \
	--log_label "$LOG_LABEL" \
	--system_format "$SYSTEM_FORMAT" \
	--system_prompt "$SYSTEM_PROMPT" \
	--verbose \
	--clean_files_copy_dir \
	>> "$LOG_FILE" 2>&1 &
	else
	nohup python inference/run_batch_inference.py \
	--llm_client_urls $LLM_CLIENT_URLS \
	--llm_client_models $LLM_CLIENT_MODELS \
	--test_data_file "$TEST_DATA_FILE" \
	--output_file "$OUTPUT_FILE" \
	--output_dir "$OUTPUT_DIR" \
	--available_tools $AVAILABLE_TOOLS \
	--concurrency_workers $CONCURRENCY_WORKERS \
	--save_batch_size $SAVE_BATCH_SIZE \
	--rollout_num $ROLLOUT_NUM \
	--max_rounds $MAX_ROUNDS \
	--temperature $TEMPERATURE \
	--timeout_for_one_query $TIMEOUT_FOR_ONE_QUERY \
	--llm_api_retry_times $LLM_API_RETRY_TIMES \
	--discard_all_mode "$DISCARD_ALL_MODE" \
	--model_max_context_tokens $MODEL_MAX_CONTEXT_TOKENS \
	--discard_ratio $DISCARD_RATIO \
	--tokenizer_path "$TOKENIZER_PATH" \
	--resume_from_file "$RESUME_FROM_FILE" \
	--log_label "$LOG_LABEL" \
	--system_format "$SYSTEM_FORMAT" \
	--system_prompt "$SYSTEM_PROMPT" \
	--verbose \
	>> "$LOG_FILE" 2>&1 &
	fi

	PY_PID=$!
	echo "Python running as PID: $PY_PID"
	echo "Python running as PID: $PY_PID" >> "$LOG_FILE"