| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| LLM_CLIENT_URLS="https://aihubmix.com/v1/chat/completions" |
| LLM_CLIENT_MODELS="glm-5" |
| SYSTEM_FORMAT="aihubmix_glm" |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
| |
| |
| |
| |
|
|
|
|
| TEST_DATA_FILE="test_files/test_one_query.jsonl" |
| OUTPUT_FILE="test_files/test_one_query_results.jsonl" |
|
|
| |
| |
| |
| OUTPUT_DIR="run_logs/GAIA_0126/rollouts" |
| |
| AVAILABLE_TOOLS="wide_search scholar_search file_wide_parse execute_code wide_visit ask_question_about_image ask_question_about_video image_search bash" |
| ROLLOUT_NUM=1 |
| RESUME_FROM_FILE="" |
| LOG_LABEL="glm-5" |
| LOG_FILE="run_logs/run_batch_glm-5.log" |
|
|
| SYSTEM_PROMPT="You are a deep research assistant. Your core function is to conduct thorough, multi-source investigations into any topic. You must handle both broad, open-domain inquiries and queries within specialized academic fields. For every request, synthesize information from credible, diverse sources to deliver a comprehensive, accurate, and objective response. When you have gathered sufficient information and are ready to provide the definitive response, you must enclose the entire final answer within <answer></answer> tags. |
| |
| # Note |
| |
| ## General Rules |
| |
| - The current working directory (cwd) is `.`. Treat the cwd as the project root. |
| - You are authorized to read, edit, or create files within this directory. **You must use relative paths** for all operations; absolute paths are strictly forbidden. |
| |
| ## Citation & Reference Policy |
| |
| - User instructions always override this policy. |
| - If the response does not use external sources, do not include citations or references. |
| - External sources include web searches, user-uploaded files, or explicitly cited webpages. |
| - If external sources are used: |
| - For lightweight factual or real-time information (e.g., weather, simple lookups), include in-text citation only. |
| - For research, analysis, or document-based tasks |
| (e.g., using multiple external sources or any user-uploaded file), |
| include both in-text citations and a reference list. |
| - Reference lists are for source traceability only; do not introduce new information. |
| - For citation-only cases, keep responses concise and avoid research-style structuring. |
| |
| Current date: $(date +"%Y-%m-%d")" |
|
|
| |
| TASK_TYPE="input_only" |
| MAX_ROUNDS=100 |
| CONCURRENCY_WORKERS=16 |
| SAVE_BATCH_SIZE=10 |
| TEMPERATURE=0.85 |
| TIMEOUT_FOR_ONE_QUERY=3600 |
| LLM_API_RETRY_TIMES=2 |
| |
| DISCARD_ALL_MODE="false" |
| MODEL_MAX_CONTEXT_TOKENS=131072 |
| DISCARD_RATIO=0.8 |
| TOKENIZER_PATH="models/tokenizer" |
|
|
|
|
| PARAM_INFO=$( |
| cat <<EOF |
| ========== Run Parameters ========== |
| Start Time: $(date) |
| LLM_CLIENT_URLS: $LLM_CLIENT_URLS |
| LLM_CLIENT_MODELS: $LLM_CLIENT_MODELS |
| TEST_DATA_FILE: $TEST_DATA_FILE |
| OUTPUT_FILE: $OUTPUT_FILE |
| OUTPUT_DIR: $OUTPUT_DIR |
| AVAILABLE_TOOLS: $AVAILABLE_TOOLS |
| CONCURRENCY_WORKERS: $CONCURRENCY_WORKERS |
| SAVE_BATCH_SIZE: $SAVE_BATCH_SIZE |
| ROLLOUT_NUM: $ROLLOUT_NUM |
| MAX_ROUNDS: $MAX_ROUNDS |
| TEMPERATURE: $TEMPERATURE |
| TIMEOUT_FOR_ONE_QUERY: $TIMEOUT_FOR_ONE_QUERY |
| LLM_API_RETRY_TIMES: $LLM_API_RETRY_TIMES |
| DISCARD_ALL_MODE: $DISCARD_ALL_MODE |
| MODEL_MAX_CONTEXT_TOKENS: $MODEL_MAX_CONTEXT_TOKENS |
| DISCARD_RATIO: $DISCARD_RATIO |
| TOKENIZER_PATH: $TOKENIZER_PATH |
| RESUME_FROM_FILE: $RESUME_FROM_FILE |
| TASK_TYPE: $TASK_TYPE |
| LOG_LABEL: $LOG_LABEL |
| SYSTEM_FORMAT: $SYSTEM_FORMAT |
| Shell PID: $$ |
| ==================================== |
| EOF |
| ) |
| echo "$PARAM_INFO" |
| echo "$PARAM_INFO" > "$LOG_FILE" |
|
|
| |
| |
| if [ "$TASK_TYPE" = "input_only" ]; then |
| nohup python inference/run_batch_inference.py \ |
| --llm_client_urls $LLM_CLIENT_URLS \ |
| --llm_client_models $LLM_CLIENT_MODELS \ |
| --test_data_file "$TEST_DATA_FILE" \ |
| --output_file "$OUTPUT_FILE" \ |
| --output_dir "$OUTPUT_DIR" \ |
| --available_tools $AVAILABLE_TOOLS \ |
| --concurrency_workers $CONCURRENCY_WORKERS \ |
| --save_batch_size $SAVE_BATCH_SIZE \ |
| --rollout_num $ROLLOUT_NUM \ |
| --max_rounds $MAX_ROUNDS \ |
| --temperature $TEMPERATURE \ |
| --timeout_for_one_query $TIMEOUT_FOR_ONE_QUERY \ |
| --llm_api_retry_times $LLM_API_RETRY_TIMES \ |
| --discard_all_mode "$DISCARD_ALL_MODE" \ |
| --model_max_context_tokens $MODEL_MAX_CONTEXT_TOKENS \ |
| --discard_ratio $DISCARD_RATIO \ |
| --tokenizer_path "$TOKENIZER_PATH" \ |
| --resume_from_file "$RESUME_FROM_FILE" \ |
| --log_label "$LOG_LABEL" \ |
| --system_format "$SYSTEM_FORMAT" \ |
| --system_prompt "$SYSTEM_PROMPT" \ |
| --verbose \ |
| --clean_files_copy_dir \ |
| >> "$LOG_FILE" 2>&1 & |
| else |
| nohup python inference/run_batch_inference.py \ |
| --llm_client_urls $LLM_CLIENT_URLS \ |
| --llm_client_models $LLM_CLIENT_MODELS \ |
| --test_data_file "$TEST_DATA_FILE" \ |
| --output_file "$OUTPUT_FILE" \ |
| --output_dir "$OUTPUT_DIR" \ |
| --available_tools $AVAILABLE_TOOLS \ |
| --concurrency_workers $CONCURRENCY_WORKERS \ |
| --save_batch_size $SAVE_BATCH_SIZE \ |
| --rollout_num $ROLLOUT_NUM \ |
| --max_rounds $MAX_ROUNDS \ |
| --temperature $TEMPERATURE \ |
| --timeout_for_one_query $TIMEOUT_FOR_ONE_QUERY \ |
| --llm_api_retry_times $LLM_API_RETRY_TIMES \ |
| --discard_all_mode "$DISCARD_ALL_MODE" \ |
| --model_max_context_tokens $MODEL_MAX_CONTEXT_TOKENS \ |
| --discard_ratio $DISCARD_RATIO \ |
| --tokenizer_path "$TOKENIZER_PATH" \ |
| --resume_from_file "$RESUME_FROM_FILE" \ |
| --log_label "$LOG_LABEL" \ |
| --system_format "$SYSTEM_FORMAT" \ |
| --system_prompt "$SYSTEM_PROMPT" \ |
| --verbose \ |
| >> "$LOG_FILE" 2>&1 & |
| fi |
|
|
| PY_PID=$! |
| echo "Python running as PID: $PY_PID" |
| echo "Python running as PID: $PY_PID" >> "$LOG_FILE" |
|
|