AutoLLMAnnotation / scripts /pipeline_hico_test.sh
ayh015's picture
This is a quick test for annotation pipeline of hico
c705aaa
#!/usr/bin/env bash
set -euo pipefail
export PYTHONPATH="${PYTHONPATH:-}:./"
DATA_PATH=../datasets/HICO-Det
LONG_MODEL_PATH=./model_weights/qwen3_30b_vl_instruct/models
REFINE_MODEL_PATH=./model_weights/qwen3_8b_vl_instruct
EXAMINE_MODEL_PATH=./model_weights/qwen3_30b_vl_instruct/models
LONG_GPU_IDS=0,1
REFINE_GPU_IDS=0,1
EXAMINE_GPU_IDS=0,1
LONG_NPROC=2
REFINE_NPROC=2
EXAMINE_NPROC=2
LONG_OUT_DIR=outputs/pipeline/long
REFINE_OUT_DIR=outputs/pipeline/refine
EXAMINE_OUT_DIR=outputs/pipeline/examine
MERGED_LONG_JSON=outputs/pipeline/merged_long.json
MERGED_REFINE_JSON=outputs/pipeline/merged_refine.json
MERGED_EXAMINE_JSON=outputs/pipeline/merged_examine.json
mkdir -p "${LONG_OUT_DIR}" "${REFINE_OUT_DIR}" "${EXAMINE_OUT_DIR}"
CUDA_VISIBLE_DEVICES=${LONG_GPU_IDS} OMP_NUM_THREADS=1 torchrun --nnodes=1 --nproc_per_node=${LONG_NPROC} --master_port=25011 \
tools/annotate_hico.py \
--model-path "${LONG_MODEL_PATH}" \
--data-path "${DATA_PATH}" \
--output-dir "${LONG_OUT_DIR}" \
--max-samples 5
python3 tools/merge_json_outputs.py \
--input-dir "${LONG_OUT_DIR}" \
--pattern "labels_*.json" \
--output-path "${MERGED_LONG_JSON}"
CUDA_VISIBLE_DEVICES=${REFINE_GPU_IDS} OMP_NUM_THREADS=1 torchrun --nnodes=1 --nproc_per_node=${REFINE_NPROC} --master_port=25012 \
tools/refine_hico.py \
--model-path "${REFINE_MODEL_PATH}" \
--data-path "${DATA_PATH}" \
--annotation-path "${MERGED_LONG_JSON}" \
--output-dir "${REFINE_OUT_DIR}" \
--max-samples 5
python3 tools/merge_json_outputs.py \
--input-dir "${REFINE_OUT_DIR}" \
--pattern "refine_labels_*.json" \
--output-path "${MERGED_REFINE_JSON}"
CUDA_VISIBLE_DEVICES=${EXAMINE_GPU_IDS} OMP_NUM_THREADS=1 torchrun --nnodes=1 --nproc_per_node=${EXAMINE_NPROC} --master_port=25013 \
tools/examine_hico.py \
--model-path "${EXAMINE_MODEL_PATH}" \
--data-path "${DATA_PATH}" \
--annotation-path "${MERGED_REFINE_JSON}" \
--output-dir "${EXAMINE_OUT_DIR}" \
--max-samples 5
python3 tools/merge_json_outputs.py \
--input-dir "${EXAMINE_OUT_DIR}" \
--pattern "examiner_labels_*.json" \
--output-path "${MERGED_EXAMINE_JSON}"
echo "Pipeline complete."
echo "Long descriptions: ${MERGED_LONG_JSON}"
echo "Refined descriptions: ${MERGED_REFINE_JSON}"
echo "Examiner results: ${MERGED_EXAMINE_JSON}"