| #!/usr/bin/env bash |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| set -uo pipefail |
|
|
| |
| export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:-4,5,6,7}" |
| MODEL="${MODEL:-openbmb/MiniCPM-o-4_5}" |
| LABEL="${LABEL:-minicpmo_4_5}" |
| SCRIPTS="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)/scripts" |
| CONDA_ENV="${CONDA_ENV:-minicpmo}" |
|
|
| |
| IFS=',' read -ra GPU_ARR <<< "$CUDA_VISIBLE_DEVICES" |
| NUM_SHARDS="${NUM_SHARDS:-${#GPU_ARR[@]}}" |
|
|
| |
| DATA_ROOT="${DATA_ROOT:-/opt/dlami/nvme/video_source}" |
| SYNC_TEST_JSONL="${SYNC_TEST_JSONL:-/home/ubuntu/CleverHans-Evaluation/data/kto_training_data_v2_test.jsonl}" |
| VGG_TEST_JSONL="${VGG_TEST_JSONL:-/opt/dlami/nvme/vggsoundsync_test/test_3k.jsonl}" |
| WORLDSENSE_DIR="${WORLDSENSE_DIR:-/opt/dlami/nvme/worldsense}" |
| DAILY_OMNI_DIR="${DAILY_OMNI_DIR:-/opt/dlami/nvme/daily_omni}" |
| VIDEOMME_DIR="${VIDEOMME_DIR:-/opt/dlami/nvme/videomme/data/data}" |
| LVBENCH_DIR="${LVBENCH_DIR:-/opt/dlami/nvme/lvbench}" |
|
|
| EVAL_ROOT="${EVAL_ROOT:-/home/ubuntu/eval_results}" |
|
|
| |
| if [[ -f "${HOME}/anaconda3/etc/profile.d/conda.sh" ]]; then |
| source "${HOME}/anaconda3/etc/profile.d/conda.sh" |
| fi |
| conda activate "${CONDA_ENV}" |
|
|
| echo "=== Model: $MODEL | Label: $LABEL" |
| echo "=== GPUs: ${GPU_ARR[*]} | Shards: $NUM_SHARDS" |
|
|
| |
| |
| |
| |
| |
| |
| run_bench_dp() { |
| local bench="$1"; shift |
| local script="$1"; shift |
| local full_label="$1"; shift |
| local out_root="$1"; shift |
| local label_dir="${out_root}/${full_label}" |
| mkdir -p "${label_dir}/logs" |
|
|
| echo "" |
| echo "==== [$(date +%T)] Bench: $bench | Label: $full_label ====" |
| local pids=() |
| for (( i=0; i<NUM_SHARDS; i++ )); do |
| local gpu="${GPU_ARR[$i]}" |
| local log="${label_dir}/logs/shard${i}of${NUM_SHARDS}.log" |
| echo " β shard $i on GPU $gpu (log: $log)" |
| CUDA_VISIBLE_DEVICES="$gpu" python "$script" \ |
| "$@" \ |
| --output-dir "$out_root" \ |
| --label "$full_label" \ |
| --shard "$i" --num-shards "$NUM_SHARDS" \ |
| > "$log" 2>&1 & |
| pids+=($!) |
| done |
|
|
| |
| local fail=0 |
| for pid in "${pids[@]}"; do |
| wait "$pid" || fail=$((fail+1)) |
| done |
| if (( fail > 0 )); then |
| echo " !! $fail shard(s) exited with error; check ${label_dir}/logs/" |
| fi |
|
|
| |
| echo " β merging shards ..." |
| python "$SCRIPTS/merge_shards.py" \ |
| --bench "$bench" \ |
| --label-dir "$label_dir" || echo " !! merge failed" |
| } |
|
|
| |
| run_bench_dp dpo_sync "$SCRIPTS/eval_dpo_sync.py" \ |
| "sync_${LABEL}" "$EVAL_ROOT/sync" \ |
| --model-id "$MODEL" \ |
| --data-root "$DATA_ROOT" \ |
| --test-jsonl "$SYNC_TEST_JSONL" \ |
| --gpt-judge |
|
|
| |
| run_bench_dp vggsoundsync "$SCRIPTS/eval_vggsoundsync.py" \ |
| "vggsync_freetext_${LABEL}_3k" "$EVAL_ROOT/vggsoundsync" \ |
| --model-id "$MODEL" \ |
| --test-jsonl "$VGG_TEST_JSONL" \ |
| --mode freetext --gpt-judge |
|
|
| |
| run_bench_dp worldsense "$SCRIPTS/eval_worldsense.py" \ |
| "ws_${LABEL}" "$EVAL_ROOT/worldsense" \ |
| --model-id "$MODEL" \ |
| --data-dir "$WORLDSENSE_DIR" \ |
| --max-samples -1 |
|
|
| |
| run_bench_dp daily_omni "$SCRIPTS/eval_daily_omni.py" \ |
| "do_${LABEL}" "$EVAL_ROOT/daily_omni" \ |
| --model-id "$MODEL" \ |
| --data-dir "$DAILY_OMNI_DIR" \ |
| --max-samples -1 |
|
|
| |
| run_bench_dp videomme "$SCRIPTS/eval_videomme.py" \ |
| "vmme_${LABEL}" "$EVAL_ROOT/videomme" \ |
| --model-id "$MODEL" \ |
| --video-dir "$VIDEOMME_DIR" \ |
| --max-samples -1 |
|
|
| |
| run_bench_dp lvbench "$SCRIPTS/eval_lvbench.py" \ |
| "lvb_${LABEL}" "$EVAL_ROOT/lvbench" \ |
| --model-id "$MODEL" \ |
| --video-dir "$LVBENCH_DIR" \ |
| --max-samples -1 |
|
|
| echo "" |
| echo "=== All done: $LABEL ===" |
| for b_out in \ |
| "$EVAL_ROOT/sync/sync_${LABEL}" \ |
| "$EVAL_ROOT/vggsoundsync/vggsync_freetext_${LABEL}_3k" \ |
| "$EVAL_ROOT/worldsense/ws_${LABEL}" \ |
| "$EVAL_ROOT/daily_omni/do_${LABEL}" \ |
| "$EVAL_ROOT/videomme/vmme_${LABEL}" \ |
| "$EVAL_ROOT/lvbench/lvb_${LABEL}"; do |
| echo " ${b_out}/metrics.json" |
| done |
|
|