Diffusers
Safetensors
EvalMDE / eval_scripts /eval_infinigen5_slurm.sh
zeyuren2002's picture
Add files using upload-large-folder tool
4165f20 verified
#!/bin/bash
#SBATCH --job-name=evalmde-infinigen5
#SBATCH --output=/home/ywan0794/EvalMDE/infinigen5_%j.log
#SBATCH --error=/home/ywan0794/EvalMDE/infinigen5_%j.log
#SBATCH --open-mode=append
#SBATCH --ntasks=1
#SBATCH --cpus-per-task=4
#SBATCH --gres=gpu:H100:1
#SBATCH --time=0-02:00:00
#SBATCH --mem=80G
#SBATCH --nodelist=erinyes
# Smoke test for all 7 models on EvalMDE sample_data + sample_data_2.
# Stage 1: per-model inference (own env). Stage 2: evalmde-env metric aggregation.
# Phase 0 contract: failures don't abort.
export PYTHONUNBUFFERED=1
cd /home/ywan0794/EvalMDE
source /home/ywan0794/miniconda3/etc/profile.d/conda.sh
DATA=/home/ywan0794/EvalMDE/data/infinigen/test_scenes_release_cleaned_final
OUT=/home/ywan0794/EvalMDE/output/infinigen5
mkdir -p $OUT
TIMESTAMP=$(date +"%Y%m%d_%H%M%S")
SUMMARY=$OUT/_smoke_all_${TIMESTAMP}.summary.txt
: > $SUMMARY
echo "============================================"
echo "infinigen5 started at $(date)"
echo "Data: $DATA Output: $OUT"
echo "============================================"
nvidia-smi
run_inf() {
local label=$1 env=$2; shift 2
echo
echo "============================================"
echo "[$label inference] $(date) env=$env"
echo "============================================"
conda deactivate 2>/dev/null || true
conda activate $env
export PYTHONPATH=/home/ywan0794/EvalMDE:/home/ywan0794/MoGe:${PYTHONPATH:-}
if python scripts/run_inference.py \
--baseline baselines/${label}.py \
--data-root $DATA \
--output-root $OUT \
--model-name $label \
"$@"; then
echo "[INF-OK] $label" | tee -a $SUMMARY
else
echo "[INF-FAIL rc=$?] $label" | tee -a $SUMMARY
fi
}
# Paper-canonical args (same as eval_all_slurm.sh)
EVAL=/home/ywan0794/EvalMDE
run_inf depth_pro depth-pro --repo $EVAL/ml-depth-pro --checkpoint $EVAL/ml-depth-pro/checkpoints/depth_pro.pt --precision fp32
run_inf marigold marigold --repo $EVAL/Marigold --checkpoint prs-eth/marigold-depth-v1-1 --denoise_steps 4 --ensemble_size 1
run_inf lotus lotus --repo $EVAL/Lotus --pretrained jingheya/lotus-depth-g-v1-0 --mode generation --task_name depth --timestep 999 --fp16 --seed 42
run_inf depthmaster depthmaster --repo $EVAL/DepthMaster --checkpoint $EVAL/DepthMaster/ckpt/eval --processing_res 768
run_inf ppd ppd --repo $EVAL/Pixel-Perfect-Depth --semantics_model MoGe2 --semantics_pth checkpoints/moge2.pt --model_pth checkpoints/ppd_moge.pth --sampling_steps 4
run_inf da3_mono da3 --repo $EVAL/Depth-Anything-3 --hf_id depth-anything/DA3MONO-LARGE
run_inf fe2e fe2e --repo $EVAL/FE2E --model_path $EVAL/FE2E/pretrain --lora_path $EVAL/FE2E/lora/LDRN.safetensors --prompt_type empty --single_denoise --cfg_guidance 6.0 --size_level 768
echo
echo "============================================"
echo "Stage 2: metric aggregation (evalmde env)"
echo "============================================"
conda deactivate; conda activate evalmde
for m in depth_pro marigold lotus depthmaster ppd da3_mono fe2e; do
echo "--- metric: $m ---"
if [ -d "$OUT/$m" ] && ls "$OUT/$m"/*/pred_depth.npz >/dev/null 2>&1; then
if python scripts/compute_metrics.py --gt-root $DATA --pred-root $OUT --model-name $m \
--output $OUT/${m}_metrics.json; then
echo "[METRIC-OK] $m" | tee -a $SUMMARY
else
echo "[METRIC-FAIL] $m" | tee -a $SUMMARY
fi
else
echo "[METRIC-SKIP no inference output] $m" | tee -a $SUMMARY
fi
done
echo
echo "============================================"
echo "infinigen5 finished at $(date)"
echo "=== Summary ==="
cat $SUMMARY
echo "=== Per-model means ==="
for m in depth_pro marigold lotus depthmaster ppd da3_mono fe2e; do
J=$OUT/${m}_metrics.json
[ -f $J ] && echo "$m: $(python -c "import json; d=json.load(open('$J')); print('RAW:', d.get('mean_raw',{})); print(' ALN:', d.get('mean_aligned',{}))")"
done