#!/bin/bash #SBATCH --job-name=evalmde-infinigen5 #SBATCH --output=/home/ywan0794/EvalMDE/infinigen5_%j.log #SBATCH --error=/home/ywan0794/EvalMDE/infinigen5_%j.log #SBATCH --open-mode=append #SBATCH --ntasks=1 #SBATCH --cpus-per-task=4 #SBATCH --gres=gpu:H100:1 #SBATCH --time=0-02:00:00 #SBATCH --mem=80G #SBATCH --nodelist=erinyes # Smoke test for all 7 models on EvalMDE sample_data + sample_data_2. # Stage 1: per-model inference (own env). Stage 2: evalmde-env metric aggregation. # Phase 0 contract: failures don't abort. export PYTHONUNBUFFERED=1 cd /home/ywan0794/EvalMDE source /home/ywan0794/miniconda3/etc/profile.d/conda.sh DATA=/home/ywan0794/EvalMDE/data/infinigen/test_scenes_release_cleaned_final OUT=/home/ywan0794/EvalMDE/output/infinigen5 mkdir -p $OUT TIMESTAMP=$(date +"%Y%m%d_%H%M%S") SUMMARY=$OUT/_smoke_all_${TIMESTAMP}.summary.txt : > $SUMMARY echo "============================================" echo "infinigen5 started at $(date)" echo "Data: $DATA Output: $OUT" echo "============================================" nvidia-smi run_inf() { local label=$1 env=$2; shift 2 echo echo "============================================" echo "[$label inference] $(date) env=$env" echo "============================================" conda deactivate 2>/dev/null || true conda activate $env export PYTHONPATH=/home/ywan0794/EvalMDE:/home/ywan0794/MoGe:${PYTHONPATH:-} if python scripts/run_inference.py \ --baseline baselines/${label}.py \ --data-root $DATA \ --output-root $OUT \ --model-name $label \ "$@"; then echo "[INF-OK] $label" | tee -a $SUMMARY else echo "[INF-FAIL rc=$?] $label" | tee -a $SUMMARY fi } # Paper-canonical args (same as eval_all_slurm.sh) EVAL=/home/ywan0794/EvalMDE run_inf depth_pro depth-pro --repo $EVAL/ml-depth-pro --checkpoint $EVAL/ml-depth-pro/checkpoints/depth_pro.pt --precision fp32 run_inf marigold marigold --repo $EVAL/Marigold --checkpoint prs-eth/marigold-depth-v1-1 --denoise_steps 4 --ensemble_size 1 run_inf lotus lotus --repo $EVAL/Lotus --pretrained jingheya/lotus-depth-g-v1-0 --mode generation --task_name depth --timestep 999 --fp16 --seed 42 run_inf depthmaster depthmaster --repo $EVAL/DepthMaster --checkpoint $EVAL/DepthMaster/ckpt/eval --processing_res 768 run_inf ppd ppd --repo $EVAL/Pixel-Perfect-Depth --semantics_model MoGe2 --semantics_pth checkpoints/moge2.pt --model_pth checkpoints/ppd_moge.pth --sampling_steps 4 run_inf da3_mono da3 --repo $EVAL/Depth-Anything-3 --hf_id depth-anything/DA3MONO-LARGE run_inf fe2e fe2e --repo $EVAL/FE2E --model_path $EVAL/FE2E/pretrain --lora_path $EVAL/FE2E/lora/LDRN.safetensors --prompt_type empty --single_denoise --cfg_guidance 6.0 --size_level 768 echo echo "============================================" echo "Stage 2: metric aggregation (evalmde env)" echo "============================================" conda deactivate; conda activate evalmde for m in depth_pro marigold lotus depthmaster ppd da3_mono fe2e; do echo "--- metric: $m ---" if [ -d "$OUT/$m" ] && ls "$OUT/$m"/*/pred_depth.npz >/dev/null 2>&1; then if python scripts/compute_metrics.py --gt-root $DATA --pred-root $OUT --model-name $m \ --output $OUT/${m}_metrics.json; then echo "[METRIC-OK] $m" | tee -a $SUMMARY else echo "[METRIC-FAIL] $m" | tee -a $SUMMARY fi else echo "[METRIC-SKIP no inference output] $m" | tee -a $SUMMARY fi done echo echo "============================================" echo "infinigen5 finished at $(date)" echo "=== Summary ===" cat $SUMMARY echo "=== Per-model means ===" for m in depth_pro marigold lotus depthmaster ppd da3_mono fe2e; do J=$OUT/${m}_metrics.json [ -f $J ] && echo "$m: $(python -c "import json; d=json.load(open('$J')); print('RAW:', d.get('mean_raw',{})); print(' ALN:', d.get('mean_aligned',{}))")" done