#!/usr/bin/env python """ EvalMDE inference driver — produces per-scene pred .npz in EvalMDE format. Strictly follows EvalMDE convention (see compute_metrics_example.py): Output: ///pred_depth.npz keys: depth (H,W) float32, intr (4,) [fx,fy,cx,cy]px, valid (H,W) bool Calls one of the 7 model wrappers (baselines/.py). Each wrapper is the same MGEBaselineInterface; we just call .infer_for_evaluation(image, intrinsics). """ import sys, os from pathlib import Path import click, numpy as np, torch from PIL import Image EVALMDE_ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(EVALMDE_ROOT)) # baselines/ + test/ visible sys.path.insert(0, '/home/ywan0794/MoGe') # baselines still import moge.test.baseline @click.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True}) @click.option('--baseline', 'baseline_code_path', required=True, type=click.Path()) @click.option('--data-root', required=True, type=click.Path()) @click.option('--output-root', required=True, type=click.Path()) @click.option('--model-name', required=True, type=str) @click.pass_context def main(ctx, baseline_code_path, data_root, output_root, model_name): import importlib.util spec = importlib.util.spec_from_file_location(Path(baseline_code_path).stem, baseline_code_path) module = importlib.util.module_from_spec(spec); spec.loader.exec_module(module) baseline_cls = getattr(module, 'Baseline') baseline = baseline_cls.load.main(ctx.args, standalone_mode=False) data_root = Path(data_root) out_root = Path(output_root) / model_name out_root.mkdir(parents=True, exist_ok=True) scenes = sorted(d for d in data_root.iterdir() if d.is_dir() and (d / 'rgb.png').exists() and (d / 'gt_depth.npz').exists()) print(f'Found {len(scenes)} scenes') for i, scene in enumerate(scenes): rgb = np.array(Image.open(scene / 'rgb.png').convert('RGB')) # (H, W, 3) uint8 gt = np.load(scene / 'gt_depth.npz') gt_intr = gt['intr'].astype(np.float32) # [fx, fy, cx, cy] pixels H, W = rgb.shape[:2] fx, fy, cx, cy = gt_intr.tolist() K_norm = torch.tensor([ [fx / W, 0.0, cx / W], [0.0, fy / H, cy / H], [0.0, 0.0, 1.0] ], dtype=torch.float32, device=baseline.device) img = torch.from_numpy(rgb.astype(np.float32) / 255.0).permute(2, 0, 1).to(baseline.device) with torch.inference_mode(): # Some baselines (DA3-Mono / Marigold / Lotus / DepthMaster / PPD / FE2E) # do `assert intrinsics is None` because they don't consume intrinsics. # Only metric models (Depth Pro / Metric3D / MoGe) actually use them. # Try with intrinsics first, fall back without. try: pred = baseline.infer_for_evaluation(img, K_norm) except (AssertionError, TypeError): pred = baseline.infer_for_evaluation(img) # Priority: metric > scale_inv > affine > disparity (most-specific first) if 'depth_metric' in pred: d = pred['depth_metric'] elif 'depth_scale_invariant' in pred: d = pred['depth_scale_invariant'] elif 'depth_affine_invariant' in pred: d = pred['depth_affine_invariant'] elif 'disparity_affine_invariant' in pred: d = 1.0 / pred['disparity_affine_invariant'].clamp(min=1e-6) else: raise ValueError(f'No depth-compatible key in {list(pred.keys())}') if d.ndim == 3: d = d.squeeze(0) depth_np = d.detach().cpu().numpy().astype(np.float32) # pred_intr: use model-predicted intrinsics if available (Depth Pro), else GT if 'intrinsics' in pred: K = pred['intrinsics'].detach().cpu().numpy() if K.ndim == 3: K = K[0] pred_intr = np.array([K[0,0]*W, K[1,1]*H, K[0,2]*W, K[1,2]*H], dtype=np.float32) else: pred_intr = gt_intr valid = np.isfinite(depth_np) & (depth_np > 0) scene_out = out_root / scene.name scene_out.mkdir(parents=True, exist_ok=True) np.savez(scene_out / 'pred_depth.npz', depth=depth_np, intr=pred_intr, valid=valid) if (i+1) % 10 == 0 or i == 0: print(f' [{i+1}/{len(scenes)}] {scene.name}: shape={depth_np.shape}') print(f'Saved {len(scenes)} predictions to {out_root}') if __name__ == '__main__': main()