""" EvalMDE-native dataloader for MoGe eval_baseline.py. Reads Infinigen-style scene dirs (per EvalMDE convention): /rgb.png /gt_depth.npz keys: depth (H,W) float32, intr (4,)=[fx,fy,cx,cy] pixels, valid (H,W) bool Returns the same sample dict shape as MoGe's EvalDataLoaderPipeline. """ from pathlib import Path from typing import Optional import numpy as np import torch from PIL import Image class EvalMDELoaderPipeline: def __init__(self, path: str, split: Optional[str] = None, has_sharp_boundary: bool = False, include_segmentation: bool = False, depth_unit: float = 1.0, **_): root = Path(path) if split is not None and (root / split).exists(): names = [s.strip() for s in (root / split).read_text().splitlines() if s.strip()] self.scene_dirs = [root / n for n in names] else: self.scene_dirs = sorted([ d for d in root.iterdir() if d.is_dir() and (d / 'rgb.png').exists() and (d / 'gt_depth.npz').exists() ]) self.has_sharp_boundary = has_sharp_boundary self.depth_unit = depth_unit self._idx = 0 def __enter__(self): return self def __exit__(self, *a): pass def __len__(self): return len(self.scene_dirs) def get(self): scene = self.scene_dirs[self._idx] self._idx += 1 rgb = np.array(Image.open(scene / 'rgb.png').convert('RGB')) # (H, W, 3) uint8 H, W = rgb.shape[:2] gt = np.load(scene / 'gt_depth.npz') depth = gt['depth'].astype(np.float32) intr = gt['intr'].astype(np.float32) # [fx, fy, cx, cy] in pixels valid = gt['valid'].astype(bool) # EvalMDE convention (evalmde/utils/depth.py:load_data): replace invalid/NaN # with 1.0 so depth-derived quantities (pointmap, etc.) stay finite. depth = np.where(valid & np.isfinite(depth), depth, np.float32(1.0)) fx, fy, cx, cy = float(intr[0]), float(intr[1]), float(intr[2]), float(intr[3]) # MoGe convention: 3x3 normalized intrinsics (fx/W, fy/H, cx/W, cy/H) K = np.array([ [fx / W, 0.0, cx / W], [0.0, fy / H, cy / H], [0.0, 0.0, 1.0] ], dtype=np.float32) # Compute 3D pointmap (in camera frame, with native pixel intrinsics) u, v = np.meshgrid(np.arange(W), np.arange(H)) x = (u.astype(np.float32) - cx) / fx * depth y = (v.astype(np.float32) - cy) / fy * depth points = np.stack([x, y, depth], axis=-1).astype(np.float32) # Multiply depth by depth_unit if specified (Infinigen is metric meters by default → 1.0) depth = depth * self.depth_unit return { 'image': torch.from_numpy(rgb.astype(np.float32) / 255.0).permute(2, 0, 1), 'depth': torch.from_numpy(depth).float(), 'depth_mask': torch.from_numpy(valid).bool(), 'intrinsics': torch.from_numpy(K).float(), 'points': torch.from_numpy(points).float(), 'is_metric': True, 'has_sharp_boundary': self.has_sharp_boundary, 'filename': scene.name, # Carry raw pixel intrinsics for downstream EvalMDE metric usage '_intr_px': intr, }