Diffusers
Safetensors
EvalMDE / scripts /dataloader.py
zeyuren2002's picture
Add files using upload-large-folder tool
4165f20 verified
"""
EvalMDE-native dataloader for MoGe eval_baseline.py.
Reads Infinigen-style scene dirs (per EvalMDE convention):
<scene>/rgb.png
<scene>/gt_depth.npz keys: depth (H,W) float32, intr (4,)=[fx,fy,cx,cy] pixels, valid (H,W) bool
Returns the same sample dict shape as MoGe's EvalDataLoaderPipeline.
"""
from pathlib import Path
from typing import Optional
import numpy as np
import torch
from PIL import Image
class EvalMDELoaderPipeline:
def __init__(self, path: str, split: Optional[str] = None,
has_sharp_boundary: bool = False,
include_segmentation: bool = False,
depth_unit: float = 1.0,
**_):
root = Path(path)
if split is not None and (root / split).exists():
names = [s.strip() for s in (root / split).read_text().splitlines() if s.strip()]
self.scene_dirs = [root / n for n in names]
else:
self.scene_dirs = sorted([
d for d in root.iterdir()
if d.is_dir() and (d / 'rgb.png').exists() and (d / 'gt_depth.npz').exists()
])
self.has_sharp_boundary = has_sharp_boundary
self.depth_unit = depth_unit
self._idx = 0
def __enter__(self):
return self
def __exit__(self, *a):
pass
def __len__(self):
return len(self.scene_dirs)
def get(self):
scene = self.scene_dirs[self._idx]
self._idx += 1
rgb = np.array(Image.open(scene / 'rgb.png').convert('RGB')) # (H, W, 3) uint8
H, W = rgb.shape[:2]
gt = np.load(scene / 'gt_depth.npz')
depth = gt['depth'].astype(np.float32)
intr = gt['intr'].astype(np.float32) # [fx, fy, cx, cy] in pixels
valid = gt['valid'].astype(bool)
# EvalMDE convention (evalmde/utils/depth.py:load_data): replace invalid/NaN
# with 1.0 so depth-derived quantities (pointmap, etc.) stay finite.
depth = np.where(valid & np.isfinite(depth), depth, np.float32(1.0))
fx, fy, cx, cy = float(intr[0]), float(intr[1]), float(intr[2]), float(intr[3])
# MoGe convention: 3x3 normalized intrinsics (fx/W, fy/H, cx/W, cy/H)
K = np.array([
[fx / W, 0.0, cx / W],
[0.0, fy / H, cy / H],
[0.0, 0.0, 1.0]
], dtype=np.float32)
# Compute 3D pointmap (in camera frame, with native pixel intrinsics)
u, v = np.meshgrid(np.arange(W), np.arange(H))
x = (u.astype(np.float32) - cx) / fx * depth
y = (v.astype(np.float32) - cy) / fy * depth
points = np.stack([x, y, depth], axis=-1).astype(np.float32)
# Multiply depth by depth_unit if specified (Infinigen is metric meters by default → 1.0)
depth = depth * self.depth_unit
return {
'image': torch.from_numpy(rgb.astype(np.float32) / 255.0).permute(2, 0, 1),
'depth': torch.from_numpy(depth).float(),
'depth_mask': torch.from_numpy(valid).bool(),
'intrinsics': torch.from_numpy(K).float(),
'points': torch.from_numpy(points).float(),
'is_metric': True,
'has_sharp_boundary': self.has_sharp_boundary,
'filename': scene.name,
# Carry raw pixel intrinsics for downstream EvalMDE metric usage
'_intr_px': intr,
}