zeyuren2002
/

EvalMDE

Model card Files Files and versions

EvalMDE / scripts /dataloader.py

zeyuren2002's picture

Add files using upload-large-folder tool

4165f20 verified about 22 hours ago

history blame contribute delete

3.41 kB

	"""
	EvalMDE-native dataloader for MoGe eval_baseline.py.

	Reads Infinigen-style scene dirs (per EvalMDE convention):
	<scene>/rgb.png
	<scene>/gt_depth.npz keys: depth (H,W) float32, intr (4,)=[fx,fy,cx,cy] pixels, valid (H,W) bool
	Returns the same sample dict shape as MoGe's EvalDataLoaderPipeline.
	"""
	from pathlib import Path
	from typing import Optional
	import numpy as np
	import torch
	from PIL import Image


	class EvalMDELoaderPipeline:
	def __init__(self, path: str, split: Optional[str] = None,
	has_sharp_boundary: bool = False,
	include_segmentation: bool = False,
	depth_unit: float = 1.0,
	**_):
	root = Path(path)
	if split is not None and (root / split).exists():
	names = [s.strip() for s in (root / split).read_text().splitlines() if s.strip()]
	self.scene_dirs = [root / n for n in names]
	else:
	self.scene_dirs = sorted([
	d for d in root.iterdir()
	if d.is_dir() and (d / 'rgb.png').exists() and (d / 'gt_depth.npz').exists()
	])
	self.has_sharp_boundary = has_sharp_boundary
	self.depth_unit = depth_unit
	self._idx = 0

	def __enter__(self):
	return self

	def __exit__(self, *a):
	pass

	def __len__(self):
	return len(self.scene_dirs)

	def get(self):
	scene = self.scene_dirs[self._idx]
	self._idx += 1

	rgb = np.array(Image.open(scene / 'rgb.png').convert('RGB')) # (H, W, 3) uint8
	H, W = rgb.shape[:2]

	gt = np.load(scene / 'gt_depth.npz')
	depth = gt['depth'].astype(np.float32)
	intr = gt['intr'].astype(np.float32) # [fx, fy, cx, cy] in pixels
	valid = gt['valid'].astype(bool)
	# EvalMDE convention (evalmde/utils/depth.py:load_data): replace invalid/NaN
	# with 1.0 so depth-derived quantities (pointmap, etc.) stay finite.
	depth = np.where(valid & np.isfinite(depth), depth, np.float32(1.0))
	fx, fy, cx, cy = float(intr[0]), float(intr[1]), float(intr[2]), float(intr[3])

	# MoGe convention: 3x3 normalized intrinsics (fx/W, fy/H, cx/W, cy/H)
	K = np.array([
	[fx / W, 0.0, cx / W],
	[0.0, fy / H, cy / H],
	[0.0, 0.0, 1.0]
	], dtype=np.float32)

	# Compute 3D pointmap (in camera frame, with native pixel intrinsics)
	u, v = np.meshgrid(np.arange(W), np.arange(H))
	x = (u.astype(np.float32) - cx) / fx * depth
	y = (v.astype(np.float32) - cy) / fy * depth
	points = np.stack([x, y, depth], axis=-1).astype(np.float32)

	# Multiply depth by depth_unit if specified (Infinigen is metric meters by default → 1.0)
	depth = depth * self.depth_unit

	return {
	'image': torch.from_numpy(rgb.astype(np.float32) / 255.0).permute(2, 0, 1),
	'depth': torch.from_numpy(depth).float(),
	'depth_mask': torch.from_numpy(valid).bool(),
	'intrinsics': torch.from_numpy(K).float(),
	'points': torch.from_numpy(points).float(),
	'is_metric': True,
	'has_sharp_boundary': self.has_sharp_boundary,
	'filename': scene.name,
	# Carry raw pixel intrinsics for downstream EvalMDE metric usage
	'_intr_px': intr,
	}