Instructions to use zeyuren2002/EvalMDE with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use zeyuren2002/EvalMDE with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("zeyuren2002/EvalMDE", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
File size: 3,411 Bytes
4165f20 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 | """
EvalMDE-native dataloader for MoGe eval_baseline.py.
Reads Infinigen-style scene dirs (per EvalMDE convention):
<scene>/rgb.png
<scene>/gt_depth.npz keys: depth (H,W) float32, intr (4,)=[fx,fy,cx,cy] pixels, valid (H,W) bool
Returns the same sample dict shape as MoGe's EvalDataLoaderPipeline.
"""
from pathlib import Path
from typing import Optional
import numpy as np
import torch
from PIL import Image
class EvalMDELoaderPipeline:
def __init__(self, path: str, split: Optional[str] = None,
has_sharp_boundary: bool = False,
include_segmentation: bool = False,
depth_unit: float = 1.0,
**_):
root = Path(path)
if split is not None and (root / split).exists():
names = [s.strip() for s in (root / split).read_text().splitlines() if s.strip()]
self.scene_dirs = [root / n for n in names]
else:
self.scene_dirs = sorted([
d for d in root.iterdir()
if d.is_dir() and (d / 'rgb.png').exists() and (d / 'gt_depth.npz').exists()
])
self.has_sharp_boundary = has_sharp_boundary
self.depth_unit = depth_unit
self._idx = 0
def __enter__(self):
return self
def __exit__(self, *a):
pass
def __len__(self):
return len(self.scene_dirs)
def get(self):
scene = self.scene_dirs[self._idx]
self._idx += 1
rgb = np.array(Image.open(scene / 'rgb.png').convert('RGB')) # (H, W, 3) uint8
H, W = rgb.shape[:2]
gt = np.load(scene / 'gt_depth.npz')
depth = gt['depth'].astype(np.float32)
intr = gt['intr'].astype(np.float32) # [fx, fy, cx, cy] in pixels
valid = gt['valid'].astype(bool)
# EvalMDE convention (evalmde/utils/depth.py:load_data): replace invalid/NaN
# with 1.0 so depth-derived quantities (pointmap, etc.) stay finite.
depth = np.where(valid & np.isfinite(depth), depth, np.float32(1.0))
fx, fy, cx, cy = float(intr[0]), float(intr[1]), float(intr[2]), float(intr[3])
# MoGe convention: 3x3 normalized intrinsics (fx/W, fy/H, cx/W, cy/H)
K = np.array([
[fx / W, 0.0, cx / W],
[0.0, fy / H, cy / H],
[0.0, 0.0, 1.0]
], dtype=np.float32)
# Compute 3D pointmap (in camera frame, with native pixel intrinsics)
u, v = np.meshgrid(np.arange(W), np.arange(H))
x = (u.astype(np.float32) - cx) / fx * depth
y = (v.astype(np.float32) - cy) / fy * depth
points = np.stack([x, y, depth], axis=-1).astype(np.float32)
# Multiply depth by depth_unit if specified (Infinigen is metric meters by default → 1.0)
depth = depth * self.depth_unit
return {
'image': torch.from_numpy(rgb.astype(np.float32) / 255.0).permute(2, 0, 1),
'depth': torch.from_numpy(depth).float(),
'depth_mask': torch.from_numpy(valid).bool(),
'intrinsics': torch.from_numpy(K).float(),
'points': torch.from_numpy(points).float(),
'is_metric': True,
'has_sharp_boundary': self.has_sharp_boundary,
'filename': scene.name,
# Carry raw pixel intrinsics for downstream EvalMDE metric usage
'_intr_px': intr,
}
|