Instructions to use zeyuren2002/EvalMDE with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use zeyuren2002/EvalMDE with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("zeyuren2002/EvalMDE", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| """ | |
| EvalMDE-native dataloader for MoGe eval_baseline.py. | |
| Reads Infinigen-style scene dirs (per EvalMDE convention): | |
| <scene>/rgb.png | |
| <scene>/gt_depth.npz keys: depth (H,W) float32, intr (4,)=[fx,fy,cx,cy] pixels, valid (H,W) bool | |
| Returns the same sample dict shape as MoGe's EvalDataLoaderPipeline. | |
| """ | |
| from pathlib import Path | |
| from typing import Optional | |
| import numpy as np | |
| import torch | |
| from PIL import Image | |
| class EvalMDELoaderPipeline: | |
| def __init__(self, path: str, split: Optional[str] = None, | |
| has_sharp_boundary: bool = False, | |
| include_segmentation: bool = False, | |
| depth_unit: float = 1.0, | |
| **_): | |
| root = Path(path) | |
| if split is not None and (root / split).exists(): | |
| names = [s.strip() for s in (root / split).read_text().splitlines() if s.strip()] | |
| self.scene_dirs = [root / n for n in names] | |
| else: | |
| self.scene_dirs = sorted([ | |
| d for d in root.iterdir() | |
| if d.is_dir() and (d / 'rgb.png').exists() and (d / 'gt_depth.npz').exists() | |
| ]) | |
| self.has_sharp_boundary = has_sharp_boundary | |
| self.depth_unit = depth_unit | |
| self._idx = 0 | |
| def __enter__(self): | |
| return self | |
| def __exit__(self, *a): | |
| pass | |
| def __len__(self): | |
| return len(self.scene_dirs) | |
| def get(self): | |
| scene = self.scene_dirs[self._idx] | |
| self._idx += 1 | |
| rgb = np.array(Image.open(scene / 'rgb.png').convert('RGB')) # (H, W, 3) uint8 | |
| H, W = rgb.shape[:2] | |
| gt = np.load(scene / 'gt_depth.npz') | |
| depth = gt['depth'].astype(np.float32) | |
| intr = gt['intr'].astype(np.float32) # [fx, fy, cx, cy] in pixels | |
| valid = gt['valid'].astype(bool) | |
| # EvalMDE convention (evalmde/utils/depth.py:load_data): replace invalid/NaN | |
| # with 1.0 so depth-derived quantities (pointmap, etc.) stay finite. | |
| depth = np.where(valid & np.isfinite(depth), depth, np.float32(1.0)) | |
| fx, fy, cx, cy = float(intr[0]), float(intr[1]), float(intr[2]), float(intr[3]) | |
| # MoGe convention: 3x3 normalized intrinsics (fx/W, fy/H, cx/W, cy/H) | |
| K = np.array([ | |
| [fx / W, 0.0, cx / W], | |
| [0.0, fy / H, cy / H], | |
| [0.0, 0.0, 1.0] | |
| ], dtype=np.float32) | |
| # Compute 3D pointmap (in camera frame, with native pixel intrinsics) | |
| u, v = np.meshgrid(np.arange(W), np.arange(H)) | |
| x = (u.astype(np.float32) - cx) / fx * depth | |
| y = (v.astype(np.float32) - cy) / fy * depth | |
| points = np.stack([x, y, depth], axis=-1).astype(np.float32) | |
| # Multiply depth by depth_unit if specified (Infinigen is metric meters by default → 1.0) | |
| depth = depth * self.depth_unit | |
| return { | |
| 'image': torch.from_numpy(rgb.astype(np.float32) / 255.0).permute(2, 0, 1), | |
| 'depth': torch.from_numpy(depth).float(), | |
| 'depth_mask': torch.from_numpy(valid).bool(), | |
| 'intrinsics': torch.from_numpy(K).float(), | |
| 'points': torch.from_numpy(points).float(), | |
| 'is_metric': True, | |
| 'has_sharp_boundary': self.has_sharp_boundary, | |
| 'filename': scene.name, | |
| # Carry raw pixel intrinsics for downstream EvalMDE metric usage | |
| '_intr_px': intr, | |
| } | |