Instructions to use zeyuren2002/EvalMDE with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use zeyuren2002/EvalMDE with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("zeyuren2002/EvalMDE", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| # Reference: https://github.com/gangweiX/Pixel-Perfect-Depth | |
| # Strictly follows official `run.py`: | |
| # from ppd.models.ppd import PixelPerfectDepth | |
| # model = PixelPerfectDepth(semantics_model='DA2', semantics_pth='checkpoints/depth_anything_v2_vitl.pth', | |
| # sampling_steps=4) | |
| # model.load_state_dict(torch.load(model_pth, map_location='cpu'), strict=False) | |
| # model = model.to(DEVICE).eval() | |
| # image = cv2.imread(filename) # BGR uint8 numpy | |
| # H, W = image.shape[:2] | |
| # depth, _ = model.infer_image(image) # torch.Tensor, may be (1, 1, h, w) | |
| # depth = F.interpolate(depth, size=(H, W), mode='bilinear', align_corners=False)[0, 0] | |
| import os | |
| import sys | |
| from typing import * | |
| from pathlib import Path | |
| import click | |
| import torch | |
| import torch.nn.functional as F | |
| import numpy as np | |
| from moge.test.baseline import MGEBaselineInterface | |
| class Baseline(MGEBaselineInterface): | |
| def __init__(self, repo_path: str, semantics_model: str, semantics_pth: str, | |
| model_pth: str, sampling_steps: int, device: Union[torch.device, str]): | |
| repo_path = os.path.abspath(repo_path) | |
| if not Path(repo_path).exists(): | |
| raise FileNotFoundError( | |
| f"Cannot find PPD repo at {repo_path}. Clone https://github.com/gangweiX/Pixel-Perfect-Depth." | |
| ) | |
| if repo_path not in sys.path: | |
| sys.path.insert(0, repo_path) | |
| from ppd.models.ppd import PixelPerfectDepth | |
| from ppd.utils.set_seed import set_seed | |
| set_seed(666) # mirror run.py | |
| # Allow relative paths against repo root (mirror run.py expectations). | |
| if not os.path.isabs(semantics_pth): | |
| semantics_pth = os.path.join(repo_path, semantics_pth) | |
| if not os.path.isabs(model_pth): | |
| model_pth = os.path.join(repo_path, model_pth) | |
| if not os.path.exists(semantics_pth): | |
| raise FileNotFoundError(f"Cannot find PPD semantics checkpoint at {semantics_pth}.") | |
| if not os.path.exists(model_pth): | |
| raise FileNotFoundError(f"Cannot find PPD model checkpoint at {model_pth}.") | |
| device = torch.device(device) | |
| model = PixelPerfectDepth( | |
| semantics_model=semantics_model, | |
| semantics_pth=semantics_pth, | |
| sampling_steps=sampling_steps, | |
| ) | |
| model.load_state_dict(torch.load(model_pth, map_location='cpu'), strict=False) | |
| model = model.to(device).eval() | |
| self.model = model | |
| self.device = device | |
| def load(repo_path: str, semantics_model: str, semantics_pth: str, | |
| model_pth: str, sampling_steps: int, device: str = 'cuda'): | |
| return Baseline(repo_path, semantics_model, semantics_pth, model_pth, sampling_steps, device) | |
| def infer(self, image: torch.Tensor, intrinsics: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]: | |
| omit_batch = image.ndim == 3 | |
| if omit_batch: | |
| image = image.unsqueeze(0) | |
| assert image.shape[0] == 1, "PPD baseline only supports batch size 1" | |
| _, _, H, W = image.shape | |
| # run.py calls cv2.imread which returns BGR uint8 numpy (H, W, 3). | |
| rgb_uint8 = (image[0].cpu().permute(1, 2, 0).clamp(0, 1).numpy() * 255).astype(np.uint8) | |
| bgr_uint8 = rgb_uint8[..., ::-1].copy() # BGR for cv2 parity | |
| depth, _ = self.model.infer_image(bgr_uint8) | |
| # run.py: depth = F.interpolate(depth, size=(H, W), ...)[0, 0]; so depth here is 4D. | |
| if depth.ndim == 4: | |
| depth = F.interpolate(depth, size=(H, W), mode='bilinear', align_corners=False)[0, 0] | |
| elif depth.ndim == 2 and depth.shape != (H, W): | |
| depth = F.interpolate(depth[None, None], size=(H, W), mode='bilinear', align_corners=False)[0, 0] | |
| depth = depth.to(self.device).float() | |
| # PPD predicts affine-invariant depth (Xu et al., 2025). Emit only this physical key. | |
| result = {'depth_affine_invariant': depth} | |
| if not omit_batch: | |
| result['depth_affine_invariant'] = result['depth_affine_invariant'].unsqueeze(0) | |
| return result | |