Instructions to use zeyuren2002/EvalMDE with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use zeyuren2002/EvalMDE with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("zeyuren2002/EvalMDE", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| # Reference: https://github.com/ByteDance-Seed/Depth-Anything-3 | |
| # Variant of `baselines/da3.py` that loads DA3's *monocular* preset(s). | |
| # DA3 README: "DA3 Monocular Series (DA3Mono-Large). A dedicated model for high-quality | |
| # relative monocular depth estimation. Unlike disparity-based models (e.g. Depth Anything 2), | |
| # it directly predicts depth, resulting in superior geometric accuracy." | |
| # | |
| # Strictly follows the same Python API as da3.py: | |
| # model = DepthAnything3.from_pretrained(<hf_id>) | |
| # output = model(image) # image shape [B, N, 3, H, W] | |
| # depth = output['depth'][:, 0] # [B, H, W] | |
| # | |
| # NOTE on output key: DA3-Mono outputs depth directly (per README), not disparity. | |
| # We therefore return `depth_scale_invariant` instead of `disparity_affine_invariant`. | |
| import os | |
| import sys | |
| from typing import * | |
| from pathlib import Path | |
| import click | |
| import torch | |
| import torch.nn.functional as F | |
| import torchvision.transforms as T | |
| import torchvision.transforms.functional as TF | |
| from moge.test.baseline import MGEBaselineInterface | |
| class Baseline(MGEBaselineInterface): | |
| def __init__(self, repo_path: str, hf_id: str, num_tokens: Optional[int], device: Union[torch.device, str]): | |
| repo_path = os.path.abspath(repo_path) | |
| if not Path(repo_path).exists(): | |
| raise FileNotFoundError( | |
| f"Cannot find Depth-Anything-3 repo at {repo_path}. Clone " | |
| f"https://github.com/ByteDance-Seed/Depth-Anything-3." | |
| ) | |
| src_path = os.path.join(repo_path, 'src') | |
| if src_path not in sys.path: | |
| sys.path.insert(0, src_path) | |
| # Silence DA3's verbose per-image INFO logs (DA3_LOG_LEVEL is read at logger init) | |
| os.environ.setdefault('DA3_LOG_LEVEL', 'WARN') | |
| from depth_anything_3.api import DepthAnything3 | |
| device = torch.device(device) | |
| model = DepthAnything3.from_pretrained(hf_id) | |
| model.to(device).eval() | |
| self.model = model | |
| self.num_tokens = num_tokens | |
| self.device = device | |
| def load(repo_path: str, hf_id: str, num_tokens: Optional[int], device: str = 'cuda'): | |
| return Baseline(repo_path, hf_id, num_tokens, device) | |
| def infer(self, image: torch.Tensor, intrinsics: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]: | |
| # Same input pipeline as baselines/da3.py to keep apples-to-apples. | |
| assert intrinsics is None, "DA3-Mono does not consume intrinsics." | |
| original_height, original_width = image.shape[-2:] | |
| if image.ndim == 3: | |
| image = image.unsqueeze(0) | |
| omit_batch_dim = True | |
| else: | |
| omit_batch_dim = False | |
| # Use DA3's high-level `model.inference()` API per README. Direct `model(x)` | |
| # goes through `forward(... export_feat_layers=None)` and the DA3-Mono backbone | |
| # (DINOv2 fork) crashes inside `_get_intermediate_layers_not_chunked` because it | |
| # tries `i in export_feat_layers` on None. `inference()` handles processing, | |
| # autocast, and post-processing correctly. | |
| import numpy as np | |
| np_img = (image[0].cpu().permute(1, 2, 0).clamp(0, 1).numpy() * 255).astype(np.uint8) | |
| prediction = self.model.inference([np_img]) | |
| # prediction.depth: [N, H, W] float32 | |
| depth_t = torch.as_tensor(prediction.depth[0], device=self.device, dtype=torch.float32) | |
| if depth_t.shape != (original_height, original_width): | |
| depth_t = F.interpolate(depth_t[None, None], size=(original_height, original_width), | |
| mode='bilinear', align_corners=False)[0, 0] | |
| if not omit_batch_dim: | |
| depth_t = depth_t.unsqueeze(0) | |
| return {'depth_scale_invariant': depth_t} | |