Instructions to use zeyuren2002/EvalMDE with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use zeyuren2002/EvalMDE with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("zeyuren2002/EvalMDE", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| # Reference: https://github.com/prs-eth/Marigold | |
| # Strictly follows official `script/depth/run.py`: | |
| # from marigold import MarigoldDepthPipeline | |
| # pipe = MarigoldDepthPipeline.from_pretrained(checkpoint, torch_dtype=dtype) | |
| # pipe_out = pipe(input_pil_image, denoise_steps, ensemble_size, processing_res, | |
| # match_input_res, batch_size, resample_method, ...) | |
| # depth_pred: np.ndarray = pipe_out.depth_np # normalized affine-invariant depth | |
| # | |
| # Marigold reports its outputs as affine-invariant depth (Marigold paper, CVPR 2024). | |
| # Returns key `depth_affine_invariant`. | |
| import os | |
| import sys | |
| from typing import * | |
| from pathlib import Path | |
| import click | |
| import torch | |
| import torch.nn.functional as F | |
| import numpy as np | |
| from PIL import Image | |
| from moge.test.baseline import MGEBaselineInterface | |
| class Baseline(MGEBaselineInterface): | |
| def __init__(self, repo_path: str, checkpoint: str, denoise_steps: Optional[int], | |
| ensemble_size: int, processing_res: Optional[int], half_precision: bool, | |
| device: Union[torch.device, str]): | |
| repo_path = os.path.abspath(repo_path) | |
| if not Path(repo_path).exists(): | |
| raise FileNotFoundError( | |
| f"Cannot find Marigold repo at {repo_path}. Clone https://github.com/prs-eth/Marigold." | |
| ) | |
| if repo_path not in sys.path: | |
| sys.path.insert(0, repo_path) | |
| from marigold import MarigoldDepthPipeline | |
| device = torch.device(device) | |
| dtype = torch.float16 if half_precision else torch.float32 | |
| variant = "fp16" if half_precision else None | |
| pipe = MarigoldDepthPipeline.from_pretrained(checkpoint, variant=variant, torch_dtype=dtype) | |
| try: | |
| pipe.enable_xformers_memory_efficient_attention() | |
| except ImportError: | |
| pass | |
| pipe = pipe.to(device) | |
| pipe.set_progress_bar_config(disable=True) | |
| self.pipe = pipe | |
| self.device = device | |
| self.denoise_steps = denoise_steps | |
| self.ensemble_size = ensemble_size | |
| self.processing_res = processing_res | |
| def load(repo_path: str, checkpoint: str, denoise_steps: Optional[int], | |
| ensemble_size: int, processing_res: Optional[int], half_precision: bool, | |
| device: str = 'cuda'): | |
| return Baseline(repo_path, checkpoint, denoise_steps, ensemble_size, | |
| processing_res, half_precision, device) | |
| def infer(self, image: torch.Tensor, intrinsics: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]: | |
| assert intrinsics is None or True, "Marigold does not consume intrinsics; argument ignored." | |
| omit_batch = image.ndim == 3 | |
| if omit_batch: | |
| image = image.unsqueeze(0) | |
| assert image.shape[0] == 1, "Marigold baseline only supports batch size 1" | |
| _, _, H, W = image.shape | |
| # MoGe pipeline supplies image as float tensor in [0, 1]. Marigold pipe takes PIL.Image (run.py uses PIL). | |
| arr = (image[0].cpu().permute(1, 2, 0).clamp(0, 1).numpy() * 255).astype(np.uint8) | |
| pil = Image.fromarray(arr) | |
| kwargs: Dict[str, Any] = dict( | |
| ensemble_size=self.ensemble_size, | |
| match_input_res=True, | |
| batch_size=0, | |
| resample_method='bilinear', | |
| show_progress_bar=False, | |
| ) | |
| if self.denoise_steps is not None: | |
| kwargs['denoising_steps'] = self.denoise_steps # pipeline kwarg is "denoising_steps" | |
| if self.processing_res is not None: | |
| kwargs['processing_res'] = self.processing_res | |
| out = self.pipe(pil, **kwargs) | |
| # MarigoldDepthOutput.depth_np: HxW np.float32 in [0, 1]. Marigold paper: | |
| # affine-invariant depth (linear monotone with true depth, scale+shift free). | |
| depth_np = out.depth_np | |
| depth = torch.from_numpy(np.ascontiguousarray(depth_np)).to(self.device).float() | |
| # Resize back if pipeline yielded a different size (it shouldn't with match_input_res=True). | |
| if depth.shape[-2:] != (H, W): | |
| depth = F.interpolate(depth[None, None], size=(H, W), mode='bilinear', align_corners=False)[0, 0] | |
| # Marigold predicts affine-invariant depth (Marigold paper, CVPR 2024). Emit only | |
| # this physical key. MoGe compute_metrics reports `depth_affine_invariant` metric. | |
| result = {'depth_affine_invariant': depth} | |
| if not omit_batch: | |
| result['depth_affine_invariant'] = result['depth_affine_invariant'].unsqueeze(0) | |
| return result | |