Instructions to use zeyuren2002/EvalMDE with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use zeyuren2002/EvalMDE with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("zeyuren2002/EvalMDE", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
File size: 4,562 Bytes
4165f20 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 | #!/usr/bin/env python
"""
EvalMDE inference driver — produces per-scene pred .npz in EvalMDE format.
Strictly follows EvalMDE convention (see compute_metrics_example.py):
Output: <output-root>/<model_name>/<scene>/pred_depth.npz
keys: depth (H,W) float32, intr (4,) [fx,fy,cx,cy]px, valid (H,W) bool
Calls one of the 7 model wrappers (baselines/<m>.py). Each wrapper is the same
MGEBaselineInterface; we just call .infer_for_evaluation(image, intrinsics).
"""
import sys, os
from pathlib import Path
import click, numpy as np, torch
from PIL import Image
EVALMDE_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(EVALMDE_ROOT)) # baselines/ + test/ visible
sys.path.insert(0, '/home/ywan0794/MoGe') # baselines still import moge.test.baseline
@click.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
@click.option('--baseline', 'baseline_code_path', required=True, type=click.Path())
@click.option('--data-root', required=True, type=click.Path())
@click.option('--output-root', required=True, type=click.Path())
@click.option('--model-name', required=True, type=str)
@click.pass_context
def main(ctx, baseline_code_path, data_root, output_root, model_name):
import importlib.util
spec = importlib.util.spec_from_file_location(Path(baseline_code_path).stem, baseline_code_path)
module = importlib.util.module_from_spec(spec); spec.loader.exec_module(module)
baseline_cls = getattr(module, 'Baseline')
baseline = baseline_cls.load.main(ctx.args, standalone_mode=False)
data_root = Path(data_root)
out_root = Path(output_root) / model_name
out_root.mkdir(parents=True, exist_ok=True)
scenes = sorted(d for d in data_root.iterdir()
if d.is_dir() and (d / 'rgb.png').exists() and (d / 'gt_depth.npz').exists())
print(f'Found {len(scenes)} scenes')
for i, scene in enumerate(scenes):
rgb = np.array(Image.open(scene / 'rgb.png').convert('RGB')) # (H, W, 3) uint8
gt = np.load(scene / 'gt_depth.npz')
gt_intr = gt['intr'].astype(np.float32) # [fx, fy, cx, cy] pixels
H, W = rgb.shape[:2]
fx, fy, cx, cy = gt_intr.tolist()
K_norm = torch.tensor([
[fx / W, 0.0, cx / W],
[0.0, fy / H, cy / H],
[0.0, 0.0, 1.0]
], dtype=torch.float32, device=baseline.device)
img = torch.from_numpy(rgb.astype(np.float32) / 255.0).permute(2, 0, 1).to(baseline.device)
with torch.inference_mode():
# Some baselines (DA3-Mono / Marigold / Lotus / DepthMaster / PPD / FE2E)
# do `assert intrinsics is None` because they don't consume intrinsics.
# Only metric models (Depth Pro / Metric3D / MoGe) actually use them.
# Try with intrinsics first, fall back without.
try:
pred = baseline.infer_for_evaluation(img, K_norm)
except (AssertionError, TypeError):
pred = baseline.infer_for_evaluation(img)
# Priority: metric > scale_inv > affine > disparity (most-specific first)
if 'depth_metric' in pred: d = pred['depth_metric']
elif 'depth_scale_invariant' in pred: d = pred['depth_scale_invariant']
elif 'depth_affine_invariant' in pred: d = pred['depth_affine_invariant']
elif 'disparity_affine_invariant' in pred:
d = 1.0 / pred['disparity_affine_invariant'].clamp(min=1e-6)
else:
raise ValueError(f'No depth-compatible key in {list(pred.keys())}')
if d.ndim == 3: d = d.squeeze(0)
depth_np = d.detach().cpu().numpy().astype(np.float32)
# pred_intr: use model-predicted intrinsics if available (Depth Pro), else GT
if 'intrinsics' in pred:
K = pred['intrinsics'].detach().cpu().numpy()
if K.ndim == 3: K = K[0]
pred_intr = np.array([K[0,0]*W, K[1,1]*H, K[0,2]*W, K[1,2]*H], dtype=np.float32)
else:
pred_intr = gt_intr
valid = np.isfinite(depth_np) & (depth_np > 0)
scene_out = out_root / scene.name
scene_out.mkdir(parents=True, exist_ok=True)
np.savez(scene_out / 'pred_depth.npz', depth=depth_np, intr=pred_intr, valid=valid)
if (i+1) % 10 == 0 or i == 0:
print(f' [{i+1}/{len(scenes)}] {scene.name}: shape={depth_np.shape}')
print(f'Saved {len(scenes)} predictions to {out_root}')
if __name__ == '__main__':
main()
|