Diffusers
Safetensors
EvalMDE / baselines /depthmaster.py
zeyuren2002's picture
Add files using upload-large-folder tool
4165f20 verified
# Reference: https://github.com/indu1ge/DepthMaster
# Strictly follows official `run.py`:
# from depthmaster import DepthMasterPipeline
# from depthmaster.modules.unet_2d_condition_s2 import UNet2DConditionModel
# pipe = DepthMasterPipeline.from_pretrained(checkpoint_path, variant=variant, torch_dtype=dtype)
# unet = UNet2DConditionModel.from_pretrained(os.path.join(checkpoint_path, 'unet'))
# pipe.unet = unet
# pipe = pipe.to(device)
# pipe_out = pipe(input_pil_image, processing_res=..., match_input_res=...,
# batch_size=..., color_map=..., show_progress_bar=..., resample_method=...)
# depth_pred = pipe_out.depth_np # H x W float, affine-invariant depth
import os
import sys
from typing import *
from pathlib import Path
import click
import torch
import torch.nn.functional as F
import numpy as np
from PIL import Image
from moge.test.baseline import MGEBaselineInterface
class Baseline(MGEBaselineInterface):
def __init__(self, repo_path: str, checkpoint: str, processing_res: Optional[int],
half_precision: bool, device: Union[torch.device, str]):
repo_path = os.path.abspath(repo_path)
if not Path(repo_path).exists():
raise FileNotFoundError(
f"Cannot find DepthMaster repo at {repo_path}. Clone https://github.com/indu1ge/DepthMaster."
)
if repo_path not in sys.path:
sys.path.insert(0, repo_path)
from depthmaster import DepthMasterPipeline
from depthmaster.modules.unet_2d_condition_s2 import UNet2DConditionModel
device = torch.device(device)
dtype = torch.float16 if half_precision else torch.float32
variant = "fp16" if half_precision else None
pipe = DepthMasterPipeline.from_pretrained(checkpoint, variant=variant, torch_dtype=dtype)
unet_dir = os.path.join(checkpoint, "unet")
unet = UNet2DConditionModel.from_pretrained(unet_dir)
pipe.unet = unet
try:
pipe.enable_xformers_memory_efficient_attention()
except ImportError:
pass
pipe = pipe.to(device)
self.pipe = pipe
self.device = device
self.processing_res = processing_res
@click.command()
@click.option('--repo', 'repo_path', type=click.Path(), default='../DepthMaster',
help='Path to the indu1ge/DepthMaster repository.')
@click.option('--checkpoint', type=click.Path(), required=True,
help='Local checkpoint directory containing pipeline files + unet subdir (HF: zysong212/DepthMaster).')
@click.option('--processing_res', type=int, default=768,
help='Pipeline processing resolution (run.py default 768).')
@click.option('--fp16', 'half_precision', is_flag=True, help='Run in half precision.')
@click.option('--device', type=str, default='cuda')
@staticmethod
def load(repo_path: str, checkpoint: str, processing_res: Optional[int],
half_precision: bool, device: str = 'cuda'):
return Baseline(repo_path, checkpoint, processing_res, half_precision, device)
@torch.inference_mode()
def infer(self, image: torch.Tensor, intrinsics: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]:
omit_batch = image.ndim == 3
if omit_batch:
image = image.unsqueeze(0)
assert image.shape[0] == 1, "DepthMaster baseline only supports batch size 1"
_, _, H, W = image.shape
# Pipeline takes a PIL.Image (per run.py).
arr = (image[0].cpu().permute(1, 2, 0).clamp(0, 1).numpy() * 255).astype(np.uint8)
pil = Image.fromarray(arr)
out = self.pipe(
pil,
processing_res=self.processing_res,
match_input_res=True,
batch_size=0,
color_map='Spectral',
show_progress_bar=False,
resample_method='bilinear',
)
depth_np = out.depth_np
depth = torch.from_numpy(np.ascontiguousarray(depth_np)).to(self.device).float()
if depth.shape != (H, W):
depth = F.interpolate(depth[None, None], size=(H, W), mode='bilinear', align_corners=False)[0, 0]
# DepthMaster predicts affine-invariant depth (TCSVT 2026). Emit only this physical key.
result = {'depth_affine_invariant': depth}
if not omit_batch:
result['depth_affine_invariant'] = result['depth_affine_invariant'].unsqueeze(0)
return result