zeyuren2002

Add files using upload-large-folder tool

4165f20 verified about 21 hours ago

4.49 kB

	# Reference: https://github.com/indu1ge/DepthMaster
	# Strictly follows official `run.py`:
	# from depthmaster import DepthMasterPipeline
	# from depthmaster.modules.unet_2d_condition_s2 import UNet2DConditionModel
	# pipe = DepthMasterPipeline.from_pretrained(checkpoint_path, variant=variant, torch_dtype=dtype)
	# unet = UNet2DConditionModel.from_pretrained(os.path.join(checkpoint_path, 'unet'))
	# pipe.unet = unet
	# pipe = pipe.to(device)
	# pipe_out = pipe(input_pil_image, processing_res=..., match_input_res=...,
	# batch_size=..., color_map=..., show_progress_bar=..., resample_method=...)
	# depth_pred = pipe_out.depth_np # H x W float, affine-invariant depth

	import os
	import sys
	from typing import *
	from pathlib import Path

	import click
	import torch
	import torch.nn.functional as F
	import numpy as np
	from PIL import Image

	from moge.test.baseline import MGEBaselineInterface


	class Baseline(MGEBaselineInterface):
	def __init__(self, repo_path: str, checkpoint: str, processing_res: Optional[int],
	half_precision: bool, device: Union[torch.device, str]):
	repo_path = os.path.abspath(repo_path)
	if not Path(repo_path).exists():
	raise FileNotFoundError(
	f"Cannot find DepthMaster repo at {repo_path}. Clone https://github.com/indu1ge/DepthMaster."
	)
	if repo_path not in sys.path:
	sys.path.insert(0, repo_path)

	from depthmaster import DepthMasterPipeline
	from depthmaster.modules.unet_2d_condition_s2 import UNet2DConditionModel

	device = torch.device(device)
	dtype = torch.float16 if half_precision else torch.float32
	variant = "fp16" if half_precision else None

	pipe = DepthMasterPipeline.from_pretrained(checkpoint, variant=variant, torch_dtype=dtype)
	unet_dir = os.path.join(checkpoint, "unet")
	unet = UNet2DConditionModel.from_pretrained(unet_dir)
	pipe.unet = unet
	try:
	pipe.enable_xformers_memory_efficient_attention()
	except ImportError:
	pass
	pipe = pipe.to(device)

	self.pipe = pipe
	self.device = device
	self.processing_res = processing_res

	@click.command()
	@click.option('--repo', 'repo_path', type=click.Path(), default='../DepthMaster',
	help='Path to the indu1ge/DepthMaster repository.')
	@click.option('--checkpoint', type=click.Path(), required=True,
	help='Local checkpoint directory containing pipeline files + unet subdir (HF: zysong212/DepthMaster).')
	@click.option('--processing_res', type=int, default=768,
	help='Pipeline processing resolution (run.py default 768).')
	@click.option('--fp16', 'half_precision', is_flag=True, help='Run in half precision.')
	@click.option('--device', type=str, default='cuda')
	@staticmethod
	def load(repo_path: str, checkpoint: str, processing_res: Optional[int],
	half_precision: bool, device: str = 'cuda'):
	return Baseline(repo_path, checkpoint, processing_res, half_precision, device)

	@torch.inference_mode()
	def infer(self, image: torch.Tensor, intrinsics: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]:
	omit_batch = image.ndim == 3
	if omit_batch:
	image = image.unsqueeze(0)
	assert image.shape[0] == 1, "DepthMaster baseline only supports batch size 1"
	_, _, H, W = image.shape

	# Pipeline takes a PIL.Image (per run.py).
	arr = (image[0].cpu().permute(1, 2, 0).clamp(0, 1).numpy() * 255).astype(np.uint8)
	pil = Image.fromarray(arr)

	out = self.pipe(
	pil,
	processing_res=self.processing_res,
	match_input_res=True,
	batch_size=0,
	color_map='Spectral',
	show_progress_bar=False,
	resample_method='bilinear',
	)

	depth_np = out.depth_np
	depth = torch.from_numpy(np.ascontiguousarray(depth_np)).to(self.device).float()
	if depth.shape != (H, W):
	depth = F.interpolate(depth[None, None], size=(H, W), mode='bilinear', align_corners=False)[0, 0]

	# DepthMaster predicts affine-invariant depth (TCSVT 2026). Emit only this physical key.
	result = {'depth_affine_invariant': depth}
	if not omit_batch:
	result['depth_affine_invariant'] = result['depth_affine_invariant'].unsqueeze(0)
	return result