zeyuren2002

Add files using upload-large-folder tool

4165f20 verified 6 days ago

5.47 kB

	# Reference: https://github.com/prs-eth/Marigold
	# Strictly follows official `script/depth/run.py`:
	# from marigold import MarigoldDepthPipeline
	# pipe = MarigoldDepthPipeline.from_pretrained(checkpoint, torch_dtype=dtype)
	# pipe_out = pipe(input_pil_image, denoise_steps, ensemble_size, processing_res,
	# match_input_res, batch_size, resample_method, ...)
	# depth_pred: np.ndarray = pipe_out.depth_np # normalized affine-invariant depth
	#
	# Marigold reports its outputs as affine-invariant depth (Marigold paper, CVPR 2024).
	# Returns key `depth_affine_invariant`.

	import os
	import sys
	from typing import *
	from pathlib import Path

	import click
	import torch
	import torch.nn.functional as F
	import numpy as np
	from PIL import Image

	from moge.test.baseline import MGEBaselineInterface


	class Baseline(MGEBaselineInterface):
	def __init__(self, repo_path: str, checkpoint: str, denoise_steps: Optional[int],
	ensemble_size: int, processing_res: Optional[int], half_precision: bool,
	device: Union[torch.device, str]):
	repo_path = os.path.abspath(repo_path)
	if not Path(repo_path).exists():
	raise FileNotFoundError(
	f"Cannot find Marigold repo at {repo_path}. Clone https://github.com/prs-eth/Marigold."
	)
	if repo_path not in sys.path:
	sys.path.insert(0, repo_path)

	from marigold import MarigoldDepthPipeline

	device = torch.device(device)
	dtype = torch.float16 if half_precision else torch.float32
	variant = "fp16" if half_precision else None

	pipe = MarigoldDepthPipeline.from_pretrained(checkpoint, variant=variant, torch_dtype=dtype)
	try:
	pipe.enable_xformers_memory_efficient_attention()
	except ImportError:
	pass
	pipe = pipe.to(device)
	pipe.set_progress_bar_config(disable=True)

	self.pipe = pipe
	self.device = device
	self.denoise_steps = denoise_steps
	self.ensemble_size = ensemble_size
	self.processing_res = processing_res

	@click.command()
	@click.option('--repo', 'repo_path', type=click.Path(), default='../Marigold',
	help='Path to the prs-eth/Marigold repository.')
	@click.option('--checkpoint', type=str, default='prs-eth/marigold-depth-v1-1',
	help='HuggingFace ckpt name or local dir (run.py default).')
	@click.option('--denoise_steps', type=int, default=None,
	help='Diffusion denoising steps. None -> default in ckpt.')
	@click.option('--ensemble_size', type=int, default=1,
	help='Ensemble size. run.py default = 1.')
	@click.option('--processing_res', type=int, default=None,
	help='Processing resolution. None -> default in ckpt.')
	@click.option('--fp16', 'half_precision', is_flag=True, help='Run in half precision.')
	@click.option('--device', type=str, default='cuda')
	@staticmethod
	def load(repo_path: str, checkpoint: str, denoise_steps: Optional[int],
	ensemble_size: int, processing_res: Optional[int], half_precision: bool,
	device: str = 'cuda'):
	return Baseline(repo_path, checkpoint, denoise_steps, ensemble_size,
	processing_res, half_precision, device)

	@torch.inference_mode()
	def infer(self, image: torch.Tensor, intrinsics: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]:
	assert intrinsics is None or True, "Marigold does not consume intrinsics; argument ignored."
	omit_batch = image.ndim == 3
	if omit_batch:
	image = image.unsqueeze(0)
	assert image.shape[0] == 1, "Marigold baseline only supports batch size 1"
	_, _, H, W = image.shape

	# MoGe pipeline supplies image as float tensor in [0, 1]. Marigold pipe takes PIL.Image (run.py uses PIL).
	arr = (image[0].cpu().permute(1, 2, 0).clamp(0, 1).numpy() * 255).astype(np.uint8)
	pil = Image.fromarray(arr)

	kwargs: Dict[str, Any] = dict(
	ensemble_size=self.ensemble_size,
	match_input_res=True,
	batch_size=0,
	resample_method='bilinear',
	show_progress_bar=False,
	)
	if self.denoise_steps is not None:
	kwargs['denoising_steps'] = self.denoise_steps # pipeline kwarg is "denoising_steps"
	if self.processing_res is not None:
	kwargs['processing_res'] = self.processing_res

	out = self.pipe(pil, **kwargs)

	# MarigoldDepthOutput.depth_np: HxW np.float32 in [0, 1]. Marigold paper:
	# affine-invariant depth (linear monotone with true depth, scale+shift free).
	depth_np = out.depth_np
	depth = torch.from_numpy(np.ascontiguousarray(depth_np)).to(self.device).float()

	# Resize back if pipeline yielded a different size (it shouldn't with match_input_res=True).
	if depth.shape[-2:] != (H, W):
	depth = F.interpolate(depth[None, None], size=(H, W), mode='bilinear', align_corners=False)[0, 0]

	# Marigold predicts affine-invariant depth (Marigold paper, CVPR 2024). Emit only
	# this physical key. MoGe compute_metrics reports `depth_affine_invariant` metric.
	result = {'depth_affine_invariant': depth}
	if not omit_batch:
	result['depth_affine_invariant'] = result['depth_affine_invariant'].unsqueeze(0)
	return result