zeyuren2002

Add files using upload-large-folder tool

4165f20 verified about 20 hours ago

5.09 kB

	# Reference: https://github.com/gangweiX/Pixel-Perfect-Depth
	# Strictly follows official `run.py`:
	# from ppd.models.ppd import PixelPerfectDepth
	# model = PixelPerfectDepth(semantics_model='DA2', semantics_pth='checkpoints/depth_anything_v2_vitl.pth',
	# sampling_steps=4)
	# model.load_state_dict(torch.load(model_pth, map_location='cpu'), strict=False)
	# model = model.to(DEVICE).eval()
	# image = cv2.imread(filename) # BGR uint8 numpy
	# H, W = image.shape[:2]
	# depth, _ = model.infer_image(image) # torch.Tensor, may be (1, 1, h, w)
	# depth = F.interpolate(depth, size=(H, W), mode='bilinear', align_corners=False)[0, 0]

	import os
	import sys
	from typing import *
	from pathlib import Path

	import click
	import torch
	import torch.nn.functional as F
	import numpy as np

	from moge.test.baseline import MGEBaselineInterface


	class Baseline(MGEBaselineInterface):
	def __init__(self, repo_path: str, semantics_model: str, semantics_pth: str,
	model_pth: str, sampling_steps: int, device: Union[torch.device, str]):
	repo_path = os.path.abspath(repo_path)
	if not Path(repo_path).exists():
	raise FileNotFoundError(
	f"Cannot find PPD repo at {repo_path}. Clone https://github.com/gangweiX/Pixel-Perfect-Depth."
	)
	if repo_path not in sys.path:
	sys.path.insert(0, repo_path)

	from ppd.models.ppd import PixelPerfectDepth
	from ppd.utils.set_seed import set_seed
	set_seed(666) # mirror run.py

	# Allow relative paths against repo root (mirror run.py expectations).
	if not os.path.isabs(semantics_pth):
	semantics_pth = os.path.join(repo_path, semantics_pth)
	if not os.path.isabs(model_pth):
	model_pth = os.path.join(repo_path, model_pth)
	if not os.path.exists(semantics_pth):
	raise FileNotFoundError(f"Cannot find PPD semantics checkpoint at {semantics_pth}.")
	if not os.path.exists(model_pth):
	raise FileNotFoundError(f"Cannot find PPD model checkpoint at {model_pth}.")

	device = torch.device(device)
	model = PixelPerfectDepth(
	semantics_model=semantics_model,
	semantics_pth=semantics_pth,
	sampling_steps=sampling_steps,
	)
	model.load_state_dict(torch.load(model_pth, map_location='cpu'), strict=False)
	model = model.to(device).eval()

	self.model = model
	self.device = device

	@click.command()
	@click.option('--repo', 'repo_path', type=click.Path(), default='../Pixel-Perfect-Depth',
	help='Path to the gangweiX/Pixel-Perfect-Depth repository.')
	@click.option('--semantics_model', type=click.Choice(['DA2', 'MoGe2']), default='DA2',
	help='Semantics encoder used by PPD (run.py default DA2).')
	@click.option('--semantics_pth', type=click.Path(),
	default='checkpoints/depth_anything_v2_vitl.pth',
	help='Semantics encoder ckpt path (relative to --repo if not absolute).')
	@click.option('--model_pth', type=click.Path(), default='checkpoints/ppd.pth',
	help='PPD model ckpt path (relative to --repo if not absolute).')
	@click.option('--sampling_steps', type=int, default=4,
	help='Number of DiT sampling steps (run.py default 4).')
	@click.option('--device', type=str, default='cuda')
	@staticmethod
	def load(repo_path: str, semantics_model: str, semantics_pth: str,
	model_pth: str, sampling_steps: int, device: str = 'cuda'):
	return Baseline(repo_path, semantics_model, semantics_pth, model_pth, sampling_steps, device)

	@torch.inference_mode()
	def infer(self, image: torch.Tensor, intrinsics: Optional[torch.Tensor] = None) -> Dict[str, torch.Tensor]:
	omit_batch = image.ndim == 3
	if omit_batch:
	image = image.unsqueeze(0)
	assert image.shape[0] == 1, "PPD baseline only supports batch size 1"
	_, _, H, W = image.shape

	# run.py calls cv2.imread which returns BGR uint8 numpy (H, W, 3).
	rgb_uint8 = (image[0].cpu().permute(1, 2, 0).clamp(0, 1).numpy() * 255).astype(np.uint8)
	bgr_uint8 = rgb_uint8[..., ::-1].copy() # BGR for cv2 parity

	depth, _ = self.model.infer_image(bgr_uint8)
	# run.py: depth = F.interpolate(depth, size=(H, W), ...)[0, 0]; so depth here is 4D.
	if depth.ndim == 4:
	depth = F.interpolate(depth, size=(H, W), mode='bilinear', align_corners=False)[0, 0]
	elif depth.ndim == 2 and depth.shape != (H, W):
	depth = F.interpolate(depth[None, None], size=(H, W), mode='bilinear', align_corners=False)[0, 0]
	depth = depth.to(self.device).float()

	# PPD predicts affine-invariant depth (Xu et al., 2025). Emit only this physical key.
	result = {'depth_affine_invariant': depth}
	if not omit_batch:
	result['depth_affine_invariant'] = result['depth_affine_invariant'].unsqueeze(0)
	return result