Diffusers
Safetensors
File size: 4,562 Bytes
4165f20
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python
"""
EvalMDE inference driver — produces per-scene pred .npz in EvalMDE format.

Strictly follows EvalMDE convention (see compute_metrics_example.py):
  Output:  <output-root>/<model_name>/<scene>/pred_depth.npz
           keys: depth (H,W) float32, intr (4,) [fx,fy,cx,cy]px, valid (H,W) bool

Calls one of the 7 model wrappers (baselines/<m>.py). Each wrapper is the same
MGEBaselineInterface; we just call .infer_for_evaluation(image, intrinsics).
"""
import sys, os
from pathlib import Path
import click, numpy as np, torch
from PIL import Image

EVALMDE_ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(EVALMDE_ROOT))            # baselines/ + test/ visible
sys.path.insert(0, '/home/ywan0794/MoGe')         # baselines still import moge.test.baseline


@click.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True})
@click.option('--baseline', 'baseline_code_path', required=True, type=click.Path())
@click.option('--data-root',   required=True, type=click.Path())
@click.option('--output-root', required=True, type=click.Path())
@click.option('--model-name',  required=True, type=str)
@click.pass_context
def main(ctx, baseline_code_path, data_root, output_root, model_name):
    import importlib.util
    spec = importlib.util.spec_from_file_location(Path(baseline_code_path).stem, baseline_code_path)
    module = importlib.util.module_from_spec(spec); spec.loader.exec_module(module)
    baseline_cls = getattr(module, 'Baseline')
    baseline = baseline_cls.load.main(ctx.args, standalone_mode=False)

    data_root = Path(data_root)
    out_root = Path(output_root) / model_name
    out_root.mkdir(parents=True, exist_ok=True)

    scenes = sorted(d for d in data_root.iterdir()
                    if d.is_dir() and (d / 'rgb.png').exists() and (d / 'gt_depth.npz').exists())
    print(f'Found {len(scenes)} scenes')

    for i, scene in enumerate(scenes):
        rgb = np.array(Image.open(scene / 'rgb.png').convert('RGB'))     # (H, W, 3) uint8
        gt  = np.load(scene / 'gt_depth.npz')
        gt_intr = gt['intr'].astype(np.float32)                          # [fx, fy, cx, cy] pixels
        H, W = rgb.shape[:2]
        fx, fy, cx, cy = gt_intr.tolist()

        K_norm = torch.tensor([
            [fx / W, 0.0,    cx / W],
            [0.0,    fy / H, cy / H],
            [0.0,    0.0,    1.0]
        ], dtype=torch.float32, device=baseline.device)

        img = torch.from_numpy(rgb.astype(np.float32) / 255.0).permute(2, 0, 1).to(baseline.device)

        with torch.inference_mode():
            # Some baselines (DA3-Mono / Marigold / Lotus / DepthMaster / PPD / FE2E)
            # do `assert intrinsics is None` because they don't consume intrinsics.
            # Only metric models (Depth Pro / Metric3D / MoGe) actually use them.
            # Try with intrinsics first, fall back without.
            try:
                pred = baseline.infer_for_evaluation(img, K_norm)
            except (AssertionError, TypeError):
                pred = baseline.infer_for_evaluation(img)

        # Priority: metric > scale_inv > affine > disparity (most-specific first)
        if   'depth_metric' in pred:               d = pred['depth_metric']
        elif 'depth_scale_invariant' in pred:      d = pred['depth_scale_invariant']
        elif 'depth_affine_invariant' in pred:     d = pred['depth_affine_invariant']
        elif 'disparity_affine_invariant' in pred:
            d = 1.0 / pred['disparity_affine_invariant'].clamp(min=1e-6)
        else:
            raise ValueError(f'No depth-compatible key in {list(pred.keys())}')

        if d.ndim == 3: d = d.squeeze(0)
        depth_np = d.detach().cpu().numpy().astype(np.float32)

        # pred_intr: use model-predicted intrinsics if available (Depth Pro), else GT
        if 'intrinsics' in pred:
            K = pred['intrinsics'].detach().cpu().numpy()
            if K.ndim == 3: K = K[0]
            pred_intr = np.array([K[0,0]*W, K[1,1]*H, K[0,2]*W, K[1,2]*H], dtype=np.float32)
        else:
            pred_intr = gt_intr

        valid = np.isfinite(depth_np) & (depth_np > 0)

        scene_out = out_root / scene.name
        scene_out.mkdir(parents=True, exist_ok=True)
        np.savez(scene_out / 'pred_depth.npz', depth=depth_np, intr=pred_intr, valid=valid)

        if (i+1) % 10 == 0 or i == 0:
            print(f'  [{i+1}/{len(scenes)}] {scene.name}: shape={depth_np.shape}')

    print(f'Saved {len(scenes)} predictions to {out_root}')


if __name__ == '__main__':
    main()