#!/usr/bin/env python """ Batch wrapper around EvalMDE's metrics. Dual-track output per scene: RAW — pred fed verbatim into SAWA-H components (EvalMDE official protocol; biased against affine-invariant models in boundary_F1 / rel_normal since those metrics are NOT shift-invariant) ALIGNED — pred affine-aligned to GT (LSQ fit on valid mask) before feeding, gives fair cross-model comparison Both tracks emit the 5 SAWA-H components + the weighted sum + raw rel_normal. """ import json, click from pathlib import Path import numpy as np import torch from evalmde.utils.depth import load_data, align from evalmde.utils.proj import depth_to_xyz from evalmde.utils.torch import reformat_as_torch_tensor from evalmde.metrics.standard import rel_depth, delta0125 from evalmde.metrics.boundary import boundary_f1 from evalmde.metrics.rel_normal import rel_normal as rel_normal_fn # SAWA-H weights (verbatim from evalmde/metrics/sawa_h.py) W_WKDR, W_DDISP_AF, W_DDEPTH_AF, W_BOUND_F1, W_RELNORM = 3.65, 0.18, 0.01, 0.20, 1.94 def compute_components(pr_depth, pr_intr, pr_valid, gt_depth, gt_intr, gt_valid): """Verbatim of sawa_h.compute_sawa_h body, returns 5 comps + weighted sum.""" wkdr = 1.0 - rel_depth(pr_depth, gt_depth, gt_valid)[1] d_disp = 1.0 - delta0125( align(1 / reformat_as_torch_tensor(pr_depth), reformat_as_torch_tensor(gt_depth), reformat_as_torch_tensor(gt_valid), 'disparity_affine_clip_by_0'), gt_depth, gt_valid)[1] d_depth = 1.0 - delta0125( align(reformat_as_torch_tensor(pr_depth), reformat_as_torch_tensor(gt_depth), reformat_as_torch_tensor(gt_valid), 'depth_affine_lst_sq_clip_by_0'), gt_depth, gt_valid)[1] bound = boundary_f1(reformat_as_torch_tensor(pr_depth), reformat_as_torch_tensor(gt_depth), reformat_as_torch_tensor(gt_valid))[1] relnorm = rel_normal_fn( depth_to_xyz(gt_intr, gt_depth), gt_valid, depth_to_xyz(pr_intr, pr_depth), pr_valid, ) sawa = (W_WKDR * wkdr + W_DDISP_AF * d_disp + W_DDEPTH_AF * d_depth + W_BOUND_F1 * bound + W_RELNORM * relnorm) return { 'wkdr_no_align': float(wkdr), 'delta0125_disparity_affine_err': float(d_disp), 'delta0125_depth_affine_err': float(d_depth), 'boundary_f1_err': float(bound), 'rel_normal': float(relnorm), 'sawa_h': float(sawa), } def affine_align_to_gt(pr_d, gt_d, valid): """LSQ-fit (a, b) so that a*pr + b ≈ gt on valid pixels.""" m = valid & np.isfinite(pr_d) & np.isfinite(gt_d) & (np.abs(pr_d) < 1e6) if m.sum() < 100: return pr_d.astype(np.float32), 1.0, 0.0 p = pr_d[m].astype(np.float64) g = gt_d[m].astype(np.float64) A = np.stack([p, np.ones_like(p)], axis=-1) a, b = np.linalg.lstsq(A, g, rcond=None)[0] aligned = (pr_d.astype(np.float64) * a + b).clip(1e-4, None) return aligned.astype(np.float32), float(a), float(b) @click.command() @click.option('--gt-root', required=True, type=click.Path()) @click.option('--pred-root', required=True, type=click.Path()) @click.option('--model-name', required=True, type=str) @click.option('--output', required=True, type=click.Path()) def main(gt_root, pred_root, model_name, output): gt_root, pred_root = Path(gt_root), Path(pred_root) / model_name scenes = sorted(d.name for d in pred_root.iterdir() if (d / 'pred_depth.npz').exists()) print(f'Found {len(scenes)} scenes for {model_name}') rows = [] for sc in scenes: gt_d, gt_i, gt_v = load_data(str(gt_root / sc / 'gt_depth.npz')) pr_d, pr_i, pr_v = load_data(str(pred_root / sc / 'pred_depth.npz')) raw = compute_components(pr_d, pr_i, pr_v, gt_d, gt_i, gt_v) pr_d_aln, scale, shift = affine_align_to_gt(pr_d, gt_d, pr_v & gt_v) # Aligned pred uses gt intrinsics (we don't realign focal — only depth values) aln = compute_components(pr_d_aln, gt_i, pr_v, gt_d, gt_i, gt_v) aln['_align_scale'] = scale aln['_align_shift'] = shift rows.append({'scene': sc, 'raw': raw, 'aligned': aln}) print(f' {sc}: ' f'sawa_h raw={raw["sawa_h"]:.3f} aln={aln["sawa_h"]:.3f} | ' f'relnorm raw={raw["rel_normal"]:.3f} aln={aln["rel_normal"]:.3f} | ' f'boundF1_err raw={raw["boundary_f1_err"]:.3f} aln={aln["boundary_f1_err"]:.3f}') n = max(len(rows), 1) keys = ['wkdr_no_align','delta0125_disparity_affine_err','delta0125_depth_affine_err', 'boundary_f1_err','rel_normal','sawa_h'] mean_raw = {k: sum(r['raw'][k] for r in rows) / n for k in keys} mean_aln = {k: sum(r['aligned'][k] for r in rows) / n for k in keys} summary = { 'model': model_name, 'n_scenes': len(rows), 'sawa_h_weights': { 'wkdr_no_align': W_WKDR, 'delta0125_disparity_affine_err': W_DDISP_AF, 'delta0125_depth_affine_err': W_DDEPTH_AF, 'boundary_f1_err': W_BOUND_F1, 'rel_normal': W_RELNORM, }, 'per_scene': rows, 'mean_raw': mean_raw, 'mean_aligned': mean_aln, } Path(output).parent.mkdir(parents=True, exist_ok=True) json.dump(summary, open(output, 'w'), indent=2) print(f'\nMean RAW : {mean_raw}') print(f'Mean ALIGNED: {mean_aln}') print(f'Saved → {output}') if __name__ == '__main__': main()