Instructions to use zeyuren2002/EvalMDE with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Diffusers
How to use zeyuren2002/EvalMDE with Diffusers:
pip install -U diffusers transformers accelerate
import torch from diffusers import DiffusionPipeline # switch to "mps" for apple devices pipe = DiffusionPipeline.from_pretrained("zeyuren2002/EvalMDE", dtype=torch.bfloat16, device_map="cuda") prompt = "Astronaut in a jungle, cold color palette, muted colors, detailed, 8k" image = pipe(prompt).images[0] - Notebooks
- Google Colab
- Kaggle
| #!/usr/bin/env python | |
| """ | |
| Batch wrapper around EvalMDE's metrics. | |
| Dual-track output per scene: | |
| RAW — pred fed verbatim into SAWA-H components (EvalMDE official protocol; | |
| biased against affine-invariant models in boundary_F1 / rel_normal | |
| since those metrics are NOT shift-invariant) | |
| ALIGNED — pred affine-aligned to GT (LSQ fit on valid mask) before feeding, | |
| gives fair cross-model comparison | |
| Both tracks emit the 5 SAWA-H components + the weighted sum + raw rel_normal. | |
| """ | |
| import json, click | |
| from pathlib import Path | |
| import numpy as np | |
| import torch | |
| from evalmde.utils.depth import load_data, align | |
| from evalmde.utils.proj import depth_to_xyz | |
| from evalmde.utils.torch import reformat_as_torch_tensor | |
| from evalmde.metrics.standard import rel_depth, delta0125 | |
| from evalmde.metrics.boundary import boundary_f1 | |
| from evalmde.metrics.rel_normal import rel_normal as rel_normal_fn | |
| # SAWA-H weights (verbatim from evalmde/metrics/sawa_h.py) | |
| W_WKDR, W_DDISP_AF, W_DDEPTH_AF, W_BOUND_F1, W_RELNORM = 3.65, 0.18, 0.01, 0.20, 1.94 | |
| def compute_components(pr_depth, pr_intr, pr_valid, gt_depth, gt_intr, gt_valid): | |
| """Verbatim of sawa_h.compute_sawa_h body, returns 5 comps + weighted sum.""" | |
| wkdr = 1.0 - rel_depth(pr_depth, gt_depth, gt_valid)[1] | |
| d_disp = 1.0 - delta0125( | |
| align(1 / reformat_as_torch_tensor(pr_depth), | |
| reformat_as_torch_tensor(gt_depth), | |
| reformat_as_torch_tensor(gt_valid), | |
| 'disparity_affine_clip_by_0'), | |
| gt_depth, gt_valid)[1] | |
| d_depth = 1.0 - delta0125( | |
| align(reformat_as_torch_tensor(pr_depth), | |
| reformat_as_torch_tensor(gt_depth), | |
| reformat_as_torch_tensor(gt_valid), | |
| 'depth_affine_lst_sq_clip_by_0'), | |
| gt_depth, gt_valid)[1] | |
| bound = boundary_f1(reformat_as_torch_tensor(pr_depth), | |
| reformat_as_torch_tensor(gt_depth), | |
| reformat_as_torch_tensor(gt_valid))[1] | |
| relnorm = rel_normal_fn( | |
| depth_to_xyz(gt_intr, gt_depth), gt_valid, | |
| depth_to_xyz(pr_intr, pr_depth), pr_valid, | |
| ) | |
| sawa = (W_WKDR * wkdr + W_DDISP_AF * d_disp + W_DDEPTH_AF * d_depth | |
| + W_BOUND_F1 * bound + W_RELNORM * relnorm) | |
| return { | |
| 'wkdr_no_align': float(wkdr), | |
| 'delta0125_disparity_affine_err': float(d_disp), | |
| 'delta0125_depth_affine_err': float(d_depth), | |
| 'boundary_f1_err': float(bound), | |
| 'rel_normal': float(relnorm), | |
| 'sawa_h': float(sawa), | |
| } | |
| def affine_align_to_gt(pr_d, gt_d, valid): | |
| """LSQ-fit (a, b) so that a*pr + b ≈ gt on valid pixels.""" | |
| m = valid & np.isfinite(pr_d) & np.isfinite(gt_d) & (np.abs(pr_d) < 1e6) | |
| if m.sum() < 100: | |
| return pr_d.astype(np.float32), 1.0, 0.0 | |
| p = pr_d[m].astype(np.float64) | |
| g = gt_d[m].astype(np.float64) | |
| A = np.stack([p, np.ones_like(p)], axis=-1) | |
| a, b = np.linalg.lstsq(A, g, rcond=None)[0] | |
| aligned = (pr_d.astype(np.float64) * a + b).clip(1e-4, None) | |
| return aligned.astype(np.float32), float(a), float(b) | |
| def main(gt_root, pred_root, model_name, output): | |
| gt_root, pred_root = Path(gt_root), Path(pred_root) / model_name | |
| scenes = sorted(d.name for d in pred_root.iterdir() if (d / 'pred_depth.npz').exists()) | |
| print(f'Found {len(scenes)} scenes for {model_name}') | |
| rows = [] | |
| for sc in scenes: | |
| gt_d, gt_i, gt_v = load_data(str(gt_root / sc / 'gt_depth.npz')) | |
| pr_d, pr_i, pr_v = load_data(str(pred_root / sc / 'pred_depth.npz')) | |
| raw = compute_components(pr_d, pr_i, pr_v, gt_d, gt_i, gt_v) | |
| pr_d_aln, scale, shift = affine_align_to_gt(pr_d, gt_d, pr_v & gt_v) | |
| # Aligned pred uses gt intrinsics (we don't realign focal — only depth values) | |
| aln = compute_components(pr_d_aln, gt_i, pr_v, gt_d, gt_i, gt_v) | |
| aln['_align_scale'] = scale | |
| aln['_align_shift'] = shift | |
| rows.append({'scene': sc, 'raw': raw, 'aligned': aln}) | |
| print(f' {sc}: ' | |
| f'sawa_h raw={raw["sawa_h"]:.3f} aln={aln["sawa_h"]:.3f} | ' | |
| f'relnorm raw={raw["rel_normal"]:.3f} aln={aln["rel_normal"]:.3f} | ' | |
| f'boundF1_err raw={raw["boundary_f1_err"]:.3f} aln={aln["boundary_f1_err"]:.3f}') | |
| n = max(len(rows), 1) | |
| keys = ['wkdr_no_align','delta0125_disparity_affine_err','delta0125_depth_affine_err', | |
| 'boundary_f1_err','rel_normal','sawa_h'] | |
| mean_raw = {k: sum(r['raw'][k] for r in rows) / n for k in keys} | |
| mean_aln = {k: sum(r['aligned'][k] for r in rows) / n for k in keys} | |
| summary = { | |
| 'model': model_name, | |
| 'n_scenes': len(rows), | |
| 'sawa_h_weights': { | |
| 'wkdr_no_align': W_WKDR, 'delta0125_disparity_affine_err': W_DDISP_AF, | |
| 'delta0125_depth_affine_err': W_DDEPTH_AF, | |
| 'boundary_f1_err': W_BOUND_F1, 'rel_normal': W_RELNORM, | |
| }, | |
| 'per_scene': rows, | |
| 'mean_raw': mean_raw, | |
| 'mean_aligned': mean_aln, | |
| } | |
| Path(output).parent.mkdir(parents=True, exist_ok=True) | |
| json.dump(summary, open(output, 'w'), indent=2) | |
| print(f'\nMean RAW : {mean_raw}') | |
| print(f'Mean ALIGNED: {mean_aln}') | |
| print(f'Saved → {output}') | |
| if __name__ == '__main__': | |
| main() | |