Other
PyTorch
3d-reconstruction
wireframe
building
point-cloud
s23dr
cvpr-2026
jacklangerman's picture
prep release
4946666
#!/usr/bin/env python3
"""Cache compact scenes from HoHo22k shards to training-ready .pt files.
Streams samples from the public `usm3d/hoho22k_2026_trainval` dataset, runs
`build_compact_scene` (see point_fusion.py), precomputes priority group_id
and semantic class_id, and saves one .pt per scene.
Stage 1 of the dataset pipeline. See make_sampled_cache.py for stage 2.
Usage:
python -m s23dr_2026_example.cache_scenes --out-dir cache/full --split train
python -m s23dr_2026_example.cache_scenes --out-dir cache/full_val --split validation
Cache format per .pt file:
xyz: float32 [P, 3] all points in world space
source: uint8 [P] 0=colmap, 1=depth
group_id: int8 [P] priority tier 0-4, -1=excluded
class_id: uint8 [P] one-hot class index (0-12)
behind_gest_id: int16 [P] behind-gestalt id (-1 if none)
visible_src: uint8 [P] 1=gestalt, 2=ade
visible_id: int16 [P] class id within space
n_views_voted: uint8 [P] number of views that voted
vote_frac: float32 [P] fraction of votes
center: float32 [3] smart normalization center
scale: float32 scalar smart normalization scale
gt_vertices: float32 [V, 3] ground truth wireframe vertices
gt_edges: int32 [E, 2] ground truth wireframe edge indices
"""
from __future__ import annotations
import argparse
import time
from pathlib import Path
import numpy as np
import torch
from .point_fusion import (
FuserConfig, build_compact_scene,
GEST_ID_TO_NAME, ADE_ID_TO_NAME, NUM_GEST,
)
# ---------------------------------------------------------------------------
# Semantic class encoding: 11 structural + 1 other_house + 1 non_house = 13
# ---------------------------------------------------------------------------
# Each structural gestalt class gets its own one-hot bit.
STRUCTURAL_CLASSES = (
"apex", "eave_end_point", "flashing_end_point", # point classes (tier 0)
"rake", "ridge", "eave", "hip", "valley", # roof edges (tier 1)
"flashing", "step_flashing",
"roof", # roof face (tier 2)
)
# Index 11 = other house part (door, window, siding, etc.)
# Index 12 = non-house / ADE / unlabeled
NUM_SEMANTIC_CLASSES = len(STRUCTURAL_CLASSES) + 2 # 13
# Priority tiers (same as tokenizer.py)
_GEST_NAME_TO_ID = {n: i for i, n in enumerate(GEST_ID_TO_NAME)}
_POINT_IDS = {_GEST_NAME_TO_ID[n] for n in ("apex", "eave_end_point", "flashing_end_point") if n in _GEST_NAME_TO_ID}
_EDGE_IDS = {_GEST_NAME_TO_ID[n] for n in ("rake", "ridge", "eave", "hip", "valley", "flashing", "step_flashing") if n in _GEST_NAME_TO_ID}
_FACE_IDS = {_GEST_NAME_TO_ID[n] for n in ("roof",) if n in _GEST_NAME_TO_ID}
_HOUSE_IDS = {_GEST_NAME_TO_ID[n] for n in (
"apex", "eave_end_point", "flashing_end_point",
"rake", "ridge", "eave", "hip", "valley", "flashing", "step_flashing",
"roof", "door", "garage", "window", "shutter", "fascia", "soffit",
"horizontal_siding", "vertical_siding", "brick", "concrete",
"other_wall", "trim", "post", "ground_line",
) if n in _GEST_NAME_TO_ID}
_ADE_NAME_TO_ID = {n.lower(): i for i, n in enumerate(ADE_ID_TO_NAME)}
_ADE_HOUSE_IDS = {_ADE_NAME_TO_ID[n] for n in ("building;edifice", "house", "wall", "windowpane;window", "door;double;door") if n in _ADE_NAME_TO_ID}
_UNCLS_ID = _GEST_NAME_TO_ID.get("unclassified", -1)
# Map structural gestalt names to one-hot index
_STRUCTURAL_ONEHOT = {}
for idx, name in enumerate(STRUCTURAL_CLASSES):
gid = _GEST_NAME_TO_ID.get(name)
if gid is not None:
_STRUCTURAL_ONEHOT[gid] = idx
def _compute_group_and_class(visible_src, visible_id, behind_id, source):
"""Compute priority group_id and semantic class_id per point (vectorized).
Args:
visible_src: uint8 [P] -- 0=unlabeled, 1=gestalt, 2=ade
visible_id: int16 [P] -- class id within gestalt or ade space
behind_id: int16 [P] -- behind-gestalt id (-1 if none)
source: uint8 [P] -- 0=colmap, 1=depth
Returns:
group_id: int8 [P] -- priority tier 0-4, -1 for excluded (unclassified)
class_id: uint8 [P] -- one-hot class index 0-12
"""
P = len(visible_src)
vsrc = visible_src.astype(np.int32)
vid = visible_id.astype(np.int32)
bid = behind_id.astype(np.int32)
# Effective gestalt id: prefer visible gestalt, fall back to behind
gest_id = np.full(P, -1, dtype=np.int32)
has_vis_gest = (vsrc == 1) & (vid >= 0)
has_behind = (bid >= 0) & ~has_vis_gest
gest_id[has_vis_gest] = vid[has_vis_gest]
gest_id[has_behind] = bid[has_behind]
# Exclude unclassified points
if _UNCLS_ID >= 0:
is_uncls = ((vsrc == 1) & (vid == _UNCLS_ID)) | (bid == _UNCLS_ID)
gest_id[is_uncls] = -1 # force excluded
# Build lookup arrays for gestalt id -> group and gestalt id -> class
max_gid = NUM_GEST
gid_to_group = np.full(max_gid, 4, dtype=np.int8) # default: tier 4
gid_to_class = np.full(max_gid, NUM_SEMANTIC_CLASSES - 1, dtype=np.uint8) # default: non-house
for gid in _POINT_IDS:
gid_to_group[gid] = 0
for gid in _EDGE_IDS:
gid_to_group[gid] = 1
for gid in _FACE_IDS:
gid_to_group[gid] = 2
for gid in _HOUSE_IDS - _POINT_IDS - _EDGE_IDS - _FACE_IDS:
gid_to_group[gid] = 3
for gid, onehot_idx in _STRUCTURAL_ONEHOT.items():
gid_to_class[gid] = onehot_idx
for gid in _HOUSE_IDS - set(_STRUCTURAL_ONEHOT.keys()):
gid_to_class[gid] = len(STRUCTURAL_CLASSES) # other_house
# Apply lookup for points with valid gestalt ids
has_gest = gest_id >= 0
group_id = np.full(P, 4, dtype=np.int8) # default: tier 4
class_id = np.full(P, NUM_SEMANTIC_CLASSES - 1, dtype=np.uint8) # default: non-house
group_id[has_gest] = gid_to_group[gest_id[has_gest]]
class_id[has_gest] = gid_to_class[gest_id[has_gest]]
# ADE house points (no gestalt) get tier 3 + class_id = other_house
ade_house_arr = np.array(sorted(_ADE_HOUSE_IDS), dtype=np.int32)
is_ade_house = ~has_gest & (vsrc == 2) & (vid >= 0) & np.isin(vid, ade_house_arr)
group_id[is_ade_house] = 3
class_id[is_ade_house] = len(STRUCTURAL_CLASSES) # other_house (index 11)
# Mark excluded points (unclassified) as -1
if _UNCLS_ID >= 0:
group_id[is_uncls] = -1
class_id[is_uncls] = NUM_SEMANTIC_CLASSES - 1
return group_id, class_id
def _compute_smart_center_scale(xyz, source, mad_k=2.5, percentile=95.0,
max_points=8000):
"""Compute normalization center and scale from depth points with MAD filter."""
depth_mask = source == 1
ref = xyz[depth_mask] if depth_mask.any() else xyz
if ref.shape[0] == 0:
center = xyz.mean(axis=0)
scale = max(np.linalg.norm(xyz - center, axis=1).max(), 1e-6)
return center.astype(np.float32), np.float32(scale)
if ref.shape[0] > max_points:
idx = np.random.choice(ref.shape[0], max_points, replace=False)
ref = ref[idx]
center0 = np.median(ref, axis=0)
dist = np.linalg.norm(ref - center0, axis=1)
med = np.median(dist)
mad = max(np.median(np.abs(dist - med)), 1e-6)
inliers = dist <= (med + mad_k * mad)
if inliers.any():
ref = ref[inliers]
# Percentile bounding box
lo_f = (100.0 - percentile) * 0.5 / 100.0
sorted_v = np.sort(ref, axis=0)
n = sorted_v.shape[0]
lo_idx = max(0, min(n - 1, int(lo_f * (n - 1))))
hi_idx = max(0, min(n - 1, int((1.0 - lo_f) * (n - 1))))
low = sorted_v[lo_idx]
high = sorted_v[hi_idx]
center = 0.5 * (low + high)
scale = max(np.sqrt(((high - low) ** 2).sum()), 1e-6)
return center.astype(np.float32), np.float32(scale)
# ---------------------------------------------------------------------------
# Dataset pipeline stage 1: raw HF sample -> cached .pt
# ---------------------------------------------------------------------------
def _process_one(sample, cfg):
"""Fuse a single HF sample into a cache dict. Returns (order_id, dict) or None."""
rng = np.random.RandomState()
n_edges = len(sample.get("wf_edges", []))
if n_edges == 0 or n_edges > 64:
return None
scene = build_compact_scene(sample, cfg, rng=rng)
if scene is None:
return None
gt_v = scene.get("gt_vertices")
gt_e = scene.get("gt_edges")
if gt_v is None or gt_e is None or len(gt_e) == 0:
return None
xyz = scene["xyz"]
source = scene["source"]
group_id, class_id = _compute_group_and_class(
scene["visible_src"], scene["visible_id"], scene["behind_gest_id"], source)
center, scale = _compute_smart_center_scale(xyz, source)
gt_edge_classes = np.asarray(sample["wf_classifications"], dtype=np.int64)
return sample["order_id"], {
"xyz": xyz.astype(np.float32),
"source": source.astype(np.uint8),
"group_id": group_id,
"class_id": class_id,
"behind_gest_id": scene["behind_gest_id"].astype(np.int16),
"visible_src": scene["visible_src"].astype(np.uint8),
"visible_id": scene["visible_id"].astype(np.int16),
"n_views_voted": scene["n_views_voted"],
"vote_frac": scene["vote_frac"],
"center": center,
"scale": scale,
"gt_vertices": gt_v.astype(np.float32),
"gt_edges": gt_e.astype(np.int32),
"gt_edge_classes": gt_edge_classes,
}
def main():
p = argparse.ArgumentParser(description="Stage 1: HoHo22k -> cached .pt files")
p.add_argument("--out-dir", required=True, help="Output directory for .pt files")
p.add_argument("--split", default="train", choices=["train", "validation"])
p.add_argument("--limit", type=int, default=0, help="Stop after N samples (0 = all)")
p.add_argument("--depth-per-view", type=int, default=8000)
p.add_argument("--skip-existing", action="store_true")
args = p.parse_args()
out_dir = Path(args.out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
existing = {p.stem for p in out_dir.glob("*.pt")} if args.skip_existing else set()
from datasets import load_dataset
print(f"Streaming usm3d/hoho22k_2026_trainval split={args.split}...")
ds = load_dataset("usm3d/hoho22k_2026_trainval",
streaming=True, trust_remote_code=True, split=args.split)
cfg = FuserConfig(depth_points_per_view=args.depth_per_view)
saved, skipped = 0, 0
t0 = time.perf_counter()
for i, sample in enumerate(ds):
if args.limit > 0 and i >= args.limit:
break
oid = sample["order_id"]
if oid in existing:
skipped += 1
continue
result = _process_one(sample, cfg)
if result is None:
skipped += 1
continue
order_id, data = result
torch.save(data, out_dir / f"{order_id}.pt")
saved += 1
if saved % 100 == 0:
rate = saved / (time.perf_counter() - t0)
print(f" saved {saved} (skipped {skipped}) [{rate:.1f}/s]")
elapsed = time.perf_counter() - t0
print(f"Done. Saved {saved}, skipped {skipped} in {elapsed:.0f}s.")
if __name__ == "__main__":
main()