bdck
/

learn_region_grow

Model card Files Files and versions

xet

Community

bdck commited on 1 day ago

Commit

e3423d1

verified ·

1 Parent(s): 6f54288

Upload learn_region_grow/stage_data.py

Browse files

Files changed (1) hide show

learn_region_grow/stage_data.py +175 -0

learn_region_grow/stage_data.py ADDED Viewed

	@@ -0,0 +1,175 @@

+"""
+Training data generator.
+Simulates the region growing process on ground-truth labeled point clouds to create
+supervised training examples: (inlier_points, neighbor_points, add_labels, remove_labels).
+The key trick in the paper is **controlled noise injection**:
+    - add_mistake_prob : probability of including an outlier in the inlier set
+    - remove_mistake_prob : probability of removing a true inlier
+This forces the network to learn to *recover from errors*, making the growing
+process robust to early mistakes (e.g. a bad seed or an initial over-growth).
+"""
+import numpy as np
+from typing import Tuple, Optional
+from pathlib import Path
+import h5py
+from .preprocess import voxel_equalize, compute_normals_and_curvature, build_feature_vector
+from .utils import sample_or_pad, center_features
+def stage_labeled_cloud(xyz: np.ndarray, rgb: Optional[np.ndarray],
+                        labels: np.ndarray,
+                        add_mistake_prob: float = 0.2,
+                        remove_mistake_prob: float = 0.2,
+                        resolution: float = 0.1,
+                        num_inlier: int = 512,
+                        num_neighbor: int = 512,
+                        seeds_per_instance: int = 5,
+                        max_steps_per_seed: int = 20) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Generate supervised training tuples from a labeled point cloud.
+    For each unique instance label, pick `seeds_per_instance` random seeds
+    inside the instance and simulate noisy region growing.
+    Parameters
+    ----------
+    xyz : np.ndarray, shape (N, 3)
+    rgb : np.ndarray, shape (N, 3), uint8 or None
+    labels : np.ndarray, shape (N,), int
+        Instance IDs. Background / wall should have label < 0 or a special value.
+    add_mistake_prob : float
+        Probability of wrongly keeping an outlier in the growing set.
+    remove_mistake_prob : float
+        Probability of wrongly discarding a true inlier.
+    resolution : float
+        Voxel grid resolution.
+    num_inlier / num_neighbor : int
+        Network input sizes.
+    seeds_per_instance : int
+    max_steps_per_seed : int
+    Returns
+    -------
+    inlier_batches  : np.ndarray, shape (M, num_inlier, 13)
+    neighbor_batches: np.ndarray, shape (M, num_neighbor, 13)
+    add_labels      : np.ndarray, shape (M, num_neighbor)
+    remove_labels   : np.ndarray, shape (M, num_inlier)
+    """
+    # Preprocess
+    eq_xyz, eq_idx, voxel_map = voxel_equalize(xyz, resolution)
+    eq_labels = labels[eq_idx]
+    normals, curvature = compute_normals_and_curvature(eq_xyz, resolution)
+    features = build_feature_vector(eq_xyz, rgb[eq_idx] if rgb is not None else None,
+                                    normals, curvature)
+    unique_instances = np.unique(eq_labels[eq_labels >= 0])
+    all_inliers = []
+    all_neighbors = []
+    all_add = []
+    all_remove = []
+    for inst in unique_instances:
+        inst_mask = eq_labels == inst
+        inst_indices = np.where(inst_mask)[0]
+        if len(inst_indices) < 5:
+            continue
+        rng = np.random.default_rng()
+        seeds = rng.choice(inst_indices, min(seeds_per_instance, len(inst_indices)), replace=False)
+        for seed in seeds:
+            region = {int(seed)}
+            for step in range(max_steps_per_seed):
+                # Find boundary neighbors using voxel adjacency
+                neighbors = _boundary_neighbors(region, eq_xyz, voxel_map, resolution)
+                # GT labels
+                gt_neighbors = eq_labels[np.array(neighbors)] == inst
+                gt_region = eq_labels[np.array(list(region))] == inst
+                # Inject noise into labels (the *target* for supervision)
+                noisy_add = gt_neighbors.astype(bool).copy()
+                noisy_remove = (~gt_region).copy()
+                # Flip some correct labels to incorrect ones
+                noisy_add = _flip_mask(noisy_add, add_mistake_prob, rng)
+                noisy_remove = _flip_mask(noisy_remove, remove_mistake_prob, rng)
+                # Build input tensors
+                inlier_pts = features[np.array(list(region), dtype=np.int64)]
+                neighbor_pts = features[np.array(neighbors, dtype=np.int64)] if len(neighbors) else np.zeros((0, 13), dtype=np.float32)
+                inlier_s = sample_or_pad(inlier_pts, num_inlier)
+                neighbor_s = sample_or_pad(neighbor_pts, num_neighbor)
+                inlier_c, neighbor_c = center_features(inlier_s, neighbor_s)
+                # Pad labels to match padded lengths
+                add_label = np.zeros(num_neighbor, dtype=np.int64)
+                remove_label = np.zeros(num_inlier, dtype=np.int64)
+                n_real = min(len(neighbors), num_neighbor)
+                i_real = min(len(region), num_inlier)
+                add_label[:n_real] = noisy_add[:n_real].astype(np.int64)
+                remove_label[:i_real] = noisy_remove[:i_real].astype(np.int64)
+                all_inliers.append(inlier_c)
+                all_neighbors.append(neighbor_c)
+                all_add.append(add_label)
+                all_remove.append(remove_label)
+                # Update region for next step (use noisy labels as "simulated" current state)
+                for idx, flag in zip(neighbors[:n_real], noisy_add[:n_real]):
+                    if flag:
+                        region.add(int(idx))
+                for idx, flag in zip(list(region)[:i_real], ~noisy_remove[:i_real]):
+                    if not flag:
+                        region.discard(int(idx))
+    if len(all_inliers) == 0:
+        # Return empty arrays with correct shape
+        return (np.zeros((0, num_inlier, 13), dtype=np.float32),
+                np.zeros((0, num_neighbor, 13), dtype=np.float32),
+                np.zeros((0, num_neighbor), dtype=np.int64),
+                np.zeros((0, num_inlier), dtype=np.int64))
+    return (np.stack(all_inliers), np.stack(all_neighbors),
+            np.stack(all_add), np.stack(all_remove))
+def _flip_mask(mask: np.ndarray, prob: float, rng: np.random.Generator) -> np.ndarray:
+    """Randomly flip `prob` fraction of True entries to False and vice-versa."""
+    out = mask.copy()
+    flip = rng.random(len(mask)) < prob
+    out[flip] = ~out[flip]
+    return out
+def _boundary_neighbors(region: set, xyz: np.ndarray, voxel_map: dict, resolution: float):
+    """Find adjacent voxel points not in region (6-connected)."""
+    region_pts = np.array(list(region), dtype=np.int64)
+    voxels = set()
+    for idx in region_pts:
+        v = tuple(np.round(xyz[idx] / resolution).astype(int))
+        voxels.add(v)
+    adjacent = set()
+    for v in voxels:
+        for dx, dy, dz in [(-1,0,0),(1,0,0),(0,-1,0),(0,1,0),(0,0,-1),(0,0,1)]:
+            nv = (v[0]+dx, v[1]+dy, v[2]+dz)
+            if nv in voxel_map and voxel_map[nv] not in region:
+                adjacent.add(voxel_map[nv])
+    return list(adjacent)
+def save_staged_h5(path: str, inliers, neighbors, add_labels, remove_labels):
+    """Save staged training data to an HDF5 file."""
+    path = Path(path)
+    path.parent.mkdir(parents=True, exist_ok=True)
+    with h5py.File(path, 'w') as f:
+        f.create_dataset('inliers', data=inliers, compression='gzip')
+        f.create_dataset('neighbors', data=neighbors, compression='gzip')
+        f.create_dataset('add_labels', data=add_labels, compression='gzip')
+        f.create_dataset('remove_labels', data=remove_labels, compression='gzip')