jskvrna commited on Jun 8, 2025

Commit

33113fd

1 Parent(s): 2affd35

Preparation of the files for the public release.

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

color_visu.py +8 -0
end_to_end.py +24 -0
end_to_end_deeper.py +0 -946
fast_pointnet.py +0 -520
fast_pointnet_class.py +7 -0
fast_pointnet_class_10d.py +0 -405
fast_pointnet_class_10d_2048.py +0 -405
fast_pointnet_class_10d_deeper.py +0 -438
fast_pointnet_class_deeper.py +0 -527
fast_pointnet_class_v2.py +0 -508
fast_pointnet_v2.py +11 -1
fast_pointnet_v3.py +0 -605
fast_voxel.py +0 -591
find_best_results.py +7 -0
fully_deep.py +0 -1082
generate_pcloud_dataset.py +12 -0
hoho_cpu.batch +0 -17
hoho_cpu_gpu_intel.batch +0 -19
hoho_gpu.batch +0 -19
hoho_gpu_class.batch +0 -19
hoho_gpu_class_10d.batch +0 -19
hoho_gpu_class_10d_2048.batch +0 -19
hoho_gpu_class_10d_deeper.batch +0 -19
hoho_gpu_h200.batch +0 -19
hoho_gpu_voxel.batch +0 -19
initial_epoch_100.pth +0 -3
initial_epoch_100_class_v2.pth +0 -3
initial_epoch_100_v2.pth +0 -3
initial_epoch_100_v2_aug.pth +0 -3
initial_epoch_60.pth +0 -3
initial_epoch_60_v2.pth +0 -3
iterate.batch +0 -50
pnet.pth +2 -2
predict.py +12 -0
predict_end.py +0 -73
script.py +1 -1
train.py +19 -10
train_end.py +0 -73
train_pnet.py +0 -13
train_pnet_class.py +13 -1
train_pnet_class_cluster.py +0 -13
train_pnet_class_cluster_10d.py +0 -13
train_pnet_class_cluster_10d_2048.py +0 -13
train_pnet_class_cluster_10d_deeper.py +0 -13
train_pnet_cluster.py +0 -10
train_pnet_cluster_class_v2.py +0 -10
train_pnet_cluster_v3.py +0 -10
train_pnet_cluster_v2.py → train_pnet_v2.py +2 -2
train_voxel.py +0 -13
train_voxel_cluster.py +0 -13

color_visu.py CHANGED Viewed

@@ -1,3 +1,11 @@
 import cv2
 import numpy as np

+"""
+This file generates a color legend image for building component visualization.
+It creates a PNG image showing color swatches and labels for two categories:
+1. Gestalt Colors - for various building components like roof, walls, windows, etc.
+2. Edge Colors - for architectural edges like ridges, eaves, hips, valleys, etc.
+The legend helps visualize the color mappings used in building analysis and annotation.
+"""
 import cv2
 import numpy as np

end_to_end.py CHANGED Viewed

@@ -1,3 +1,27 @@
 import os
 import pickle
 import torch

+"""
+End-to-End Voxel-Based Vertex Detection Pipeline
+This file implements a complete pipeline for detecting wireframe vertices from 3D point clouds using
+a voxel-based deep learning approach. The pipeline includes:
+1. Data preprocessing: Converting 14D point clouds into 3D voxel grids with averaged features
+2. Ground truth generation: Creating binary vertex labels and refinement targets from wireframe vertices
+3. Model architecture: VoxelUNet with encoder-decoder structure and 1x1x1 bottleneck for vertex detection
+4. Training: Combined loss function with BCE, Dice loss, and MSE for offset regression
+5. Inference: Predicting vertex locations from new point clouds with visualization
+Key components:
+- Voxelization with configurable grid size and metric voxel size
+- Per-voxel MLP before convolutional processing
+- Gaussian smoothing of ground truth labels
+- Refinement prediction for sub-voxel accuracy
+- PyVista-based visualization for results analysis
+Usage:
+- Set inference=False to train a new model
+- Set inference=True to run predictions on existing data
+"""
 import os
 import pickle
 import torch

end_to_end_deeper.py DELETED Viewed

@@ -1,946 +0,0 @@
-import os
-import pickle
-import torch
-import torch.nn as nn
-import torch.optim as optim
-import numpy as np
-from typing import Dict, Any, Tuple, List
-from torch.utils.data import Dataset, DataLoader
-import glob
-import pyvista as pv
-import torch
-# [Previous code from the existing document remains unchanged up to CombinedLoss class]
-# ... (save_data, load_data, get_data_files, voxelize_points, create_ground_truth, VoxelUNet, VoxelDataset) ...
-def save_data(dict_to_save: Dict[str, Any], filename: str, data_folder: str = "data") -> None:
-    """Save dictionary data to pickle file"""
-    os.makedirs(data_folder, exist_ok=True)
-    filepath = os.path.join(data_folder, f"{filename}.pkl")
-    with open(filepath, 'wb') as f:
-        pickle.dump(dict_to_save, f)
-    #print(f"Data saved to {filepath}")
-def load_data(filepath: str) -> Dict[str, Any]:
-    """Load dictionary data from pickle file"""
-    with open(filepath, 'rb') as f:
-        data = pickle.load(f)
-    #print(f"Data loaded from {filepath}")
-    return data
-def get_data_files(data_folder: str = "data", pattern: str = "*.pkl") -> List[str]:
-    """Get list of data files from folder"""
-    search_pattern = os.path.join(data_folder, pattern)
-    files = glob.glob(search_pattern)
-    #print(f"Found {len(files)} data files in {data_folder}")
-    return files
-def voxelize_points(points: np.ndarray,
-                    grid_size_xy: int = 64,
-                    voxel_size_metric: float = 0.25
-                   ) -> Tuple[torch.Tensor, np.ndarray, Dict[str, Any]]:
-    """
-    Voxelize 14D point cloud into a 3D grid with a fixed number of voxels and fixed metric voxel size.
-    The Z dimension of the grid will have grid_size_xy / 4 voxels.
-    The point cloud is centered within this metric grid. Points outside are discarded.
-    Args:
-        points: (N, 14) array where first 3 dims are xyz (original coordinates).
-        grid_size_xy: Number of voxels along X and Y dimensions.
-        voxel_size_metric: The physical size of each voxel (e.g., 0.5 units).
-    Returns:
-        voxel_grid: (NUM_FEATURES, dim_z, dim_y, dim_x) tensor.
-        voxel_indices_for_points: (N_points_in_grid, 3) integer voxel indices (z, y, x)
-                                  for each input point that falls within the grid.
-        scale_info: Dict with transformation parameters:
-            'grid_origin_metric': Real-world metric coordinate of the corner of voxel [0,0,0] (x,y,z).
-            'voxel_size_metric': The metric size of a voxel.
-            'grid_dims_voxels': Tuple (dim_x, dim_y, dim_z) representing number of voxels.
-            'pc_centroid_metric': Centroid of the input point cloud (x,y,z).
-    """
-    NUM_FEATURES = 14
-    dim_x = grid_size_xy
-    dim_y = grid_size_xy
-    dim_z = grid_size_xy # Assuming cubic grid based on usage in create_ground_truth and VoxelDataset
-    if dim_z == 0: dim_z = 1 # Ensure at least one voxel in Z
-    # grid_dims_voxels stores (num_voxels_x, num_voxels_y, num_voxels_z)
-    grid_dims_voxels = np.array([dim_x, dim_y, dim_z], dtype=int)
-    def _get_empty_return(reason: str = ""):
-        # Voxel grid shape is fixed: (NUM_FEATURES, dim_z, dim_y, dim_x)
-        voxel_grid_empty = torch.zeros(NUM_FEATURES, grid_dims_voxels[2], grid_dims_voxels[1], grid_dims_voxels[0], dtype=torch.float32)
-        voxel_indices_empty = np.empty((0, 3), dtype=int)
-        scale_info_empty = {
-            'grid_origin_metric': np.zeros(3, dtype=float),
-            'voxel_size_metric': voxel_size_metric,
-            'grid_dims_voxels': tuple(grid_dims_voxels.tolist()),
-            'pc_centroid_metric': np.zeros(3, dtype=float),
-        }
-        return voxel_grid_empty, voxel_indices_empty, scale_info_empty
-    if points.shape[0] == 0:
-        return _get_empty_return("Initial empty point cloud")
-    xyz = points[:, :3] # Metric coordinates of points
-    features_other = points[:, 3:] # Other features
-    pc_centroid_metric = xyz.mean(axis=0) # (cx, cy, cz)
-    # Calculate the metric origin of the grid such that the point cloud centroid
-    # aligns with the center of the metric grid.
-    # grid_metric_span is (total_metric_width_x, total_metric_height_y, total_metric_depth_z)
-    grid_metric_span = grid_dims_voxels * voxel_size_metric
-    # grid_origin_metric is the real-world (x,y,z) coordinate of the corner of voxel (0,0,0)
-    grid_origin_metric = pc_centroid_metric - (grid_metric_span / 2.0)
-    # Initialize voxel_grid: PyTorch expects (C, D, H, W)
-    # Here, D=dim_z, H=dim_y, W=dim_x
-    voxel_grid = torch.zeros(NUM_FEATURES, grid_dims_voxels[2], grid_dims_voxels[1], grid_dims_voxels[0], dtype=torch.float32)
-    # Convert point metric coordinates to continuous voxel coordinates (potentially fractional and outside [0, dim-1])
-    # continuous_voxel_coords[i] = (px_i, py_i, pz_i) in continuous voxel grid space
-    continuous_voxel_coords = (xyz - grid_origin_metric) / voxel_size_metric
-    # To store (z_idx, y_idx, x_idx) for each point, for easier indexing into torch tensors
-    voxel_indices_for_points_zyx_order = []
-    for i in range(points.shape[0]):
-        # current_point_continuous_coord_xyz is (x_coord, y_coord, z_coord) in continuous voxel space
-        current_point_continuous_coord_xyz = continuous_voxel_coords[i]
-        # Voxel integer indices (ix, iy, iz) by flooring. This is the voxel cell the point falls into.
-        voxel_idx_int_xyz = np.round(current_point_continuous_coord_xyz).astype(int)
-        # Check if the point falls outside the grid boundaries
-        # grid_dims_voxels is (dim_x, dim_y, dim_z)
-        idx_x, idx_y, idx_z = voxel_idx_int_xyz[0], voxel_idx_int_xyz[1], voxel_idx_int_xyz[2]
-        if not (0 <= idx_x < grid_dims_voxels[0] and \
-                0 <= idx_y < grid_dims_voxels[1] and \
-                0 <= idx_z < grid_dims_voxels[2]):
-            # Point is outside the grid, skip it
-            continue
-        # At this point, idx_x, idx_y, idx_z are guaranteed to be within grid bounds.
-        # No explicit clipping is needed here, but using them directly.
-        voxel_indices_for_points_zyx_order.append([idx_z, idx_y, idx_x])
-        # Calculate offset for the first 3 features:
-        # Center of the assigned voxel in continuous grid index space (e.g., [0.5,0.5,0.5] for voxel [0,0,0])
-        assigned_voxel_center_grid_idx_space = np.array([idx_x, idx_y, idx_z], dtype=float) + 0.5
-        # Offset of the point from its assigned voxel center, in grid units.
-        offset_xyz_in_grid_units = current_point_continuous_coord_xyz - assigned_voxel_center_grid_idx_space
-        # Accumulate features in voxel_grid (C, Z, Y, X)
-        # Store dx, dy, dz (offsets in X, Y, Z dimensions)
-        voxel_grid[0, idx_z, idx_y, idx_x] += offset_xyz_in_grid_units[0] # dx
-        voxel_grid[1, idx_z, idx_y, idx_x] += offset_xyz_in_grid_units[1] # dy
-        voxel_grid[2, idx_z, idx_y, idx_x] += offset_xyz_in_grid_units[2] # dz
-        # Accumulate other original features (from index 3 onwards)
-        if NUM_FEATURES > 3:
-            current_point_other_features = features_other[i]
-            voxel_grid[3:, idx_z, idx_y, idx_x] += torch.tensor(current_point_other_features, dtype=torch.float32)
-    final_voxel_indices_for_points_zyx = np.array(voxel_indices_for_points_zyx_order, dtype=int) if voxel_indices_for_points_zyx_order else np.empty((0,3), dtype=int)
-    scale_info = {
-        'grid_origin_metric': grid_origin_metric,
-        'voxel_size_metric': voxel_size_metric,
-        'grid_dims_voxels': tuple(grid_dims_voxels.tolist()),
-        'pc_centroid_metric': pc_centroid_metric,
-    }
-    return voxel_grid, final_voxel_indices_for_points_zyx, scale_info
-def create_ground_truth(vertices: np.ndarray,
-                        scale_info: Dict[str, Any]
-                       ) -> Tuple[torch.Tensor, torch.Tensor]:
-    """
-    Create ground truth voxel labels and refinement targets using metric voxelization info.
-    The grid dimensions are taken from scale_info.
-    Args:
-        vertices: (M, 3) vertex coordinates in original metric space.
-        scale_info: Dict from voxelize_points. Requires:
-                    'grid_origin_metric', 'voxel_size_metric', 'grid_dims_voxels'.
-    Returns:
-        vertex_labels: (dim_z, dim_y, dim_x) binary labels (1.0 for voxel containing a vertex).
-        refinement_targets: (3, dim_z, dim_y, dim_x) offset (dx,dy,dz) from voxel cell center
-                            in grid units. Range approx [-0.5, 0.5).
-    """
-    grid_origin_metric = scale_info['grid_origin_metric'] # (ox, oy, oz)
-    voxel_size_metric = scale_info['voxel_size_metric']
-    # grid_dims_voxels is (num_voxels_x, num_voxels_y, num_voxels_z)
-    grid_dims_voxels = np.array(scale_info['grid_dims_voxels'])
-    dim_x, dim_y, dim_z = grid_dims_voxels[0], grid_dims_voxels[1], grid_dims_voxels[2]
-    # Labels tensor: (dim_z, dim_y, dim_x)
-    vertex_labels = torch.zeros(dim_z, dim_y, dim_x, dtype=torch.float32)
-    # Refinement targets tensor: (3, dim_z, dim_y, dim_x) for (dx, dy, dz) offsets
-    refinement_targets = torch.zeros(3, dim_z, dim_y, dim_x, dtype=torch.float32)
-    if vertices.shape[0] == 0:
-        return vertex_labels, refinement_targets
-    # Convert vertex metric coordinates to continuous voxel coordinates
-    # (potentially fractional and outside [0, dim-1])
-    continuous_voxel_coords_vertices = (vertices - grid_origin_metric) / voxel_size_metric
-    for i in range(vertices.shape[0]):
-        # v_continuous_coord_xyz is (vx, vy, vz) for the current vertex in continuous voxel space
-        v_continuous_coord_xyz = continuous_voxel_coords_vertices[i]
-        # Integer voxel index (ix, iy, iz) by flooring
-        v_idx_int_xyz = np.floor(v_continuous_coord_xyz).astype(int)
-        # Clip to be within grid boundaries [0, dim-1]
-        idx_x = np.clip(v_idx_int_xyz[0], 0, dim_x - 1)
-        idx_y = np.clip(v_idx_int_xyz[1], 0, dim_y - 1)
-        idx_z = np.clip(v_idx_int_xyz[2], 0, dim_z - 1)
-        # Set label for this voxel (using z, y, x order for tensor access)
-        vertex_labels[idx_z, idx_y, idx_x] = 1.0
-        # Calculate refinement offset:
-        # Center of the *assigned* (clipped) voxel in continuous grid index space
-        assigned_voxel_center_grid_idx_space = np.array([idx_x, idx_y, idx_z], dtype=float) + 0.5
-        # Offset of the vertex from its *assigned* voxel center, in grid units.
-        offset_xyz_grid_units = v_continuous_coord_xyz - assigned_voxel_center_grid_idx_space
-        # Store dx, dy, dz in channels 0, 1, 2 respectively
-        # refinement_targets is (3, Z, Y, X)
-        refinement_targets[0, idx_z, idx_y, idx_x] = offset_xyz_grid_units[0] # dx
-        refinement_targets[1, idx_z, idx_y, idx_x] = offset_xyz_grid_units[1] # dy
-        refinement_targets[2, idx_z, idx_y, idx_x] = offset_xyz_grid_units[2] # dz
-    return vertex_labels, refinement_targets
-class VoxelUNet(nn.Module):
-    """Enhanced U-Net for voxel-based vertex detection with increased capacity and advanced features."""
-    def __init__(self, in_channels: int = 14, base_channels: int = 64, bottleneck_expansion: int = 4,
-                 use_attention: bool = True, use_residual: bool = True, dropout_rate: float = 0.1):
-        super(VoxelUNet, self).__init__()
-        bc = base_channels
-        self.use_attention = use_attention
-        self.use_residual = use_residual
-        # Encoder with increased depth and capacity
-        self.enc1 = self._conv_block(in_channels, bc, use_residual=False)  # bc
-        self.enc2 = self._conv_block(bc, bc * 2, dropout_rate)            # bc*2
-        self.enc3 = self._conv_block(bc * 2, bc * 4, dropout_rate)        # bc*4
-        self.enc4 = self._conv_block(bc * 4, bc * 8, dropout_rate)        # bc*8
-        self.enc5 = self._conv_block(bc * 8, bc * 16, dropout_rate)       # bc*16
-        self.enc6 = self._conv_block(bc * 16, bc * 32, dropout_rate)      # bc*32 (new layer)
-        self.pool = nn.MaxPool3d(2)
-        # Enhanced bottleneck with multiple processing paths
-        self.adaptive_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
-        bottleneck_in_channels = bc * 32
-        bottleneck_width = bottleneck_in_channels * bottleneck_expansion
-        self.bottleneck = nn.Sequential(
-            nn.Conv3d(bottleneck_in_channels, bottleneck_width, kernel_size=1, padding=0, bias=True),
-            nn.BatchNorm3d(bottleneck_width),
-            nn.ReLU(inplace=True),
-            nn.Dropout3d(dropout_rate),
-            nn.Conv3d(bottleneck_width, bottleneck_width, kernel_size=1, padding=0, bias=True),
-            nn.BatchNorm3d(bottleneck_width),
-            nn.ReLU(inplace=True),
-            nn.Dropout3d(dropout_rate),
-            nn.Conv3d(bottleneck_width, bottleneck_width, kernel_size=1, padding=0, bias=True),
-            nn.BatchNorm3d(bottleneck_width),
-            nn.ReLU(inplace=True)
-        )
-        # Attention modules for skip connections (if enabled)
-        if self.use_attention:
-            self.att6 = self._attention_block(bottleneck_width, bc * 32, bc * 16)
-            self.att5 = self._attention_block(bc * 32, bc * 16, bc * 8)
-            self.att4 = self._attention_block(bc * 16, bc * 8, bc * 4)
-            self.att3 = self._attention_block(bc * 8, bc * 4, bc * 2)
-            self.att2 = self._attention_block(bc * 4, bc * 2, bc)
-        # Enhanced decoder with more capacity
-        self.dec6 = self._conv_block(bottleneck_width + bc * 32, bc * 32, dropout_rate)
-        self.up5 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)
-        self.dec5 = self._conv_block(bc * 32 + bc * 16, bc * 16, dropout_rate)
-        self.up4 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)
-        self.dec4 = self._conv_block(bc * 16 + bc * 8, bc * 8, dropout_rate)
-        self.up3 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)
-        self.dec3 = self._conv_block(bc * 8 + bc * 4, bc * 4, dropout_rate)
-        self.up2 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)
-        self.dec2 = self._conv_block(bc * 4 + bc * 2, bc * 2, dropout_rate)
-        self.up1 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)
-        self.dec1 = self._conv_block(bc * 2 + bc, bc, dropout_rate)
-        # Enhanced output heads with intermediate processing
-        self.vertex_intermediate = self._conv_block(bc, bc // 2, 0.0, use_residual=False)
-        self.vertex_head = nn.Conv3d(bc // 2, 1, kernel_size=1)
-        self.refinement_intermediate = self._conv_block(bc, bc // 2, 0.0, use_residual=False)
-        self.refinement_head = nn.Conv3d(bc // 2, 3, kernel_size=1)
-        self.tanh = nn.Tanh()
-    def _conv_block(self, in_channels: int, out_channels: int, dropout_rate: float = 0.0,
-                    use_residual: bool = None) -> nn.Sequential:
-        """Enhanced convolutional block with optional residual connections and dropout."""
-        if use_residual is None:
-            use_residual = self.use_residual
-        layers = []
-        # First conv
-        layers.extend([
-            nn.Conv3d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
-            nn.BatchNorm3d(out_channels),
-            nn.ReLU(inplace=True)
-        ])
-        if dropout_rate > 0:
-            layers.append(nn.Dropout3d(dropout_rate))
-        # Second conv
-        layers.extend([
-            nn.Conv3d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
-            nn.BatchNorm3d(out_channels)
-        ])
-        # Third conv for extra capacity
-        if out_channels >= 128:
-            layers.extend([
-                nn.ReLU(inplace=True),
-                nn.Conv3d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
-                nn.BatchNorm3d(out_channels)
-            ])
-        block = nn.Sequential(*layers)
-        # Add residual connection if channels match and residual is enabled
-        if use_residual and in_channels == out_channels:
-            return ResidualBlock(block)
-        else:
-            return nn.Sequential(block, nn.ReLU(inplace=True))
-    def _attention_block(self, gate_channels: int, skip_channels: int, out_channels: int) -> nn.Module:
-        """Attention gate for focusing on relevant features in skip connections."""
-        return AttentionGate(gate_channels, skip_channels, out_channels)
-    def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
-        # Encoder path with increased depth
-        e1 = self.enc1(x)    # bc
-        p1 = self.pool(e1)
-        e2 = self.enc2(p1)   # bc*2
-        p2 = self.pool(e2)
-        e3 = self.enc3(p2)   # bc*4
-        p3 = self.pool(e3)
-        e4 = self.enc4(p3)   # bc*8
-        p4 = self.pool(e4)
-        e5 = self.enc5(p4)   # bc*16
-        p5 = self.pool(e5)
-        e6 = self.enc6(p5)   # bc*32
-        p6 = self.pool(e6)
-        # Enhanced bottleneck
-        b_pooled = self.adaptive_pool(p6)
-        b = self.bottleneck(b_pooled)
-        # Enhanced decoder path with attention
-        u6_from_b = nn.functional.interpolate(b, size=e6.shape[2:], mode='trilinear', align_corners=True)
-        if self.use_attention:
-            e6_att = self.att6(u6_from_b, e6)
-            cat6 = torch.cat([u6_from_b, e6_att], dim=1)
-        else:
-            cat6 = torch.cat([u6_from_b, e6], dim=1)
-        d6 = self.dec6(cat6)
-        u5 = self.up5(d6)
-        if self.use_attention:
-            e5_att = self.att5(u5, e5)
-            cat5 = torch.cat([u5, e5_att], dim=1)
-        else:
-            cat5 = torch.cat([u5, e5], dim=1)
-        d5 = self.dec5(cat5)
-        u4 = self.up4(d5)
-        if self.use_attention:
-            e4_att = self.att4(u4, e4)
-            cat4 = torch.cat([u4, e4_att], dim=1)
-        else:
-            cat4 = torch.cat([u4, e4], dim=1)
-        d4 = self.dec4(cat4)
-        u3 = self.up3(d4)
-        if self.use_attention:
-            e3_att = self.att3(u3, e3)
-            cat3 = torch.cat([u3, e3_att], dim=1)
-        else:
-            cat3 = torch.cat([u3, e3], dim=1)
-        d3 = self.dec3(cat3)
-        u2 = self.up2(d3)
-        if self.use_attention:
-            e2_att = self.att2(u2, e2)
-            cat2 = torch.cat([u2, e2_att], dim=1)
-        else:
-            cat2 = torch.cat([u2, e2], dim=1)
-        d2 = self.dec2(cat2)
-        u1 = self.up1(d2)
-        cat1 = torch.cat([u1, e1], dim=1)
-        d1 = self.dec1(cat1)
-        # Enhanced output heads
-        vertex_features = self.vertex_intermediate(d1)
-        vertex_logits = self.vertex_head(vertex_features)
-        refinement_features = self.refinement_intermediate(d1)
-        refinement = self.tanh(self.refinement_head(refinement_features)) * 0.5
-        return vertex_logits, refinement
-class ResidualBlock(nn.Module):
-    """Residual block wrapper for skip connections."""
-    def __init__(self, block):
-        super().__init__()
-        self.block = block
-    def forward(self, x):
-        return torch.relu(self.block(x) + x)
-class AttentionGate(nn.Module):
-    """Attention gate for U-Net skip connections."""
-    def __init__(self, gate_channels, skip_channels, out_channels):
-        super().__init__()
-        self.gate_conv = nn.Conv3d(gate_channels, out_channels, kernel_size=1, bias=True)
-        self.skip_conv = nn.Conv3d(skip_channels, out_channels, kernel_size=1, bias=True)
-        self.attention_conv = nn.Conv3d(out_channels, 1, kernel_size=1, bias=True)
-        self.relu = nn.ReLU(inplace=True)
-        self.sigmoid = nn.Sigmoid()
-    def forward(self, gate, skip):
-        gate_proj = self.gate_conv(gate)
-        skip_proj = self.skip_conv(skip)
-        # Ensure spatial dimensions match
-        if gate_proj.shape[2:] != skip_proj.shape[2:]:
-            gate_proj = nn.functional.interpolate(
-                gate_proj, size=skip_proj.shape[2:],
-                mode='trilinear', align_corners=True
-            )
-        combined = self.relu(gate_proj + skip_proj)
-        attention = self.sigmoid(self.attention_conv(combined))
-        return skip * attention
-class VoxelDataset(Dataset):
-    def __init__(self, data_files: List[str], voxel_size: float = 0.1, grid_size: int = 64):
-        self.data_files = data_files
-        self.voxel_size = voxel_size
-        self.grid_size = grid_size
-    def __len__(self):
-        return len(self.data_files)
-    def __getitem__(self, idx):
-        data = load_data(self.data_files[idx])
-        voxel_grid, _, scale_info = voxelize_points(
-            data['pcloud_14d'], self.grid_size, self.voxel_size
-        )
-        wf_vertices_np = np.array(data['wf_vertices'])
-        vertex_labels, refinement_targets = create_ground_truth(
-            wf_vertices_np, scale_info
-        )
-        return voxel_grid, vertex_labels, refinement_targets, scale_info
-import torch.nn as nn
-import torch.nn.functional as F
-class CombinedLoss(nn.Module):
-    """
-    Combined loss for vertex classification and offset regression.
-    Uses:
-      - BCEWithLogitsLoss
-      - Dice loss
-      - MSE loss on refinement offsets (only over positive voxels)
-      - Gaussian blur on the GT labels
-    """
-    def __init__(self,
-                 vertex_weight: float = 1.0,
-                 refinement_weight: float = 0.1,
-                 dice_weight: float = 0.5,
-                 blur_kernel_size: int = 5,
-                 blur_sigma: float = 1.0,
-                 eps: float = 1e-6):
-        super().__init__()
-        self.vertex_weight = vertex_weight
-        self.refinement_weight = refinement_weight
-        self.dice_weight = dice_weight
-        self.eps = eps
-        # BCE with logits
-        self.bce_loss = nn.BCEWithLogitsLoss()
-        # MSE for offset regression
-        self.mse_loss = nn.MSELoss()
-        # build 3D gaussian kernel
-        k = blur_kernel_size
-        coords = torch.arange(k, dtype=torch.float32) - (k - 1) / 2
-        xx, yy, zz = torch.meshgrid(coords, coords, coords, indexing='ij')
-        kernel = torch.exp(-(xx**2 + yy**2 + zz**2) / (2 * blur_sigma**2))
-        # shape (1,1,k,k,k)
-        kernel = kernel.view(1, 1, k, k, k)
-        self.register_buffer('gaussian_kernel', kernel)
-        self.pad = k // 2
-    def forward(self,
-                vertex_logits_pred: torch.Tensor,   # (B,1,D,H,W)
-                refinement_pred: torch.Tensor,      # (B,3,D,H,W)
-                vertex_gt: torch.Tensor,            # (B,D,H,W), 0/1
-                refinement_gt: torch.Tensor         # (B,3,D,H,W)
-                ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
-        # logits & gt
-        logits = vertex_logits_pred.squeeze(1)  # (B,D,H,W)
-        gt = vertex_gt.float()                  # (B,D,H,W)
-        # apply gaussian blur on gt
-        gt_unsq = gt.unsqueeze(1)               # (B,1,D,H,W)
-        gt_blur = F.conv3d(gt_unsq, self.gaussian_kernel, padding=self.pad)   # (B,1,D,H,W)
-        gt_blur = gt_blur.clamp(0, 1)           # ensure values are in [0, 1]
-        gt_smooth = gt_blur.squeeze(1)          # (B,D,H,W)
-        # 1) Weighted BCE loss - positive when gt_smooth > 1e-3
-        pos_mask = gt_smooth > 1e-3
-        neg_mask = ~pos_mask
-        # Compute BCE separately for positive and negative samples
-        bce_loss_fn = nn.BCEWithLogitsLoss(reduction='none')
-        bce_all = bce_loss_fn(logits, gt_smooth)
-        # Calculate weighted BCE
-        pos_weight = 1.0
-        neg_weight = 1.0
-        if pos_mask.sum() > 0 and neg_mask.sum() > 0:
-            pos_loss = bce_all[pos_mask].mean()
-            neg_loss = bce_all[neg_mask].mean()
-            bce = pos_weight * pos_loss + neg_weight * neg_loss
-        elif pos_mask.sum() > 0:
-            bce = pos_weight * bce_all[pos_mask].mean()
-        elif neg_mask.sum() > 0:
-            bce = neg_weight * bce_all[neg_mask].mean()
-        else:
-            bce = torch.tensor(0.0, device=logits.device)
-        # 2) Dice loss
-        prob = torch.sigmoid(logits)
-        gt_smooth_round = (gt_smooth > 0.5).float()  # binary mask
-        intersection = (prob * gt_smooth_round).sum(dim=[1,2,3])
-        union = prob.sum(dim=[1,2,3]) + gt_smooth_round.sum(dim=[1,2,3])
-        dice_score = (2 * intersection + self.eps) / (union + self.eps)
-        dice_loss = 1 - dice_score.mean()
-        vertex_loss = bce + self.dice_weight * dice_loss
-        # 3) Refinement MSE (only where original gt==1)
-        mask_pos = (gt > 0.5).unsqueeze(1)  # use hard mask for offsets
-        if mask_pos.sum() > 0:
-            pred_offsets = refinement_pred[mask_pos.expand_as(refinement_pred)] \
-                             .view(-1, 3)
-            gt_offsets = refinement_gt[mask_pos.expand_as(refinement_gt)] \
-                             .view(-1, 3)
-            refinement_loss = self.mse_loss(pred_offsets, gt_offsets)
-        else:
-            refinement_loss = torch.tensor(0., device=logits.device)
-        # 4) Total loss
-        total_loss = (self.vertex_weight * vertex_loss +
-                      self.refinement_weight * refinement_loss)
-        return total_loss, vertex_loss, refinement_loss
-def train_epoch(model, dataloader, optimizer, criterion, device, current_epoch: int):
-    model.train()
-    total_loss_epoch = 0.0
-    vertex_loss_epoch = 0.0
-    refinement_loss_epoch = 0.0
-    for batch_idx, (voxel_grid_batch, vertex_labels_batch, refinement_targets_batch, _) in enumerate(dataloader):
-        voxel_grid_batch = voxel_grid_batch.to(device)
-        vertex_labels_batch = vertex_labels_batch.to(device)
-        refinement_targets_batch = refinement_targets_batch.to(device)
-        if False:
-            print(f'Epoch {current_epoch+1}, Batch {batch_idx+1}/{len(dataloader)}')
-            sample_voxel_features = voxel_grid_batch[0].cpu().numpy()
-            sample_gt_labels = vertex_labels_batch[0].cpu().numpy()
-            sample_gt_refinement = refinement_targets_batch[0].cpu().numpy()
-            summed_xyz_in_voxels = sample_voxel_features[:3]
-            occupied_voxel_mask = np.any(summed_xyz_in_voxels != 0, axis=0)
-            plotter = pv.Plotter(window_size=[800,600])
-            plotter.background_color = 'white'
-            if np.any(occupied_voxel_mask):
-                occupied_voxel_indices = np.array(np.where(occupied_voxel_mask)).T
-                input_points_display = pv.PolyData(occupied_voxel_indices + 0.5)
-                plotter.add_mesh(input_points_display, color='cornflowerblue', point_size=5, render_points_as_spheres=True, label='Occupied Voxels (Centers)')
-            gt_vertex_voxel_mask = sample_gt_labels > 0.5
-            if np.any(gt_vertex_voxel_mask):
-                gt_vertex_indices_int = np.array(np.where(gt_vertex_voxel_mask)).T
-                gt_offsets = sample_gt_refinement[:, gt_vertex_voxel_mask].T
-                gt_vertex_positions_grid_space = gt_vertex_indices_int.astype(float) + 0.5 + gt_offsets
-                target_vertices_display = pv.PolyData(gt_vertex_positions_grid_space)
-                plotter.add_mesh(target_vertices_display, color='crimson', point_size=10, render_points_as_spheres=True, label='Target Vertices (GT)')
-                plotter.show(title=f"Debug Viz E{current_epoch+1} B{batch_idx+1}", auto_close=False)
-            else:
-                print(f"Epoch {current_epoch+1} Batch {batch_idx+1}: No data to visualize for the first sample.")
-        optimizer.zero_grad()
-        vertex_logits_pred, refinement_pred = model(voxel_grid_batch)
-        loss, vertex_loss, refinement_loss = criterion(
-            vertex_logits_pred, refinement_pred, vertex_labels_batch, refinement_targets_batch
-        )
-        print(f"Batch {batch_idx+1}/{len(dataloader)}: Loss={loss.item():.4f}, Vertex Loss={vertex_loss.item():.4f}, Refinement Loss={refinement_loss.item():.4f}")
-        if loss > 0.000001:
-            loss.backward()
-        optimizer.step()
-        total_loss_epoch += loss.item()
-        vertex_loss_epoch += vertex_loss.item()
-        refinement_loss_epoch += refinement_loss.item()
-        if (batch_idx + 1) % 200 == 0:
-            checkpoint_path = f"model_epoch_{current_epoch+1}_batch_{batch_idx+1}_grid_128v7.pth" # Consider updating filename if grid size changes
-            torch.save(model.state_dict(), checkpoint_path)
-            print(f"Saved batch checkpoint: {checkpoint_path}")
-    avg_total_loss = total_loss_epoch / len(dataloader) if len(dataloader) > 0 else 0
-    avg_vertex_loss = vertex_loss_epoch / len(dataloader) if len(dataloader) > 0 else 0
-    avg_refinement_loss = refinement_loss_epoch / len(dataloader) if len(dataloader) > 0 else 0
-    return avg_total_loss, avg_vertex_loss, avg_refinement_loss
-def train_model(data_folder: str = "data", num_epochs: int = 100, batch_size: int = 4, neg_pos_ratio_val: float = 1.0):
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Using device: {device}")
-    data_files = get_data_files(data_folder)
-    if not data_files:
-        print(f"No data files found in {data_folder}. Exiting.")
-        return
-    GRID_SIZE_CFG = 64
-    VOXEL_SIZE_CFG = 0.75
-    dataset = VoxelDataset(data_files, voxel_size=VOXEL_SIZE_CFG, grid_size=GRID_SIZE_CFG)
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8)
-    model = VoxelUNet().to(device)
-    optimizer = optim.Adam(model.parameters(), lr=1e-3)
-    criterion = CombinedLoss(
-        vertex_weight=1.0,
-        refinement_weight=0.0,
-        dice_weight=0.2
-    ).to(device)
-    print(f"Starting training: {num_epochs} epochs, Batch size: {batch_size}, Grid size: {GRID_SIZE_CFG}, Voxel size: {VOXEL_SIZE_CFG}, Initial LR: {optimizer.param_groups[0]['lr']}")
-    for epoch in range(num_epochs):
-        print(f"\n--- Epoch {epoch+1}/{num_epochs} ---")
-        avg_loss, avg_vertex_loss, avg_refinement_loss = train_epoch(
-            model, dataloader, optimizer, criterion, device, epoch
-        )
-        print(f"Epoch {epoch+1} Summary: Avg Loss: {avg_loss:.4f}, "
-              f"Avg Vertex Loss: {avg_vertex_loss:.4f}, "
-              f"Avg Refinement Loss: {avg_refinement_loss:.4f}, "
-              f"Current LR: {optimizer.param_groups[0]['lr']:.6f}")
-        checkpoint_path = f"model_epoch_{epoch+1}_grid{GRID_SIZE_CFG}_smooth_bal{neg_pos_ratio_val}_v7.pth"
-        torch.save(model.state_dict(), checkpoint_path)
-        print(f"Saved checkpoint: {checkpoint_path}")
-    final_model_path = f"final_model_grid{GRID_SIZE_CFG}_epochs{num_epochs}_smooth_bal{neg_pos_ratio_val}_v7.pth"
-    torch.save(model.state_dict(), final_model_path)
-    print(f"Training completed! Final model saved as {final_model_path}")
-def load_model_for_inference(model_path: str, device: torch.device,
-                             in_channels: int = 14, base_channels: int = 32) -> VoxelUNet:
-    """Load a VoxelUNet model for inference."""
-    model = VoxelUNet(in_channels=in_channels, base_channels=base_channels)
-    model.load_state_dict(torch.load(model_path, map_location=device))
-    model.to(device)
-    model.eval()
-    print(f"Model loaded from {model_path} and set to evaluation mode on {device}.")
-    return model
-def predict_vertices(model: VoxelUNet,
-                     point_cloud_14d: np.ndarray,
-                     grid_size: int,
-                     device: torch.device,
-                     voxel_size_metric: float = 0.35, # Added for consistency, default matches voxelize_points
-                     vertex_threshold: float = 0.5) -> np.ndarray:
-    """
-    Predict vertices from a 14D point cloud.
-    Args:
-        model: The trained VoxelUNet model.
-        point_cloud_14d: (N, 14) NumPy array of the input point cloud.
-        grid_size: The size of the voxel grid along X and Y dimensions (must match training).
-        device: PyTorch device ('cuda' or 'cpu').
-        voxel_size_metric: The metric size of each voxel (must match training).
-        vertex_threshold: Threshold for classifying a voxel as containing a vertex.
-    Returns:
-        predicted_vertices_original_space: (M, 3) NumPy array of predicted vertex
-                                           coordinates in the original point cloud space (X, Y, Z order).
-                                           Returns an empty array if no vertices are predicted
-                                           or if the input point cloud results in an empty voxel grid.
-    """
-    voxel_grid_tensor, _, scale_info = voxelize_points(
-        point_cloud_14d,
-        grid_size_xy=grid_size,
-        voxel_size_metric=voxel_size_metric
-    )
-    # Check if voxelization produced a valid grid (e.g., if input point cloud was empty)
-    # voxelize_points returns a zero tensor for grid if input points are empty.
-    # If voxel_grid_tensor is all zeros and no points were input, scale_info might be default.
-    if voxel_grid_tensor.sum() == 0 and point_cloud_14d.shape[0] == 0:
-        # This case implies empty input point cloud, voxelize_points handles this.
-        # Predictions will naturally be empty if the grid is empty.
-        pass # Continue, model will predict on zero grid.
-    input_tensor = voxel_grid_tensor.unsqueeze(0).to(device)
-    with torch.no_grad():
-        vertex_logits_pred_tensor, refinement_pred_tensor = model(input_tensor)
-    vertex_prob_pred_tensor = torch.sigmoid(vertex_logits_pred_tensor)
-    vertex_prob_pred_np = vertex_prob_pred_tensor.squeeze(0).squeeze(0).cpu().numpy()
-    refinement_pred_np = refinement_pred_tensor.squeeze(0).cpu().numpy() # Shape (3, D, H, W) -> (dx,dy,dz channels)
-    print(f"Vertex Probabilities Stats: Min={np.min(vertex_prob_pred_np):.4f}, Max={np.max(vertex_prob_pred_np):.4f}, Mean={np.mean(vertex_prob_pred_np):.4f}, Median={np.median(vertex_prob_pred_np):.4f}")
-    if refinement_pred_np.size > 0:
-        print(f"Refinement Predictions Stats: Min={np.min(refinement_pred_np):.4f}, Max={np.max(refinement_pred_np):.4f}, Mean={np.mean(refinement_pred_np):.4f}, Median={np.median(refinement_pred_np):.4f}")
-        for i in range(refinement_pred_np.shape[0]): # Iterate over dx, dy, dz components
-            print(f"  Refinement Dim {i} (dx,dy,dz order) Stats: Min={np.min(refinement_pred_np[i]):.4f}, Max={np.max(refinement_pred_np[i]):.4f}, Mean={np.mean(refinement_pred_np[i]):.4f}, Median={np.median(refinement_pred_np[i]):.4f}")
-    else:
-        print("Refinement Predictions Stats: Array is empty.")
-    predicted_mask = vertex_prob_pred_np > vertex_threshold
-    # predicted_voxel_indices are (N_preds, 3) with columns (idx_z, idx_y, idx_x)
-    predicted_voxel_indices_zyx = np.argwhere(predicted_mask)
-    if not predicted_voxel_indices_zyx.size:
-        return np.empty((0, 3), dtype=np.float32)
-    # Extract refinement offsets for the predicted voxels
-    # offsets_channels_first will be (3, N_preds) where channels are (dx, dy, dz)
-    offsets_channels_first = refinement_pred_np[:,
-                               predicted_voxel_indices_zyx[:, 0], # z_indices
-                               predicted_voxel_indices_zyx[:, 1], # y_indices
-                               predicted_voxel_indices_zyx[:, 2]] # x_indices
-    # Transpose to (N_preds, 3) where columns are (dx, dy, dz)
-    offsets_xyz_order = offsets_channels_first.T
-    # Calculate refined coordinates in continuous voxel grid space (X, Y, Z order)
-    # Voxel center is at index + 0.5
-    # Refinement is added to this center.
-    # predicted_voxel_indices_zyx[:, 2] is x_idx
-    # predicted_voxel_indices_zyx[:, 1] is y_idx
-    # predicted_voxel_indices_zyx[:, 0] is z_idx
-    # offsets_xyz_order[:, 0] is dx
-    # offsets_xyz_order[:, 1] is dy
-    # offsets_xyz_order[:, 2] is dz
-    refined_x_grid = predicted_voxel_indices_zyx[:, 2].astype(np.float32) + 0.5 #+ offsets_xyz_order[:, 0]
-    refined_y_grid = predicted_voxel_indices_zyx[:, 1].astype(np.float32) + 0.5 #+ offsets_xyz_order[:, 1]
-    refined_z_grid = predicted_voxel_indices_zyx[:, 0].astype(np.float32) + 0.5 #+ offsets_xyz_order[:, 2]
-    # Stack to get (N_preds, 3) array in (X, Y, Z) order
-    refined_grid_coords_xyz = np.stack((refined_x_grid, refined_y_grid, refined_z_grid), axis=-1)
-    # Convert refined grid coordinates to original metric space
-    grid_origin_metric = np.array(scale_info['grid_origin_metric']) # (ox, oy, oz)
-    # Voxel_size_metric from scale_info should match the input voxel_size_metric parameter
-    current_voxel_size_metric = scale_info['voxel_size_metric']
-    # predicted_vertices_original_space are (N_preds, 3) in (X,Y,Z) order
-    predicted_vertices_original_space = refined_grid_coords_xyz * current_voxel_size_metric + grid_origin_metric
-    return predicted_vertices_original_space.astype(np.float32)
-# Simple inference script
-def run_inference(model_path: str,
-                  data_file_path: str,
-                  output_file: str = None,
-                  grid_size: int = 128,
-                  voxel_size: float = 0.5,
-                  vertex_threshold: float = 0.5):
-    """
-    Run inference on all data files in a directory, visualize with pyvista, and save results.
-    """
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Using device: {device}")
-    # Load model
-    model = load_model_for_inference(model_path, device)
-    # Get all data files from the directory
-    data_files = get_data_files(data_file_path)
-    if not data_files:
-        print(f"No data files found in {data_file_path}")
-        return
-    print(f"Found {len(data_files)} data files to process")
-    for i, file_path in enumerate(data_files):
-        print(f"\n--- Processing file {i+1}/{len(data_files)}: {os.path.basename(file_path)} ---")
-        # Load input data
-        try:
-            data = load_data(file_path)
-        except Exception as e:
-            print(f"Error loading {file_path}: {e}")
-            continue
-        if 'pcloud_14d' not in data:
-            print(f"Error: File {file_path} does not contain 'pcloud_14d' key, skipping")
-            continue
-        # Extract original point cloud and ground-truth vertices
-        pcloud = data['pcloud_14d'][:, :3]        # (N,3)
-        gt_vertices = np.array(data.get('wf_vertices', []))  # (M,3) or empty
-        print(f"Input point cloud shape: {pcloud.shape}")
-        if gt_vertices.size:
-            print(f"GT vertices shape: {gt_vertices.shape}")
-        # Run prediction
-        print("Running inference...")
-        try:
-            predicted_vertices = predict_vertices(
-                model=model,
-                point_cloud_14d=data['pcloud_14d'],
-                grid_size=grid_size,
-                device=device,
-                voxel_size_metric=voxel_size,
-                vertex_threshold=vertex_threshold
-            )
-        except Exception as e:
-            print(f"Error during prediction for {file_path}: {e}")
-            continue
-        print(f"Predicted {len(predicted_vertices)} vertices")
-        # --- Visualization ---
-        plotter = pv.Plotter(window_size=[800,600])
-        plotter.background_color = 'white'
-        # Original point cloud in light gray
-        if pcloud.size:
-            pc_cloud = pv.PolyData(pcloud)
-            plotter.add_mesh(pc_cloud, color='lightgray', point_size=2, render_points_as_spheres=True, label='Input PC')
-        # Ground-truth vertices in red
-        if gt_vertices.size:
-            gt_pd = pv.PolyData(gt_vertices)
-            plotter.add_mesh(gt_pd, color='red', point_size=8, render_points_as_spheres=True, label='GT Vertices')
-        # Predicted vertices in blue
-        if predicted_vertices.size:
-            pred_pd = pv.PolyData(predicted_vertices)
-            plotter.add_mesh(pred_pd, color='blue', point_size=8, render_points_as_spheres=True, label='Predicted Vertices')
-        plotter.add_legend()
-        plotter.show(title=os.path.basename(file_path))
-        # Prepare output data
-        output_data = {
-            'predicted_vertices': predicted_vertices,
-            'input_file': file_path,
-            'model_used': model_path,
-            'grid_size': grid_size,
-            'voxel_size': voxel_size,
-            'vertex_threshold': vertex_threshold,
-            'original_data': data
-        }
-        # Save results
-        base_name = os.path.splitext(os.path.basename(file_path))[0]
-        output_filename = f"{base_name}_predictions"
-        try:
-            save_data(output_data, output_filename)
-            print(f"Results saved to: {output_filename}.pkl")
-        except Exception as e:
-            print(f"Error saving results for {file_path}: {e}")
-    print(f"\nCompleted processing {len(data_files)} files")
-if __name__ == "__main__":
-    inference = False
-    data_folder_train = '/mnt/personal/skvrnjan/hoho_end/'
-    #data_folder_train = '/home/skvrnjan/personal/hoho_end'
-    num_epochs_train = 100
-    batch_size_train = 4
-    # This parameter now controls the ratio of negative to positive samples for BCE loss
-    negative_to_positive_bce_ratio = 1
-    if inference:
-        run_inference(model_path='/home/skvrnjan/personal/hoho/model_epoch_100_grid128_smooth_bal1.pth',
-            data_file_path=data_folder_train,
-            output_file=None,
-            grid_size=128,
-            voxel_size=0.5,
-            vertex_threshold=0.5
-        )
-    else:
-        train_model(data_folder=data_folder_train,
-                    num_epochs=num_epochs_train,
-                    batch_size=batch_size_train,
-                    neg_pos_ratio_val=negative_to_positive_bce_ratio)

fast_pointnet.py DELETED Viewed

@@ -1,520 +0,0 @@
-import os
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-import pickle
-from torch.utils.data import Dataset, DataLoader
-from typing import List, Dict, Tuple, Optional
-import json
-class FastPointNet(nn.Module):
-    """
-    Fast PointNet implementation for 3D vertex prediction from point cloud patches.
-    Takes 7D point clouds (x,y,z,r,g,b,filtered_flag) and predicts 3D vertex coordinates.
-    Enhanced with deeper architecture and more parameters for better generalization.
-    """
-    def __init__(self, input_dim=7, output_dim=3, max_points=1024, predict_score=True, predict_class=True, num_classes=1):
-        super(FastPointNet, self).__init__()
-        self.max_points = max_points
-        self.predict_score = predict_score
-        self.predict_class = predict_class
-        self.num_classes = num_classes
-        # Enhanced point-wise MLPs with deeper architecture
-        self.conv1 = nn.Conv1d(input_dim, 128, 1)
-        self.conv2 = nn.Conv1d(128, 256, 1)
-        self.conv3 = nn.Conv1d(256, 512, 1)
-        self.conv4 = nn.Conv1d(512, 1024, 1)
-        # Additional layers for better feature extraction
-        self.conv5 = nn.Conv1d(1024, 1024, 1)
-        self.conv6 = nn.Conv1d(1024, 2048, 1)
-        # Larger shared features
-        self.shared_fc1 = nn.Linear(2048, 1024)
-        self.shared_fc2 = nn.Linear(1024, 512)
-        # Enhanced position prediction head
-        self.pos_fc1 = nn.Linear(512, 512)
-        self.pos_fc2 = nn.Linear(512, 256)
-        self.pos_fc3 = nn.Linear(256, 128)
-        self.pos_fc4 = nn.Linear(128, output_dim)
-        # Enhanced score prediction head
-        if self.predict_score:
-            self.score_fc1 = nn.Linear(512, 512)
-            self.score_fc2 = nn.Linear(512, 256)
-            self.score_fc3 = nn.Linear(256, 128)
-            self.score_fc4 = nn.Linear(128, 64)
-            self.score_fc5 = nn.Linear(64, 1)
-        # Classification head
-        if self.predict_class:
-            self.class_fc1 = nn.Linear(512, 512)
-            self.class_fc2 = nn.Linear(512, 256)
-            self.class_fc3 = nn.Linear(256, 128)
-            self.class_fc4 = nn.Linear(128, 64)
-            self.class_fc5 = nn.Linear(64, num_classes)
-        # Batch normalization layers
-        self.bn1 = nn.BatchNorm1d(128)
-        self.bn2 = nn.BatchNorm1d(256)
-        self.bn3 = nn.BatchNorm1d(512)
-        self.bn4 = nn.BatchNorm1d(1024)
-        self.bn5 = nn.BatchNorm1d(1024)
-        self.bn6 = nn.BatchNorm1d(2048)
-        # Dropout with different rates
-        self.dropout_light = nn.Dropout(0.2)
-        self.dropout_medium = nn.Dropout(0.3)
-        self.dropout_heavy = nn.Dropout(0.4)
-    def forward(self, x):
-        """
-        Forward pass
-        Args:
-            x: (batch_size, input_dim, max_points) tensor
-        Returns:
-            Tuple containing predictions based on configuration:
-            - position: (batch_size, output_dim) tensor of predicted 3D coordinates
-            - score: (batch_size, 1) tensor of predicted distance to GT (if predict_score=True)
-            - classification: (batch_size, num_classes) tensor of class logits (if predict_class=True)
-        """
-        batch_size = x.size(0)
-        # Enhanced point-wise feature extraction with residual-like connections
-        x1 = F.relu(self.bn1(self.conv1(x)))
-        x2 = F.relu(self.bn2(self.conv2(x1)))
-        x3 = F.relu(self.bn3(self.conv3(x2)))
-        x4 = F.relu(self.bn4(self.conv4(x3)))
-        x5 = F.relu(self.bn5(self.conv5(x4)))
-        x6 = F.relu(self.bn6(self.conv6(x5)))
-        # Global max pooling with additional global average pooling
-        max_pool = torch.max(x6, 2)[0]  # (batch_size, 2048)
-        avg_pool = torch.mean(x6, 2)    # (batch_size, 2048)
-        # Combine max and average pooling for richer global features
-        global_features = max_pool + avg_pool  # (batch_size, 2048)
-        # Enhanced shared features with residual connection
-        shared1 = F.relu(self.shared_fc1(global_features))
-        shared1 = self.dropout_light(shared1)
-        shared2 = F.relu(self.shared_fc2(shared1))
-        shared_features = self.dropout_medium(shared2)
-        # Enhanced position prediction with skip connections
-        pos1 = F.relu(self.pos_fc1(shared_features))
-        pos1 = self.dropout_light(pos1)
-        pos2 = F.relu(self.pos_fc2(pos1))
-        pos2 = self.dropout_medium(pos2)
-        pos3 = F.relu(self.pos_fc3(pos2))
-        pos3 = self.dropout_light(pos3)
-        position = self.pos_fc4(pos3)
-        outputs = [position]
-        if self.predict_score:
-            # Enhanced score prediction
-            score1 = F.relu(self.score_fc1(shared_features))
-            score1 = self.dropout_light(score1)
-            score2 = F.relu(self.score_fc2(score1))
-            score2 = self.dropout_medium(score2)
-            score3 = F.relu(self.score_fc3(score2))
-            score3 = self.dropout_light(score3)
-            score4 = F.relu(self.score_fc4(score3))
-            score4 = self.dropout_light(score4)
-            score = F.relu(self.score_fc5(score4))  # Ensure positive distance
-            outputs.append(score)
-        if self.predict_class:
-            # Classification prediction
-            class1 = F.relu(self.class_fc1(shared_features))
-            class1 = self.dropout_light(class1)
-            class2 = F.relu(self.class_fc2(class1))
-            class2 = self.dropout_medium(class2)
-            class3 = F.relu(self.class_fc3(class2))
-            class3 = self.dropout_light(class3)
-            class4 = F.relu(self.class_fc4(class3))
-            class4 = self.dropout_light(class4)
-            classification = self.class_fc5(class4)  # Raw logits
-            outputs.append(classification)
-        # Return outputs based on configuration
-        if len(outputs) == 1:
-            return outputs[0]  # Only position
-        elif len(outputs) == 2:
-            if self.predict_score:
-                return outputs[0], outputs[1]  # position, score
-            else:
-                return outputs[0], outputs[1]  # position, classification
-        else:
-            return outputs[0], outputs[1], outputs[2]  # position, score, classification
-class PatchDataset(Dataset):
-    """
-    Dataset class for loading saved patches for PointNet training.
-    """
-    def __init__(self, dataset_dir: str, max_points: int = 1024, augment: bool = True):
-        self.dataset_dir = dataset_dir
-        self.max_points = max_points
-        self.augment = augment
-        # Load patch files
-        self.patch_files = []
-        for file in os.listdir(dataset_dir):
-            if file.endswith('.pkl'):
-                self.patch_files.append(os.path.join(dataset_dir, file))
-        print(f"Found {len(self.patch_files)} patch files in {dataset_dir}")
-    def __len__(self):
-        return len(self.patch_files)
-    def __getitem__(self, idx):
-        """
-        Load and process a patch for training.
-        Returns:
-            patch_data: (7, max_points) tensor of point cloud data
-            target: (3,) tensor of target 3D coordinates
-            valid_mask: (max_points,) boolean tensor indicating valid points
-            distance_to_gt: scalar tensor of distance from initial prediction to GT
-            classification: scalar tensor for binary classification (1 if GT vertex present, 0 if not)
-        """
-        patch_file = self.patch_files[idx]
-        with open(patch_file, 'rb') as f:
-            patch_info = pickle.load(f)
-        patch_7d = patch_info['patch_7d']  # (N, 7)
-        target = patch_info.get('assigned_wf_vertex', None)  # (3,) or None
-        initial_pred = patch_info.get('cluster_center', None)  # (3,) or None
-        # Determine classification label based on GT vertex presence
-        has_gt_vertex = 1.0 if target is not None else 0.0
-        # Handle patches without ground truth
-        if target is None:
-            # Use a dummy target for consistency, but mark as invalid with classification
-            target = np.zeros(3)
-        else:
-            target = np.array(target)
-        # Pad or sample points to max_points
-        num_points = patch_7d.shape[0]
-        if num_points >= self.max_points:
-            # Randomly sample max_points
-            indices = np.random.choice(num_points, self.max_points, replace=False)
-            patch_sampled = patch_7d[indices]
-            valid_mask = np.ones(self.max_points, dtype=bool)
-        else:
-            # Pad with zeros
-            patch_sampled = np.zeros((self.max_points, 7))
-            patch_sampled[:num_points] = patch_7d
-            valid_mask = np.zeros(self.max_points, dtype=bool)
-            valid_mask[:num_points] = True
-        # Data augmentation (only if GT vertex is present)
-        if self.augment and has_gt_vertex > 0:
-            patch_sampled, target = self._augment_patch(patch_sampled, valid_mask, target)
-        # Convert to tensors and transpose for conv1d (channels first)
-        patch_tensor = torch.from_numpy(patch_sampled.T).float()  # (7, max_points)
-        target_tensor = torch.from_numpy(target).float()  # (3,)
-        valid_mask_tensor = torch.from_numpy(valid_mask)
-        # Handle initial_pred
-        if initial_pred is not None:
-            initial_pred_tensor = torch.from_numpy(initial_pred).float()
-        else:
-            initial_pred_tensor = torch.zeros(3).float()
-        # Classification tensor
-        classification_tensor = torch.tensor(has_gt_vertex).float()
-        return patch_tensor, target_tensor, valid_mask_tensor, initial_pred_tensor, classification_tensor
-def save_patches_dataset(patches: List[Dict], dataset_dir: str, entry_id: str):
-    """
-    Save patches from prediction pipeline to create a training dataset.
-    Args:
-        patches: List of patch dictionaries from generate_patches()
-        dataset_dir: Directory to save the dataset
-        entry_id: Unique identifier for this entry/image
-    """
-    os.makedirs(dataset_dir, exist_ok=True)
-    for i, patch in enumerate(patches):
-        # Create unique filename
-        filename = f"{entry_id}_patch_{i}.pkl"
-        filepath = os.path.join(dataset_dir, filename)
-        # Skip if file already exists
-        if os.path.exists(filepath):
-            continue
-        # Save patch data
-        with open(filepath, 'wb') as f:
-            pickle.dump(patch, f)
-    print(f"Saved {len(patches)} patches for entry {entry_id}")
-# Create dataloader with custom collate function to filter invalid samples
-def collate_fn(batch):
-    valid_batch = []
-    for patch_data, target, valid_mask, initial_pred, classification in batch:
-        # Filter out invalid samples (no valid points)
-        if valid_mask.sum() > 0:
-            valid_batch.append((patch_data, target, valid_mask, initial_pred, classification))
-    if len(valid_batch) == 0:
-        return None
-    # Stack valid samples
-    patch_data = torch.stack([item[0] for item in valid_batch])
-    targets = torch.stack([item[1] for item in valid_batch])
-    valid_masks = torch.stack([item[2] for item in valid_batch])
-    initial_preds = torch.stack([item[3] for item in valid_batch])
-    classifications = torch.stack([item[4] for item in valid_batch])
-    return patch_data, targets, valid_masks, initial_preds, classifications
-# Initialize weights using Xavier/Glorot initialization
-def init_weights(m):
-    if isinstance(m, nn.Conv1d):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.Linear):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.BatchNorm1d):
-        nn.init.ones_(m.weight)
-        nn.init.zeros_(m.bias)
-def train_pointnet(dataset_dir: str, model_save_path: str, epochs: int = 100, batch_size: int = 32, lr: float = 0.001,
-                  score_weight: float = 0.1, class_weight: float = 0.5):
-    """
-    Train the FastPointNet model on saved patches.
-    Args:
-        dataset_dir: Directory containing saved patch files
-        model_save_path: Path to save the trained model
-        epochs: Number of training epochs
-        batch_size: Training batch size
-        lr: Learning rate
-        score_weight: Weight for the distance prediction loss
-        class_weight: Weight for the classification loss
-    """
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Training on device: {device}")
-    # Create dataset and dataloader
-    dataset = PatchDataset(dataset_dir, max_points=1024, augment=False)
-    print(f"Dataset loaded with {len(dataset)} samples")
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8,
-                           collate_fn=collate_fn, drop_last=True)
-    # Initialize model with score and classification prediction
-    model = FastPointNet(input_dim=7, output_dim=3, max_points=1024, predict_score=True, predict_class=True, num_classes=1)
-    model.apply(init_weights)
-    model.to(device)
-    # Loss functions
-    position_criterion = nn.MSELoss()
-    score_criterion = nn.MSELoss()
-    classification_criterion = nn.BCEWithLogitsLoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
-    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)
-    # Training loop
-    model.train()
-    for epoch in range(epochs):
-        total_loss = 0.0
-        total_pos_loss = 0.0
-        total_score_loss = 0.0
-        total_class_loss = 0.0
-        num_batches = 0
-        for batch_idx, batch_data in enumerate(dataloader):
-            if batch_data is None:  # Skip invalid batches
-                continue
-            patch_data, targets, valid_masks, initial_preds, classifications = batch_data
-            patch_data = patch_data.to(device)  # (batch_size, 7, max_points)
-            targets = targets.to(device)  # (batch_size, 3)
-            classifications = classifications.to(device)  # (batch_size,)
-            # Forward pass
-            optimizer.zero_grad()
-            predictions, predicted_scores, predicted_classes = model(patch_data)
-            # Compute actual distance from predictions to targets
-            actual_distances = torch.norm(predictions - targets, dim=1, keepdim=True)
-            # Only compute position and score losses for samples with GT vertices
-            has_gt_mask = classifications > 0.5
-            if has_gt_mask.sum() > 0:
-                # Position loss only for samples with GT vertices
-                pos_loss = position_criterion(predictions[has_gt_mask], targets[has_gt_mask])
-                score_loss = score_criterion(predicted_scores[has_gt_mask], actual_distances[has_gt_mask])
-            else:
-                pos_loss = torch.tensor(0.0, device=device)
-                score_loss = torch.tensor(0.0, device=device)
-            # Classification loss for all samples
-            class_loss = classification_criterion(predicted_classes.squeeze(), classifications)
-            # Combined loss
-            total_batch_loss = pos_loss + score_weight * score_loss + class_weight * class_loss
-            # Backward pass
-            total_batch_loss.backward()
-            optimizer.step()
-            total_loss += total_batch_loss.item()
-            total_pos_loss += pos_loss.item()
-            total_score_loss += score_loss.item()
-            total_class_loss += class_loss.item()
-            num_batches += 1
-            if batch_idx % 50 == 0:
-                print(f"Epoch {epoch+1}/{epochs}, Batch {batch_idx}, "
-                      f"Total Loss: {total_batch_loss.item():.6f}, "
-                      f"Pos Loss: {pos_loss.item():.6f}, "
-                      f"Score Loss: {score_loss.item():.6f}, "
-                      f"Class Loss: {class_loss.item():.6f}")
-        avg_loss = total_loss / num_batches if num_batches > 0 else 0
-        avg_pos_loss = total_pos_loss / num_batches if num_batches > 0 else 0
-        avg_score_loss = total_score_loss / num_batches if num_batches > 0 else 0
-        avg_class_loss = total_class_loss / num_batches if num_batches > 0 else 0
-        print(f"Epoch {epoch+1}/{epochs} completed, "
-              f"Avg Total Loss: {avg_loss:.6f}, "
-              f"Avg Pos Loss: {avg_pos_loss:.6f}, "
-              f"Avg Score Loss: {avg_score_loss:.6f}, "
-              f"Avg Class Loss: {avg_class_loss:.6f}")
-        scheduler.step()
-        # Save model checkpoint every epoch
-        checkpoint_path = model_save_path.replace('.pth', f'_epoch_{epoch+1}.pth')
-        torch.save({
-            'model_state_dict': model.state_dict(),
-            'optimizer_state_dict': optimizer.state_dict(),
-            'epoch': epoch + 1,
-            'loss': avg_loss,
-        }, checkpoint_path)
-    # Save the trained model
-    torch.save({
-        'model_state_dict': model.state_dict(),
-        'optimizer_state_dict': optimizer.state_dict(),
-        'epoch': epochs,
-    }, model_save_path)
-    print(f"Model saved to {model_save_path}")
-    return model
-def load_pointnet_model(model_path: str, device: torch.device = None, predict_score: bool = True) -> FastPointNet:
-    """
-    Load a trained FastPointNet model.
-    Args:
-        model_path: Path to the saved model
-        device: Device to load the model on
-        predict_score: Whether the model predicts scores
-    Returns:
-        Loaded FastPointNet model
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model = FastPointNet(input_dim=7, output_dim=3, max_points=1024, predict_score=predict_score)
-    checkpoint = torch.load(model_path, map_location=device)
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.to(device)
-    model.eval()
-    return model
-def predict_vertex_from_patch(model: FastPointNet, patch: np.ndarray, device: torch.device = None) -> Tuple[np.ndarray, float, float]:
-    """
-    Predict 3D vertex coordinates, confidence score, and classification from a patch using trained PointNet.
-    Args:
-        model: Trained FastPointNet model
-        patch: Dictionary containing patch data with 'patch_7d' and 'offset' keys
-        device: Device to run prediction on
-    Returns:
-        tuple of (predicted_coordinates, confidence_score, classification_score)
-            predicted_coordinates: (3,) numpy array of predicted 3D coordinates
-            confidence_score: float representing predicted distance to GT (lower is better)
-            classification_score: float representing probability of GT vertex presence (0-1)
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    patch_7d = patch['patch_7d']  # (N, 7)
-    # Prepare input
-    max_points = 1024
-    num_points = patch_7d.shape[0]
-    if num_points >= max_points:
-        # Sample points
-        indices = np.random.choice(num_points, max_points, replace=False)
-        patch_sampled = patch_7d[indices]
-    else:
-        # Pad with zeros
-        patch_sampled = np.zeros((max_points, 7))
-        patch_sampled[:num_points] = patch_7d
-    # Convert to tensor
-    patch_tensor = torch.from_numpy(patch_sampled.T).float().unsqueeze(0)  # (1, 7, max_points)
-    patch_tensor = patch_tensor.to(device)
-    # Predict
-    with torch.no_grad():
-        outputs = model(patch_tensor)
-        if model.predict_score and model.predict_class:
-            position, score, classification = outputs
-            position = position.cpu().numpy().squeeze()
-            score = score.cpu().numpy().squeeze()
-            classification = torch.sigmoid(classification).cpu().numpy().squeeze()  # Apply sigmoid for probability
-        elif model.predict_score:
-            position, score = outputs
-            position = position.cpu().numpy().squeeze()
-            score = score.cpu().numpy().squeeze()
-            classification = None
-        elif model.predict_class:
-            position, classification = outputs
-            position = position.cpu().numpy().squeeze()
-            score = None
-            classification = torch.sigmoid(classification).cpu().numpy().squeeze()  # Apply sigmoid for probability
-        else:
-            position = outputs
-            position = position.cpu().numpy().squeeze()
-            score = None
-            classification = None
-        # Apply offset correction
-        offset = patch['cluster_center']
-        position += offset
-        return position, score, classification

fast_pointnet_class.py CHANGED Viewed

@@ -1,3 +1,9 @@
 import os
 import torch
 import torch.nn as nn
@@ -403,3 +409,4 @@ def predict_class_from_patch(model: ClassificationPointNet, patch: Dict, device:
         predicted_class = int(probability > 0.5)
         return predicted_class, probability

+# This file defines a PointNet-based model for binary classification of 6D point cloud patches.
+# It includes the model architecture (ClassificationPointNet), a custom dataset class
+# (PatchClassificationDataset) for loading and augmenting patches, functions for saving
+# patches to create a dataset, a training loop (train_pointnet), a function to load
+# a trained model (load_pointnet_model), and a function for predicting class labels
+# from new patches (predict_class_from_patch).
 import os
 import torch
 import torch.nn as nn
         predicted_class = int(probability > 0.5)
         return predicted_class, probability

fast_pointnet_class_10d.py DELETED Viewed

@@ -1,405 +0,0 @@
-import os
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-import pickle
-from torch.utils.data import Dataset, DataLoader
-from typing import List, Dict, Tuple, Optional
-import json
-class ClassificationPointNet(nn.Module):
-    """
-    PointNet implementation for binary classification from 10D point cloud patches.
-    Takes 10D point clouds and predicts binary classification (edge/not edge).
-    """
-    def __init__(self, input_dim=10, max_points=1024):
-        super(ClassificationPointNet, self).__init__()
-        self.max_points = max_points
-        # Point-wise MLPs for feature extraction (deeper network)
-        self.conv1 = nn.Conv1d(input_dim, 64, 1)
-        self.conv2 = nn.Conv1d(64, 128, 1)
-        self.conv3 = nn.Conv1d(128, 256, 1)
-        self.conv4 = nn.Conv1d(256, 512, 1)
-        self.conv5 = nn.Conv1d(512, 1024, 1)
-        self.conv6 = nn.Conv1d(1024, 2048, 1)  # Additional layer
-        # Classification head (deeper with more capacity)
-        self.fc1 = nn.Linear(2048, 1024)
-        self.fc2 = nn.Linear(1024, 512)
-        self.fc3 = nn.Linear(512, 256)
-        self.fc4 = nn.Linear(256, 128)
-        self.fc5 = nn.Linear(128, 64)
-        self.fc6 = nn.Linear(64, 1)  # Single output for binary classification
-        # Batch normalization layers
-        self.bn1 = nn.BatchNorm1d(64)
-        self.bn2 = nn.BatchNorm1d(128)
-        self.bn3 = nn.BatchNorm1d(256)
-        self.bn4 = nn.BatchNorm1d(512)
-        self.bn5 = nn.BatchNorm1d(1024)
-        self.bn6 = nn.BatchNorm1d(2048)
-        # Dropout layers
-        self.dropout1 = nn.Dropout(0.3)
-        self.dropout2 = nn.Dropout(0.4)
-        self.dropout3 = nn.Dropout(0.5)
-        self.dropout4 = nn.Dropout(0.4)
-        self.dropout5 = nn.Dropout(0.3)
-    def forward(self, x):
-        """
-        Forward pass
-        Args:
-            x: (batch_size, input_dim, max_points) tensor
-        Returns:
-            classification: (batch_size, 1) tensor of logits (sigmoid for probability)
-        """
-        batch_size = x.size(0)
-        # Point-wise feature extraction
-        x1 = F.relu(self.bn1(self.conv1(x)))
-        x2 = F.relu(self.bn2(self.conv2(x1)))
-        x3 = F.relu(self.bn3(self.conv3(x2)))
-        x4 = F.relu(self.bn4(self.conv4(x3)))
-        x5 = F.relu(self.bn5(self.conv5(x4)))
-        x6 = F.relu(self.bn6(self.conv6(x5)))
-        # Global max pooling
-        global_features = torch.max(x6, 2)[0]  # (batch_size, 2048)
-        # Classification head
-        x = F.relu(self.fc1(global_features))
-        x = self.dropout1(x)
-        x = F.relu(self.fc2(x))
-        x = self.dropout2(x)
-        x = F.relu(self.fc3(x))
-        x = self.dropout3(x)
-        x = F.relu(self.fc4(x))
-        x = self.dropout4(x)
-        x = F.relu(self.fc5(x))
-        x = self.dropout5(x)
-        classification = self.fc6(x)  # (batch_size, 1)
-        return classification
-class PatchClassificationDataset(Dataset):
-    """
-    Dataset class for loading saved patches for PointNet classification training.
-    """
-    def __init__(self, dataset_dir: str, max_points: int = 1024, augment: bool = True):
-        self.dataset_dir = dataset_dir
-        self.max_points = max_points
-        self.augment = augment
-        # Load patch files
-        self.patch_files = []
-        for file in os.listdir(dataset_dir):
-            if file.endswith('.pkl'):
-                self.patch_files.append(os.path.join(dataset_dir, file))
-        print(f"Found {len(self.patch_files)} patch files in {dataset_dir}")
-    def __len__(self):
-        return len(self.patch_files)
-    def __getitem__(self, idx):
-        """
-        Load and process a patch for training.
-        Returns:
-            patch_data: (10, max_points) tensor of point cloud data
-            label: scalar tensor for binary classification (0 or 1)
-            valid_mask: (max_points,) boolean tensor indicating valid points
-        """
-        patch_file = self.patch_files[idx]
-        with open(patch_file, 'rb') as f:
-            patch_info = pickle.load(f)
-        patch_10d = patch_info['patch_10d']  # (N, 10)
-        label = patch_info.get('label', 0)  # Get binary classification label (0 or 1)
-        # Pad or sample points to max_points
-        num_points = patch_10d.shape[0]
-        if num_points >= self.max_points:
-            # Randomly sample max_points
-            indices = np.random.choice(num_points, self.max_points, replace=False)
-            patch_sampled = patch_10d[indices]
-            valid_mask = np.ones(self.max_points, dtype=bool)
-        else:
-            # Pad with zeros
-            patch_sampled = np.zeros((self.max_points, 10))
-            patch_sampled[:num_points] = patch_10d
-            valid_mask = np.zeros(self.max_points, dtype=bool)
-            valid_mask[:num_points] = True
-        # Data augmentation
-        if self.augment:
-            patch_sampled = self._augment_patch(patch_sampled, valid_mask)
-        # Convert to tensors and transpose for conv1d (channels first)
-        patch_tensor = torch.from_numpy(patch_sampled.T).float()  # (10, max_points)
-        label_tensor = torch.tensor(label, dtype=torch.float32)  # Float for BCE loss
-        valid_mask_tensor = torch.from_numpy(valid_mask)
-        return patch_tensor, label_tensor, valid_mask_tensor
-    def _augment_patch(self, patch, valid_mask):
-        """
-        Apply data augmentation to the patch.
-        """
-        valid_points = patch[valid_mask]
-        if len(valid_points) == 0:
-            return patch
-        # Random rotation around z-axis (only for xyz coordinates, first 3 dimensions)
-        angle = np.random.uniform(0, 2 * np.pi)
-        cos_angle = np.cos(angle)
-        sin_angle = np.sin(angle)
-        rotation_matrix = np.array([
-            [cos_angle, -sin_angle, 0],
-            [sin_angle, cos_angle, 0],
-            [0, 0, 1]
-        ])
-        # Apply rotation to xyz coordinates (first 3 dimensions)
-        valid_points[:, :3] = valid_points[:, :3] @ rotation_matrix.T
-        # Random jittering (only for xyz coordinates)
-        noise = np.random.normal(0, 0.01, valid_points[:, :3].shape)
-        valid_points[:, :3] += noise
-        # Random scaling (only for xyz coordinates)
-        scale = np.random.uniform(0.9, 1.1)
-        valid_points[:, :3] *= scale
-        patch[valid_mask] = valid_points
-        return patch
-def save_patches_dataset(patches: List[Dict], dataset_dir: str, entry_id: str):
-    """
-    Save patches from prediction pipeline to create a training dataset.
-    Args:
-        patches: List of patch dictionaries from generate_patches()
-        dataset_dir: Directory to save the dataset
-        entry_id: Unique identifier for this entry/image
-    """
-    os.makedirs(dataset_dir, exist_ok=True)
-    for i, patch in enumerate(patches):
-        # Create unique filename
-        filename = f"{entry_id}_patch_{i}.pkl"
-        filepath = os.path.join(dataset_dir, filename)
-        # Skip if file already exists
-        if os.path.exists(filepath):
-            continue
-        # Save patch data
-        with open(filepath, 'wb') as f:
-            pickle.dump(patch, f)
-    print(f"Saved {len(patches)} patches for entry {entry_id}")
-# Create dataloader with custom collate function to filter invalid samples
-def collate_fn(batch):
-    valid_batch = []
-    for patch_data, label, valid_mask in batch:
-        # Filter out invalid samples (no valid points)
-        if valid_mask.sum() > 0:
-            valid_batch.append((patch_data, label, valid_mask))
-    if len(valid_batch) == 0:
-        return None
-    # Stack valid samples
-    patch_data = torch.stack([item[0] for item in valid_batch])
-    labels = torch.stack([item[1] for item in valid_batch])
-    valid_masks = torch.stack([item[2] for item in valid_batch])
-    return patch_data, labels, valid_masks
-# Initialize weights using Xavier/Glorot initialization
-def init_weights(m):
-    if isinstance(m, nn.Conv1d):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.Linear):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.BatchNorm1d):
-        nn.init.ones_(m.weight)
-        nn.init.zeros_(m.bias)
-def train_pointnet(dataset_dir: str, model_save_path: str, epochs: int = 100, batch_size: int = 32,
-                  lr: float = 0.001):
-    """
-    Train the ClassificationPointNet model on saved patches.
-    Args:
-        dataset_dir: Directory containing saved patch files
-        model_save_path: Path to save the trained model
-        epochs: Number of training epochs
-        batch_size: Training batch size
-        lr: Learning rate
-    """
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Training on device: {device}")
-    # Create dataset and dataloader
-    dataset = PatchClassificationDataset(dataset_dir, max_points=1024, augment=True)
-    print(f"Dataset loaded with {len(dataset)} samples")
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8,
-                           collate_fn=collate_fn, drop_last=True)
-    # Initialize model
-    model = ClassificationPointNet(input_dim=10, max_points=1024)
-    model.apply(init_weights)
-    model.to(device)
-    # Loss function and optimizer (BCE for binary classification)
-    criterion = nn.BCEWithLogitsLoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
-    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)
-    # Training loop
-    model.train()
-    for epoch in range(epochs):
-        total_loss = 0.0
-        correct = 0
-        total = 0
-        num_batches = 0
-        for batch_idx, batch_data in enumerate(dataloader):
-            if batch_data is None:  # Skip invalid batches
-                continue
-            patch_data, labels, valid_masks = batch_data
-            patch_data = patch_data.to(device)  # (batch_size, 10, max_points)
-            labels = labels.to(device).unsqueeze(1)  # (batch_size, 1)
-            # Forward pass
-            optimizer.zero_grad()
-            outputs = model(patch_data)  # (batch_size, 1)
-            loss = criterion(outputs, labels)
-            # Backward pass
-            loss.backward()
-            optimizer.step()
-            # Statistics
-            total_loss += loss.item()
-            predicted = (torch.sigmoid(outputs) > 0.5).float()
-            total += labels.size(0)
-            correct += (predicted == labels).sum().item()
-            num_batches += 1
-            if batch_idx % 50 == 0:
-                print(f"Epoch {epoch+1}/{epochs}, Batch {batch_idx}, "
-                      f"Loss: {loss.item():.6f}, "
-                      f"Accuracy: {100 * correct / total:.2f}%")
-        avg_loss = total_loss / num_batches if num_batches > 0 else 0
-        accuracy = 100 * correct / total if total > 0 else 0
-        print(f"Epoch {epoch+1}/{epochs} completed, "
-              f"Avg Loss: {avg_loss:.6f}, "
-              f"Accuracy: {accuracy:.2f}%")
-        scheduler.step()
-        # Save model checkpoint every epoch
-        checkpoint_path = model_save_path.replace('.pth', f'_epoch_{epoch+1}.pth')
-        torch.save({
-            'model_state_dict': model.state_dict(),
-            'optimizer_state_dict': optimizer.state_dict(),
-            'epoch': epoch + 1,
-            'loss': avg_loss,
-            'accuracy': accuracy,
-        }, checkpoint_path)
-    # Save the trained model
-    torch.save({
-        'model_state_dict': model.state_dict(),
-        'optimizer_state_dict': optimizer.state_dict(),
-        'epoch': epochs,
-    }, model_save_path)
-    print(f"Model saved to {model_save_path}")
-    return model
-def load_pointnet_model(model_path: str, device: torch.device = None) -> ClassificationPointNet:
-    """
-    Load a trained ClassificationPointNet model.
-    Args:
-        model_path: Path to the saved model
-        device: Device to load the model on
-    Returns:
-        Loaded ClassificationPointNet model
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model = ClassificationPointNet(input_dim=10, max_points=1024)
-    checkpoint = torch.load(model_path, map_location=device)
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.to(device)
-    model.eval()
-    return model
-def predict_class_from_patch(model: ClassificationPointNet, patch: Dict, device: torch.device = None) -> Tuple[int, float]:
-    """
-    Predict binary classification from a patch using trained PointNet.
-    Args:
-        model: Trained ClassificationPointNet model
-        patch: Dictionary containing patch data with 'patch_10d' key
-        device: Device to run prediction on
-    Returns:
-        tuple of (predicted_class, confidence)
-            predicted_class: int (0 for not edge, 1 for edge)
-            confidence: float representing confidence score (0-1)
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    patch_10d = patch['patch_10d']  # (N, 10)
-    # Prepare input
-    max_points = 1024
-    num_points = patch_10d.shape[0]
-    if num_points >= max_points:
-        # Sample points
-        indices = np.random.choice(num_points, max_points, replace=False)
-        patch_sampled = patch_10d[indices]
-    else:
-        # Pad with zeros
-        patch_sampled = np.zeros((max_points, 10))
-        patch_sampled[:num_points] = patch_10d
-    # Convert to tensor
-    patch_tensor = torch.from_numpy(patch_sampled.T).float().unsqueeze(0)  # (1, 10, max_points)
-    patch_tensor = patch_tensor.to(device)
-    # Predict
-    with torch.no_grad():
-        outputs = model(patch_tensor)  # (1, 1)
-        probability = torch.sigmoid(outputs).item()
-        predicted_class = int(probability > 0.5)
-        return predicted_class, probability

fast_pointnet_class_10d_2048.py DELETED Viewed

@@ -1,405 +0,0 @@
-import os
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-import pickle
-from torch.utils.data import Dataset, DataLoader
-from typing import List, Dict, Tuple, Optional
-import json
-class ClassificationPointNet(nn.Module):
-    """
-    PointNet implementation for binary classification from 10D point cloud patches.
-    Takes 10D point clouds and predicts binary classification (edge/not edge).
-    """
-    def __init__(self, input_dim=10, max_points=2048):
-        super(ClassificationPointNet, self).__init__()
-        self.max_points = max_points
-        # Point-wise MLPs for feature extraction (deeper network)
-        self.conv1 = nn.Conv1d(input_dim, 64, 1)
-        self.conv2 = nn.Conv1d(64, 128, 1)
-        self.conv3 = nn.Conv1d(128, 256, 1)
-        self.conv4 = nn.Conv1d(256, 512, 1)
-        self.conv5 = nn.Conv1d(512, 1024, 1)
-        self.conv6 = nn.Conv1d(1024, 2048, 1)  # Additional layer
-        # Classification head (deeper with more capacity)
-        self.fc1 = nn.Linear(2048, 1024)
-        self.fc2 = nn.Linear(1024, 512)
-        self.fc3 = nn.Linear(512, 256)
-        self.fc4 = nn.Linear(256, 128)
-        self.fc5 = nn.Linear(128, 64)
-        self.fc6 = nn.Linear(64, 1)  # Single output for binary classification
-        # Batch normalization layers
-        self.bn1 = nn.BatchNorm1d(64)
-        self.bn2 = nn.BatchNorm1d(128)
-        self.bn3 = nn.BatchNorm1d(256)
-        self.bn4 = nn.BatchNorm1d(512)
-        self.bn5 = nn.BatchNorm1d(1024)
-        self.bn6 = nn.BatchNorm1d(2048)
-        # Dropout layers
-        self.dropout1 = nn.Dropout(0.3)
-        self.dropout2 = nn.Dropout(0.4)
-        self.dropout3 = nn.Dropout(0.5)
-        self.dropout4 = nn.Dropout(0.4)
-        self.dropout5 = nn.Dropout(0.3)
-    def forward(self, x):
-        """
-        Forward pass
-        Args:
-            x: (batch_size, input_dim, max_points) tensor
-        Returns:
-            classification: (batch_size, 1) tensor of logits (sigmoid for probability)
-        """
-        batch_size = x.size(0)
-        # Point-wise feature extraction
-        x1 = F.relu(self.bn1(self.conv1(x)))
-        x2 = F.relu(self.bn2(self.conv2(x1)))
-        x3 = F.relu(self.bn3(self.conv3(x2)))
-        x4 = F.relu(self.bn4(self.conv4(x3)))
-        x5 = F.relu(self.bn5(self.conv5(x4)))
-        x6 = F.relu(self.bn6(self.conv6(x5)))
-        # Global max pooling
-        global_features = torch.max(x6, 2)[0]  # (batch_size, 2048)
-        # Classification head
-        x = F.relu(self.fc1(global_features))
-        x = self.dropout1(x)
-        x = F.relu(self.fc2(x))
-        x = self.dropout2(x)
-        x = F.relu(self.fc3(x))
-        x = self.dropout3(x)
-        x = F.relu(self.fc4(x))
-        x = self.dropout4(x)
-        x = F.relu(self.fc5(x))
-        x = self.dropout5(x)
-        classification = self.fc6(x)  # (batch_size, 1)
-        return classification
-class PatchClassificationDataset(Dataset):
-    """
-    Dataset class for loading saved patches for PointNet classification training.
-    """
-    def __init__(self, dataset_dir: str, max_points: int = 2048, augment: bool = True):
-        self.dataset_dir = dataset_dir
-        self.max_points = max_points
-        self.augment = augment
-        # Load patch files
-        self.patch_files = []
-        for file in os.listdir(dataset_dir):
-            if file.endswith('.pkl'):
-                self.patch_files.append(os.path.join(dataset_dir, file))
-        print(f"Found {len(self.patch_files)} patch files in {dataset_dir}")
-    def __len__(self):
-        return len(self.patch_files)
-    def __getitem__(self, idx):
-        """
-        Load and process a patch for training.
-        Returns:
-            patch_data: (10, max_points) tensor of point cloud data
-            label: scalar tensor for binary classification (0 or 1)
-            valid_mask: (max_points,) boolean tensor indicating valid points
-        """
-        patch_file = self.patch_files[idx]
-        with open(patch_file, 'rb') as f:
-            patch_info = pickle.load(f)
-        patch_10d = patch_info['patch_10d']  # (N, 10)
-        label = patch_info.get('label', 0)  # Get binary classification label (0 or 1)
-        # Pad or sample points to max_points
-        num_points = patch_10d.shape[0]
-        if num_points >= self.max_points:
-            # Randomly sample max_points
-            indices = np.random.choice(num_points, self.max_points, replace=False)
-            patch_sampled = patch_10d[indices]
-            valid_mask = np.ones(self.max_points, dtype=bool)
-        else:
-            # Pad with zeros
-            patch_sampled = np.zeros((self.max_points, 10))
-            patch_sampled[:num_points] = patch_10d
-            valid_mask = np.zeros(self.max_points, dtype=bool)
-            valid_mask[:num_points] = True
-        # Data augmentation
-        if self.augment:
-            patch_sampled = self._augment_patch(patch_sampled, valid_mask)
-        # Convert to tensors and transpose for conv1d (channels first)
-        patch_tensor = torch.from_numpy(patch_sampled.T).float()  # (10, max_points)
-        label_tensor = torch.tensor(label, dtype=torch.float32)  # Float for BCE loss
-        valid_mask_tensor = torch.from_numpy(valid_mask)
-        return patch_tensor, label_tensor, valid_mask_tensor
-    def _augment_patch(self, patch, valid_mask):
-        """
-        Apply data augmentation to the patch.
-        """
-        valid_points = patch[valid_mask]
-        if len(valid_points) == 0:
-            return patch
-        # Random rotation around z-axis (only for xyz coordinates, first 3 dimensions)
-        angle = np.random.uniform(0, 2 * np.pi)
-        cos_angle = np.cos(angle)
-        sin_angle = np.sin(angle)
-        rotation_matrix = np.array([
-            [cos_angle, -sin_angle, 0],
-            [sin_angle, cos_angle, 0],
-            [0, 0, 1]
-        ])
-        # Apply rotation to xyz coordinates (first 3 dimensions)
-        valid_points[:, :3] = valid_points[:, :3] @ rotation_matrix.T
-        # Random jittering (only for xyz coordinates)
-        noise = np.random.normal(0, 0.01, valid_points[:, :3].shape)
-        valid_points[:, :3] += noise
-        # Random scaling (only for xyz coordinates)
-        scale = np.random.uniform(0.9, 1.1)
-        valid_points[:, :3] *= scale
-        patch[valid_mask] = valid_points
-        return patch
-def save_patches_dataset(patches: List[Dict], dataset_dir: str, entry_id: str):
-    """
-    Save patches from prediction pipeline to create a training dataset.
-    Args:
-        patches: List of patch dictionaries from generate_patches()
-        dataset_dir: Directory to save the dataset
-        entry_id: Unique identifier for this entry/image
-    """
-    os.makedirs(dataset_dir, exist_ok=True)
-    for i, patch in enumerate(patches):
-        # Create unique filename
-        filename = f"{entry_id}_patch_{i}.pkl"
-        filepath = os.path.join(dataset_dir, filename)
-        # Skip if file already exists
-        if os.path.exists(filepath):
-            continue
-        # Save patch data
-        with open(filepath, 'wb') as f:
-            pickle.dump(patch, f)
-    print(f"Saved {len(patches)} patches for entry {entry_id}")
-# Create dataloader with custom collate function to filter invalid samples
-def collate_fn(batch):
-    valid_batch = []
-    for patch_data, label, valid_mask in batch:
-        # Filter out invalid samples (no valid points)
-        if valid_mask.sum() > 0:
-            valid_batch.append((patch_data, label, valid_mask))
-    if len(valid_batch) == 0:
-        return None
-    # Stack valid samples
-    patch_data = torch.stack([item[0] for item in valid_batch])
-    labels = torch.stack([item[1] for item in valid_batch])
-    valid_masks = torch.stack([item[2] for item in valid_batch])
-    return patch_data, labels, valid_masks
-# Initialize weights using Xavier/Glorot initialization
-def init_weights(m):
-    if isinstance(m, nn.Conv1d):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.Linear):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.BatchNorm1d):
-        nn.init.ones_(m.weight)
-        nn.init.zeros_(m.bias)
-def train_pointnet(dataset_dir: str, model_save_path: str, epochs: int = 100, batch_size: int = 32,
-                  lr: float = 0.001):
-    """
-    Train the ClassificationPointNet model on saved patches.
-    Args:
-        dataset_dir: Directory containing saved patch files
-        model_save_path: Path to save the trained model
-        epochs: Number of training epochs
-        batch_size: Training batch size
-        lr: Learning rate
-    """
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Training on device: {device}")
-    # Create dataset and dataloader
-    dataset = PatchClassificationDataset(dataset_dir, max_points=2048, augment=True)
-    print(f"Dataset loaded with {len(dataset)} samples")
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8,
-                           collate_fn=collate_fn, drop_last=True)
-    # Initialize model
-    model = ClassificationPointNet(input_dim=10, max_points=2048)
-    model.apply(init_weights)
-    model.to(device)
-    # Loss function and optimizer (BCE for binary classification)
-    criterion = nn.BCEWithLogitsLoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
-    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)
-    # Training loop
-    model.train()
-    for epoch in range(epochs):
-        total_loss = 0.0
-        correct = 0
-        total = 0
-        num_batches = 0
-        for batch_idx, batch_data in enumerate(dataloader):
-            if batch_data is None:  # Skip invalid batches
-                continue
-            patch_data, labels, valid_masks = batch_data
-            patch_data = patch_data.to(device)  # (batch_size, 10, max_points)
-            labels = labels.to(device).unsqueeze(1)  # (batch_size, 1)
-            # Forward pass
-            optimizer.zero_grad()
-            outputs = model(patch_data)  # (batch_size, 1)
-            loss = criterion(outputs, labels)
-            # Backward pass
-            loss.backward()
-            optimizer.step()
-            # Statistics
-            total_loss += loss.item()
-            predicted = (torch.sigmoid(outputs) > 0.5).float()
-            total += labels.size(0)
-            correct += (predicted == labels).sum().item()
-            num_batches += 1
-            if batch_idx % 50 == 0:
-                print(f"Epoch {epoch+1}/{epochs}, Batch {batch_idx}, "
-                      f"Loss: {loss.item():.6f}, "
-                      f"Accuracy: {100 * correct / total:.2f}%")
-        avg_loss = total_loss / num_batches if num_batches > 0 else 0
-        accuracy = 100 * correct / total if total > 0 else 0
-        print(f"Epoch {epoch+1}/{epochs} completed, "
-              f"Avg Loss: {avg_loss:.6f}, "
-              f"Accuracy: {accuracy:.2f}%")
-        scheduler.step()
-        # Save model checkpoint every epoch
-        checkpoint_path = model_save_path.replace('.pth', f'_epoch_{epoch+1}.pth')
-        torch.save({
-            'model_state_dict': model.state_dict(),
-            'optimizer_state_dict': optimizer.state_dict(),
-            'epoch': epoch + 1,
-            'loss': avg_loss,
-            'accuracy': accuracy,
-        }, checkpoint_path)
-    # Save the trained model
-    torch.save({
-        'model_state_dict': model.state_dict(),
-        'optimizer_state_dict': optimizer.state_dict(),
-        'epoch': epochs,
-    }, model_save_path)
-    print(f"Model saved to {model_save_path}")
-    return model
-def load_pointnet_model(model_path: str, device: torch.device = None) -> ClassificationPointNet:
-    """
-    Load a trained ClassificationPointNet model.
-    Args:
-        model_path: Path to the saved model
-        device: Device to load the model on
-    Returns:
-        Loaded ClassificationPointNet model
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model = ClassificationPointNet(input_dim=10, max_points=2048)
-    checkpoint = torch.load(model_path, map_location=device)
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.to(device)
-    model.eval()
-    return model
-def predict_class_from_patch(model: ClassificationPointNet, patch: Dict, device: torch.device = None) -> Tuple[int, float]:
-    """
-    Predict binary classification from a patch using trained PointNet.
-    Args:
-        model: Trained ClassificationPointNet model
-        patch: Dictionary containing patch data with 'patch_10d' key
-        device: Device to run prediction on
-    Returns:
-        tuple of (predicted_class, confidence)
-            predicted_class: int (0 for not edge, 1 for edge)
-            confidence: float representing confidence score (0-1)
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    patch_10d = patch['patch_10d']  # (N, 10)
-    # Prepare input
-    max_points = 2048
-    num_points = patch_10d.shape[0]
-    if num_points >= max_points:
-        # Sample points
-        indices = np.random.choice(num_points, max_points, replace=False)
-        patch_sampled = patch_10d[indices]
-    else:
-        # Pad with zeros
-        patch_sampled = np.zeros((max_points, 10))
-        patch_sampled[:num_points] = patch_10d
-    # Convert to tensor
-    patch_tensor = torch.from_numpy(patch_sampled.T).float().unsqueeze(0)  # (1, 10, max_points)
-    patch_tensor = patch_tensor.to(device)
-    # Predict
-    with torch.no_grad():
-        outputs = model(patch_tensor)  # (1, 1)
-        probability = torch.sigmoid(outputs).item()
-        predicted_class = int(probability > 0.5)
-        return predicted_class, probability

fast_pointnet_class_10d_deeper.py DELETED Viewed

@@ -1,438 +0,0 @@
-import os
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-import pickle
-from torch.utils.data import Dataset, DataLoader
-from typing import List, Dict, Tuple, Optional
-import json
-class ClassificationPointNet(nn.Module):
-    """
-    PointNet implementation for binary classification from 10D point cloud patches.
-    Takes 10D point clouds and predicts binary classification (edge/not edge).
-    Enhanced with residual connections and attention mechanism.
-    """
-    def __init__(self, input_dim=10, max_points=1024):
-        super(ClassificationPointNet, self).__init__()
-        self.max_points = max_points
-        # Point-wise MLPs for feature extraction (deeper with residual connections)
-        self.conv1 = nn.Conv1d(input_dim, 64, 1)
-        self.conv2 = nn.Conv1d(64, 128, 1)
-        self.conv3 = nn.Conv1d(128, 256, 1)
-        self.conv4 = nn.Conv1d(256, 512, 1)
-        self.conv5 = nn.Conv1d(512, 1024, 1)
-        self.conv6 = nn.Conv1d(1024, 2048, 1)
-        self.conv7 = nn.Conv1d(2048, 2048, 1)  # Additional layer
-        # Residual connection layers
-        self.residual1 = nn.Conv1d(128, 256, 1)
-        self.residual2 = nn.Conv1d(512, 1024, 1)
-        # Attention mechanism
-        self.attention = nn.Conv1d(2048, 1, 1)
-        # Classification head (deeper with more capacity)
-        self.fc1 = nn.Linear(2048, 1536)
-        self.fc2 = nn.Linear(1536, 1024)
-        self.fc3 = nn.Linear(1024, 512)
-        self.fc4 = nn.Linear(512, 256)
-        self.fc5 = nn.Linear(256, 128)
-        self.fc6 = nn.Linear(128, 64)
-        self.fc7 = nn.Linear(64, 32)
-        self.fc8 = nn.Linear(32, 1)  # Single output for binary classification
-        # Batch normalization layers
-        self.bn1 = nn.BatchNorm1d(64)
-        self.bn2 = nn.BatchNorm1d(128)
-        self.bn3 = nn.BatchNorm1d(256)
-        self.bn4 = nn.BatchNorm1d(512)
-        self.bn5 = nn.BatchNorm1d(1024)
-        self.bn6 = nn.BatchNorm1d(2048)
-        self.bn7 = nn.BatchNorm1d(2048)
-        # Dropout layers with varying rates
-        self.dropout1 = nn.Dropout(0.2)
-        self.dropout2 = nn.Dropout(0.3)
-        self.dropout3 = nn.Dropout(0.4)
-        self.dropout4 = nn.Dropout(0.5)
-        self.dropout5 = nn.Dropout(0.4)
-        self.dropout6 = nn.Dropout(0.3)
-        self.dropout7 = nn.Dropout(0.2)
-    def forward(self, x):
-        """
-        Forward pass with residual connections and attention
-        Args:
-            x: (batch_size, input_dim, max_points) tensor
-        Returns:
-            classification: (batch_size, 1) tensor of logits (sigmoid for probability)
-        """
-        batch_size = x.size(0)
-        # Point-wise feature extraction with residual connections
-        x1 = F.relu(self.bn1(self.conv1(x)))
-        x2 = F.relu(self.bn2(self.conv2(x1)))
-        x3 = F.relu(self.bn3(self.conv3(x2)))
-        # First residual connection
-        x3_res = x3 + self.residual1(x2)
-        x4 = F.relu(self.bn4(self.conv4(x3_res)))
-        x5 = F.relu(self.bn5(self.conv5(x4)))
-        # Second residual connection
-        x5_res = x5 + self.residual2(x4)
-        x6 = F.relu(self.bn6(self.conv6(x5_res)))
-        x7 = F.relu(self.bn7(self.conv7(x6)))
-        # Attention mechanism
-        attention_weights = F.softmax(self.attention(x7), dim=2)  # (batch_size, 1, max_points)
-        x7_weighted = x7 * attention_weights  # Apply attention
-        # Global max pooling combined with attention-weighted average pooling
-        global_max = torch.max(x7, 2)[0]  # (batch_size, 2048)
-        global_avg = torch.sum(x7_weighted, 2)  # (batch_size, 2048)
-        global_features = global_max + global_avg  # Combine features
-        # Classification head with residual connections
-        x = F.relu(self.fc1(global_features))
-        x = self.dropout1(x)
-        x = F.relu(self.fc2(x))
-        x = self.dropout2(x)
-        x_mid = F.relu(self.fc3(x))
-        x = self.dropout3(x_mid)
-        x = F.relu(self.fc4(x))
-        x = self.dropout4(x)
-        x = F.relu(self.fc5(x))
-        x = self.dropout5(x)
-        x = F.relu(self.fc6(x))
-        x = self.dropout6(x)
-        x = F.relu(self.fc7(x))
-        x = self.dropout7(x)
-        classification = self.fc8(x)  # (batch_size, 1)
-        return classification
-class PatchClassificationDataset(Dataset):
-    """
-    Dataset class for loading saved patches for PointNet classification training.
-    """
-    def __init__(self, dataset_dir: str, max_points: int = 1024, augment: bool = True):
-        self.dataset_dir = dataset_dir
-        self.max_points = max_points
-        self.augment = augment
-        # Load patch files
-        self.patch_files = []
-        for file in os.listdir(dataset_dir):
-            if file.endswith('.pkl'):
-                self.patch_files.append(os.path.join(dataset_dir, file))
-        print(f"Found {len(self.patch_files)} patch files in {dataset_dir}")
-    def __len__(self):
-        return len(self.patch_files)
-    def __getitem__(self, idx):
-        """
-        Load and process a patch for training.
-        Returns:
-            patch_data: (10, max_points) tensor of point cloud data
-            label: scalar tensor for binary classification (0 or 1)
-            valid_mask: (max_points,) boolean tensor indicating valid points
-        """
-        patch_file = self.patch_files[idx]
-        with open(patch_file, 'rb') as f:
-            patch_info = pickle.load(f)
-        patch_10d = patch_info['patch_10d']  # (N, 10)
-        label = patch_info.get('label', 0)  # Get binary classification label (0 or 1)
-        # Pad or sample points to max_points
-        num_points = patch_10d.shape[0]
-        if num_points >= self.max_points:
-            # Randomly sample max_points
-            indices = np.random.choice(num_points, self.max_points, replace=False)
-            patch_sampled = patch_10d[indices]
-            valid_mask = np.ones(self.max_points, dtype=bool)
-        else:
-            # Pad with zeros
-            patch_sampled = np.zeros((self.max_points, 10))
-            patch_sampled[:num_points] = patch_10d
-            valid_mask = np.zeros(self.max_points, dtype=bool)
-            valid_mask[:num_points] = True
-        # Data augmentation
-        if self.augment:
-            patch_sampled = self._augment_patch(patch_sampled, valid_mask)
-        # Convert to tensors and transpose for conv1d (channels first)
-        patch_tensor = torch.from_numpy(patch_sampled.T).float()  # (10, max_points)
-        label_tensor = torch.tensor(label, dtype=torch.float32)  # Float for BCE loss
-        valid_mask_tensor = torch.from_numpy(valid_mask)
-        return patch_tensor, label_tensor, valid_mask_tensor
-    def _augment_patch(self, patch, valid_mask):
-        """
-        Apply data augmentation to the patch.
-        """
-        valid_points = patch[valid_mask]
-        if len(valid_points) == 0:
-            return patch
-        # Random rotation around z-axis (only for xyz coordinates, first 3 dimensions)
-        angle = np.random.uniform(0, 2 * np.pi)
-        cos_angle = np.cos(angle)
-        sin_angle = np.sin(angle)
-        rotation_matrix = np.array([
-            [cos_angle, -sin_angle, 0],
-            [sin_angle, cos_angle, 0],
-            [0, 0, 1]
-        ])
-        # Apply rotation to xyz coordinates (first 3 dimensions)
-        valid_points[:, :3] = valid_points[:, :3] @ rotation_matrix.T
-        # Random jittering (only for xyz coordinates)
-        noise = np.random.normal(0, 0.01, valid_points[:, :3].shape)
-        valid_points[:, :3] += noise
-        # Random scaling (only for xyz coordinates)
-        scale = np.random.uniform(0.9, 1.1)
-        valid_points[:, :3] *= scale
-        patch[valid_mask] = valid_points
-        return patch
-def save_patches_dataset(patches: List[Dict], dataset_dir: str, entry_id: str):
-    """
-    Save patches from prediction pipeline to create a training dataset.
-    Args:
-        patches: List of patch dictionaries from generate_patches()
-        dataset_dir: Directory to save the dataset
-        entry_id: Unique identifier for this entry/image
-    """
-    os.makedirs(dataset_dir, exist_ok=True)
-    for i, patch in enumerate(patches):
-        # Create unique filename
-        filename = f"{entry_id}_patch_{i}.pkl"
-        filepath = os.path.join(dataset_dir, filename)
-        # Skip if file already exists
-        if os.path.exists(filepath):
-            continue
-        # Save patch data
-        with open(filepath, 'wb') as f:
-            pickle.dump(patch, f)
-    print(f"Saved {len(patches)} patches for entry {entry_id}")
-# Create dataloader with custom collate function to filter invalid samples
-def collate_fn(batch):
-    valid_batch = []
-    for patch_data, label, valid_mask in batch:
-        # Filter out invalid samples (no valid points)
-        if valid_mask.sum() > 0:
-            valid_batch.append((patch_data, label, valid_mask))
-    if len(valid_batch) == 0:
-        return None
-    # Stack valid samples
-    patch_data = torch.stack([item[0] for item in valid_batch])
-    labels = torch.stack([item[1] for item in valid_batch])
-    valid_masks = torch.stack([item[2] for item in valid_batch])
-    return patch_data, labels, valid_masks
-# Initialize weights using Xavier/Glorot initialization
-def init_weights(m):
-    if isinstance(m, nn.Conv1d):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.Linear):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.BatchNorm1d):
-        nn.init.ones_(m.weight)
-        nn.init.zeros_(m.bias)
-def train_pointnet(dataset_dir: str, model_save_path: str, epochs: int = 100, batch_size: int = 32,
-                  lr: float = 0.001):
-    """
-    Train the ClassificationPointNet model on saved patches.
-    Args:
-        dataset_dir: Directory containing saved patch files
-        model_save_path: Path to save the trained model
-        epochs: Number of training epochs
-        batch_size: Training batch size
-        lr: Learning rate
-    """
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Training on device: {device}")
-    # Create dataset and dataloader
-    dataset = PatchClassificationDataset(dataset_dir, max_points=1024, augment=True)
-    print(f"Dataset loaded with {len(dataset)} samples")
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8,
-                           collate_fn=collate_fn, drop_last=True)
-    # Initialize model
-    model = ClassificationPointNet(input_dim=10, max_points=1024)
-    model.apply(init_weights)
-    model.to(device)
-    # Loss function and optimizer (BCE for binary classification)
-    criterion = nn.BCEWithLogitsLoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
-    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)
-    # Training loop
-    model.train()
-    for epoch in range(epochs):
-        total_loss = 0.0
-        correct = 0
-        total = 0
-        num_batches = 0
-        for batch_idx, batch_data in enumerate(dataloader):
-            if batch_data is None:  # Skip invalid batches
-                continue
-            patch_data, labels, valid_masks = batch_data
-            patch_data = patch_data.to(device)  # (batch_size, 10, max_points)
-            labels = labels.to(device).unsqueeze(1)  # (batch_size, 1)
-            # Forward pass
-            optimizer.zero_grad()
-            outputs = model(patch_data)  # (batch_size, 1)
-            loss = criterion(outputs, labels)
-            # Backward pass
-            loss.backward()
-            optimizer.step()
-            # Statistics
-            total_loss += loss.item()
-            predicted = (torch.sigmoid(outputs) > 0.5).float()
-            total += labels.size(0)
-            correct += (predicted == labels).sum().item()
-            num_batches += 1
-            if batch_idx % 50 == 0:
-                print(f"Epoch {epoch+1}/{epochs}, Batch {batch_idx}, "
-                      f"Loss: {loss.item():.6f}, "
-                      f"Accuracy: {100 * correct / total:.2f}%")
-        avg_loss = total_loss / num_batches if num_batches > 0 else 0
-        accuracy = 100 * correct / total if total > 0 else 0
-        print(f"Epoch {epoch+1}/{epochs} completed, "
-              f"Avg Loss: {avg_loss:.6f}, "
-              f"Accuracy: {accuracy:.2f}%")
-        scheduler.step()
-        # Save model checkpoint every epoch
-        checkpoint_path = model_save_path.replace('.pth', f'_epoch_{epoch+1}.pth')
-        torch.save({
-            'model_state_dict': model.state_dict(),
-            'optimizer_state_dict': optimizer.state_dict(),
-            'epoch': epoch + 1,
-            'loss': avg_loss,
-            'accuracy': accuracy,
-        }, checkpoint_path)
-    # Save the trained model
-    torch.save({
-        'model_state_dict': model.state_dict(),
-        'optimizer_state_dict': optimizer.state_dict(),
-        'epoch': epochs,
-    }, model_save_path)
-    print(f"Model saved to {model_save_path}")
-    return model
-def load_pointnet_model(model_path: str, device: torch.device = None) -> ClassificationPointNet:
-    """
-    Load a trained ClassificationPointNet model.
-    Args:
-        model_path: Path to the saved model
-        device: Device to load the model on
-    Returns:
-        Loaded ClassificationPointNet model
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model = ClassificationPointNet(input_dim=10, max_points=1024)
-    checkpoint = torch.load(model_path, map_location=device)
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.to(device)
-    model.eval()
-    return model
-def predict_class_from_patch(model: ClassificationPointNet, patch: Dict, device: torch.device = None) -> Tuple[int, float]:
-    """
-    Predict binary classification from a patch using trained PointNet.
-    Args:
-        model: Trained ClassificationPointNet model
-        patch: Dictionary containing patch data with 'patch_10d' key
-        device: Device to run prediction on
-    Returns:
-        tuple of (predicted_class, confidence)
-            predicted_class: int (0 for not edge, 1 for edge)
-            confidence: float representing confidence score (0-1)
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    patch_10d = patch['patch_10d']  # (N, 10)
-    # Prepare input
-    max_points = 1024
-    num_points = patch_10d.shape[0]
-    if num_points >= max_points:
-        # Sample points
-        indices = np.random.choice(num_points, max_points, replace=False)
-        patch_sampled = patch_10d[indices]
-    else:
-        # Pad with zeros
-        patch_sampled = np.zeros((max_points, 10))
-        patch_sampled[:num_points] = patch_10d
-    # Convert to tensor
-    patch_tensor = torch.from_numpy(patch_sampled.T).float().unsqueeze(0)  # (1, 10, max_points)
-    patch_tensor = patch_tensor.to(device)
-    # Predict
-    with torch.no_grad():
-        outputs = model(patch_tensor)  # (1, 1)
-        probability = torch.sigmoid(outputs).item()
-        predicted_class = int(probability > 0.5)
-        return predicted_class, probability

fast_pointnet_class_deeper.py DELETED Viewed

@@ -1,527 +0,0 @@
-import os
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-import pickle
-from torch.utils.data import Dataset, DataLoader
-from typing import List, Dict, Tuple, Optional
-import json
-class ClassificationPointNet(nn.Module):
-    """
-    Enhanced PointNet implementation for binary classification from 6D point cloud patches.
-    Takes 6D point clouds (x,y,z,r,g,b) and predicts binary classification (edge/not edge).
-    Features: Residual connections, attention mechanism, multi-scale features, deeper architecture.
-    """
-    def __init__(self, input_dim=6, max_points=1024):
-        super(ClassificationPointNet, self).__init__()
-        self.max_points = max_points
-        # Point-wise MLPs with residual connections (much deeper)
-        self.conv1 = nn.Conv1d(input_dim, 64, 1)
-        self.conv2 = nn.Conv1d(64, 64, 1)
-        self.conv3 = nn.Conv1d(64, 128, 1)
-        self.conv4 = nn.Conv1d(128, 128, 1)
-        self.conv5 = nn.Conv1d(128, 256, 1)
-        self.conv6 = nn.Conv1d(256, 256, 1)
-        self.conv7 = nn.Conv1d(256, 512, 1)
-        self.conv8 = nn.Conv1d(512, 512, 1)
-        self.conv9 = nn.Conv1d(512, 1024, 1)
-        self.conv10 = nn.Conv1d(1024, 1024, 1)
-        self.conv11 = nn.Conv1d(1024, 2048, 1)
-        # Residual connection layers
-        self.res_conv1 = nn.Conv1d(64, 128, 1)
-        self.res_conv2 = nn.Conv1d(128, 256, 1)
-        self.res_conv3 = nn.Conv1d(256, 512, 1)
-        self.res_conv4 = nn.Conv1d(512, 1024, 1)
-        # Self-attention mechanism
-        self.attention = nn.MultiheadAttention(embed_dim=2048, num_heads=8, batch_first=True)
-        self.attention_norm = nn.LayerNorm(2048)
-        # Multi-scale feature aggregation
-        self.scale_conv1 = nn.Conv1d(2048, 512, 1)
-        self.scale_conv2 = nn.Conv1d(2048, 512, 1)
-        self.scale_conv3 = nn.Conv1d(2048, 512, 1)
-        # Enhanced classification head with residual connections
-        self.fc1 = nn.Linear(7680, 2048)  # Updated input size: 2048*3 + 512*3 = 7680
-        self.fc2 = nn.Linear(2048, 2048)
-        self.fc3 = nn.Linear(2048, 1024)
-        self.fc4 = nn.Linear(1024, 1024)
-        self.fc5 = nn.Linear(1024, 512)
-        self.fc6 = nn.Linear(512, 512)
-        self.fc7 = nn.Linear(512, 256)
-        self.fc8 = nn.Linear(256, 128)
-        self.fc9 = nn.Linear(128, 64)
-        self.fc10 = nn.Linear(64, 1)
-        # Residual connections for FC layers
-        self.fc_res1 = nn.Linear(2048, 1024)
-        self.fc_res2 = nn.Linear(1024, 512)
-        self.fc_res3 = nn.Linear(512, 128)
-        # Batch normalization layers
-        self.bn1 = nn.BatchNorm1d(64)
-        self.bn2 = nn.BatchNorm1d(64)
-        self.bn3 = nn.BatchNorm1d(128)
-        self.bn4 = nn.BatchNorm1d(128)
-        self.bn5 = nn.BatchNorm1d(256)
-        self.bn6 = nn.BatchNorm1d(256)
-        self.bn7 = nn.BatchNorm1d(512)
-        self.bn8 = nn.BatchNorm1d(512)
-        self.bn9 = nn.BatchNorm1d(1024)
-        self.bn10 = nn.BatchNorm1d(1024)
-        self.bn11 = nn.BatchNorm1d(2048)
-        # Scale batch norms
-        self.scale_bn1 = nn.BatchNorm1d(512)
-        self.scale_bn2 = nn.BatchNorm1d(512)
-        self.scale_bn3 = nn.BatchNorm1d(512)
-        # FC batch norms
-        self.fc_bn1 = nn.BatchNorm1d(2048)
-        self.fc_bn2 = nn.BatchNorm1d(2048)
-        self.fc_bn3 = nn.BatchNorm1d(1024)
-        self.fc_bn4 = nn.BatchNorm1d(1024)
-        self.fc_bn5 = nn.BatchNorm1d(512)
-        self.fc_bn6 = nn.BatchNorm1d(512)
-        self.fc_bn7 = nn.BatchNorm1d(256)
-        self.fc_bn8 = nn.BatchNorm1d(128)
-        # Dropout layers with varying rates
-        self.dropout1 = nn.Dropout(0.1)
-        self.dropout2 = nn.Dropout(0.2)
-        self.dropout3 = nn.Dropout(0.3)
-        self.dropout4 = nn.Dropout(0.4)
-        self.dropout5 = nn.Dropout(0.5)
-        self.dropout6 = nn.Dropout(0.4)
-        self.dropout7 = nn.Dropout(0.3)
-        self.dropout8 = nn.Dropout(0.2)
-    def forward(self, x):
-        """
-        Forward pass with residual connections and attention
-        Args:
-            x: (batch_size, input_dim, max_points) tensor
-        Returns:
-            classification: (batch_size, 1) tensor of logits
-        """
-        batch_size = x.size(0)
-        # Deep point-wise feature extraction with residual connections
-        x1 = F.relu(self.bn1(self.conv1(x)))
-        x2 = F.relu(self.bn2(self.conv2(x1)))
-        x2 = x2 + x1  # Residual connection
-        x3 = F.relu(self.bn3(self.conv3(x2)))
-        x4 = F.relu(self.bn4(self.conv4(x3)))
-        res1 = self.res_conv1(x2)
-        x4 = x4 + res1  # Residual connection
-        x5 = F.relu(self.bn5(self.conv5(x4)))
-        x6 = F.relu(self.bn6(self.conv6(x5)))
-        res2 = self.res_conv2(x4)
-        x6 = x6 + res2  # Residual connection
-        x7 = F.relu(self.bn7(self.conv7(x6)))
-        x8 = F.relu(self.bn8(self.conv8(x7)))
-        res3 = self.res_conv3(x6)
-        x8 = x8 + res3  # Residual connection
-        x9 = F.relu(self.bn9(self.conv9(x8)))
-        x10 = F.relu(self.bn10(self.conv10(x9)))
-        res4 = self.res_conv4(x8)
-        x10 = x10 + res4  # Residual connection
-        x11 = F.relu(self.bn11(self.conv11(x10)))
-        # Multi-scale global pooling
-        # Max pooling
-        global_max = torch.max(x11, 2)[0]  # (batch_size, 2048)
-        # Average pooling
-        global_avg = torch.mean(x11, 2)  # (batch_size, 2048)
-        # Attention-based pooling
-        x11_transposed = x11.transpose(1, 2)  # (batch_size, max_points, 2048)
-        attended, _ = self.attention(x11_transposed, x11_transposed, x11_transposed)
-        attended = self.attention_norm(attended + x11_transposed)
-        global_att = torch.mean(attended, 1)  # (batch_size, 2048)
-        # Multi-scale feature extraction
-        scale1 = F.relu(self.scale_bn1(self.scale_conv1(x11)))
-        scale1_pool = torch.max(scale1, 2)[0]
-        scale2 = F.relu(self.scale_bn2(self.scale_conv2(x11)))
-        scale2_pool = torch.mean(scale2, 2)
-        scale3 = F.relu(self.scale_bn3(self.scale_conv3(x11)))
-        scale3_pool = torch.std(scale3, 2)
-        # Concatenate all global features
-        global_features = torch.cat([
-            global_max, global_avg, global_att,
-            scale1_pool, scale2_pool, scale3_pool
-        ], dim=1)  # (batch_size, 4096)
-        # Enhanced classification head with residual connections
-        x = F.relu(self.fc_bn1(self.fc1(global_features)))
-        x = self.dropout1(x)
-        x = F.relu(self.fc_bn2(self.fc2(x)))
-        identity1 = x
-        x = self.dropout2(x)
-        x = F.relu(self.fc_bn3(self.fc3(x)))
-        x = self.dropout3(x)
-        x = F.relu(self.fc_bn4(self.fc4(x)))
-        res_fc1 = self.fc_res1(identity1)
-        x = x + res_fc1  # Residual connection
-        identity2 = x
-        x = self.dropout4(x)
-        x = F.relu(self.fc_bn5(self.fc5(x)))
-        x = self.dropout5(x)
-        x = F.relu(self.fc_bn6(self.fc6(x)))
-        res_fc2 = self.fc_res2(identity2)
-        x = x + res_fc2  # Residual connection
-        identity3 = x
-        x = self.dropout6(x)
-        x = F.relu(self.fc_bn7(self.fc7(x)))
-        x = self.dropout7(x)
-        x = F.relu(self.fc_bn8(self.fc8(x)))
-        res_fc3 = self.fc_res3(identity3)
-        x = x + res_fc3  # Residual connection
-        x = self.dropout8(x)
-        x = F.relu(self.fc9(x))
-        classification = self.fc10(x)  # (batch_size, 1)
-        return classification
-class PatchClassificationDataset(Dataset):
-    """
-    Dataset class for loading saved patches for PointNet classification training.
-    """
-    def __init__(self, dataset_dir: str, max_points: int = 1024, augment: bool = True):
-        self.dataset_dir = dataset_dir
-        self.max_points = max_points
-        self.augment = augment
-        # Load patch files
-        self.patch_files = []
-        for file in os.listdir(dataset_dir):
-            if file.endswith('.pkl'):
-                self.patch_files.append(os.path.join(dataset_dir, file))
-        print(f"Found {len(self.patch_files)} patch files in {dataset_dir}")
-    def __len__(self):
-        return len(self.patch_files)
-    def __getitem__(self, idx):
-        """
-        Load and process a patch for training.
-        Returns:
-            patch_data: (6, max_points) tensor of point cloud data
-            label: scalar tensor for binary classification (0 or 1)
-            valid_mask: (max_points,) boolean tensor indicating valid points
-        """
-        patch_file = self.patch_files[idx]
-        with open(patch_file, 'rb') as f:
-            patch_info = pickle.load(f)
-        patch_6d = patch_info['patch_6d']  # (N, 6)
-        label = patch_info.get('label', 0)  # Get binary classification label (0 or 1)
-        # Pad or sample points to max_points
-        num_points = patch_6d.shape[0]
-        if num_points >= self.max_points:
-            # Randomly sample max_points
-            indices = np.random.choice(num_points, self.max_points, replace=False)
-            patch_sampled = patch_6d[indices]
-            valid_mask = np.ones(self.max_points, dtype=bool)
-        else:
-            # Pad with zeros
-            patch_sampled = np.zeros((self.max_points, 6))
-            patch_sampled[:num_points] = patch_6d
-            valid_mask = np.zeros(self.max_points, dtype=bool)
-            valid_mask[:num_points] = True
-        # Data augmentation
-        if self.augment:
-            patch_sampled = self._augment_patch(patch_sampled, valid_mask)
-        # Convert to tensors and transpose for conv1d (channels first)
-        patch_tensor = torch.from_numpy(patch_sampled.T).float()  # (6, max_points)
-        label_tensor = torch.tensor(label, dtype=torch.float32)  # Float for BCE loss
-        valid_mask_tensor = torch.from_numpy(valid_mask)
-        return patch_tensor, label_tensor, valid_mask_tensor
-    def _augment_patch(self, patch, valid_mask):
-        """
-        Apply data augmentation to the patch.
-        """
-        valid_points = patch[valid_mask]
-        if len(valid_points) == 0:
-            return patch
-        # Random rotation around z-axis
-        angle = np.random.uniform(0, 2 * np.pi)
-        cos_angle = np.cos(angle)
-        sin_angle = np.sin(angle)
-        rotation_matrix = np.array([
-            [cos_angle, -sin_angle, 0],
-            [sin_angle, cos_angle, 0],
-            [0, 0, 1]
-        ])
-        # Apply rotation to xyz coordinates
-        valid_points[:, :3] = valid_points[:, :3] @ rotation_matrix.T
-        # Random jittering
-        noise = np.random.normal(0, 0.01, valid_points[:, :3].shape)
-        valid_points[:, :3] += noise
-        # Random scaling
-        scale = np.random.uniform(0.9, 1.1)
-        valid_points[:, :3] *= scale
-        patch[valid_mask] = valid_points
-        return patch
-def save_patches_dataset(patches: List[Dict], dataset_dir: str, entry_id: str):
-    """
-    Save patches from prediction pipeline to create a training dataset.
-    Args:
-        patches: List of patch dictionaries from generate_patches()
-        dataset_dir: Directory to save the dataset
-        entry_id: Unique identifier for this entry/image
-    """
-    os.makedirs(dataset_dir, exist_ok=True)
-    for i, patch in enumerate(patches):
-        # Create unique filename
-        filename = f"{entry_id}_patch_{i}.pkl"
-        filepath = os.path.join(dataset_dir, filename)
-        # Skip if file already exists
-        if os.path.exists(filepath):
-            continue
-        # Save patch data
-        with open(filepath, 'wb') as f:
-            pickle.dump(patch, f)
-    print(f"Saved {len(patches)} patches for entry {entry_id}")
-# Create dataloader with custom collate function to filter invalid samples
-def collate_fn(batch):
-    valid_batch = []
-    for patch_data, label, valid_mask in batch:
-        # Filter out invalid samples (no valid points)
-        if valid_mask.sum() > 0:
-            valid_batch.append((patch_data, label, valid_mask))
-    if len(valid_batch) == 0:
-        return None
-    # Stack valid samples
-    patch_data = torch.stack([item[0] for item in valid_batch])
-    labels = torch.stack([item[1] for item in valid_batch])
-    valid_masks = torch.stack([item[2] for item in valid_batch])
-    return patch_data, labels, valid_masks
-# Initialize weights using Xavier/Glorot initialization
-def init_weights(m):
-    if isinstance(m, nn.Conv1d):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.Linear):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.BatchNorm1d):
-        nn.init.ones_(m.weight)
-        nn.init.zeros_(m.bias)
-def train_pointnet(dataset_dir: str, model_save_path: str, epochs: int = 100, batch_size: int = 32,
-                  lr: float = 0.001):
-    """
-    Train the ClassificationPointNet model on saved patches.
-    Args:
-        dataset_dir: Directory containing saved patch files
-        model_save_path: Path to save the trained model
-        epochs: Number of training epochs
-        batch_size: Training batch size
-        lr: Learning rate
-    """
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Training on device: {device}")
-    # Create dataset and dataloader
-    dataset = PatchClassificationDataset(dataset_dir, max_points=1024, augment=True)
-    print(f"Dataset loaded with {len(dataset)} samples")
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8,
-                           collate_fn=collate_fn, drop_last=True)
-    # Initialize model
-    model = ClassificationPointNet(input_dim=6, max_points=1024)
-    model.apply(init_weights)
-    model.to(device)
-    # Loss function and optimizer (BCE for binary classification)
-    criterion = nn.BCEWithLogitsLoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
-    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)
-    # Training loop
-    model.train()
-    for epoch in range(epochs):
-        total_loss = 0.0
-        correct = 0
-        total = 0
-        num_batches = 0
-        for batch_idx, batch_data in enumerate(dataloader):
-            if batch_data is None:  # Skip invalid batches
-                continue
-            patch_data, labels, valid_masks = batch_data
-            patch_data = patch_data.to(device)  # (batch_size, 6, max_points)
-            labels = labels.to(device).unsqueeze(1)  # (batch_size, 1)
-            # Forward pass
-            optimizer.zero_grad()
-            outputs = model(patch_data)  # (batch_size, 1)
-            loss = criterion(outputs, labels)
-            # Backward pass
-            loss.backward()
-            optimizer.step()
-            # Statistics
-            total_loss += loss.item()
-            predicted = (torch.sigmoid(outputs) > 0.5).float()
-            total += labels.size(0)
-            correct += (predicted == labels).sum().item()
-            num_batches += 1
-            if batch_idx % 50 == 0:
-                print(f"Epoch {epoch+1}/{epochs}, Batch {batch_idx}, "
-                      f"Loss: {loss.item():.6f}, "
-                      f"Accuracy: {100 * correct / total:.2f}%")
-        avg_loss = total_loss / num_batches if num_batches > 0 else 0
-        accuracy = 100 * correct / total if total > 0 else 0
-        print(f"Epoch {epoch+1}/{epochs} completed, "
-              f"Avg Loss: {avg_loss:.6f}, "
-              f"Accuracy: {accuracy:.2f}%")
-        scheduler.step()
-        # Save model checkpoint every epoch
-        checkpoint_path = model_save_path.replace('.pth', f'_epoch_{epoch+1}.pth')
-        torch.save({
-            'model_state_dict': model.state_dict(),
-            'optimizer_state_dict': optimizer.state_dict(),
-            'epoch': epoch + 1,
-            'loss': avg_loss,
-            'accuracy': accuracy,
-        }, checkpoint_path)
-    # Save the trained model
-    torch.save({
-        'model_state_dict': model.state_dict(),
-        'optimizer_state_dict': optimizer.state_dict(),
-        'epoch': epochs,
-    }, model_save_path)
-    print(f"Model saved to {model_save_path}")
-    return model
-def load_pointnet_model(model_path: str, device: torch.device = None) -> ClassificationPointNet:
-    """
-    Load a trained ClassificationPointNet model.
-    Args:
-        model_path: Path to the saved model
-        device: Device to load the model on
-    Returns:
-        Loaded ClassificationPointNet model
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model = ClassificationPointNet(input_dim=6, max_points=1024)
-    checkpoint = torch.load(model_path, map_location=device)
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.to(device)
-    model.eval()
-    return model
-def predict_class_from_patch(model: ClassificationPointNet, patch: Dict, device: torch.device = None) -> Tuple[int, float]:
-    """
-    Predict binary classification from a patch using trained PointNet.
-    Args:
-        model: Trained ClassificationPointNet model
-        patch: Dictionary containing patch data with 'patch_6d' key
-        device: Device to run prediction on
-    Returns:
-        tuple of (predicted_class, confidence)
-            predicted_class: int (0 for not edge, 1 for edge)
-            confidence: float representing confidence score (0-1)
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    patch_6d = patch['patch_6d']  # (N, 6)
-    # Prepare input
-    max_points = 1024
-    num_points = patch_6d.shape[0]
-    if num_points >= max_points:
-        # Sample points
-        indices = np.random.choice(num_points, max_points, replace=False)
-        patch_sampled = patch_6d[indices]
-    else:
-        # Pad with zeros
-        patch_sampled = np.zeros((max_points, 6))
-        patch_sampled[:num_points] = patch_6d
-    # Convert to tensor
-    patch_tensor = torch.from_numpy(patch_sampled.T).float().unsqueeze(0)  # (1, 6, max_points)
-    patch_tensor = patch_tensor.to(device)
-    # Predict
-    with torch.no_grad():
-        outputs = model(patch_tensor)  # (1, 1)
-        probability = torch.sigmoid(outputs).item()
-        predicted_class = int(probability > 0.5)
-        return predicted_class, probability

fast_pointnet_class_v2.py DELETED Viewed

@@ -1,508 +0,0 @@
-import os
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-import pickle
-from torch.utils.data import Dataset, DataLoader
-from typing import List, Dict, Tuple, Optional
-import json
-class ClassificationPointNet(nn.Module):
-    """
-    Fast PointNet-like implementation for binary classification from point cloud patches.
-    Adapted from FastPointNet, focusing only on classification.
-    Takes N-dimensional point clouds and predicts a binary class.
-    """
-    def __init__(self, input_dim: int = 10, max_points: int = 1024, num_classes: int = 1):
-        super(ClassificationPointNet, self).__init__()
-        self.max_points = max_points
-        self.num_classes = num_classes
-        # Enhanced point-wise MLPs with residual connections
-        self.conv1 = nn.Conv1d(input_dim, 64, 1)
-        self.conv2 = nn.Conv1d(64, 128, 1)
-        self.conv3 = nn.Conv1d(128, 256, 1)
-        self.conv4 = nn.Conv1d(256, 512, 1)
-        self.conv5 = nn.Conv1d(512, 1024, 1)
-        self.conv6 = nn.Conv1d(1024, 1024, 1) # Matches FastPointNet structure
-        self.conv7 = nn.Conv1d(1024, 2048, 1) # Matches FastPointNet structure
-        # Lightweight channel attention mechanism
-        self.channel_attention = nn.Sequential(
-            nn.AdaptiveAvgPool1d(1),
-            nn.Conv1d(2048, 128, 1),
-            nn.ReLU(inplace=True),
-            nn.Conv1d(128, 2048, 1),
-            nn.Sigmoid()
-        )
-        # Enhanced shared features with residual connections
-        self.shared_fc1 = nn.Linear(2048, 1024)
-        self.shared_fc2 = nn.Linear(1024, 512)
-        self.shared_fc3 = nn.Linear(512, 512)
-        # Classification head
-        self.class_fc1 = nn.Linear(512, 512)
-        self.class_fc2 = nn.Linear(512, 256)
-        self.class_fc3 = nn.Linear(256, 128)
-        self.class_fc4 = nn.Linear(128, 64)
-        self.class_fc5 = nn.Linear(64, self.num_classes) # Output for classification
-        # Batch normalization layers with momentum
-        self.bn1 = nn.BatchNorm1d(64, momentum=0.1)
-        self.bn2 = nn.BatchNorm1d(128, momentum=0.1)
-        self.bn3 = nn.BatchNorm1d(256, momentum=0.1)
-        self.bn4 = nn.BatchNorm1d(512, momentum=0.1)
-        self.bn5 = nn.BatchNorm1d(1024, momentum=0.1)
-        self.bn6 = nn.BatchNorm1d(1024, momentum=0.1)
-        self.bn7 = nn.BatchNorm1d(2048, momentum=0.1)
-        # Group normalization for shared layers
-        self.gn1 = nn.GroupNorm(32, 1024) # Assuming 1024 channels, 32 groups
-        self.gn2 = nn.GroupNorm(16, 512)  # Assuming 512 channels, 16 groups
-        # Dropout layers
-        self.dropout_light = nn.Dropout(0.1)
-        self.dropout_medium = nn.Dropout(0.2)
-        # self.dropout_heavy = nn.Dropout(0.3) # Not used in the direct path to classification in this adaptation
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Forward pass with residual connections and attention for classification.
-        Args:
-            x: (batch_size, input_dim, max_points) tensor
-        Returns:
-            classification: (batch_size, num_classes) tensor of logits
-        """
-        # Enhanced point-wise feature extraction
-        x1 = F.leaky_relu(self.bn1(self.conv1(x)), negative_slope=0.01, inplace=True)
-        x2 = F.leaky_relu(self.bn2(self.conv2(x1)), negative_slope=0.01, inplace=True)
-        x3 = F.leaky_relu(self.bn3(self.conv3(x2)), negative_slope=0.01, inplace=True)
-        x4 = F.leaky_relu(self.bn4(self.conv4(x3)), negative_slope=0.01, inplace=True)
-        x5 = F.leaky_relu(self.bn5(self.conv5(x4)), negative_slope=0.01, inplace=True)
-        # Residual connection for conv6
-        x6_conv = self.bn6(self.conv6(x5))
-        x6 = F.leaky_relu(x6_conv + x5, negative_slope=0.01, inplace=True) # Add residual before ReLU
-        x7 = F.leaky_relu(self.bn7(self.conv7(x6)), negative_slope=0.01, inplace=True)
-        # Apply channel attention
-        attention_weights = self.channel_attention(x7)
-        x7_attended = x7 * attention_weights
-        # Multi-scale global pooling
-        max_pool = torch.max(x7_attended, 2)[0]
-        avg_pool = torch.mean(x7_attended, 2)
-        global_features = 0.7 * max_pool + 0.3 * avg_pool
-        # Enhanced shared features
-        shared1_fc = self.shared_fc1(global_features)
-        shared1 = F.leaky_relu(self.gn1(shared1_fc.unsqueeze(-1)).squeeze(-1), negative_slope=0.01, inplace=True)
-        shared1 = self.dropout_light(shared1)
-        shared2_fc = self.shared_fc2(shared1)
-        shared2 = F.leaky_relu(self.gn2(shared2_fc.unsqueeze(-1)).squeeze(-1), negative_slope=0.01, inplace=True)
-        shared2 = self.dropout_medium(shared2)
-        shared3_fc = self.shared_fc3(shared2)
-        # Residual connection for shared_fc3
-        shared_features = F.leaky_relu(shared3_fc + shared2, negative_slope=0.01, inplace=True) # Add residual before ReLU
-        shared_features = self.dropout_light(shared_features) # Apply dropout after residual and ReLU
-        # Classification head
-        class1 = F.leaky_relu(self.class_fc1(shared_features), negative_slope=0.01, inplace=True)
-        class1 = self.dropout_light(class1)
-        class2 = F.leaky_relu(self.class_fc2(class1), negative_slope=0.01, inplace=True)
-        class2 = self.dropout_medium(class2)
-        class3 = F.leaky_relu(self.class_fc3(class2), negative_slope=0.01, inplace=True)
-        class3 = self.dropout_light(class3)
-        class4 = F.leaky_relu(self.class_fc4(class3), negative_slope=0.01, inplace=True)
-        # No dropout before the final layer typically
-        classification = self.class_fc5(class4) # Raw logits
-        return classification
-class PatchClassificationDataset(Dataset):
-    """
-    Dataset class for loading saved patches for PointNet classification training.
-    """
-    def __init__(self, dataset_dir: str, max_points: int = 1024, augment: bool = False, input_dim: int = 10): # Added input_dim
-        self.dataset_dir = dataset_dir
-        self.max_points = max_points
-        self.augment = augment
-        self.input_dim = input_dim # Store input_dim
-        # Load patch files
-        self.patch_files = []
-        for file in os.listdir(dataset_dir):
-            if file.endswith('.pkl'):
-                self.patch_files.append(os.path.join(dataset_dir, file))
-        print(f"Found {len(self.patch_files)} patch files in {dataset_dir}")
-    def __len__(self):
-        return len(self.patch_files)
-    def __getitem__(self, idx):
-        """
-        Load and process a patch for training.
-        Returns:
-            patch_data: (input_dim, max_points) tensor of point cloud data
-            label: scalar tensor for binary classification (0 or 1)
-            valid_mask: (max_points,) boolean tensor indicating valid points
-        """
-        patch_file = self.patch_files[idx]
-        with open(patch_file, 'rb') as f:
-            patch_info = pickle.load(f)
-        # Assuming the key in patch_info is now 'patch_10d' or similar, or that patch_info['patch_data'] is (N, 10)
-        # For this example, let's assume the key is 'patch_data' and it holds the 10D data.
-        # If your key is 'patch_10d', change 'patch_data' to 'patch_10d' below.
-        patch_data_nd = patch_info.get('patch_data', patch_info.get('patch_10d', patch_info.get('patch_6d'))) # Try to get 10d, fallback to 6d for now
-        if patch_data_nd.shape[1] != self.input_dim:
-            # This is a fallback or error handling if the loaded data isn't 10D.
-            # You might want to raise an error or handle this case specifically.
-            # For now, if it's 6D, we'll pad it to 10D with zeros as a placeholder.
-            # This part needs to be adjusted based on how your 10D data is actually stored.
-            print(f"Warning: Patch {patch_file} has {patch_data_nd.shape[1]} dimensions, expected {self.input_dim}. Padding with zeros if necessary.")
-            if patch_data_nd.shape[1] < self.input_dim:
-                padding = np.zeros((patch_data_nd.shape[0], self.input_dim - patch_data_nd.shape[1]))
-                patch_data_nd = np.concatenate((patch_data_nd, padding), axis=1)
-            elif patch_data_nd.shape[1] > self.input_dim:
-                patch_data_nd = patch_data_nd[:, :self.input_dim]
-        label = patch_info.get('label', 0)  # Get binary classification label (0 or 1)
-        # Pad or sample points to max_points
-        num_points = patch_data_nd.shape[0]
-        if num_points >= self.max_points:
-            # Randomly sample max_points
-            indices = np.random.choice(num_points, self.max_points, replace=False)
-            patch_sampled = patch_data_nd[indices]
-            valid_mask = np.ones(self.max_points, dtype=bool)
-        else:
-            # Pad with zeros
-            patch_sampled = np.zeros((self.max_points, self.input_dim)) # Changed to self.input_dim
-            patch_sampled[:num_points] = patch_data_nd
-            valid_mask = np.zeros(self.max_points, dtype=bool)
-            valid_mask[:num_points] = True
-        # Data augmentation
-        if self.augment:
-            # Note: _augment_patch currently only augments xyz (first 3 dims).
-            # If other dimensions are geometric and need augmentation, this function needs an update.
-            patch_sampled = self._augment_patch(patch_sampled, valid_mask)
-        # Convert to tensors and transpose for conv1d (channels first)
-        patch_tensor = torch.from_numpy(patch_sampled.T).float()  # (input_dim, max_points)
-        label_tensor = torch.tensor(label, dtype=torch.float32)  # Float for BCE loss
-        valid_mask_tensor = torch.from_numpy(valid_mask)
-        return patch_tensor, label_tensor, valid_mask_tensor
-    def _augment_patch(self, patch, valid_mask):
-        """
-        Apply data augmentation to the patch.
-        Note: This implementation only augments the first 3 dimensions (assumed to be XYZ).
-        If your 10D representation has other geometric features that need augmentation,
-        this function should be updated accordingly.
-        """
-        valid_points_data = patch[valid_mask]
-        if len(valid_points_data) == 0:
-            return patch
-        # Extract XYZ for augmentation (first 3 columns)
-        valid_points_xyz = valid_points_data[:, :3].copy() # Operate on a copy
-        # Random rotation around z-axis
-        angle = np.random.uniform(0, 2 * np.pi)
-        cos_angle = np.cos(angle)
-        sin_angle = np.sin(angle)
-        rotation_matrix = np.array([
-            [cos_angle, -sin_angle, 0],
-            [sin_angle, cos_angle, 0],
-            [0, 0, 1]
-        ])
-        # Apply rotation to xyz coordinates
-        valid_points_xyz = valid_points_xyz @ rotation_matrix.T
-        # Random jittering
-        noise = np.random.normal(0, 0.01, valid_points_xyz.shape)
-        valid_points_xyz += noise
-        # Random scaling
-        scale = np.random.uniform(0.9, 1.1)
-        valid_points_xyz *= scale
-        # Update the original patch data
-        augmented_patch = patch.copy()
-        augmented_patch[valid_mask, :3] = valid_points_xyz
-        return augmented_patch
-def save_patches_dataset(patches: List[Dict], dataset_dir: str, entry_id: str):
-    """
-    Save patches from prediction pipeline to create a training dataset.
-    Ensure 'patch_data' (or 'patch_10d') in the patch dictionary contains the 10D data.
-    Args:
-        patches: List of patch dictionaries from generate_patches()
-        dataset_dir: Directory to save the dataset
-        entry_id: Unique identifier for this entry/image
-    """
-    os.makedirs(dataset_dir, exist_ok=True)
-    for i, patch in enumerate(patches):
-        # Create unique filename
-        filename = f"{entry_id}_patch_{i}.pkl"
-        filepath = os.path.join(dataset_dir, filename)
-        # Skip if file already exists
-        if os.path.exists(filepath):
-            continue
-        # Ensure the patch data being saved is 10D.
-        # Example: patch_data_key = 'patch_10d' or 'patch_data'
-        # if 'patch_data' not in patch or patch['patch_data'].shape[1] != 10:
-        #     print(f"Warning: Patch {i} for entry {entry_id} does not seem to be 10D. Skipping or error handling needed.")
-        #     continue
-        with open(filepath, 'wb') as f:
-            pickle.dump(patch, f)
-    print(f"Saved {len(patches)} patches for entry {entry_id}")
-# Create dataloader with custom collate function to filter invalid samples
-def collate_fn(batch):
-    valid_batch = []
-    for patch_data, label, valid_mask in batch:
-        # Filter out invalid samples (no valid points)
-        if valid_mask.sum() > 0:
-            valid_batch.append((patch_data, label, valid_mask))
-    if len(valid_batch) == 0:
-        return None
-    # Stack valid samples
-    patch_data = torch.stack([item[0] for item in valid_batch])
-    labels = torch.stack([item[1] for item in valid_batch])
-    valid_masks = torch.stack([item[2] for item in valid_batch])
-    return patch_data, labels, valid_masks
-# Initialize weights using Xavier/Glorot initialization
-def init_weights(m):
-    if isinstance(m, nn.Conv1d):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.Linear):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.BatchNorm1d):
-        nn.init.ones_(m.weight)
-        nn.init.zeros_(m.bias)
-def train_pointnet(dataset_dir: str, model_save_path: str, epochs: int = 100, batch_size: int = 32,
-                  lr: float = 0.001, input_dim: int = 10): # Added input_dim
-    """
-    Train the ClassificationPointNet model on saved patches.
-    Args:
-        dataset_dir: Directory containing saved patch files
-        model_save_path: Path to save the trained model
-        epochs: Number of training epochs
-        batch_size: Training batch size
-        lr: Learning rate
-        input_dim: Dimensionality of the input points (e.g., 10 for 10D)
-    """
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Training on device: {device}")
-    # Create dataset and dataloader
-    dataset = PatchClassificationDataset(dataset_dir, max_points=1024, augment=False, input_dim=input_dim) # Pass input_dim
-    print(f"Dataset loaded with {len(dataset)} samples")
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=20,
-                           collate_fn=collate_fn, drop_last=True)
-    # Initialize model
-    model = ClassificationPointNet(input_dim=input_dim, max_points=1024) # Pass input_dim
-    model.apply(init_weights)
-    model.to(device)
-    # Loss function and optimizer (BCE for binary classification)
-    criterion = nn.BCEWithLogitsLoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
-    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)
-    # Training loop
-    model.train()
-    for epoch in range(epochs):
-        total_loss = 0.0
-        correct = 0
-        total = 0
-        num_batches = 0
-        for batch_idx, batch_data in enumerate(dataloader):
-            if batch_data is None:  # Skip invalid batches
-                continue
-            patch_data, labels, valid_masks = batch_data
-            patch_data = patch_data.to(device)  # (batch_size, input_dim, max_points)
-            labels = labels.to(device).unsqueeze(1)  # (batch_size, 1)
-            # Forward pass
-            optimizer.zero_grad()
-            outputs = model(patch_data)  # (batch_size, 1)
-            loss = criterion(outputs, labels)
-            # Backward pass
-            loss.backward()
-            optimizer.step()
-            # Statistics
-            total_loss += loss.item()
-            predicted = (torch.sigmoid(outputs) > 0.5).float()
-            total += labels.size(0)
-            correct += (predicted == labels).sum().item()
-            num_batches += 1
-            if batch_idx % 50 == 0:
-                print(f"Epoch {epoch+1}/{epochs}, Batch {batch_idx}, "
-                      f"Loss: {loss.item():.6f}, "
-                      f"Accuracy: {100 * correct / total:.2f}%")
-        avg_loss = total_loss / num_batches if num_batches > 0 else 0
-        accuracy = 100 * correct / total if total > 0 else 0
-        print(f"Epoch {epoch+1}/{epochs} completed, "
-              f"Avg Loss: {avg_loss:.6f}, "
-              f"Accuracy: {accuracy:.2f}%")
-        scheduler.step()
-        # Save model checkpoint every epoch
-        checkpoint_path = model_save_path.replace('.pth', f'_epoch_{epoch+1}.pth')
-        torch.save({
-            'model_state_dict': model.state_dict(),
-            'optimizer_state_dict': optimizer.state_dict(),
-            'epoch': epoch + 1,
-            'loss': avg_loss,
-            'accuracy': accuracy,
-            'input_dim': input_dim, # Save input_dim with checkpoint
-        }, checkpoint_path)
-    # Save the trained model
-    torch.save({
-        'model_state_dict': model.state_dict(),
-        'optimizer_state_dict': optimizer.state_dict(),
-        'epoch': epochs,
-        'input_dim': input_dim, # Save input_dim with final model
-    }, model_save_path)
-    print(f"Model saved to {model_save_path}")
-    return model
-def load_pointnet_model(model_path: str, device: torch.device = None) -> ClassificationPointNet:
-    """
-    Load a trained ClassificationPointNet model.
-    Args:
-        model_path: Path to the saved model
-        device: Device to load the model on
-    Returns:
-        Loaded ClassificationPointNet model
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    checkpoint = torch.load(model_path, map_location=device)
-    # Load input_dim from checkpoint if available, otherwise default to 10
-    # For older models saved without input_dim, you might need to specify it or assume a default.
-    input_dim = checkpoint.get('input_dim', 10)
-    model = ClassificationPointNet(input_dim=input_dim, max_points=1024) # Use loaded or default input_dim
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.to(device)
-    model.eval()
-    return model
-def predict_class_from_patch(model: ClassificationPointNet, patch: Dict, device: torch.device = None) -> Tuple[int, float]:
-    """
-    Predict binary classification from a patch using trained PointNet.
-    Assumes the model's input_dim matches the data.
-    Args:
-        model: Trained ClassificationPointNet model
-        patch: Dictionary containing patch data. Expects a key like 'patch_data' or 'patch_10d' with (N, 10) shape.
-        device: Device to run prediction on
-    Returns:
-        tuple of (predicted_class, confidence)
-            predicted_class: int (0 for not edge, 1 for edge)
-            confidence: float representing confidence score (0-1)
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    # Determine input_dim from the model
-    input_dim = model.conv1.in_channels
-    # Assuming the key in patch_info is now 'patch_10d' or similar, or that patch_info['patch_data'] is (N, 10)
-    # For this example, let's assume the key is 'patch_data' and it holds the 10D data.
-    # If your key is 'patch_10d', change 'patch_data' to 'patch_10d' below.
-    patch_data_nd = patch.get('patch_data', patch.get('patch_10d', patch.get('patch_6d'))) # Try to get 10d, fallback to 6d
-    if patch_data_nd.shape[1] != input_dim:
-        # Handle dimension mismatch, e.g., by padding or raising an error
-        print(f"Warning: Input patch has {patch_data_nd.shape[1]} dimensions, but model expects {input_dim}. Adjusting...")
-        if patch_data_nd.shape[1] < input_dim:
-            padding = np.zeros((patch_data_nd.shape[0], input_dim - patch_data_nd.shape[1]))
-            patch_data_nd = np.concatenate((patch_data_nd, padding), axis=1)
-        elif patch_data_nd.shape[1] > input_dim:
-            patch_data_nd = patch_data_nd[:, :input_dim]
-    # Prepare input
-    max_points = model.max_points # Use max_points from the model instance
-    num_points = patch_data_nd.shape[0]
-    if num_points >= max_points:
-        # Sample points
-        indices = np.random.choice(num_points, max_points, replace=False)
-        patch_sampled = patch_data_nd[indices]
-    else:
-        # Pad with zeros
-        patch_sampled = np.zeros((max_points, input_dim)) # Use model's input_dim
-        patch_sampled[:num_points] = patch_data_nd
-    # Convert to tensor
-    patch_tensor = torch.from_numpy(patch_sampled.T).float().unsqueeze(0)  # (1, input_dim, max_points)
-    patch_tensor = patch_tensor.to(device)
-    # Predict
-    model.eval() # Ensure model is in eval mode
-    with torch.no_grad():
-        outputs = model(patch_tensor)  # (1, 1)
-        probability = torch.sigmoid(outputs).item()
-        predicted_class = int(probability > 0.5)
-        return predicted_class, probability

fast_pointnet_v2.py CHANGED Viewed

@@ -1,3 +1,13 @@
 import os
 import torch
 import torch.nn as nn
@@ -568,4 +578,4 @@ def predict_vertex_from_patch(model: FastPointNet, patch: np.ndarray, device: to
         offset = patch['cluster_center']
         position += offset
-        return position, score, classification

+# This file defines a FastPointNet model for 3D vertex prediction from point clouds.
+# It includes:
+# 1. `FastPointNet`: A deep neural network with enhancements like residual connections,
+#    channel attention, and multi-scale pooling. It predicts 3D coordinates,
+#    and optionally, confidence scores and classification labels.
+# 2. `PatchDataset`: A PyTorch Dataset for loading, preprocessing, and augmenting
+#    11-dimensional point cloud patches.
+# 3. Utility functions for:
+#    - Training the model (`train_pointnet`) with custom loss and optimization.
+#    - Loading/saving models, and performing inference (`predict_vertex_from_patch`).
 import os
 import torch
 import torch.nn as nn
         offset = patch['cluster_center']
         position += offset
+        return position, score, classification

fast_pointnet_v3.py DELETED Viewed

@@ -1,605 +0,0 @@
-import os
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-import pickle
-from torch.utils.data import Dataset, DataLoader
-from typing import List, Dict, Tuple, Optional
-import json
-class FastPointNet(nn.Module):
-    """
-    Fast PointNet implementation for 3D vertex prediction from point cloud patches.
-    Takes 11D point clouds and predicts 3D vertex coordinates.
-    Enhanced with transformer attention, deeper architecture, and moderate capacity increase.
-    """
-    def __init__(self, input_dim=11, output_dim=3, max_points=1024, predict_score=True, predict_class=True, num_classes=1):
-        super(FastPointNet, self).__init__()
-        self.max_points = max_points
-        self.predict_score = predict_score
-        self.predict_class = predict_class
-        self.num_classes = num_classes
-        # Enhanced point-wise MLPs with moderate capacity increase
-        self.conv1 = nn.Conv1d(input_dim, 96, 1)  # 64 -> 96
-        self.conv2 = nn.Conv1d(96, 192, 1)        # 128 -> 192
-        self.conv3 = nn.Conv1d(192, 384, 1)       # 256 -> 384
-        self.conv4 = nn.Conv1d(384, 768, 1)       # 512 -> 768
-        self.conv5 = nn.Conv1d(768, 1536, 1)      # 1024 -> 1536
-        self.conv6 = nn.Conv1d(1536, 1536, 1)     # Keep same
-        self.conv7 = nn.Conv1d(1536, 2048, 1)     # Reduce from 1536 to 2048 for transformer
-        # Lightweight Self-Attention Transformer Block
-        self.transformer_dim = 2048
-        self.num_heads = 8
-        self.transformer_block = nn.MultiheadAttention(
-            embed_dim=self.transformer_dim,
-            num_heads=self.num_heads,
-            dropout=0.1,
-            batch_first=False
-        )
-        self.transformer_norm1 = nn.LayerNorm(self.transformer_dim)
-        self.transformer_norm2 = nn.LayerNorm(self.transformer_dim)
-        # Transformer FFN
-        self.transformer_ffn = nn.Sequential(
-            nn.Linear(self.transformer_dim, self.transformer_dim * 2),
-            nn.GELU(),
-            nn.Dropout(0.1),
-            nn.Linear(self.transformer_dim * 2, self.transformer_dim),
-            nn.Dropout(0.1)
-        )
-        # Enhanced channel attention mechanism
-        self.channel_attention = nn.Sequential(
-            nn.AdaptiveAvgPool1d(1),
-            nn.Conv1d(2048, 192, 1),  # 128 -> 192
-            nn.GELU(),
-            nn.Conv1d(192, 2048, 1),
-            nn.Sigmoid()
-        )
-        # Enhanced shared features with moderate increase
-        self.shared_fc1 = nn.Linear(2048, 1536)   # 1024 -> 1536
-        self.shared_fc2 = nn.Linear(1536, 768)    # 512 -> 768
-        self.shared_fc3 = nn.Linear(768, 768)     # Additional layer
-        # Enhanced position prediction head
-        self.pos_fc1 = nn.Linear(768, 768)        # 512 -> 768
-        self.pos_fc2 = nn.Linear(768, 384)        # 256 -> 384
-        self.pos_fc3 = nn.Linear(384, 192)        # 128 -> 192
-        self.pos_fc4 = nn.Linear(192, 96)         # 64 -> 96
-        self.pos_fc5 = nn.Linear(96, output_dim)
-        # Enhanced score prediction head
-        if self.predict_score:
-            self.score_fc1 = nn.Linear(768, 768)
-            self.score_fc2 = nn.Linear(768, 384)
-            self.score_fc3 = nn.Linear(384, 192)
-            self.score_fc4 = nn.Linear(192, 96)
-            self.score_fc5 = nn.Linear(96, 1)
-        # Enhanced classification head
-        if self.predict_class:
-            self.class_fc1 = nn.Linear(768, 768)
-            self.class_fc2 = nn.Linear(768, 384)
-            self.class_fc3 = nn.Linear(384, 192)
-            self.class_fc4 = nn.Linear(192, 96)
-            self.class_fc5 = nn.Linear(96, num_classes)
-        # Batch normalization layers
-        self.bn1 = nn.BatchNorm1d(96, momentum=0.1)
-        self.bn2 = nn.BatchNorm1d(192, momentum=0.1)
-        self.bn3 = nn.BatchNorm1d(384, momentum=0.1)
-        self.bn4 = nn.BatchNorm1d(768, momentum=0.1)
-        self.bn5 = nn.BatchNorm1d(1536, momentum=0.1)
-        self.bn6 = nn.BatchNorm1d(1536, momentum=0.1)
-        self.bn7 = nn.BatchNorm1d(2048, momentum=0.1)
-        # Group normalization for shared layers
-        self.gn1 = nn.GroupNorm(48, 1536)  # 32 -> 48 groups
-        self.gn2 = nn.GroupNorm(24, 768)   # 16 -> 24 groups
-        # Dropout with different rates
-        self.dropout_light = nn.Dropout(0.1)
-        self.dropout_medium = nn.Dropout(0.2)
-        self.dropout_heavy = nn.Dropout(0.3)
-    def forward(self, x):
-        """
-        Forward pass with transformer attention and residual connections
-        Args:
-            x: (batch_size, input_dim, max_points) tensor
-        Returns:
-            Tuple containing predictions based on configuration
-        """
-        batch_size = x.size(0)
-        # Enhanced point-wise feature extraction
-        x1 = F.gelu(self.bn1(self.conv1(x)))
-        x2 = F.gelu(self.bn2(self.conv2(x1)))
-        x3 = F.gelu(self.bn3(self.conv3(x2)))
-        x4 = F.gelu(self.bn4(self.conv4(x3)))
-        x5 = F.gelu(self.bn5(self.conv5(x4)))
-        # Residual connection
-        x6 = F.gelu(self.bn6(self.conv6(x5)) + x5)
-        x7 = F.gelu(self.bn7(self.conv7(x6)))
-        # Apply transformer attention
-        # Reshape for transformer: (seq_len, batch_size, embed_dim)
-        x7_reshaped = x7.permute(2, 0, 1)  # (max_points, batch_size, 2048)
-        # Self-attention with residual connection
-        attn_out, _ = self.transformer_block(x7_reshaped, x7_reshaped, x7_reshaped)
-        x7_attn = self.transformer_norm1(x7_reshaped + attn_out)
-        # Transformer FFN with residual connection
-        ffn_out = self.transformer_ffn(x7_attn)
-        x7_transformer = self.transformer_norm2(x7_attn + ffn_out)
-        # Reshape back: (batch_size, embed_dim, seq_len)
-        x7_transformer = x7_transformer.permute(1, 2, 0)
-        # Apply channel attention
-        attention_weights = self.channel_attention(x7_transformer)
-        x7_attended = x7_transformer * attention_weights
-        # Multi-scale global pooling
-        max_pool = torch.max(x7_attended, 2)[0]  # (batch_size, 2048)
-        avg_pool = torch.mean(x7_attended, 2)    # (batch_size, 2048)
-        # Weighted combination of pooling operations
-        global_features = 0.7 * max_pool + 0.3 * avg_pool
-        # Enhanced shared features with residual connections
-        shared1 = F.gelu(self.gn1(self.shared_fc1(global_features).unsqueeze(-1)).squeeze(-1))
-        shared1 = self.dropout_light(shared1)
-        shared2 = F.gelu(self.gn2(self.shared_fc2(shared1).unsqueeze(-1)).squeeze(-1))
-        shared2 = self.dropout_medium(shared2)
-        # Additional shared layer with residual connection
-        shared3 = F.gelu(self.shared_fc3(shared2))
-        shared_features = self.dropout_light(shared3) + shared2
-        # Enhanced position prediction
-        pos1 = F.gelu(self.pos_fc1(shared_features))
-        pos1 = self.dropout_light(pos1)
-        pos2 = F.gelu(self.pos_fc2(pos1))
-        pos2 = self.dropout_medium(pos2)
-        pos3 = F.gelu(self.pos_fc3(pos2))
-        pos3 = self.dropout_light(pos3)
-        pos4 = F.gelu(self.pos_fc4(pos3))
-        position = self.pos_fc5(pos4)
-        outputs = [position]
-        if self.predict_score:
-            # Enhanced score prediction
-            score1 = F.gelu(self.score_fc1(shared_features))
-            score1 = self.dropout_light(score1)
-            score2 = F.gelu(self.score_fc2(score1))
-            score2 = self.dropout_medium(score2)
-            score3 = F.gelu(self.score_fc3(score2))
-            score3 = self.dropout_light(score3)
-            score4 = F.gelu(self.score_fc4(score3))
-            score = F.softplus(self.score_fc5(score4))
-            outputs.append(score)
-        if self.predict_class:
-            # Classification prediction
-            class1 = F.gelu(self.class_fc1(shared_features))
-            class1 = self.dropout_light(class1)
-            class2 = F.gelu(self.class_fc2(class1))
-            class2 = self.dropout_medium(class2)
-            class3 = F.gelu(self.class_fc3(class2))
-            class3 = self.dropout_light(class3)
-            class4 = F.gelu(self.class_fc4(class3))
-            classification = self.class_fc5(class4)
-            outputs.append(classification)
-        # Return outputs based on configuration
-        if len(outputs) == 1:
-            return outputs[0]
-        elif len(outputs) == 2:
-            if self.predict_score:
-                return outputs[0], outputs[1]
-            else:
-                return outputs[0], outputs[1]
-        else:
-            return outputs[0], outputs[1], outputs[2]
-class PatchDataset(Dataset):
-    """
-    Dataset class for loading saved patches for PointNet training.
-    Updated for 11D patches.
-    """
-    def __init__(self, dataset_dir: str, max_points: int = 1024, augment: bool = True):
-        self.dataset_dir = dataset_dir
-        self.max_points = max_points
-        self.augment = augment
-        # Load patch files
-        self.patch_files = []
-        for file in os.listdir(dataset_dir):
-            if file.endswith('.pkl'):
-                self.patch_files.append(os.path.join(dataset_dir, file))
-        print(f"Found {len(self.patch_files)} patch files in {dataset_dir}")
-    def __len__(self):
-        return len(self.patch_files)
-    def __getitem__(self, idx):
-        """
-        Load and process a patch for training.
-        Returns:
-            patch_data: (11, max_points) tensor of point cloud data
-            target: (3,) tensor of target 3D coordinates
-            valid_mask: (max_points,) boolean tensor indicating valid points
-            distance_to_gt: scalar tensor of distance from initial prediction to GT
-            classification: scalar tensor for binary classification (1 if GT vertex present, 0 if not)
-        """
-        patch_file = self.patch_files[idx]
-        with open(patch_file, 'rb') as f:
-            patch_info = pickle.load(f)
-        patch_11d = patch_info['patch_11d']  # (N, 11) - Updated for 11D
-        target = patch_info.get('assigned_wf_vertex', None)  # (3,) or None
-        initial_pred = patch_info.get('cluster_center', None)  # (3,) or None
-        # Determine classification label based on GT vertex presence
-        has_gt_vertex = 1.0 if target is not None else 0.0
-        # Handle patches without ground truth
-        if target is None:
-            # Use a dummy target for consistency, but mark as invalid with classification
-            target = np.zeros(3)
-        else:
-            target = np.array(target)
-        # Pad or sample points to max_points
-        num_points = patch_11d.shape[0]
-        if num_points >= self.max_points:
-            # Randomly sample max_points
-            indices = np.random.choice(num_points, self.max_points, replace=False)
-            patch_sampled = patch_11d[indices]
-            valid_mask = np.ones(self.max_points, dtype=bool)
-        else:
-            # Pad with zeros
-            patch_sampled = np.zeros((self.max_points, 11))  # Updated for 11D
-            patch_sampled[:num_points] = patch_11d
-            valid_mask = np.zeros(self.max_points, dtype=bool)
-            valid_mask[:num_points] = True
-        # Data augmentation (only if GT vertex is present)
-        if self.augment and has_gt_vertex > 0:
-            patch_sampled, target = self._augment_patch(patch_sampled, valid_mask, target)
-        # Convert to tensors and transpose for conv1d (channels first)
-        patch_tensor = torch.from_numpy(patch_sampled.T).float()  # (11, max_points)
-        target_tensor = torch.from_numpy(target).float()  # (3,)
-        valid_mask_tensor = torch.from_numpy(valid_mask)
-        # Handle initial_pred
-        if initial_pred is not None:
-            initial_pred_tensor = torch.from_numpy(initial_pred).float()
-        else:
-            initial_pred_tensor = torch.zeros(3).float()
-        # Classification tensor
-        classification_tensor = torch.tensor(has_gt_vertex).float()
-        return patch_tensor, target_tensor, valid_mask_tensor, initial_pred_tensor, classification_tensor
-    def _augment_patch(self, patch_sampled, valid_mask, target):
-        """
-        Apply data augmentation to patch and target.
-        Only augment valid points and update target accordingly.
-        """
-        valid_points = patch_sampled[valid_mask]
-        if len(valid_points) > 0:
-            # Random rotation around Z-axis (small angle)
-            angle = np.random.uniform(-np.pi/12, np.pi/12)  # ±15 degrees
-            cos_a, sin_a = np.cos(angle), np.sin(angle)
-            rotation_matrix = np.array([[cos_a, -sin_a, 0],
-                                      [sin_a, cos_a, 0],
-                                      [0, 0, 1]])
-            # Apply rotation to xyz coordinates
-            valid_points[:, :3] = valid_points[:, :3] @ rotation_matrix.T
-            target = target @ rotation_matrix.T
-            # Small random translation
-            translation = np.random.uniform(-0.05, 0.05, 3)
-            valid_points[:, :3] += translation
-            target += translation
-            # Random scaling (small)
-            scale = np.random.uniform(0.95, 1.05)
-            valid_points[:, :3] *= scale
-            target *= scale
-            # Add small noise to features (not coordinates)
-            if valid_points.shape[1] > 3:
-                noise = np.random.normal(0, 0.01, valid_points[:, 3:].shape)
-                valid_points[:, 3:] += noise
-            # Update patch with augmented valid points
-            patch_sampled[valid_mask] = valid_points
-        return patch_sampled, target
-def save_patches_dataset(patches: List[Dict], dataset_dir: str, entry_id: str):
-    """
-    Save patches from prediction pipeline to create a training dataset.
-    Args:
-        patches: List of patch dictionaries from generate_patches()
-        dataset_dir: Directory to save the dataset
-        entry_id: Unique identifier for this entry/image
-    """
-    os.makedirs(dataset_dir, exist_ok=True)
-    for i, patch in enumerate(patches):
-        # Create unique filename
-        filename = f"{entry_id}_patch_{i}.pkl"
-        filepath = os.path.join(dataset_dir, filename)
-        # Skip if file already exists
-        if os.path.exists(filepath):
-            continue
-        # Save patch data
-        with open(filepath, 'wb') as f:
-            pickle.dump(patch, f)
-    print(f"Saved {len(patches)} patches for entry {entry_id}")
-# Create dataloader with custom collate function to filter invalid samples
-def collate_fn(batch):
-    valid_batch = []
-    for patch_data, target, valid_mask, initial_pred, classification in batch:
-        # Filter out invalid samples (no valid points)
-        if valid_mask.sum() > 0:
-            valid_batch.append((patch_data, target, valid_mask, initial_pred, classification))
-    if len(valid_batch) == 0:
-        return None
-    # Stack valid samples
-    patch_data = torch.stack([item[0] for item in valid_batch])
-    targets = torch.stack([item[1] for item in valid_batch])
-    valid_masks = torch.stack([item[2] for item in valid_batch])
-    initial_preds = torch.stack([item[3] for item in valid_batch])
-    classifications = torch.stack([item[4] for item in valid_batch])
-    return patch_data, targets, valid_masks, initial_preds, classifications
-# Initialize weights using Kaiming initialization for LeakyReLU
-def init_weights(m):
-    if isinstance(m, nn.Conv1d):
-        nn.init.kaiming_uniform_(m.weight, a=0.01, mode='fan_in', nonlinearity='leaky_relu')
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.Linear):
-        nn.init.kaiming_uniform_(m.weight, a=0.01, mode='fan_in', nonlinearity='leaky_relu')
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, (nn.BatchNorm1d, nn.GroupNorm)):
-        nn.init.ones_(m.weight)
-        nn.init.zeros_(m.bias)
-def train_pointnet(dataset_dir: str, model_save_path: str, epochs: int = 100, batch_size: int = 32, lr: float = 0.001,
-                  score_weight: float = 0.1, class_weight: float = 0.5):
-    """
-    Train the FastPointNet model on saved patches.
-    Updated for 11D input.
-    """
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Training on device: {device}")
-    # Create dataset and dataloader
-    dataset = PatchDataset(dataset_dir, max_points=1024, augment=True)  # Enable augmentation
-    print(f"Dataset loaded with {len(dataset)} samples")
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=20,
-                           collate_fn=collate_fn, drop_last=True)
-    # Initialize model with 11D input
-    model = FastPointNet(input_dim=11, output_dim=3, max_points=1024, predict_score=True, predict_class=True, num_classes=1)
-    model.apply(init_weights)
-    model.to(device)
-    # Loss functions with label smoothing for classification
-    position_criterion = nn.SmoothL1Loss()  # More robust than MSE
-    score_criterion = nn.SmoothL1Loss()
-    classification_criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(2.0))  # Weight positive class more
-    # AdamW optimizer with weight decay
-    optimizer = torch.optim.AdamW(model.parameters(), lr=lr, weight_decay=1e-4, betas=(0.9, 0.999))
-    # Cosine annealing scheduler for better convergence
-    scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, T_0=20, T_mult=2)
-    # Training loop
-    model.train()
-    for epoch in range(epochs):
-        total_loss = 0.0
-        total_pos_loss = 0.0
-        total_score_loss = 0.0
-        total_class_loss = 0.0
-        num_batches = 0
-        for batch_idx, batch_data in enumerate(dataloader):
-            if batch_data is None:  # Skip invalid batches
-                continue
-            patch_data, targets, valid_masks, initial_preds, classifications = batch_data
-            patch_data = patch_data.to(device)  # (batch_size, 11, max_points)
-            targets = targets.to(device)  # (batch_size, 3)
-            classifications = classifications.to(device)  # (batch_size,)
-            # Forward pass
-            optimizer.zero_grad()
-            predictions, predicted_scores, predicted_classes = model(patch_data)
-            # Compute actual distance from predictions to targets
-            actual_distances = torch.norm(predictions - targets, dim=1, keepdim=True)
-            # Only compute position and score losses for samples with GT vertices
-            has_gt_mask = classifications > 0.5
-            if has_gt_mask.sum() > 0:
-                # Position loss only for samples with GT vertices
-                pos_loss = position_criterion(predictions[has_gt_mask], targets[has_gt_mask])
-                score_loss = score_criterion(predicted_scores[has_gt_mask], actual_distances[has_gt_mask])
-            else:
-                pos_loss = torch.tensor(0.0, device=device)
-                score_loss = torch.tensor(0.0, device=device)
-            # Classification loss for all samples
-            class_loss = classification_criterion(predicted_classes.squeeze(), classifications)
-            # Combined loss
-            total_batch_loss = pos_loss + score_weight * score_loss + class_weight * class_loss
-            # Backward pass
-            total_batch_loss.backward()
-            # Gradient clipping for stability
-            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
-            optimizer.step()
-            total_loss += total_batch_loss.item()
-            total_pos_loss += pos_loss.item()
-            total_score_loss += score_loss.item()
-            total_class_loss += class_loss.item()
-            num_batches += 1
-            if batch_idx % 50 == 0:
-                print(f"Epoch {epoch+1}/{epochs}, Batch {batch_idx}, "
-                      f"Total Loss: {total_batch_loss.item():.6f}, "
-                      f"Pos Loss: {pos_loss.item():.6f}, "
-                      f"Score Loss: {score_loss.item():.6f}, "
-                      f"Class Loss: {class_loss.item():.6f}")
-        avg_loss = total_loss / num_batches if num_batches > 0 else 0
-        avg_pos_loss = total_pos_loss / num_batches if num_batches > 0 else 0
-        avg_score_loss = total_score_loss / num_batches if num_batches > 0 else 0
-        avg_class_loss = total_class_loss / num_batches if num_batches > 0 else 0
-        print(f"Epoch {epoch+1}/{epochs} completed, "
-              f"Avg Total Loss: {avg_loss:.6f}, "
-              f"Avg Pos Loss: {avg_pos_loss:.6f}, "
-              f"Avg Score Loss: {avg_score_loss:.6f}, "
-              f"Avg Class Loss: {avg_class_loss:.6f}")
-        scheduler.step()
-        # Save model checkpoint every 10 epochs
-        if (epoch + 1) % 10 == 0:
-            checkpoint_path = model_save_path.replace('.pth', f'_epoch_{epoch+1}.pth')
-            torch.save({
-                'model_state_dict': model.state_dict(),
-                'optimizer_state_dict': optimizer.state_dict(),
-                'epoch': epoch + 1,
-                'loss': avg_loss,
-            }, checkpoint_path)
-    # Save the trained model
-    torch.save({
-        'model_state_dict': model.state_dict(),
-        'optimizer_state_dict': optimizer.state_dict(),
-        'epoch': epochs,
-    }, model_save_path)
-    print(f"Model saved to {model_save_path}")
-    return model
-def load_pointnet_model(model_path: str, device: torch.device = None, predict_score: bool = True) -> FastPointNet:
-    """
-    Load a trained FastPointNet model.
-    Updated for 11D input.
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model = FastPointNet(input_dim=11, output_dim=3, max_points=1024, predict_score=predict_score)
-    checkpoint = torch.load(model_path, map_location=device)
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.to(device)
-    model.eval()
-    return model
-def predict_vertex_from_patch(model: FastPointNet, patch: np.ndarray, device: torch.device = None) -> Tuple[np.ndarray, float, float]:
-    """
-    Predict 3D vertex coordinates, confidence score, and classification from a patch using trained PointNet.
-    Updated for 11D patches.
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    patch_11d = patch['patch_11d']  # (N, 11) - Updated for 11D
-    # Prepare input
-    max_points = 1024
-    num_points = patch_11d.shape[0]
-    if num_points >= max_points:
-        # Sample points
-        indices = np.random.choice(num_points, max_points, replace=False)
-        patch_sampled = patch_11d[indices]
-    else:
-        # Pad with zeros
-        patch_sampled = np.zeros((max_points, 11))  # Updated for 11D
-        patch_sampled[:num_points] = patch_11d
-    # Convert to tensor
-    patch_tensor = torch.from_numpy(patch_sampled.T).float().unsqueeze(0)  # (1, 11, max_points)
-    patch_tensor = patch_tensor.to(device)
-    # Predict
-    with torch.no_grad():
-        outputs = model(patch_tensor)
-        if model.predict_score and model.predict_class:
-            position, score, classification = outputs
-            position = position.cpu().numpy().squeeze()
-            score = score.cpu().numpy().squeeze()
-            classification = torch.sigmoid(classification).cpu().numpy().squeeze()  # Apply sigmoid for probability
-        elif model.predict_score:
-            position, score = outputs
-            position = position.cpu().numpy().squeeze()
-            score = score.cpu().numpy().squeeze()
-            classification = None
-        elif model.predict_class:
-            position, classification = outputs
-            position = position.cpu().numpy().squeeze()
-            score = None
-            classification = torch.sigmoid(classification).cpu().numpy().squeeze()  # Apply sigmoid for probability
-        else:
-            position = outputs
-            position = position.cpu().numpy().squeeze()
-            score = None
-            classification = None
-        # Apply offset correction
-        offset = patch['cluster_center']
-        position += offset
-        return position, score, classification

fast_voxel.py DELETED Viewed

@@ -1,591 +0,0 @@
-import os
-import torch
-import torch.nn as nn
-import torch.nn.functional as F
-import numpy as np
-import pickle
-from torch.utils.data import Dataset, DataLoader
-from typing import List, Dict, Tuple, Optional
-import json
-class Fast3DCNN(nn.Module):
-    """
-    Fast 3D CNN implementation for 3D vertex prediction from voxelized point cloud patches.
-    Takes 7D point clouds (x,y,z,r,g,b,filtered_flag) and predicts 3D vertex coordinates.
-    Uses voxelization and 3D convolutions instead of PointNet architecture.
-    """
-    def __init__(self, input_channels=7, output_dim=3, voxel_size=32, predict_score=True, predict_class=True, num_classes=1):
-        super(Fast3DCNN, self).__init__()
-        self.voxel_size = voxel_size
-        self.predict_score = predict_score
-        self.predict_class = predict_class
-        self.num_classes = num_classes
-        # 3D Convolutional layers for feature extraction
-        self.conv1 = nn.Conv3d(input_channels, 64, kernel_size=3, padding=1)
-        self.conv2 = nn.Conv3d(64, 128, kernel_size=3, padding=1)
-        self.conv3 = nn.Conv3d(128, 256, kernel_size=3, padding=1)
-        self.conv4 = nn.Conv3d(256, 512, kernel_size=3, padding=1)
-        self.conv5 = nn.Conv3d(512, 512, kernel_size=3, padding=1)
-        # Additional convolutional layers for deeper feature extraction
-        self.conv6 = nn.Conv3d(512, 1024, kernel_size=3, padding=1)
-        # Batch normalization layers
-        self.bn1 = nn.BatchNorm3d(64)
-        self.bn2 = nn.BatchNorm3d(128)
-        self.bn3 = nn.BatchNorm3d(256)
-        self.bn4 = nn.BatchNorm3d(512)
-        self.bn5 = nn.BatchNorm3d(512)
-        self.bn6 = nn.BatchNorm3d(1024)
-        # Max pooling layers
-        self.pool = nn.MaxPool3d(kernel_size=2, stride=2)
-        # Calculate the size after convolutions and pooling
-        # Starting with voxel_size^3, after 3 pooling operations: voxel_size / 8
-        final_size = voxel_size // 8
-        flattened_size = 1024 * (final_size ** 3)
-        # Adaptive pooling to handle variable sizes
-        self.adaptive_pool = nn.AdaptiveAvgPool3d((4, 4, 4))
-        flattened_size = 1024 * 4 * 4 * 4
-        # Shared fully connected layers
-        self.shared_fc1 = nn.Linear(flattened_size, 1024)
-        self.shared_fc2 = nn.Linear(1024, 512)
-        # Position prediction head
-        self.pos_fc1 = nn.Linear(512, 512)
-        self.pos_fc2 = nn.Linear(512, 256)
-        self.pos_fc3 = nn.Linear(256, 128)
-        self.pos_fc4 = nn.Linear(128, output_dim)
-        # Score prediction head
-        if self.predict_score:
-            self.score_fc1 = nn.Linear(512, 512)
-            self.score_fc2 = nn.Linear(512, 256)
-            self.score_fc3 = nn.Linear(256, 128)
-            self.score_fc4 = nn.Linear(128, 64)
-            self.score_fc5 = nn.Linear(64, 1)
-        # Classification head
-        if self.predict_class:
-            self.class_fc1 = nn.Linear(512, 512)
-            self.class_fc2 = nn.Linear(512, 256)
-            self.class_fc3 = nn.Linear(256, 128)
-            self.class_fc4 = nn.Linear(128, 64)
-            self.class_fc5 = nn.Linear(64, num_classes)
-        # Dropout layers
-        self.dropout_light = nn.Dropout(0.2)
-        self.dropout_medium = nn.Dropout(0.3)
-        self.dropout_heavy = nn.Dropout(0.4)
-    def forward(self, x):
-        """
-        Forward pass
-        Args:
-            x: (batch_size, input_channels, voxel_size, voxel_size, voxel_size) tensor
-        Returns:
-            Tuple containing predictions based on configuration:
-            - position: (batch_size, output_dim) tensor of predicted 3D coordinates
-            - score: (batch_size, 1) tensor of predicted distance to GT (if predict_score=True)
-            - classification: (batch_size, num_classes) tensor of class logits (if predict_class=True)
-        """
-        batch_size = x.size(0)
-        # 3D Convolutional feature extraction
-        x1 = F.relu(self.bn1(self.conv1(x)))
-        x1 = self.pool(x1)
-        x2 = F.relu(self.bn2(self.conv2(x1)))
-        x2 = self.pool(x2)
-        x3 = F.relu(self.bn3(self.conv3(x2)))
-        x3 = self.pool(x3)
-        x4 = F.relu(self.bn4(self.conv4(x3)))
-        x5 = F.relu(self.bn5(self.conv5(x4)))
-        x6 = F.relu(self.bn6(self.conv6(x5)))
-        # Adaptive pooling to ensure consistent size
-        x6 = self.adaptive_pool(x6)
-        # Flatten for fully connected layers
-        global_features = x6.view(batch_size, -1)
-        # Shared features
-        shared1 = F.relu(self.shared_fc1(global_features))
-        shared1 = self.dropout_light(shared1)
-        shared2 = F.relu(self.shared_fc2(shared1))
-        shared_features = self.dropout_medium(shared2)
-        # Position prediction
-        pos1 = F.relu(self.pos_fc1(shared_features))
-        pos1 = self.dropout_light(pos1)
-        pos2 = F.relu(self.pos_fc2(pos1))
-        pos2 = self.dropout_medium(pos2)
-        pos3 = F.relu(self.pos_fc3(pos2))
-        pos3 = self.dropout_light(pos3)
-        position = self.pos_fc4(pos3)
-        outputs = [position]
-        if self.predict_score:
-            # Score prediction
-            score1 = F.relu(self.score_fc1(shared_features))
-            score1 = self.dropout_light(score1)
-            score2 = F.relu(self.score_fc2(score1))
-            score2 = self.dropout_medium(score2)
-            score3 = F.relu(self.score_fc3(score2))
-            score3 = self.dropout_light(score3)
-            score4 = F.relu(self.score_fc4(score3))
-            score4 = self.dropout_light(score4)
-            score = F.relu(self.score_fc5(score4))
-            outputs.append(score)
-        if self.predict_class:
-            # Classification prediction
-            class1 = F.relu(self.class_fc1(shared_features))
-            class1 = self.dropout_light(class1)
-            class2 = F.relu(self.class_fc2(class1))
-            class2 = self.dropout_medium(class2)
-            class3 = F.relu(self.class_fc3(class2))
-            class3 = self.dropout_light(class3)
-            class4 = F.relu(self.class_fc4(class3))
-            class4 = self.dropout_light(class4)
-            classification = self.class_fc5(class4)
-            outputs.append(classification)
-        # Return outputs based on configuration
-        if len(outputs) == 1:
-            return outputs[0]
-        elif len(outputs) == 2:
-            if self.predict_score:
-                return outputs[0], outputs[1]
-            else:
-                return outputs[0], outputs[1]
-        else:
-            return outputs[0], outputs[1], outputs[2]
-def voxelize_patch(patch_7d: np.ndarray, voxel_size: int = 32, patch_size: float = 1.0) -> np.ndarray:
-    """
-    Convert point cloud patch to voxel grid.
-    Args:
-        patch_7d: (N, 7) array of points with [x, y, z, r, g, b, filtered_flag]
-        voxel_size: Size of the voxel grid (voxel_size^3)
-        patch_size: Physical size of the patch in world coordinates
-    Returns:
-        voxels: (7, voxel_size, voxel_size, voxel_size) array of voxelized features
-    """
-    if len(patch_7d) == 0:
-        return np.zeros((7, voxel_size, voxel_size, voxel_size))
-    # Extract coordinates and features
-    coords = patch_7d[:, :3]  # x, y, z
-    features = patch_7d[:, 3:]  # r, g, b, filtered_flag
-    # Normalize coordinates to [0, voxel_size-1]
-    coords_min = coords.min(axis=0)
-    coords_max = coords.max(axis=0)
-    coords_range = coords_max - coords_min
-    coords_range[coords_range == 0] = 1  # Avoid division by zero
-    normalized_coords = (coords - coords_min) / coords_range * (voxel_size - 1)
-    voxel_indices = normalized_coords.astype(int)
-    # Clip to valid range
-    voxel_indices = np.clip(voxel_indices, 0, voxel_size - 1)
-    # Initialize voxel grid
-    voxels = np.zeros((7, voxel_size, voxel_size, voxel_size))
-    # Fill voxels with features (average if multiple points fall in same voxel)
-    counts = np.zeros((voxel_size, voxel_size, voxel_size))
-    for i in range(len(patch_7d)):
-        x, y, z = voxel_indices[i]
-        # Store normalized coordinates in first 3 channels
-        voxels[0, x, y, z] += normalized_coords[i, 0] / (voxel_size - 1)  # normalized x
-        voxels[1, x, y, z] += normalized_coords[i, 1] / (voxel_size - 1)  # normalized y
-        voxels[2, x, y, z] += normalized_coords[i, 2] / (voxel_size - 1)  # normalized z
-        # Store RGB and filtered_flag in remaining channels
-        voxels[3:, x, y, z] += features[i]
-        counts[x, y, z] += 1
-    # Average features where multiple points exist
-    mask = counts > 0
-    for c in range(7):
-        voxels[c][mask] /= counts[mask]
-    return voxels
-class VoxelPatchDataset(Dataset):
-    """
-    Dataset class for loading saved patches and converting them to voxel grids for 3D CNN training.
-    """
-    def __init__(self, dataset_dir: str, voxel_size: int = 32, augment: bool = False):
-        self.dataset_dir = dataset_dir
-        self.voxel_size = voxel_size
-        self.augment = augment
-        # Load patch files
-        self.patch_files = []
-        for file in os.listdir(dataset_dir):
-            if file.endswith('.pkl'):
-                self.patch_files.append(os.path.join(dataset_dir, file))
-        print(f"Found {len(self.patch_files)} patch files in {dataset_dir}")
-    def __len__(self):
-        return len(self.patch_files)
-    def __getitem__(self, idx):
-        """
-        Load and process a patch for training.
-        Returns:
-            voxel_data: (7, voxel_size, voxel_size, voxel_size) tensor of voxelized data
-            target: (3,) tensor of target 3D coordinates
-            valid_mask: scalar tensor indicating if this is a valid sample
-            distance_to_gt: scalar tensor of distance from initial prediction to GT
-            classification: scalar tensor for binary classification (1 if GT vertex present, 0 if not)
-        """
-        patch_file = self.patch_files[idx]
-        with open(patch_file, 'rb') as f:
-            patch_info = pickle.load(f)
-        patch_7d = patch_info['patch_7d']  # (N, 7)
-        target = patch_info.get('assigned_wf_vertex', None)  # (3,) or None
-        initial_pred = patch_info.get('cluster_center', None)  # (3,) or None
-        # Determine classification label based on GT vertex presence
-        has_gt_vertex = 1.0 if target is not None else 0.0
-        # Handle patches without ground truth
-        if target is None:
-            target = np.zeros(3)
-        else:
-            target = np.array(target)
-        # Voxelize the patch
-        voxel_data = voxelize_patch(patch_7d, self.voxel_size)
-        # Data augmentation (only if GT vertex is present)
-        if self.augment and has_gt_vertex > 0:
-            voxel_data, target = self._augment_voxels(voxel_data, target)
-        # Convert to tensors (copy arrays to handle negative strides from augmentation)
-        voxel_tensor = torch.from_numpy(voxel_data.copy()).float()  # (7, voxel_size, voxel_size, voxel_size)
-        target_tensor = torch.from_numpy(target.copy()).float()  # (3,)
-        # Valid mask (check if voxel grid has any non-zero values)
-        valid_mask = torch.tensor(1.0 if voxel_data.sum() > 0 else 0.0)
-        # Handle initial_pred
-        if initial_pred is not None:
-            initial_pred_tensor = torch.from_numpy(initial_pred).float()
-        else:
-            initial_pred_tensor = torch.zeros(3).float()
-        # Classification tensor
-        classification_tensor = torch.tensor(has_gt_vertex).float()
-        return voxel_tensor, target_tensor, valid_mask, initial_pred_tensor, classification_tensor
-    def _augment_voxels(self, voxel_data: np.ndarray, target: np.ndarray) -> Tuple[np.ndarray, np.ndarray]:
-        """
-        Apply data augmentation to voxel data.
-        """
-        # Random rotation around Z-axis
-        if np.random.random() > 0.5:
-            k = np.random.randint(1, 4)  # 90, 180, or 270 degrees
-            voxel_data = np.rot90(voxel_data, k, axes=(1, 2))  # Rotate around z-axis
-        # Random flip
-        if np.random.random() > 0.5:
-            voxel_data = np.flip(voxel_data, axis=1)  # Flip along x-axis
-        if np.random.random() > 0.5:
-            voxel_data = np.flip(voxel_data, axis=2)  # Flip along y-axis
-        return voxel_data, target
-def save_patches_dataset(patches: List[Dict], dataset_dir: str, entry_id: str):
-    """
-    Save patches from prediction pipeline to create a training dataset.
-    Args:
-        patches: List of patch dictionaries from generate_patches()
-        dataset_dir: Directory to save the dataset
-        entry_id: Unique identifier for this entry/image
-    """
-    os.makedirs(dataset_dir, exist_ok=True)
-    for i, patch in enumerate(patches):
-        # Create unique filename
-        filename = f"{entry_id}_patch_{i}.pkl"
-        filepath = os.path.join(dataset_dir, filename)
-        # Skip if file already exists
-        if os.path.exists(filepath):
-            continue
-        # Save patch data
-        with open(filepath, 'wb') as f:
-            pickle.dump(patch, f)
-    print(f"Saved {len(patches)} patches for entry {entry_id}")
-# Create dataloader with custom collate function to filter invalid samples
-def collate_fn(batch):
-    valid_batch = []
-    for voxel_data, target, valid_mask, initial_pred, classification in batch:
-        # Filter out invalid samples
-        if valid_mask > 0:
-            valid_batch.append((voxel_data, target, valid_mask, initial_pred, classification))
-    if len(valid_batch) == 0:
-        return None
-    # Stack valid samples
-    voxel_data = torch.stack([item[0] for item in valid_batch])
-    targets = torch.stack([item[1] for item in valid_batch])
-    valid_masks = torch.stack([item[2] for item in valid_batch])
-    initial_preds = torch.stack([item[3] for item in valid_batch])
-    classifications = torch.stack([item[4] for item in valid_batch])
-    return voxel_data, targets, valid_masks, initial_preds, classifications
-# Initialize weights using Xavier/Glorot initialization
-def init_weights(m):
-    if isinstance(m, (nn.Conv3d, nn.Conv1d)):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, nn.Linear):
-        nn.init.xavier_uniform_(m.weight)
-        if m.bias is not None:
-            nn.init.zeros_(m.bias)
-    elif isinstance(m, (nn.BatchNorm3d, nn.BatchNorm1d)):
-        nn.init.ones_(m.weight)
-        nn.init.zeros_(m.bias)
-def train_3dcnn(dataset_dir: str, model_save_path: str, epochs: int = 100, batch_size: int = 16, lr: float = 0.001,
-                voxel_size: int = 32, score_weight: float = 0.1, class_weight: float = 0.5):
-    """
-    Train the Fast3DCNN model on saved patches.
-    Args:
-        dataset_dir: Directory containing saved patch files
-        model_save_path: Path to save the trained model
-        epochs: Number of training epochs
-        batch_size: Training batch size (reduced due to memory requirements of 3D conv)
-        lr: Learning rate
-        voxel_size: Size of voxel grid
-        score_weight: Weight for the distance prediction loss
-        class_weight: Weight for the classification loss
-    """
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    print(f"Training on device: {device}")
-    # Create dataset and dataloader
-    dataset = VoxelPatchDataset(dataset_dir, voxel_size=voxel_size, augment=True)
-    print(f"Dataset loaded with {len(dataset)} samples")
-    dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=4,
-                           collate_fn=collate_fn, drop_last=True)
-    # Initialize model with score and classification prediction
-    model = Fast3DCNN(input_channels=7, output_dim=3, voxel_size=voxel_size,
-                      predict_score=True, predict_class=True, num_classes=1)
-    model.apply(init_weights)
-    model.to(device)
-    # Loss functions
-    position_criterion = nn.MSELoss()
-    score_criterion = nn.MSELoss()
-    classification_criterion = nn.BCEWithLogitsLoss()
-    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=1e-4)
-    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=30, gamma=0.5)
-    # Training loop
-    model.train()
-    for epoch in range(epochs):
-        total_loss = 0.0
-        total_pos_loss = 0.0
-        total_score_loss = 0.0
-        total_class_loss = 0.0
-        num_batches = 0
-        for batch_idx, batch_data in enumerate(dataloader):
-            if batch_data is None:  # Skip invalid batches
-                continue
-            voxel_data, targets, valid_masks, initial_preds, classifications = batch_data
-            voxel_data = voxel_data.to(device)  # (batch_size, 7, voxel_size, voxel_size, voxel_size)
-            targets = targets.to(device)  # (batch_size, 3)
-            classifications = classifications.to(device)  # (batch_size,)
-            # Forward pass
-            optimizer.zero_grad()
-            predictions, predicted_scores, predicted_classes = model(voxel_data)
-            # Compute actual distance from predictions to targets
-            actual_distances = torch.norm(predictions - targets, dim=1, keepdim=True)
-            # Only compute position and score losses for samples with GT vertices
-            has_gt_mask = classifications > 0.5
-            if has_gt_mask.sum() > 0:
-                # Position loss only for samples with GT vertices
-                pos_loss = position_criterion(predictions[has_gt_mask], targets[has_gt_mask])
-                score_loss = score_criterion(predicted_scores[has_gt_mask], actual_distances[has_gt_mask])
-            else:
-                pos_loss = torch.tensor(0.0, device=device)
-                score_loss = torch.tensor(0.0, device=device)
-            # Classification loss for all samples
-            class_loss = classification_criterion(predicted_classes.squeeze(), classifications)
-            # Combined loss
-            total_batch_loss = pos_loss + score_weight * score_loss + class_weight * class_loss
-            # Backward pass
-            total_batch_loss.backward()
-            optimizer.step()
-            total_loss += total_batch_loss.item()
-            total_pos_loss += pos_loss.item()
-            total_score_loss += score_loss.item()
-            total_class_loss += class_loss.item()
-            num_batches += 1
-            if batch_idx % 50 == 0:
-                print(f"Epoch {epoch+1}/{epochs}, Batch {batch_idx}, "
-                      f"Total Loss: {total_batch_loss.item():.6f}, "
-                      f"Pos Loss: {pos_loss.item():.6f}, "
-                      f"Score Loss: {score_loss.item():.6f}, "
-                      f"Class Loss: {class_loss.item():.6f}")
-        avg_loss = total_loss / num_batches if num_batches > 0 else 0
-        avg_pos_loss = total_pos_loss / num_batches if num_batches > 0 else 0
-        avg_score_loss = total_score_loss / num_batches if num_batches > 0 else 0
-        avg_class_loss = total_class_loss / num_batches if num_batches > 0 else 0
-        print(f"Epoch {epoch+1}/{epochs} completed, "
-              f"Avg Total Loss: {avg_loss:.6f}, "
-              f"Avg Pos Loss: {avg_pos_loss:.6f}, "
-              f"Avg Score Loss: {avg_score_loss:.6f}, "
-              f"Avg Class Loss: {avg_class_loss:.6f}")
-        scheduler.step()
-        # Save model checkpoint every epoch
-        checkpoint_path = model_save_path.replace('.pth', f'_epoch_{epoch+1}.pth')
-        torch.save({
-            'model_state_dict': model.state_dict(),
-            'optimizer_state_dict': optimizer.state_dict(),
-            'epoch': epoch + 1,
-            'loss': avg_loss,
-        }, checkpoint_path)
-    # Save the trained model
-    torch.save({
-        'model_state_dict': model.state_dict(),
-        'optimizer_state_dict': optimizer.state_dict(),
-        'epoch': epochs,
-    }, model_save_path)
-    print(f"Model saved to {model_save_path}")
-    return model
-def load_3dcnn_model(model_path: str, device: torch.device = None, voxel_size: int = 32, predict_score: bool = True) -> Fast3DCNN:
-    """
-    Load a trained Fast3DCNN model.
-    Args:
-        model_path: Path to the saved model
-        device: Device to load the model on
-        voxel_size: Size of voxel grid
-        predict_score: Whether the model predicts scores
-    Returns:
-        Loaded Fast3DCNN model
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model = Fast3DCNN(input_channels=7, output_dim=3, voxel_size=voxel_size, predict_score=predict_score)
-    checkpoint = torch.load(model_path, map_location=device)
-    model.load_state_dict(checkpoint['model_state_dict'])
-    model.to(device)
-    model.eval()
-    return model
-def predict_vertex_from_patch_voxel(model: Fast3DCNN, patch: np.ndarray, device: torch.device = None, voxel_size: int = 32) -> Tuple[np.ndarray, float, float]:
-    """
-    Predict 3D vertex coordinates, confidence score, and classification from a patch using trained 3D CNN.
-    Args:
-        model: Trained Fast3DCNN model
-        patch: Dictionary containing patch data with 'patch_7d' and 'cluster_center' keys
-        device: Device to run prediction on
-        voxel_size: Size of voxel grid
-    Returns:
-        tuple of (predicted_coordinates, confidence_score, classification_score)
-            predicted_coordinates: (3,) numpy array of predicted 3D coordinates
-            confidence_score: float representing predicted distance to GT (lower is better)
-            classification_score: float representing probability of GT vertex presence (0-1)
-    """
-    if device is None:
-        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    patch_7d = patch['patch_7d']  # (N, 7)
-    # Voxelize the patch
-    voxel_data = voxelize_patch(patch_7d, voxel_size)
-    # Convert to tensor
-    voxel_tensor = torch.from_numpy(voxel_data).float().unsqueeze(0)  # (1, 7, voxel_size, voxel_size, voxel_size)
-    voxel_tensor = voxel_tensor.to(device)
-    # Predict
-    with torch.no_grad():
-        outputs = model(voxel_tensor)
-        if model.predict_score and model.predict_class:
-            position, score, classification = outputs
-            position = position.cpu().numpy().squeeze()
-            score = score.cpu().numpy().squeeze()
-            classification = torch.sigmoid(classification).cpu().numpy().squeeze()
-        elif model.predict_score:
-            position, score = outputs
-            position = position.cpu().numpy().squeeze()
-            score = score.cpu().numpy().squeeze()
-            classification = None
-        elif model.predict_class:
-            position, classification = outputs
-            position = position.cpu().numpy().squeeze()
-            score = None
-            classification = torch.sigmoid(classification).cpu().numpy().squeeze()
-        else:
-            position = outputs
-            position = position.cpu().numpy().squeeze()
-            score = None
-            classification = None
-        # Apply offset correction
-        offset = patch['cluster_center']
-        position += offset
-        return position, score, classification

find_best_results.py CHANGED Viewed

@@ -1,5 +1,12 @@
 #!/usr/bin/env python3
 # filepath: /home/skvrnjan/hoho/find_best_results.py
 import os
 import re

 #!/usr/bin/env python3
 # filepath: /home/skvrnjan/hoho/find_best_results.py
+# This script scans a directory for result files (text files typically starting
+# with "results_vt" within subdirectories matching a given prefix).
+# It parses these files to extract metrics like Mean HSS, Mean F1, Mean IoU,
+# Vertex Threshold, Edge Threshold, and Only Predicted Connections.
+# The script then identifies and prints the top N results (default N=10)
+# for Mean HSS, Mean F1, and Mean IoU, along with their associated configuration
+# parameters.
 import os
 import re

fully_deep.py DELETED Viewed

@@ -1,1082 +0,0 @@
-import torch
-import os
-import pickle
-from torch.utils.data import Dataset, DataLoader
-import numpy as np
-from scipy.optimize import linear_sum_assignment
-import torch.nn as nn
-import torch.nn.functional as F
-# =============================================================================
-# CONFIGURATION PARAMETERS
-# =============================================================================
-# Dataset Configuration
-DATA_DIR = '/mnt/personal/skvrnjan/hoho_fully'
-SPLIT = 'train'
-MAX_POINTS = 8096
-BATCH_SIZE = 32
-NUM_WORKERS = 8
-# Model Architecture Parameters
-PC_INPUT_FEATURES = 3
-PC_ENCODER_OUTPUT_FEATURES = 128
-MAX_VERTICES = 50
-VERTEX_COORD_DIM = 3
-GNN_HIDDEN_DIM = 64
-NUM_GNN_LAYERS = 2
-HIDDEN_DIM = 256
-NUM_DECODER_LAYERS = 3
-NUM_HEADS = 8
-# PointNet2 Encoder Parameters
-SA1_NPOINT = 1024
-SA1_RADIUS = 0.2
-SA1_NSAMPLE = 32
-SA1_MLP = [64, 64, 128]
-SA2_NPOINT = 256
-SA2_RADIUS = 0.4
-SA2_NSAMPLE = 64
-SA2_MLP = [128, 128, 256]
-SA3_MLP = [256, 512, 1024]  # Global pooling layer
-FP3_MLP = [256, 256]
-FP2_MLP = [256, 128]
-FP1_MLP = [128, 128]  # Will add PC_ENCODER_OUTPUT_FEATURES at the end
-# Vertex Prediction Head Parameters
-VERTEX_TRANSFORMER_DROPOUT = 0.1
-VERTEX_TRANSFORMER_FFN_RATIO = 4
-# Edge Prediction Head Parameters
-EDGE_GNN_NUM_HEADS = 4
-EDGE_GNN_DROPOUT = 0.1
-EDGE_K_NEIGHBORS = 8
-# Training Configuration
-NUM_EPOCHS = 100
-LEARNING_RATE = 1e-4
-WEIGHT_DECAY = 1e-5
-GRADIENT_CLIP_MAX_NORM = 1.0
-# Loss Weights
-VERTEX_LOSS_WEIGHT = 1.0
-EDGE_LOSS_WEIGHT = 0.5
-CONFIDENCE_LOSS_WEIGHT = 0.3
-# Learning Rate Scheduler Parameters
-LR_SCHEDULER_FACTOR = 0.5
-LR_SCHEDULER_PATIENCE = 10
-# Checkpoint and Logging
-CHECKPOINT_SAVE_FREQUENCY = 1  # Save every N epochs
-LOG_FREQUENCY = 10  # Print progress every N batches
-# Device Configuration
-DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-# =============================================================================
-# MODEL IMPLEMENTATION
-# =============================================================================
-# You would likely need a library like torch_geometric for GNNs
-# from torch_geometric.nn import GATConv, EdgeConv # Example GNN layers
-# --- 1. Point Cloud Encoder Backbone (Placeholder) ---
-class PointNet2Encoder(nn.Module):
-    def __init__(self, input_features, output_features):
-        super().__init__()
-        self.input_features = input_features
-        self.output_features = output_features
-        # Set Abstraction layers - adjusted for 8096 input points
-        self.sa1 = SetAbstractionLayer(
-            npoint=SA1_NPOINT, radius=SA1_RADIUS, nsample=SA1_NSAMPLE,
-            in_channel=input_features + 3, mlp=SA1_MLP
-        )
-        self.sa2 = SetAbstractionLayer(
-            npoint=SA2_NPOINT, radius=SA2_RADIUS, nsample=SA2_NSAMPLE,
-            in_channel=SA1_MLP[-1] + 3, mlp=SA2_MLP
-        )
-        self.sa3 = SetAbstractionLayer(
-            npoint=None, radius=None, nsample=None,  # Global pooling
-            in_channel=SA2_MLP[-1] + 3, mlp=SA3_MLP
-        )
-        # Feature Propagation layers for point-wise features
-        self.fp3 = FeaturePropagationLayer(in_channel=SA3_MLP[-1] + SA2_MLP[-1], mlp=FP3_MLP)
-        self.fp2 = FeaturePropagationLayer(in_channel=FP3_MLP[-1] + SA1_MLP[-1], mlp=FP2_MLP)
-        self.fp1 = FeaturePropagationLayer(in_channel=FP2_MLP[-1] + input_features, mlp=FP1_MLP + [output_features])
-    def forward(self, xyz):
-        # xyz: (B, N, 3) where N = 8096
-        B, N, _ = xyz.shape
-        # Initial features (can be empty or coordinates)
-        points = xyz if self.input_features == 3 else None
-        # Set Abstraction
-        l1_xyz, l1_points = self.sa1(xyz, points)  # 8096 -> 1024 points
-        l2_xyz, l2_points = self.sa2(l1_xyz, l1_points)  # 1024 -> 256 points
-        l3_xyz, l3_points = self.sa3(l2_xyz, l2_points)  # 256 -> 1 point (global)
-        # Feature Propagation
-        l2_points = self.fp3(l2_xyz, l3_xyz, l2_points, l3_points)
-        l1_points = self.fp2(l1_xyz, l2_xyz, l1_points, l2_points)
-        l0_points = self.fp1(xyz, l1_xyz, points, l1_points)
-        # Global feature from the most abstract level
-        global_feature = l3_points.squeeze(-1)  # (B, 1024)
-        return l0_points, global_feature  # (B, 8096, output_features), (B, 1024)
-class SetAbstractionLayer(nn.Module):
-    def __init__(self, npoint, radius, nsample, in_channel, mlp, group_all=False):
-        super().__init__()
-        self.npoint = npoint
-        self.radius = radius
-        self.nsample = nsample
-        self.group_all = group_all
-        self.mlp_convs = nn.ModuleList()
-        self.mlp_bns = nn.ModuleList()
-        last_channel = in_channel
-        for out_channel in mlp:
-            self.mlp_convs.append(nn.Conv2d(last_channel, out_channel, 1))
-            self.mlp_bns.append(nn.BatchNorm2d(out_channel))
-            last_channel = out_channel
-    def forward(self, xyz, points):
-        # xyz: (B, N, 3)
-        # points: (B, N, C) or None
-        B, N, C = xyz.shape
-        if self.group_all or self.npoint is None:
-            # Global pooling
-            new_xyz = xyz.mean(dim=1, keepdim=True)  # (B, 1, 3)
-            if points is not None:
-                new_points = torch.cat([xyz, points], dim=-1)  # (B, N, 3+C)
-                new_points = new_points.transpose(1, 2).unsqueeze(-1)  # (B, 3+C, N, 1)
-            else:
-                new_points = xyz.transpose(1, 2).unsqueeze(-1)  # (B, 3, N, 1)
-        else:
-            # Farthest Point Sampling
-            fps_idx = farthest_point_sample(xyz, self.npoint)  # (B, npoint)
-            new_xyz = index_points(xyz, fps_idx)  # (B, npoint, 3)
-            # Ball Query
-            idx = ball_query(self.radius, self.nsample, xyz, new_xyz)  # (B, npoint, nsample)
-            grouped_xyz = index_points(xyz, idx)  # (B, npoint, nsample, 3)
-            grouped_xyz_norm = grouped_xyz - new_xyz.unsqueeze(2)  # Relative positions
-            if points is not None:
-                grouped_points = index_points(points, idx)  # (B, npoint, nsample, C)
-                new_points = torch.cat([grouped_xyz_norm, grouped_points], dim=-1)  # (B, npoint, nsample, 3+C)
-            else:
-                new_points = grouped_xyz_norm  # (B, npoint, nsample, 3)
-            new_points = new_points.permute(0, 3, 1, 2)  # (B, 3+C, npoint, nsample)
-        # MLP
-        for i, conv in enumerate(self.mlp_convs):
-            bn = self.mlp_bns[i]
-            new_points = F.relu(bn(conv(new_points)))
-        # Max pooling
-        new_points = torch.max(new_points, dim=-1)[0]  # (B, mlp[-1], npoint)
-        new_points = new_points.transpose(1, 2)  # (B, npoint, mlp[-1])
-        return new_xyz, new_points
-class FeaturePropagationLayer(nn.Module):
-    def __init__(self, in_channel, mlp):
-        super().__init__()
-        self.mlp_convs = nn.ModuleList()
-        self.mlp_bns = nn.ModuleList()
-        last_channel = in_channel
-        for out_channel in mlp:
-            self.mlp_convs.append(nn.Conv1d(last_channel, out_channel, 1))
-            self.mlp_bns.append(nn.BatchNorm1d(out_channel))
-            last_channel = out_channel
-    def forward(self, xyz1, xyz2, points1, points2):
-        # xyz1: (B, N1, 3) - target points
-        # xyz2: (B, N2, 3) - source points
-        # points1: (B, N1, C1) - target features
-        # points2: (B, N2, C2) - source features
-        # Interpolate features from xyz2 to xyz1
-        if points2 is not None:
-            interpolated_points = interpolate_features(xyz1, xyz2, points2)  # (B, N1, C2)
-            if points1 is not None:
-                # Ensure both tensors have the same number of points (N1)
-                assert points1.shape[1] == interpolated_points.shape[1], f"Point count mismatch: {points1.shape[1]} vs {interpolated_points.shape[1]}"
-                new_points = torch.cat([points1, interpolated_points], dim=-1)  # (B, N1, C1+C2)
-            else:
-                new_points = interpolated_points
-        else:
-            new_points = points1
-        # Handle None case
-        if new_points is None:
-            return None
-        # MLP
-        new_points = new_points.transpose(1, 2)  # (B, C, N1)
-        for i, conv in enumerate(self.mlp_convs):
-            bn = self.mlp_bns[i]
-            new_points = F.relu(bn(conv(new_points)))
-        return new_points.transpose(1, 2)  # (B, N1, mlp[-1])
-def farthest_point_sample(xyz, npoint):
-    """Farthest Point Sampling"""
-    device = xyz.device
-    B, N, C = xyz.shape
-    centroids = torch.zeros(B, npoint, dtype=torch.long).to(device)
-    distance = torch.ones(B, N).to(device) * 1e10
-    farthest = torch.randint(0, N, (B,), dtype=torch.long).to(device)
-    for i in range(npoint):
-        centroids[:, i] = farthest
-        centroid = xyz[torch.arange(B), farthest, :].view(B, 1, 3)
-        dist = torch.sum((xyz - centroid) ** 2, -1)
-        mask = dist < distance
-        distance[mask] = dist[mask]
-        farthest = torch.max(distance, -1)[1]
-    return centroids
-def ball_query(radius, nsample, xyz, new_xyz):
-    """Ball Query"""
-    device = xyz.device
-    B, N, C = xyz.shape
-    _, S, _ = new_xyz.shape
-    group_idx = torch.arange(N, dtype=torch.long).to(device).view(1, 1, N).repeat([B, S, 1])
-    sqrdists = square_distance(new_xyz, xyz)
-    group_idx[sqrdists > radius ** 2] = N
-    group_idx = group_idx.sort(dim=-1)[0][:, :, :nsample]
-    group_first = group_idx[:, :, 0].view(B, S, 1).repeat([1, 1, nsample])
-    mask = group_idx == N
-    group_idx[mask] = group_first[mask]
-    # If group_first[mask] was N (i.e., no points in the ball for a centroid),
-    # group_idx can still contain N. Clamp N to 0 to ensure valid indices.
-    # N corresponds to xyz.shape[1], which is guaranteed to be > 0 by the dataloader logic.
-    group_idx[group_idx == N] = 0
-    return group_idx
-def square_distance(src, dst):
-    """Calculate squared distance between each two points"""
-    B, N, _ = src.shape
-    _, M, _ = dst.shape
-    dist = -2 * torch.matmul(src, dst.permute(0, 2, 1))
-    dist += torch.sum(src ** 2, -1).view(B, N, 1)
-    dist += torch.sum(dst ** 2, -1).view(B, 1, M)
-    return dist
-def index_points(points, idx):
-    """Index points using given indices"""
-    device = points.device
-    B = points.shape[0]
-    view_shape = list(idx.shape)
-    view_shape[1:] = [1] * (len(view_shape) - 1)
-    repeat_shape = list(idx.shape)
-    repeat_shape[0] = 1
-    batch_indices = torch.arange(B, dtype=torch.long).to(device).view(view_shape).repeat(repeat_shape)
-    new_points = points[batch_indices, idx, :]
-    return new_points
-def interpolate_features(xyz1, xyz2, points2):
-    """Interpolate features using inverse distance weighting"""
-    B, N1, C = xyz1.shape
-    _, N2, _ = xyz2.shape
-    if N2 == 1:
-        # If only one point, broadcast to all target points
-        interpolated_points = points2.expand(B, N1, -1)
-    else:
-        # Find 3 nearest neighbors and interpolate
-        dists = square_distance(xyz1, xyz2)  # (B, N1, N2)
-        dists, idx = dists.sort(dim=-1)
-        # Use min(3, N2) neighbors to handle cases with fewer source points
-        k = min(3, N2)
-        dists, idx = dists[:, :, :k], idx[:, :, :k]
-        # Inverse distance weighting
-        dists[dists < 1e-10] = 1e-10
-        weight = 1.0 / dists  # (B, N1, k)
-        weight = weight / torch.sum(weight, dim=-1, keepdim=True)  # Normalize
-        # Interpolate
-        interpolated_points = torch.sum(
-            index_points(points2, idx) * weight.view(B, N1, k, 1), dim=2
-        )
-    return interpolated_points
-# --- 2. Vertex Prediction Head (Transformer-based) ---
-class VertexPredictionHead(nn.Module):
-    def __init__(self, point_feature_dim, global_feature_dim, max_vertices, vertex_coord_dim=3,
-                 hidden_dim=256, num_decoder_layers=3, num_heads=8):
-        super().__init__()
-        self.max_vertices = max_vertices
-        self.vertex_coord_dim = vertex_coord_dim
-        self.hidden_dim = hidden_dim
-        # Learnable vertex queries (similar to DETR object queries)
-        self.vertex_queries = nn.Parameter(torch.randn(max_vertices, hidden_dim))
-        # Project global feature to hidden dimension
-        self.global_proj = nn.Linear(global_feature_dim, 1)
-        # Project point features to hidden dimension for cross-attention
-        self.point_proj = nn.Linear(point_feature_dim, hidden_dim)
-        # Transformer decoder layers
-        decoder_layer = nn.TransformerDecoderLayer(
-            d_model=hidden_dim,
-            nhead=num_heads,
-            dim_feedforward=hidden_dim * VERTEX_TRANSFORMER_FFN_RATIO,
-            dropout=VERTEX_TRANSFORMER_DROPOUT,
-            batch_first=True
-        )
-        self.transformer_decoder = nn.TransformerDecoder(decoder_layer, num_layers=num_decoder_layers)
-        # Output heads
-        self.vertex_coord_head = nn.Sequential(
-            nn.Linear(hidden_dim, hidden_dim),
-            nn.ReLU(),
-            nn.Linear(hidden_dim, vertex_coord_dim)
-        )
-        # Confidence/existence head (predicts if vertex exists)
-        self.vertex_conf_head = nn.Sequential(
-            nn.Linear(hidden_dim, hidden_dim),
-            nn.ReLU(),
-            nn.Linear(hidden_dim, 1)
-        )
-        # Position encoding for point features
-        self.pos_encoding = nn.Sequential(
-            nn.Linear(3, hidden_dim // 2),
-            nn.ReLU(),
-            nn.Linear(hidden_dim // 2, hidden_dim)
-        )
-    def forward(self, point_features, global_feature, point_coords=None):
-        # point_features: (B, N, point_feature_dim)
-        # global_feature: (B, global_feature_dim)
-        # point_coords: (B, N, 3) - optional point coordinates for positional encoding
-        batch_size = point_features.shape[0]
-        # Project features to hidden dimension
-        point_features_proj = self.point_proj(point_features)  # (B, N, hidden_dim)
-        # Add positional encoding if coordinates are provided
-        if point_coords is not None:
-            pos_enc = self.pos_encoding(point_coords)  # (B, N, hidden_dim)
-            point_features_proj = point_features_proj + pos_enc
-        # Prepare vertex queries
-        vertex_queries = self.vertex_queries.unsqueeze(0).repeat(batch_size, 1, 1)  # (B, max_vertices, hidden_dim)
-        # Add global context to vertex queries
-        global_proj = self.global_proj(global_feature).squeeze(-1).unsqueeze(1)  # (B, 1, hidden_dim)
-        vertex_queries = vertex_queries + global_proj  # Broadcasting will handle (B, 1, hidden_dim) + (B, max_vertices, hidden_dim)
-        # Transformer decoder: vertex queries attend to point features
-        vertex_features = self.transformer_decoder(
-            tgt=vertex_queries,  # (B, max_vertices, hidden_dim)
-            memory=point_features_proj  # (B, N, hidden_dim)
-        )  # (B, max_vertices, hidden_dim)
-        # Predict vertex coordinates
-        predicted_vertices = self.vertex_coord_head(vertex_features)  # (B, max_vertices, 3)
-        # Predict vertex confidence/existence
-        vertex_confidence = self.vertex_conf_head(vertex_features).squeeze(-1)  # (B, max_vertices)
-        return predicted_vertices, vertex_confidence
-# --- 3. Edge Prediction Head (GNN-based) ---
-class EdgePredictionHeadGNN(nn.Module):
-    def __init__(self, vertex_feature_dim, gnn_hidden_dim, num_gnn_layers):
-        super().__init__()
-        self.vertex_feature_dim = vertex_feature_dim
-        self.gnn_hidden_dim = gnn_hidden_dim
-        self.num_gnn_layers = num_gnn_layers
-        # Initial vertex feature projection
-        self.vertex_proj = nn.Linear(vertex_feature_dim, gnn_hidden_dim)
-        # GNN layers using message passing
-        self.gnn_layers = nn.ModuleList()
-        for i in range(num_gnn_layers):
-            self.gnn_layers.append(
-                GraphAttentionLayer(
-                    in_features=gnn_hidden_dim,
-                    out_features=gnn_hidden_dim,
-                    num_heads=EDGE_GNN_NUM_HEADS,
-                    dropout=EDGE_GNN_DROPOUT
-                )
-            )
-        # Edge classifier MLP
-        self.edge_mlp = nn.Sequential(
-            nn.Linear(gnn_hidden_dim * 2, gnn_hidden_dim),
-            nn.ReLU(),
-            nn.Dropout(EDGE_GNN_DROPOUT),
-            nn.Linear(gnn_hidden_dim, gnn_hidden_dim // 2),
-            nn.ReLU(),
-            nn.Linear(gnn_hidden_dim // 2, 1)
-        )
-        # Learnable threshold for k-NN graph construction
-        self.k_neighbors = EDGE_K_NEIGHBORS  # Number of nearest neighbors for initial graph
-    def forward(self, vertices):
-        # vertices: (B, num_vertices, vertex_coord_dim)
-        batch_size, num_vertices, _ = vertices.shape
-        # Project vertex coordinates to hidden features
-        vertex_features = self.vertex_proj(vertices)  # (B, num_vertices, gnn_hidden_dim)
-        # Construct initial graph based on spatial proximity (k-NN)
-        adjacency_matrix = self.construct_knn_graph(vertices, k=self.k_neighbors)  # (B, num_vertices, num_vertices)
-        # Apply GNN layers
-        for gnn_layer in self.gnn_layers:
-            vertex_features = gnn_layer(vertex_features, adjacency_matrix)  # (B, num_vertices, gnn_hidden_dim)
-        # Generate all possible vertex pairs
-        idx_pairs = torch.combinations(torch.arange(num_vertices), r=2).to(vertices.device)  # (num_pairs, 2)
-        # Gather features for all vertex pairs
-        v1_features = vertex_features[:, idx_pairs[:, 0], :]  # (B, num_pairs, gnn_hidden_dim)
-        v2_features = vertex_features[:, idx_pairs[:, 1], :]  # (B, num_pairs, gnn_hidden_dim)
-        # Concatenate paired vertex features
-        edge_features = torch.cat([v1_features, v2_features], dim=2)  # (B, num_pairs, gnn_hidden_dim * 2)
-        # Predict edge probabilities
-        edge_logits = self.edge_mlp(edge_features).squeeze(-1)  # (B, num_pairs)
-        return edge_logits, idx_pairs
-    def construct_knn_graph(self, vertices, k):
-        # vertices: (B, num_vertices, 3)
-        batch_size, num_vertices, _ = vertices.shape
-        # Compute pairwise distances
-        distances = torch.cdist(vertices, vertices, p=2)  # (B, num_vertices, num_vertices)
-        # Find k nearest neighbors for each vertex
-        _, knn_indices = torch.topk(distances, k + 1, dim=-1, largest=False)  # +1 to include self
-        knn_indices = knn_indices[:, :, 1:]  # Remove self-connection
-        # Create adjacency matrix
-        adjacency = torch.zeros(batch_size, num_vertices, num_vertices, device=vertices.device)
-        # Fill adjacency matrix
-        batch_idx = torch.arange(batch_size).view(-1, 1, 1).expand(-1, num_vertices, k)
-        vertex_idx = torch.arange(num_vertices).view(1, -1, 1).expand(batch_size, -1, k)
-        adjacency[batch_idx, vertex_idx, knn_indices] = 1.0
-        # Make adjacency symmetric
-        adjacency = torch.max(adjacency, adjacency.transpose(-1, -2))
-        return adjacency
-class GraphAttentionLayer(nn.Module):
-    def __init__(self, in_features, out_features, num_heads=1, dropout=0.1):
-        super().__init__()
-        self.in_features = in_features
-        self.out_features = out_features
-        self.num_heads = num_heads
-        self.dropout = dropout
-        assert out_features % num_heads == 0
-        self.head_dim = out_features // num_heads
-        # Linear transformations for queries, keys, values
-        self.W_q = nn.Linear(in_features, out_features)
-        self.W_k = nn.Linear(in_features, out_features)
-        self.W_v = nn.Linear(in_features, out_features)
-        # Output projection
-        self.W_o = nn.Linear(out_features, out_features)
-        # Attention mechanism
-        self.attention = nn.MultiheadAttention(
-            embed_dim=out_features,
-            num_heads=num_heads,
-            dropout=dropout,
-            batch_first=True
-        )
-        # Layer normalization and residual connection
-        self.layer_norm = nn.LayerNorm(out_features)
-        self.ffn = nn.Sequential(
-            nn.Linear(out_features, out_features * 2),
-            nn.ReLU(),
-            nn.Dropout(dropout),
-            nn.Linear(out_features * 2, out_features)
-        )
-        self.layer_norm2 = nn.LayerNorm(out_features)
-    def forward(self, x, adjacency_matrix):
-        # x: (B, num_vertices, in_features)
-        # adjacency_matrix: (B, num_vertices, num_vertices)
-        batch_size, num_vertices, _ = x.shape
-        # Project to query, key, value
-        Q = self.W_q(x)  # (B, num_vertices, out_features)
-        K = self.W_k(x)  # (B, num_vertices, out_features)
-        V = self.W_v(x)  # (B, num_vertices, out_features)
-        # Create attention mask from adjacency matrix
-        # Convert adjacency to attention mask (0 for allowed, -inf for masked)
-        attention_mask = (1 - adjacency_matrix) * (-1e9)  # (B, num_vertices, num_vertices)
-        # Apply multi-head attention with adjacency-based masking
-        attended_features = []
-        for b in range(batch_size):
-            q_b = Q[b:b+1]  # (1, num_vertices, out_features)
-            k_b = K[b:b+1]  # (1, num_vertices, out_features)
-            v_b = V[b:b+1]  # (1, num_vertices, out_features)
-            mask_b = attention_mask[b]  # (num_vertices, num_vertices)
-            # Apply attention
-            attn_output, _ = self.attention(q_b, k_b, v_b, attn_mask=mask_b)
-            attended_features.append(attn_output)
-        attended_features = torch.cat(attended_features, dim=0)  # (B, num_vertices, out_features)
-        # Residual connection and layer norm
-        x_residual = self.layer_norm(attended_features + Q)
-        # Feed-forward network
-        ffn_output = self.ffn(x_residual)
-        output = self.layer_norm2(ffn_output + x_residual)
-        return output
-# --- Main Model ---
-class PointCloudToWireframe(nn.Module):
-    def __init__(self,
-                 pc_input_features=PC_INPUT_FEATURES,
-                 pc_encoder_output_features=PC_ENCODER_OUTPUT_FEATURES,
-                 max_vertices=MAX_VERTICES,
-                 vertex_coord_dim=VERTEX_COORD_DIM,
-                 gnn_hidden_dim=GNN_HIDDEN_DIM,
-                 num_gnn_layers=NUM_GNN_LAYERS,
-                 hidden_dim=HIDDEN_DIM,
-                 num_decoder_layers=NUM_DECODER_LAYERS,
-                 num_heads=NUM_HEADS):
-        super().__init__()
-        # Point cloud encoder using PointNet2-style architecture
-        self.encoder = PointNet2Encoder(pc_input_features, pc_encoder_output_features)
-        # Vertex prediction head using transformer decoder
-        self.vertex_head = VertexPredictionHead(
-            point_feature_dim=pc_encoder_output_features,
-            global_feature_dim=SA3_MLP[-1],  # From PointNet2Encoder global feature
-            max_vertices=max_vertices,
-            vertex_coord_dim=vertex_coord_dim,
-            hidden_dim=hidden_dim,
-            num_decoder_layers=num_decoder_layers,
-            num_heads=num_heads
-        )
-        # Edge prediction head using GNN
-        self.edge_head = EdgePredictionHeadGNN(
-            vertex_feature_dim=vertex_coord_dim,
-            gnn_hidden_dim=gnn_hidden_dim,
-            num_gnn_layers=num_gnn_layers
-        )
-    def forward(self, point_cloud):
-        # point_cloud: (B, N, 3)
-        batch_size, num_points, _ = point_cloud.shape
-        # Encode point cloud
-        point_features, global_feature = self.encoder(point_cloud)
-        # point_features: (B, N, pc_encoder_output_features)
-        # global_feature: (B, 1024)
-        # Predict vertices
-        predicted_vertices, vertex_confidence = self.vertex_head(
-            point_features, global_feature, point_coords=point_cloud
-        )
-        # predicted_vertices: (B, max_vertices, 3)
-        # vertex_confidence: (B, max_vertices)
-        # Predict edges using GNN (using vertex coordinates directly)
-        edge_logits, edge_indices = self.edge_head(predicted_vertices)
-        # edge_logits: (B, num_potential_edges)
-        # edge_indices: (num_potential_edges, 2)
-        return {
-            'vertices': predicted_vertices,
-            'vertex_confidence': vertex_confidence,
-            'edge_logits': edge_logits,
-            'edge_indices': edge_indices
-        }
-class WireframeDataset(Dataset):
-    def __init__(self, data_dir=DATA_DIR, split=SPLIT, transform=None, max_points=MAX_POINTS):
-        """
-        Dataset for point cloud to wireframe conversion.
-        Args:
-            data_dir: Directory containing the pickle files
-            split: 'train', 'val', or 'test'
-            transform: Optional transforms to apply to point clouds
-            max_points: Maximum number of points in the point cloud (default: 8096)
-        """
-        self.data_dir = data_dir
-        self.split = split
-        self.transform = transform
-        self.max_points = max_points
-        # Get all pickle files in the directory
-        self.data_files = []
-        for file in os.listdir(data_dir):
-            if file.endswith('.pkl'):
-                self.data_files.append(os.path.join(data_dir, file))
-        self.data_files.sort()  # Ensure consistent ordering
-    def __len__(self):
-        return len(self.data_files)
-    def __getitem__(self, idx):
-        # Load the pickle file
-        with open(self.data_files[idx], 'rb') as f:
-            sample_data = pickle.load(f)
-        # Extract data
-        point_cloud = torch.tensor(sample_data['point_cloud'], dtype=torch.float32)
-        point_colors = torch.tensor(sample_data['point_colors'], dtype=torch.float32)
-        gt_vertices = torch.tensor(sample_data['gt_vertices'], dtype=torch.float32)
-        gt_connections = sample_data['gt_connections']  # List of tuples
-        sample_id = sample_data['sample_id']
-        # Handle point cloud size to match max_points
-        current_points = point_cloud.shape[0]
-        if current_points > self.max_points:
-            # Downsample using random sampling
-            indices = torch.randperm(current_points)[:self.max_points]
-            point_cloud = point_cloud[indices]
-            point_colors = point_colors[indices]
-        elif current_points < self.max_points:
-            # Pad by repeating last point or duplicating random points
-            pad_size = self.max_points - current_points
-            if current_points > 0:
-                # Randomly sample existing points to pad
-                pad_indices = torch.randint(0, current_points, (pad_size,))
-                pad_points = point_cloud[pad_indices]
-                pad_colors = point_colors[pad_indices]
-                point_cloud = torch.cat([point_cloud, pad_points], dim=0)
-                point_colors = torch.cat([point_colors, pad_colors], dim=0)
-            else:
-                # Edge case: no points, pad with zeros
-                point_cloud = torch.zeros(self.max_points, 3)
-                point_colors = torch.zeros(self.max_points, 3)
-        # Convert connections to edge format
-        if len(gt_connections) > 0:
-            edge_indices = torch.tensor(gt_connections, dtype=torch.long).t()  # (2, num_edges)
-        else:
-            edge_indices = torch.zeros((2, 0), dtype=torch.long)  # Empty edges
-        # Apply transforms if any
-        if self.transform:
-            point_cloud = self.transform(point_cloud)
-        return {
-            'point_cloud': point_cloud,
-            'point_colors': point_colors,
-            'gt_vertices': gt_vertices,
-            'edge_indices': edge_indices,
-            'sample_id': sample_id
-        }
-def collate_fn(batch):
-    """
-    Custom collate function to handle variable number of vertices and edges.
-    """
-    point_clouds = []
-    point_colors = []
-    gt_vertices_list = []
-    edge_indices_list = []
-    sample_ids = []
-    max_vertices = 0
-    for sample in batch:
-        point_clouds.append(sample['point_cloud'])
-        point_colors.append(sample['point_colors'])
-        gt_vertices_list.append(sample['gt_vertices'])
-        edge_indices_list.append(sample['edge_indices'])
-        sample_ids.append(sample['sample_id'])
-        max_vertices = max(max_vertices, sample['gt_vertices'].shape[0])
-    # Pad point clouds to same size if needed
-    max_points = max(pc.shape[0] for pc in point_clouds)
-    padded_point_clouds = []
-    padded_point_colors = []
-    for pc, colors in zip(point_clouds, point_colors):
-        if pc.shape[0] < max_points:
-            # Pad with zeros or repeat last point
-            pad_size = max_points - pc.shape[0]
-            pc_padded = torch.cat([pc, torch.zeros(pad_size, 3)], dim=0)
-            colors_padded = torch.cat([colors, torch.zeros(pad_size, 3)], dim=0)
-        else:
-            pc_padded = pc
-            colors_padded = colors
-        padded_point_clouds.append(pc_padded)
-        padded_point_colors.append(colors_padded)
-    # Stack point clouds
-    point_clouds_batch = torch.stack(padded_point_clouds)
-    point_colors_batch = torch.stack(padded_point_colors)
-    # Pad vertices to max_vertices
-    padded_vertices = []
-    vertex_masks = []  # To indicate which vertices are real vs padded
-    for vertices in gt_vertices_list:
-        num_vertices = vertices.shape[0]
-        if num_vertices < max_vertices:
-            # Pad with zeros
-            pad_size = max_vertices - num_vertices
-            vertices_padded = torch.cat([vertices, torch.zeros(pad_size, 3)], dim=0)
-            mask = torch.cat([torch.ones(num_vertices), torch.zeros(pad_size)], dim=0).bool()
-        else:
-            vertices_padded = vertices
-            mask = torch.ones(num_vertices).bool()
-        padded_vertices.append(vertices_padded)
-        vertex_masks.append(mask)
-    gt_vertices_batch = torch.stack(padded_vertices)
-    vertex_masks_batch = torch.stack(vertex_masks)
-    # Create adjacency matrices for edges
-    batch_size = len(batch)
-    adjacency_matrices = torch.zeros(batch_size, max_vertices, max_vertices)
-    for i, edge_indices in enumerate(edge_indices_list):
-        if edge_indices.shape[1] > 0:  # If there are edges
-            src, dst = edge_indices[0], edge_indices[1]
-            # Only add edges for valid vertices (within the actual vertex count)
-            valid_edges = (src < gt_vertices_list[i].shape[0]) & (dst < gt_vertices_list[i].shape[0])
-            src_valid = src[valid_edges]
-            dst_valid = dst[valid_edges]
-            adjacency_matrices[i, src_valid, dst_valid] = 1
-            adjacency_matrices[i, dst_valid, src_valid] = 1  # Undirected graph
-    return {
-        'point_cloud': point_clouds_batch,
-        'point_colors': point_colors_batch,
-        'gt_vertices': gt_vertices_batch,
-        'vertex_masks': vertex_masks_batch,
-        'adjacency_matrices': adjacency_matrices,
-        'edge_indices_list': edge_indices_list,  # Keep original for loss computation
-        'sample_ids': sample_ids
-    }
-# Loss functions
-def compute_vertex_loss(pred_vertices, gt_vertices, vertex_masks, vertex_confidence):
-    """
-    Compute vertex position loss using Hungarian matching
-    """
-    batch_size = pred_vertices.shape[0]
-    total_loss = 0.0
-    total_confidence_loss = 0.0
-    for b in range(batch_size):
-        # Get valid GT vertices for this sample
-        valid_mask = vertex_masks[b]
-        gt_verts = gt_vertices[b][valid_mask]  # (num_valid_gt, 3)
-        num_gt = gt_verts.shape[0]
-        if num_gt == 0:
-            # No GT vertices, penalize high confidence predictions
-            confidence_target = torch.zeros_like(vertex_confidence[b])
-            conf_loss = F.binary_cross_entropy_with_logits(vertex_confidence[b], confidence_target)
-            total_confidence_loss += conf_loss
-            continue
-        pred_verts = pred_vertices[b]  # (max_vertices, 3)
-        pred_conf = vertex_confidence[b]  # (max_vertices,)
-        # Compute pairwise distances between predicted and GT vertices
-        distances = torch.cdist(pred_verts, gt_verts)  # (max_vertices, num_gt)
-        # Hungarian matching to find optimal assignment
-        # Convert to numpy for scipy
-        cost_matrix = distances.detach().cpu().numpy()
-        # Pad cost matrix if needed
-        if distances.shape[0] < distances.shape[1]:
-            # More GT vertices than predicted - pad with high cost
-            padding = np.full((distances.shape[1] - distances.shape[0], distances.shape[1]), 1e6)
-            cost_matrix = np.vstack([cost_matrix, padding])
-        elif distances.shape[0] > distances.shape[1]:
-            # More predicted vertices than GT - pad with high cost
-            padding = np.full((distances.shape[0], distances.shape[0] - distances.shape[1]), 1e6)
-            cost_matrix = np.hstack([cost_matrix, padding])
-        # Solve assignment problem
-        pred_indices, gt_indices = linear_sum_assignment(cost_matrix)
-        # Filter out dummy assignments (high cost padding)
-        # Ensure pred_indices are valid for pred_verts and gt_indices for gt_verts
-        valid_assignments = (pred_indices < pred_verts.shape[0]) & (gt_indices < num_gt)
-        pred_indices = pred_indices[valid_assignments]
-        gt_indices = gt_indices[valid_assignments]
-        if len(pred_indices) > 0:
-            # Compute position loss for matched vertices
-            matched_pred = pred_verts[pred_indices]
-            matched_gt = gt_verts[gt_indices]
-            position_loss = F.mse_loss(matched_pred, matched_gt)
-            total_loss += position_loss
-            # Confidence targets: 1 for matched vertices, 0 for unmatched
-            confidence_target = torch.zeros_like(pred_conf)
-            confidence_target[pred_indices] = 1.0
-            conf_loss = F.binary_cross_entropy_with_logits(pred_conf, confidence_target)
-            total_confidence_loss += conf_loss
-        else:
-            # No valid matches - penalize all predictions
-            confidence_target = torch.zeros_like(pred_conf)
-            conf_loss = F.binary_cross_entropy_with_logits(pred_conf, confidence_target)
-            total_confidence_loss += conf_loss
-    return total_loss / batch_size, total_confidence_loss / batch_size
-def compute_edge_loss(edge_logits, edge_indices, gt_adjacency_matrices):
-    """
-    Compute edge prediction loss
-    """
-    batch_size = gt_adjacency_matrices.shape[0]
-    # Create edge targets from adjacency matrices
-    edge_targets = []
-    for b in range(batch_size):
-        gt_adj_for_sample = gt_adjacency_matrices[b]  # Shape: (batch_max_gt_verts, batch_max_gt_verts)
-        # Create a target adjacency matrix of size (MAX_VERTICES, MAX_VERTICES)
-        # as edge_indices are generated based on the global MAX_VERTICES.
-        target_adj_full_size = torch.zeros(
-            MAX_VERTICES,
-            MAX_VERTICES,
-            device=gt_adj_for_sample.device,
-            dtype=gt_adj_for_sample.dtype
-        )
-        # Determine the actual dimension of the current sample's GT adjacency matrix (padded to batch max)
-        current_gt_dim = gt_adj_for_sample.shape[0]
-        # Copy the relevant part of gt_adj_for_sample into the full-sized target matrix.
-        # The copy_dim is the minimum of MAX_VERTICES and the current GT dimension,
-        # ensuring we don't read out of bounds from gt_adj_for_sample or write out of bounds to target_adj_full_size.
-        copy_dim = min(MAX_VERTICES, current_gt_dim)
-        target_adj_full_size[:copy_dim, :copy_dim] = gt_adj_for_sample[:copy_dim, :copy_dim]
-        # Extract targets using edge_indices, which refer to pairs in a MAX_VERTICES graph.
-        targets = target_adj_full_size[edge_indices[:, 0], edge_indices[:, 1]]
-        edge_targets.append(targets)
-    edge_targets = torch.stack(edge_targets)  # Shape: (batch_size, num_potential_edges_in_MAX_VERTICES_graph)
-    edge_targets = edge_targets.to(edge_logits.device)
-    # Binary cross entropy loss
-    edge_loss = F.binary_cross_entropy_with_logits(edge_logits, edge_targets)
-    return edge_loss
-def compute_total_loss(model_output, batch):
-    """
-    Compute total loss combining vertex and edge losses
-    """
-    # Extract model outputs
-    pred_vertices = model_output['vertices']
-    vertex_confidence = model_output['vertex_confidence']
-    edge_logits = model_output['edge_logits']
-    edge_indices = model_output['edge_indices']
-    # Extract ground truth
-    gt_vertices = batch['gt_vertices'].to(DEVICE)
-    vertex_masks = batch['vertex_masks'].to(DEVICE)
-    gt_adjacency = batch['adjacency_matrices'].to(DEVICE)
-    # Compute individual losses
-    vertex_pos_loss, vertex_conf_loss = compute_vertex_loss(
-        pred_vertices, gt_vertices, vertex_masks, vertex_confidence
-    )
-    edge_loss = compute_edge_loss(edge_logits, edge_indices, gt_adjacency)
-    # Combine losses
-    total_loss = (VERTEX_LOSS_WEIGHT * vertex_pos_loss +
-                    CONFIDENCE_LOSS_WEIGHT * vertex_conf_loss +
-                    EDGE_LOSS_WEIGHT * edge_loss)
-    return {
-        'total_loss': total_loss,
-        'vertex_pos_loss': vertex_pos_loss,
-        'vertex_conf_loss': vertex_conf_loss,
-        'edge_loss': edge_loss
-    }
-# =============================================================================
-# MAIN TRAINING SCRIPT
-# =============================================================================
-if __name__ == '__main__':
-    # Create dataset and dataloader
-    dataset = WireframeDataset(data_dir=DATA_DIR, split=SPLIT)
-    dataloader = DataLoader(
-        dataset,
-        batch_size=BATCH_SIZE,
-        shuffle=True,
-        collate_fn=collate_fn,
-        num_workers=NUM_WORKERS
-    )
-    # Initialize model
-    model = PointCloudToWireframe()
-    # Move model to device
-    model = model.to(DEVICE)
-    print(f"Model loaded on device: {DEVICE}")
-    # Initialize optimizer and scheduler
-    optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
-    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
-        optimizer, mode='min', factor=LR_SCHEDULER_FACTOR, patience=LR_SCHEDULER_PATIENCE
-    )
-    # Training loop
-    model.train()
-    print("Starting training...")
-    for epoch in range(NUM_EPOCHS):
-        epoch_losses = {
-            'total_loss': 0.0,
-            'vertex_pos_loss': 0.0,
-            'vertex_conf_loss': 0.0,
-            'edge_loss': 0.0
-        }
-        num_batches = 0
-        for batch_idx, batch in enumerate(dataloader):
-            # Move data to device
-            point_cloud = batch['point_cloud'].to(DEVICE)
-            # Zero gradients
-            optimizer.zero_grad()
-            # Forward pass
-            output = model(point_cloud)
-            # Compute losses
-            losses = compute_total_loss(output, batch)
-            # Backward pass
-            losses['total_loss'].backward()
-            # Gradient clipping
-            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=GRADIENT_CLIP_MAX_NORM)
-            # Update weights
-            optimizer.step()
-            # Accumulate losses
-            for key in epoch_losses:
-                epoch_losses[key] += losses[key].item()
-            num_batches += 1
-            # Print progress
-            if batch_idx % LOG_FREQUENCY == 0:
-                print(f"Epoch {epoch+1}/{NUM_EPOCHS}, Batch {batch_idx}/{len(dataloader)}")
-                print(f"  Total Loss: {losses['total_loss'].item():.4f}")
-                print(f"  Vertex Pos Loss: {losses['vertex_pos_loss'].item():.4f}")
-                print(f"  Vertex Conf Loss: {losses['vertex_conf_loss'].item():.4f}")
-                print(f"  Edge Loss: {losses['edge_loss'].item():.4f}")
-        # Average losses for the epoch
-        for key in epoch_losses:
-            epoch_losses[key] /= num_batches
-        # Update learning rate scheduler
-        scheduler.step(epoch_losses['total_loss'])
-        # Print epoch summary
-        print(f"\nEpoch {epoch+1} Summary:")
-        print(f"  Avg Total Loss: {epoch_losses['total_loss']:.4f}")
-        print(f"  Avg Vertex Pos Loss: {epoch_losses['vertex_pos_loss']:.4f}")
-        print(f"  Avg Vertex Conf Loss: {epoch_losses['vertex_conf_loss']:.4f}")
-        print(f"  Avg Edge Loss: {epoch_losses['edge_loss']:.4f}")
-        print(f"  Learning Rate: {optimizer.param_groups[0]['lr']:.6f}")
-        print("-" * 50)
-        # Save checkpoint every epoch
-        if (epoch + 1) % CHECKPOINT_SAVE_FREQUENCY == 0:
-            checkpoint = {
-                'epoch': epoch + 1,
-                'model_state_dict': model.state_dict(),
-                'optimizer_state_dict': optimizer.state_dict(),
-                'scheduler_state_dict': scheduler.state_dict(),
-                'losses': epoch_losses,
-                'config': {
-                    'pc_input_features': PC_INPUT_FEATURES,
-                    'pc_encoder_output_features': PC_ENCODER_OUTPUT_FEATURES,
-                    'max_vertices': MAX_VERTICES,
-                    'gnn_hidden_dim': GNN_HIDDEN_DIM,
-                    'num_gnn_layers': NUM_GNN_LAYERS
-                }
-            }
-            torch.save(checkpoint, f'checkpoint_epoch_{epoch+1}.pth')
-            print(f"Checkpoint saved: checkpoint_epoch_{epoch+1}.pth")
-    # Save final model
-    torch.save({
-        'model_state_dict': model.state_dict(),
-        'model_config': {
-            'pc_input_features': PC_INPUT_FEATURES,
-            'pc_encoder_output_features': PC_ENCODER_OUTPUT_FEATURES,
-            'max_vertices': MAX_VERTICES,
-            'gnn_hidden_dim': GNN_HIDDEN_DIM,
-            'num_gnn_layers': NUM_GNN_LAYERS
-        }
-    }, 'final_model.pth')
-    print("Training completed!")
-    print(f"Dataset size: {len(dataset)}")

generate_pcloud_dataset.py CHANGED Viewed

@@ -1,3 +1,14 @@
 from datasets import load_dataset
 from hoho2025.viz3d import *
 import os
@@ -55,3 +66,4 @@ for a in tqdm(ds['train'], desc="Processing dataset"):
 print(f"Generated {counter} samples in {output_dir}")

+# This script processes the 'usm3d/hoho25k' dataset.
+# For each sample in the dataset, it performs the following steps:
+# 1. Reads COLMAP reconstruction data.
+# 2. Extracts 3D point coordinates and their corresponding colors.
+# 3. Retrieves ground truth wireframe vertices and edges.
+# 4. Skips processing if the output file already exists or if no 3D points are found.
+# 5. Saves the extracted point cloud, colors, ground truth data, and sample ID
+#    into a pickle file in a specified output directory.
+# The script shuffles the dataset before processing and keeps track of
+# the number of samples successfully processed and saved.
+#
 from datasets import load_dataset
 from hoho2025.viz3d import *
 import os
 print(f"Generated {counter} samples in {output_dir}")

hoho_cpu.batch DELETED Viewed

@@ -1,17 +0,0 @@
-#!/bin/bash
-#SBATCH --nodes=1 # 1 node
-#SBATCH --ntasks-per-node=1 # 1 tasks per node
-#SBATCH --cpus-per-task=8 # 6 CPUS per task = 12 CPUS per node
-#SBATCH --mem-per-cpu=10G # 8GB per CPU = 96GB per node
-#SBATCH --time=24:00:00 # time limits: 1 hour
-#SBATCH --error=hoho_cpu.err # standard error file
-#SBATCH --output=hoho_cpu.out # standard output file
-#SBATCH --partition=amd # partition name
-#SBATCH --mail-user=skvrnjan@fel.cvut.cz # where send info about job
-#SBATCH --mail-type=ALL # what to send, valid type values are NONE, BEGIN, END, FAIL, REQUEUE, ALL
-cd /mnt/personal/skvrnjan/hoho/
-module purge
-module load Python/3.10.8-GCCcore-12.2.0
-source /mnt/personal/skvrnjan/venvs/hoho/bin/activate
-python train.py

hoho_cpu_gpu_intel.batch DELETED Viewed

@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --nodes=1 # 1 node
-#SBATCH --ntasks-per-node=1 # 1 tasks per node
-#SBATCH --cpus-per-task=8 # 6 CPUS per task = 12 CPUS per node
-#SBATCH --mem-per-cpu=10G # 8GB per CPU = 96GB per node
-#SBATCH --time=24:00:00 # time limits: 1 hour
-#SBATCH --error=hoho_cpu.err # standard error file
-#SBATCH --output=hoho_cpu.out # standard output file
-#SBATCH --partition=gpu # partition name
-#SBATCH --mail-user=skvrnjan@fel.cvut.cz # where send info about job
-#SBATCH --mail-type=ALL # what to send, valid type values are NONE, BEGIN, END, FAIL, REQUEUE, ALL
-#SBATCH --gres=gpu:1
-cd /mnt/personal/skvrnjan/hoho/
-module purge
-module load Python/3.10.8-GCCcore-12.2.0
-module load CUDA/12.6.0
-source /mnt/personal/skvrnjan/venvs/hoho/bin/activate
-python train.py

hoho_gpu.batch DELETED Viewed

@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --nodes=1 # 1 node
-#SBATCH --ntasks-per-node=1 # 1 tasks per node
-#SBATCH --cpus-per-task=16 # 6 CPUS per task = 12 CPUS per node
-#SBATCH --mem-per-cpu=10G # 8GB per CPU = 96GB per node
-#SBATCH --time=24:00:00 # time limits: 1 hour
-#SBATCH --error=hoho_gpu.err # standard error file
-#SBATCH --output=hoho_gpu.out # standard output file
-#SBATCH --partition=amdgpu # partition name
-#SBATCH --mail-user=skvrnjan@fel.cvut.cz # where send info about job
-#SBATCH --mail-type=ALL # what to send, valid type values are NONE, BEGIN, END, FAIL, REQUEUE, ALL
-#SBATCH --gres=gpu:1
-cd /mnt/personal/skvrnjan/hoho/
-module purge
-module load Python/3.10.8-GCCcore-12.2.0
-module load CUDA/12.6.0
-source /mnt/personal/skvrnjan/venvs/hoho/bin/activate
-python train_pnet_cluster.py

hoho_gpu_class.batch DELETED Viewed

@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --nodes=1 # 1 node
-#SBATCH --ntasks-per-node=1 # 1 tasks per node
-#SBATCH --cpus-per-task=16 # 6 CPUS per task = 12 CPUS per node
-#SBATCH --mem-per-cpu=10G # 8GB per CPU = 96GB per node
-#SBATCH --time=24:00:00 # time limits: 1 hour
-#SBATCH --error=hoho_gpu_class2_v4.err # standard error file
-#SBATCH --output=hoho_gpu_class2_v4.out # standard output file
-#SBATCH --partition=amdgpu # partition name
-#SBATCH --mail-user=skvrnjan@fel.cvut.cz # where send info about job
-#SBATCH --mail-type=ALL # what to send, valid type values are NONE, BEGIN, END, FAIL, REQUEUE, ALL
-#SBATCH --gres=gpu:1
-cd /mnt/personal/skvrnjan/hoho/
-module purge
-module load Python/3.10.8-GCCcore-12.2.0
-module load CUDA/12.6.0
-source /mnt/personal/skvrnjan/venvs/hoho/bin/activate
-python train_pnet_class_cluster.py

hoho_gpu_class_10d.batch DELETED Viewed

@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --nodes=1 # 1 node
-#SBATCH --ntasks-per-node=1 # 1 tasks per node
-#SBATCH --cpus-per-task=16 # 6 CPUS per task = 12 CPUS per node
-#SBATCH --mem-per-cpu=10G # 8GB per CPU = 96GB per node
-#SBATCH --time=24:00:00 # time limits: 1 hour
-#SBATCH --error=hoho_gpu_class_10d_v2.err # standard error file
-#SBATCH --output=hoho_gpu_class_10d_v2.out # standard output file
-#SBATCH --partition=amdgpu # partition name
-#SBATCH --mail-user=skvrnjan@fel.cvut.cz # where send info about job
-#SBATCH --mail-type=ALL # what to send, valid type values are NONE, BEGIN, END, FAIL, REQUEUE, ALL
-#SBATCH --gres=gpu:1
-cd /mnt/personal/skvrnjan/hoho/
-module purge
-module load Python/3.10.8-GCCcore-12.2.0
-module load CUDA/12.6.0
-source /mnt/personal/skvrnjan/venvs/hoho/bin/activate
-python train_pnet_class_cluster_10d.py

hoho_gpu_class_10d_2048.batch DELETED Viewed

@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --nodes=1 # 1 node
-#SBATCH --ntasks-per-node=1 # 1 tasks per node
-#SBATCH --cpus-per-task=16 # 6 CPUS per task = 12 CPUS per node
-#SBATCH --mem-per-cpu=10G # 8GB per CPU = 96GB per node
-#SBATCH --time=24:00:00 # time limits: 1 hour
-#SBATCH --error=hoho_gpu_class_10d_2048.err # standard error file
-#SBATCH --output=hoho_gpu_class_10d_2048.out # standard output file
-#SBATCH --partition=amdgpu # partition name
-#SBATCH --mail-user=skvrnjan@fel.cvut.cz # where send info about job
-#SBATCH --mail-type=ALL # what to send, valid type values are NONE, BEGIN, END, FAIL, REQUEUE, ALL
-#SBATCH --gres=gpu:1
-cd /mnt/personal/skvrnjan/hoho/
-module purge
-module load Python/3.10.8-GCCcore-12.2.0
-module load CUDA/12.6.0
-source /mnt/personal/skvrnjan/venvs/hoho/bin/activate
-python train_pnet_class_cluster_10d_2048.py

hoho_gpu_class_10d_deeper.batch DELETED Viewed

@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --nodes=1 # 1 node
-#SBATCH --ntasks-per-node=1 # 1 tasks per node
-#SBATCH --cpus-per-task=16 # 6 CPUS per task = 12 CPUS per node
-#SBATCH --mem-per-cpu=10G # 8GB per CPU = 96GB per node
-#SBATCH --time=24:00:00 # time limits: 1 hour
-#SBATCH --error=hoho_gpu_class_10d_v2_deeper_v2.err # standard error file
-#SBATCH --output=hoho_gpu_class_10d_v2_deeper_v2.out # standard output file
-#SBATCH --partition=amdgpu # partition name
-#SBATCH --mail-user=skvrnjan@fel.cvut.cz # where send info about job
-#SBATCH --mail-type=ALL # what to send, valid type values are NONE, BEGIN, END, FAIL, REQUEUE, ALL
-#SBATCH --gres=gpu:1
-cd /mnt/personal/skvrnjan/hoho/
-module purge
-module load Python/3.10.8-GCCcore-12.2.0
-module load CUDA/12.6.0
-source /mnt/personal/skvrnjan/venvs/hoho/bin/activate
-python train_pnet_class_cluster_10d_deeper.py

hoho_gpu_h200.batch DELETED Viewed

@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --nodes=1 # 1 node
-#SBATCH --ntasks-per-node=1 # 1 tasks per node
-#SBATCH --cpus-per-task=20 # 6 CPUS per task = 12 CPUS per node
-#SBATCH --mem-per-cpu=10G # 8GB per CPU = 96GB per node
-#SBATCH --time=24:00:00 # time limits: 1 hour
-#SBATCH --error=hoho_gpu_h200_v2_class.err # standard error file
-#SBATCH --output=hoho_gpu_h200_v2_class.out # standard output file
-#SBATCH --partition=h200 # partition name
-#SBATCH --mail-user=skvrnjan@fel.cvut.cz # where send info about job
-#SBATCH --mail-type=ALL # what to send, valid type values are NONE, BEGIN, END, FAIL, REQUEUE, ALL
-#SBATCH --gres=gpu:1
-cd /mnt/personal/skvrnjan/hoho/
-module purge
-module load Python/3.12.3-GCCcore-13.3.0
-module load CUDA/12.6.0
-source /mnt/personal/skvrnjan/venvs/hoho/bin/activate
-python train_pnet_cluster_class_v2.py

hoho_gpu_voxel.batch DELETED Viewed

@@ -1,19 +0,0 @@
-#!/bin/bash
-#SBATCH --nodes=1 # 1 node
-#SBATCH --ntasks-per-node=1 # 1 tasks per node
-#SBATCH --cpus-per-task=16 # 6 CPUS per task = 12 CPUS per node
-#SBATCH --mem-per-cpu=10G # 8GB per CPU = 96GB per node
-#SBATCH --time=24:00:00 # time limits: 1 hour
-#SBATCH --error=hoho_gpu.err # standard error file
-#SBATCH --output=hoho_gpu.out # standard output file
-#SBATCH --partition=amdgpu # partition name
-#SBATCH --mail-user=skvrnjan@fel.cvut.cz # where send info about job
-#SBATCH --mail-type=ALL # what to send, valid type values are NONE, BEGIN, END, FAIL, REQUEUE, ALL
-#SBATCH --gres=gpu:1
-cd /mnt/personal/skvrnjan/hoho/
-module purge
-module load Python/3.10.8-GCCcore-12.2.0
-module load CUDA/12.6.0
-source /mnt/personal/skvrnjan/venvs/hoho/bin/activate
-python train_voxel_cluster.py

initial_epoch_100.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8d9de42aafd8ca9f0e831c920a41d35f61f05ecf6f96c0227a46d16c34cd861c
-size 93364299

initial_epoch_100_class_v2.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:af653e5fe08dc57b2bb84996896c60d06a71e1c1a0197ad0e07ee2d03dc080e8
-size 92609251

initial_epoch_100_v2.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:be55556839f8d4fedad7a5cf7520b48859d7bd9a3fbb6b2efbae627ca8ca3ffc
-size 103080355

initial_epoch_100_v2_aug.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:66ba39359176a2d4d9f444912c58f24d5039b49c802dd8c2be45bcba10694054
-size 103080355

initial_epoch_60.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:ef3f7c9462b297447ee24b7994919d625b958d1941626215d1d41f27faf7dac1
-size 93364051

initial_epoch_60_v2.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:759a5e61e5934aa2417d587621b10a0b390c821c414380185c75fc1379e07f90
-size 103080040

iterate.batch DELETED Viewed

@@ -1,50 +0,0 @@
-#!/bin/bash
-# Define parameter ranges
-vertex_min=0.4
-vertex_max=0.9
-vertex_step=0.02
-edge_min=0.4
-edge_max=0.9
-edge_step=0.02
-# Define results directory path or use first argument
-results_dir=${1:-"/mnt/personal/skvrnjan/hoho/results"}
-# Create results directory if it doesn't exist
-mkdir -p $results_dir
-# Iterate over all combinations
-for vertex_thresh in $(seq $vertex_min $vertex_step $vertex_max); do
-    for edge_thresh in $(seq $edge_min $edge_step $edge_max); do
-        # Create job name
-        job_name="v10_train_v${vertex_thresh}_e${edge_thresh}"
-        # Create SLURM script
-        cat > "${job_name}.slurm" << EOF
-#!/bin/bash
-#SBATCH --job-name=${job_name}
-#SBATCH --output=${job_name}_%j.out
-#SBATCH --error=${job_name}_%j.err
-#SBATCH --time=4:00:00
-#SBATCH --partition=amdfast # partition name
-#SBATCH --cpus-per-task=4 # 6 CPUS per task = 12 CPUS per node
-#SBATCH --mem-per-cpu=10G # 8GB per CPU = 96GB per node
-#SBATCH --nodes=1 # 1 node
-#SBATCH --ntasks-per-node=1 # 1 tasks per node
-# Run training with specific parameters
-cd /mnt/personal/skvrnjan/hoho/
-module purge
-module load Python/3.10.8-GCCcore-12.2.0
-source /mnt/personal/skvrnjan/venvs/hoho/bin/activate
-python train.py --vertex_threshold ${vertex_thresh} --edge_threshold ${edge_thresh} --results_dir $results_dir/${job_name} --max_samples 100
-EOF
-        # Submit job
-        sbatch "${job_name}.slurm"
-        echo "Submitted job: ${job_name}"
-    done
-done

pnet.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5eef9dc9903751fd7fb2418d729e7021ba7f90e9133f3c64a9694c78c10b61f7
-size 93358155

 version https://git-lfs.github.com/spec/v1
+oid sha256:be55556839f8d4fedad7a5cf7520b48859d7bd9a3fbb6b2efbae627ca8ca3ffc
+size 103080355

predict.py CHANGED Viewed

@@ -1,3 +1,15 @@
 import numpy as np
 from typing import Tuple, List
 from hoho2025.example_solutions import empty_solution, read_colmap_rec, get_vertices_and_edges_from_segmentation, get_house_mask, fit_scale_robust_median, get_uv_depth, merge_vertices_3d, prune_not_connected, prune_too_far, point_to_segment_dist

+# This script is designed for 3D wireframe reconstruction, primarily focusing on
+# buildings, using multi-view imagery and associated 3D data.
+# It leverages COLMAP reconstructions, depth maps, and semantic segmentations
+# (ADE20k and Gestalt) to identify and predict structural elements.
+# Core tasks include:
+# - Processing and aligning 2D image data (segmentations, depth) with 3D COLMAP point clouds.
+# - Extracting initial 2D/3D vertex candidates from segmentation maps.
+# - Generating local point cloud patches around these candidates.
+# - Employing machine learning models (e.g., PointNet variants) to refine vertex locations
+#   and classify potential edges between them.
+# - Optionally, generating datasets of these patches for training ML models.
+# - Merging information from multiple views to produce a final 3D wireframe.
 import numpy as np
 from typing import Tuple, List
 from hoho2025.example_solutions import empty_solution, read_colmap_rec, get_vertices_and_edges_from_segmentation, get_house_mask, fit_scale_robust_median, get_uv_depth, merge_vertices_3d, prune_not_connected, prune_too_far, point_to_segment_dist

predict_end.py DELETED Viewed

@@ -1,73 +0,0 @@
-import torch
-from typing import Tuple, List
-import numpy as np
-from hoho2025.color_mappings import ade20k_color_mapping, gestalt_color_mapping
-from predict import create_pcloud, convert_entry_to_human_readable, empty_solution
-from end_to_end import save_data
-data_folder = '/mnt/personal/skvrnjan/hoho_end/'
-def predict_wireframe(entry, config) -> Tuple[np.ndarray, List[int]]:
-    """
-    Predict 3D wireframe from a dataset entry.
-    """
-    device = 'cuda' if torch.cuda.is_available() else 'cpu'
-    good_entry = convert_entry_to_human_readable(entry)
-    colmap_rec = good_entry['colmap_binary']
-    colmap_pcloud = create_pcloud(colmap_rec, good_entry)
-    pcloud_14d = pcloud_7d_to_14d(colmap_pcloud)
-    dict_to_save = {'pcloud_14d': pcloud_14d,
-                    'wf_vertices': good_entry['wf_vertices'],
-                    'wf_edges': good_entry['wf_edges']}
-    save_data(dict_to_save, good_entry['order_id'], data_folder=data_folder)
-    return empty_solution()
-def pcloud_7d_to_14d(pcloud_7d: np.ndarray) -> np.ndarray:
-    """
-    Convert 7D point cloud to higher dimensional by removing ID, then adding ADE and Gestalt segmentation
-    with bin counting for edge classes.
-    Args:
-        pcloud_7d: Array of shape (N, 7) containing [x, y, z, r, g, b, confidence]
-    Returns:
-        Array of shape (N, 15) containing [x, y, z, r, g, b, ade_class, apex, eave_end_point,
-        flashing_end_points, eave, ridge, rake, valley, gestalt_rgb]
-    """
-    edge_classes = ['apex', 'eave_end_point', 'flashing_end_points', 'eave', 'ridge', 'rake', 'valley']
-    # Extract ADE and Gestalt data from colmap_pcloud
-    ade_values = pcloud_7d['ade']
-    gestalt_values = pcloud_7d['gestalt']
-    point_cloud = pcloud_7d['points_7d']
-    # Initialize output array (6D base + 1 ADE + 7 edge classes + 1 gestalt)
-    pcloud_14d = np.zeros((point_cloud.shape[0], 14))
-    pcloud_14d[:, :6] = point_cloud[:, :6]  # Remove confidence/ID column
-    # Process ADE segmentation
-    pcloud_14d[:, 6] = ade_values
-    pcloud_14d[:, 3:6] = pcloud_14d[:, 3:6] * 2 - 1
-    # Process Gestalt segmentation with edge class bin counting
-    for i, gestalt_list in enumerate(gestalt_values):
-        if len(gestalt_list) > 0:
-            gestalt_array = np.array(gestalt_list, dtype=np.uint32)
-            if gestalt_array.ndim == 2:
-                # Bin counting for edge classes (columns 7-13)
-                for j, edge_class in enumerate(edge_classes):
-                    if edge_class in gestalt_color_mapping:
-                        target_color = np.array(gestalt_color_mapping[edge_class])
-                        # Count matches for this edge class
-                        matches = np.sum(np.all(gestalt_array == target_color, axis=1))
-                        pcloud_14d[i, 7 + j] = matches
-    return pcloud_14d

script.py CHANGED Viewed

@@ -74,7 +74,7 @@ if __name__ == "__main__":
     device = "cuda" if torch.cuda.is_available() else "cpu"
-    pnet_model = load_pointnet_model(model_path="initial_epoch_100_v2.pth", device=device, predict_score=True)
     pnet_class_model = load_pointnet_class_model(model_path="pnet_class.pth", device=device)

     device = "cuda" if torch.cuda.is_available() else "cpu"
+    pnet_model = load_pointnet_model(model_path="pnet.pth", device=device, predict_score=True)
     pnet_class_model = load_pointnet_class_model(model_path="pnet_class.pth", device=device)

train.py CHANGED Viewed

@@ -1,3 +1,15 @@
 from datasets import load_dataset
 from hoho2025.vis import plot_all_modalities
 from hoho2025.viz3d import *
@@ -16,11 +28,8 @@ from utils import read_colmap_rec, empty_solution
 from hoho2025.metric_helper import hss
 from predict import predict_wireframe, predict_wireframe_old
 from tqdm import tqdm
-#from fast_pointnet import load_pointnet_model
 from fast_pointnet_v2 import load_pointnet_model
-from fast_voxel import load_3dcnn_model
-from fast_pointnet_class_v2 import load_pointnet_model as load_pointnet_class_model
-from fast_pointnet_class_10d import load_pointnet_model as load_pointnet_class_model_10d
 import torch
 import time
@@ -47,15 +56,15 @@ print(f"Running with configuration: {config}")
 os.makedirs(args.results_dir, exist_ok=True)
-#ds = load_dataset("usm3d/hoho25k", cache_dir="/media/skvrnjan/sd/hoho25k/", trust_remote_code=True)
-ds = load_dataset("usm3d/hoho25k", cache_dir="/mnt/personal/skvrnjan/hoho25k/", trust_remote_code=True)
-ds = ds.shuffle()
 scores_hss = []
 scores_f1 = []
 scores_iou = []
-show_visu = False
 device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -105,10 +114,10 @@ for a in tqdm(ds['train'], desc="Processing dataset"):
         colmap = read_colmap_rec(a['colmap_binary'])
         pcd, geometries = plot_reconstruction_local(None, colmap, points=True, cameras=True, crop_outliers=True)
         wireframe = plot_wireframe_local(None, a['wf_vertices'], a['wf_edges'], a['wf_classifications'])
-        wireframe2 = plot_wireframe_local(None, pred_vertices, pred_edges, None, color='rgb(255, 0, 0)')
         bpo_cams = plot_bpo_cameras_from_entry_local(None, a)
-        visu_all = [pcd] + geometries + wireframe + bpo_cams + wireframe2
         o3d.visualization.draw_geometries(visu_all, window_name=f"3D Reconstruction - HSS: {score.hss:.4f}, F1: {score.f1:.4f}, IoU: {score.iou:.4f}")
     idx += 1

+"""
+Training and evaluation script for HoHo wireframe prediction model.
+This script loads the HoHo25k dataset, processes samples through a wireframe prediction pipeline
+using PointNet models, and evaluates performance using HSS, F1, and IoU metrics. It supports
+configurable thresholds, visualization of results, and saves detailed performance metrics to files.
+Key features:
+- Command-line argument support for model configuration
+- PointNet-based vertex and edge prediction
+- Real-time performance monitoring and visualization
+- Comprehensive metric evaluation and result logging
+- Support for CUDA acceleration when available
+"""
 from datasets import load_dataset
 from hoho2025.vis import plot_all_modalities
 from hoho2025.viz3d import *
 from hoho2025.metric_helper import hss
 from predict import predict_wireframe, predict_wireframe_old
 from tqdm import tqdm
 from fast_pointnet_v2 import load_pointnet_model
+from fast_pointnet_class import load_pointnet_model as load_pointnet_class_model
 import torch
 import time
 os.makedirs(args.results_dir, exist_ok=True)
+ds = load_dataset("usm3d/hoho25k", cache_dir="/media/skvrnjan/sd/hoho25k/", trust_remote_code=True)
+#ds = load_dataset("usm3d/hoho25k", cache_dir="/mnt/personal/skvrnjan/hoho25k/", trust_remote_code=True)
+#ds = ds.shuffle()
 scores_hss = []
 scores_f1 = []
 scores_iou = []
+show_visu = True
 device = "cuda" if torch.cuda.is_available() else "cpu"
         colmap = read_colmap_rec(a['colmap_binary'])
         pcd, geometries = plot_reconstruction_local(None, colmap, points=True, cameras=True, crop_outliers=True)
         wireframe = plot_wireframe_local(None, a['wf_vertices'], a['wf_edges'], a['wf_classifications'])
+        #wireframe2 = plot_wireframe_local(None, pred_vertices, pred_edges, None, color='rgb(255, 0, 0)')
         bpo_cams = plot_bpo_cameras_from_entry_local(None, a)
+        visu_all = [pcd] + geometries + wireframe + bpo_cams #+ wireframe2
         o3d.visualization.draw_geometries(visu_all, window_name=f"3D Reconstruction - HSS: {score.hss:.4f}, F1: {score.f1:.4f}, IoU: {score.iou:.4f}")
     idx += 1

train_end.py DELETED Viewed

@@ -1,73 +0,0 @@
-from datasets import load_dataset
-from hoho2025.vis import plot_all_modalities
-from hoho2025.viz3d import *
-import open3d as o3d
-from visu import plot_reconstruction_local, plot_wireframe_local, plot_bpo_cameras_from_entry_local, _plotly_rgb_to_normalized_o3d_color
-from utils import read_colmap_rec, empty_solution
-#from hoho2025.example_solutions import predict_wireframe
-from hoho2025.metric_helper import hss
-from predict import predict_wireframe_old
-from predict_end import predict_wireframe
-from tqdm import tqdm
-import torch
-import time
-#ds = load_dataset("usm3d/hoho25k", cache_dir="/media/skvrnjan/sd/hoho25k/", trust_remote_code=True)
-ds = load_dataset("usm3d/hoho25k", cache_dir="/mnt/personal/skvrnjan/hoho25k/", trust_remote_code=True)
-ds = ds.shuffle()
-scores_hss = []
-scores_f1 = []
-scores_iou = []
-show_visu = False
-device = "cuda" if torch.cuda.is_available() else "cpu"
-config = {'vertex_threshold': 0.4, 'edge_threshold': 0.6, 'only_predicted_connections': False}
-idx = 0
-prediction_times = []
-for a in tqdm(ds['train'], desc="Processing dataset"):
-    #plot_all_modalities(a)
-    #pred_vertices, pred_edges = predict_wireframe_old(a)
-    #pred_vertices, pred_edges = predict_wireframe(a.copy(),  config)
-    try:
-        start_time = time.time()
-        pred_vertices, pred_edges = predict_wireframe(a.copy(), config)
-        #pred_vertices, pred_edges = predict_wireframe_old(a)
-        end_time = time.time()
-        prediction_time = end_time - start_time
-        prediction_times.append(prediction_time)
-        mean_time = np.mean(prediction_times)
-        print(f"Prediction time: {prediction_time:.4f} seconds, Mean time: {mean_time:.4f} seconds")
-    except:
-        pred_vertices, pred_edges = empty_solution()
-    score = hss(pred_vertices, pred_edges, a['wf_vertices'], a['wf_edges'], vert_thresh=0.5, edge_thresh=0.5)
-    print(f"Score: {score}")
-    scores_hss.append(score.hss)
-    scores_f1.append(score.f1)
-    scores_iou.append(score.iou)
-    if show_visu:
-        colmap = read_colmap_rec(a['colmap_binary'])
-        pcd, geometries = plot_reconstruction_local(None, colmap, points=True, cameras=True, crop_outliers=True)
-        wireframe = plot_wireframe_local(None, a['wf_vertices'], a['wf_edges'], a['wf_classifications'])
-        wireframe2 = plot_wireframe_local(None, pred_vertices, pred_edges, None, color='rgb(255, 0, 0)')
-        bpo_cams = plot_bpo_cameras_from_entry_local(None, a)
-        visu_all = [pcd] + geometries + wireframe + bpo_cams + wireframe2
-        o3d.visualization.draw_geometries(visu_all, window_name=f"3D Reconstruction - HSS: {score.hss:.4f}, F1: {score.f1:.4f}, IoU: {score.iou:.4f}")
-    idx += 1
-for i in range(10):
-    print("END OF DATASET")
-print(f"Mean HSS: {np.mean(scores_hss):.4f}")
-print(f"Mean F1: {np.mean(scores_f1):.4f}")
-print(f"Mean IoU: {np.mean(scores_iou):.4f}")
-print(config)

train_pnet.py DELETED Viewed

@@ -1,13 +0,0 @@
-from fast_pointnet import train_pointnet
-import os
-if __name__ == "__main__":
-    # Load the dataset
-    dataset_path = "/home/skvrnjan/personal/hohocustom/"
-    model_save_path = "/home/skvrnjan/personal/hoho_pnet/"
-    os.makedirs(model_save_path, exist_ok=True)
-    # Train the model
-    train_pointnet(dataset_path, model_save_path)

train_pnet_class.py CHANGED Viewed

@@ -1,3 +1,15 @@
 from fast_pointnet_class import train_pointnet
 import os
@@ -10,4 +22,4 @@ if __name__ == "__main__":
     os.makedirs(model_save_path, exist_ok=True)
     # Train the model
-    train_pointnet(dataset_path, model_save_path)

+# This script serves as the main entry point for training a PointNet-based
+# classification model.
+#
+# It imports the necessary training function `train_pointnet` from the
+# `fast_pointnet_class` module.
+#
+# The script defines file paths for the input dataset and the directory
+# where the trained model will be saved. It ensures that the model saving
+# directory exists before starting the training.
+#
+# Finally, it initiates the training process by calling the `train_pointnet`
+# function with the specified dataset path, model save path, and a batch size.
 from fast_pointnet_class import train_pointnet
 import os
     os.makedirs(model_save_path, exist_ok=True)
     # Train the model
+    train_pointnet(dataset_path, model_save_path, batch_size=4)

train_pnet_class_cluster.py DELETED Viewed

@@ -1,13 +0,0 @@
-from fast_pointnet_class import train_pointnet
-import os
-if __name__ == "__main__":
-    # Load the dataset
-    dataset_path = "/mnt/personal/skvrnjan/hohocustom_edges/"
-    model_save_path = "/mnt/personal/skvrnjan/hoho_pnet_edges_v4/initial.pth"
-    os.makedirs(model_save_path, exist_ok=True)
-    # Train the model
-    train_pointnet(dataset_path, model_save_path, epochs=100, batch_size=128, lr=0.001)

train_pnet_class_cluster_10d.py DELETED Viewed

@@ -1,13 +0,0 @@
-from fast_pointnet_class_10d import train_pointnet
-import os
-if __name__ == "__main__":
-    # Load the dataset
-    dataset_path = "/mnt/personal/skvrnjan/hohocustom_edges_10d/"
-    model_save_path = "/mnt/personal/skvrnjan/hoho_pnet_edges_10d_v2/initial.pth"
-    os.makedirs(model_save_path, exist_ok=True)
-    # Train the model
-    train_pointnet(dataset_path, model_save_path, epochs=100, batch_size=128, lr=0.001)

train_pnet_class_cluster_10d_2048.py DELETED Viewed

@@ -1,13 +0,0 @@
-from fast_pointnet_class_10d_2048 import train_pointnet
-import os
-if __name__ == "__main__":
-    # Load the dataset
-    dataset_path = "/mnt/personal/skvrnjan/hohocustom_edges_10d_1m/"
-    model_save_path = "/mnt/personal/skvrnjan/hoho_pnet_edges_10d_2048/initial.pth"
-    os.makedirs(model_save_path, exist_ok=True)
-    # Train the model
-    train_pointnet(dataset_path, model_save_path, epochs=100, batch_size=128, lr=0.001)

train_pnet_class_cluster_10d_deeper.py DELETED Viewed

@@ -1,13 +0,0 @@
-from fast_pointnet_class_10d_deeper import train_pointnet
-import os
-if __name__ == "__main__":
-    # Load the dataset
-    dataset_path = "/mnt/personal/skvrnjan/hohocustom_edges_10d/"
-    model_save_path = "/mnt/personal/skvrnjan/hoho_pnet_edges_10d_deeper_v2/initial.pth"
-    os.makedirs(model_save_path, exist_ok=True)
-    # Train the model
-    train_pointnet(dataset_path, model_save_path, epochs=100, batch_size=128, lr=0.001)

train_pnet_cluster.py DELETED Viewed

@@ -1,10 +0,0 @@
-from fast_pointnet import train_pointnet
-if __name__ == "__main__":
-    # Load the dataset
-    dataset_path = "/mnt/personal/skvrnjan/hohocustom/"
-    model_save_path = "/mnt/personal/skvrnjan/hoho_pnet/initial.pth"
-    # Train the model
-    train_pointnet(dataset_path, model_save_path, epochs=100, batch_size=128, lr=0.001, score_weight=1.0, class_weight=0.5)

train_pnet_cluster_class_v2.py DELETED Viewed

@@ -1,10 +0,0 @@
-from fast_pointnet_class_v2 import train_pointnet
-if __name__ == "__main__":
-    # Load the dataset
-    dataset_path = "/mnt/personal/skvrnjan/hohocustom_edges_10d_v5/"
-    model_save_path = "/mnt/personal/skvrnjan/hoho_pnet_class_v2/initial.pth"
-    # Train the model
-    train_pointnet(dataset_path, model_save_path, epochs=100, batch_size=512, lr=0.001)

train_pnet_cluster_v3.py DELETED Viewed

@@ -1,10 +0,0 @@
-from fast_pointnet_v3 import train_pointnet
-if __name__ == "__main__":
-    # Load the dataset
-    dataset_path = "/mnt/personal/skvrnjan/hohocustom_v4/"
-    model_save_path = "/mnt/personal/skvrnjan/hoho_pnet_v11/initial.pth"
-    # Train the model
-    train_pointnet(dataset_path, model_save_path, epochs=100, batch_size=256, lr=0.001, score_weight=0.25, class_weight=1.0)

train_pnet_cluster_v2.py → train_pnet_v2.py RENAMED Viewed

@@ -3,8 +3,8 @@ from fast_pointnet_v2 import train_pointnet
 if __name__ == "__main__":
     # Load the dataset
-    dataset_path = "/mnt/personal/skvrnjan/hohocustom_v4/"
-    model_save_path = "/mnt/personal/skvrnjan/hoho_pnet_v7/initial.pth"
     # Train the model
     train_pointnet(dataset_path, model_save_path, epochs=100, batch_size=512, lr=0.001, score_weight=0.25, class_weight=1.0)

 if __name__ == "__main__":
     # Load the dataset
+    dataset_path = "xx"
+    model_save_path = "xx.pth"
     # Train the model
     train_pointnet(dataset_path, model_save_path, epochs=100, batch_size=512, lr=0.001, score_weight=0.25, class_weight=1.0)

train_voxel.py DELETED Viewed

@@ -1,13 +0,0 @@
-from fast_voxel import train_3dcnn
-import os
-if __name__ == "__main__":
-    # Load the dataset
-    dataset_path = "/home/skvrnjan/personal/hohocustom/"
-    model_save_path = "/home/skvrnjan/personal/hoho_voxel/"
-    os.makedirs(model_save_path, exist_ok=True)
-    # Train the model
-    train_3dcnn(dataset_path, model_save_path, epochs=100, batch_size=16, lr=0.001, voxel_size=32, score_weight=0.5, class_weight=0.5)

train_voxel_cluster.py DELETED Viewed

@@ -1,13 +0,0 @@
-from fast_voxel import train_3dcnn
-import os
-if __name__ == "__main__":
-    # Load the dataset
-    dataset_path = "/mnt/personal/skvrnjan/hohocustom/"
-    model_save_path = "/mnt/personal/skvrnjan/hoho_voxel/initial.pth"
-    os.makedirs(model_save_path, exist_ok=True)
-    # Train the model
-    train_3dcnn(dataset_path, model_save_path, epochs=100, batch_size=128, lr=0.001, voxel_size=32, score_weight=0.5, class_weight=0.5)