hoho / end_to_end.py

Final submission code

9518589 11 months ago

42 kB

	"""
	End-to-End Voxel-Based Vertex Detection Pipeline

	This file implements a complete pipeline for detecting wireframe vertices from 3D point clouds using
	a voxel-based deep learning approach. The pipeline includes:

	1. Data preprocessing: Converting 14D point clouds into 3D voxel grids with averaged features
	2. Ground truth generation: Creating binary vertex labels and refinement targets from wireframe vertices
	3. Model architecture: VoxelUNet with encoder-decoder structure and 1x1x1 bottleneck for vertex detection
	4. Training: Combined loss function with BCE, Dice loss, and MSE for offset regression
	5. Inference: Predicting vertex locations from new point clouds with visualization

	Key components:
	- Voxelization with configurable grid size and metric voxel size
	- Per-voxel MLP before convolutional processing
	- Gaussian smoothing of ground truth labels
	- Refinement prediction for sub-voxel accuracy
	- PyVista-based visualization for results analysis

	Usage:
	- Set inference=False to train a new model
	- Set inference=True to run predictions on existing data
	"""

	import os
	import pickle
	import torch
	import torch.nn as nn
	import torch.optim as optim
	import numpy as np
	from typing import Dict, Any, Tuple, List
	from torch.utils.data import Dataset, DataLoader
	import glob
	import pyvista as pv
	import torch

	# [Previous code from the existing document remains unchanged up to CombinedLoss class]
	# ... (save_data, load_data, get_data_files, voxelize_points, create_ground_truth, VoxelUNet, VoxelDataset) ...

	def save_data(dict_to_save: Dict[str, Any], filename: str, data_folder: str = "data") -> None:
	"""Save dictionary data to pickle file"""
	os.makedirs(data_folder, exist_ok=True)
	filepath = os.path.join(data_folder, f"{filename}.pkl")
	with open(filepath, 'wb') as f:
	pickle.dump(dict_to_save, f)
	#print(f"Data saved to {filepath}")

	def load_data(filepath: str) -> Dict[str, Any]:
	"""Load dictionary data from pickle file"""
	with open(filepath, 'rb') as f:
	data = pickle.load(f)
	#print(f"Data loaded from {filepath}")
	return data

	def get_data_files(data_folder: str = "data", pattern: str = "*.pkl") -> List[str]:
	"""Get list of data files from folder"""
	search_pattern = os.path.join(data_folder, pattern)
	files = glob.glob(search_pattern)
	#print(f"Found {len(files)} data files in {data_folder}")
	return files

	def voxelize_points(points: np.ndarray,
	grid_size_xy: int = 64,
	voxel_size_metric: float = 0.25
	) -> Tuple[torch.Tensor, np.ndarray, Dict[str, Any]]:
	"""
	Voxelize 14D point cloud into a 3D grid with a fixed number of voxels and fixed metric voxel size.
	The Z dimension of the grid will also have `grid_size_xy` voxels, forming a cubic grid.
	The point cloud is centered within this metric grid. Points outside are discarded.
	Features from points falling into the same voxel are averaged.

	Args:
	points: (N, 14) array where first 3 dims are xyz (original coordinates).
	grid_size_xy: Number of voxels along X and Y dimensions (and Z).
	voxel_size_metric: The physical size of each voxel (e.g., 0.5 units).

	Returns:
	voxel_grid: (NUM_FEATURES, dim_z, dim_y, dim_x) tensor with averaged features.
	voxel_indices_for_points: (N_points_in_grid, 3) integer voxel indices (z, y, x)
	for each input point that falls within the grid.
	scale_info: Dict with transformation parameters:
	'grid_origin_metric': Real-world metric coordinate of the corner of voxel [0,0,0] (x,y,z).
	'voxel_size_metric': The metric size of a voxel.
	'grid_dims_voxels': Tuple (dim_x, dim_y, dim_z) representing number of voxels.
	'pc_centroid_metric': Centroid of the input point cloud (x,y,z).
	"""
	NUM_FEATURES = 14
	dim_x = grid_size_xy
	dim_y = grid_size_xy
	dim_z = grid_size_xy # Assuming cubic grid

	if dim_z == 0: dim_z = 1 # Ensure at least one voxel in Z

	grid_dims_voxels = np.array([dim_x, dim_y, dim_z], dtype=int)

	def _get_empty_return(reason: str = ""):
	voxel_grid_empty = torch.zeros(NUM_FEATURES, grid_dims_voxels[2], grid_dims_voxels[1], grid_dims_voxels[0], dtype=torch.float32)
	voxel_indices_empty = np.empty((0, 3), dtype=int)
	scale_info_empty = {
	'grid_origin_metric': np.zeros(3, dtype=float),
	'voxel_size_metric': voxel_size_metric,
	'grid_dims_voxels': tuple(grid_dims_voxels.tolist()),
	'pc_centroid_metric': np.zeros(3, dtype=float),
	}
	return voxel_grid_empty, voxel_indices_empty, scale_info_empty

	if points.shape[0] == 0:
	return _get_empty_return("Initial empty point cloud")

	xyz = points[:, :3]
	features_other = points[:, 3:]

	pc_centroid_metric = xyz.mean(axis=0)

	grid_metric_span = grid_dims_voxels * voxel_size_metric
	grid_origin_metric = pc_centroid_metric - (grid_metric_span / 2.0)

	# Voxel grid to store summed features
	voxel_grid_sum = torch.zeros(NUM_FEATURES, grid_dims_voxels[2], grid_dims_voxels[1], grid_dims_voxels[0], dtype=torch.float32)
	# Counter for points per voxel
	point_counts_in_voxel = torch.zeros(grid_dims_voxels[2], grid_dims_voxels[1], grid_dims_voxels[0], dtype=torch.int32)

	continuous_voxel_coords = (xyz - grid_origin_metric) / voxel_size_metric

	voxel_indices_for_points_zyx_order = []

	for i in range(points.shape[0]):
	current_point_continuous_coord_xyz = continuous_voxel_coords[i]
	# Using np.round for voxel assignment (assigns to nearest voxel center)
	voxel_idx_int_xyz = np.round(current_point_continuous_coord_xyz).astype(int)

	idx_x, idx_y, idx_z = voxel_idx_int_xyz[0], voxel_idx_int_xyz[1], voxel_idx_int_xyz[2]

	if not (0 <= idx_x < grid_dims_voxels[0] and \
	0 <= idx_y < grid_dims_voxels[1] and \
	0 <= idx_z < grid_dims_voxels[2]):
	continue # Point is outside the grid

	voxel_indices_for_points_zyx_order.append([idx_z, idx_y, idx_x])

	assigned_voxel_center_grid_idx_space = np.array([idx_x, idx_y, idx_z], dtype=float) + 0.5
	offset_xyz_in_grid_units = current_point_continuous_coord_xyz - assigned_voxel_center_grid_idx_space

	# Accumulate features in voxel_grid_sum
	voxel_grid_sum[0, idx_z, idx_y, idx_x] += offset_xyz_in_grid_units[0] # dx
	voxel_grid_sum[1, idx_z, idx_y, idx_x] += offset_xyz_in_grid_units[1] # dy
	voxel_grid_sum[2, idx_z, idx_y, idx_x] += offset_xyz_in_grid_units[2] # dz

	if NUM_FEATURES > 3:
	current_point_other_features = features_other[i]
	voxel_grid_sum[3:, idx_z, idx_y, idx_x] += torch.tensor(current_point_other_features, dtype=torch.float32)

	point_counts_in_voxel[idx_z, idx_y, idx_x] += 1

	# Averaging step
	# Initialize the final voxel_grid which will store averaged features
	voxel_grid = torch.zeros_like(voxel_grid_sum)

	# Prepare counts for division, ensuring no division by zero.
	# Convert counts to float for division.
	counts_for_division = point_counts_in_voxel.float()
	# For voxels with 0 points, counts_for_division is 0.0.
	# To avoid 0/0 = NaN, set these counts to 1.0. Since voxel_grid_sum is 0 there,
	# the result of 0.0 / 1.0 will be 0.0, which is correct.
	counts_for_division[counts_for_division == 0] = 1.0

	# Perform averaging:
	# voxel_grid_sum is (C, D, H, W)
	# counts_for_division.unsqueeze(0) is (1, D, H, W), broadcasting correctly.
	voxel_grid = voxel_grid_sum / counts_for_division.unsqueeze(0)

	final_voxel_indices_for_points_zyx = np.array(voxel_indices_for_points_zyx_order, dtype=int) if voxel_indices_for_points_zyx_order else np.empty((0,3), dtype=int)

	scale_info = {
	'grid_origin_metric': grid_origin_metric,
	'voxel_size_metric': voxel_size_metric,
	'grid_dims_voxels': tuple(grid_dims_voxels.tolist()),
	'pc_centroid_metric': pc_centroid_metric,
	}

	return voxel_grid, final_voxel_indices_for_points_zyx, scale_info


	def create_ground_truth(vertices: np.ndarray,
	scale_info: Dict[str, Any]
	) -> Tuple[torch.Tensor, torch.Tensor]:
	"""
	Create ground truth voxel labels and refinement targets using metric voxelization info.
	The grid dimensions are taken from scale_info.

	Args:
	vertices: (M, 3) vertex coordinates in original metric space.
	scale_info: Dict from voxelize_points. Requires:
	'grid_origin_metric', 'voxel_size_metric', 'grid_dims_voxels'.
	Returns:
	vertex_labels: (dim_z, dim_y, dim_x) binary labels (1.0 for voxel containing a vertex).
	refinement_targets: (3, dim_z, dim_y, dim_x) offset (dx,dy,dz) from voxel cell center
	in grid units. Range approx [-0.5, 0.5).
	"""
	grid_origin_metric = scale_info['grid_origin_metric'] # (ox, oy, oz)
	voxel_size_metric = scale_info['voxel_size_metric']
	# grid_dims_voxels is (num_voxels_x, num_voxels_y, num_voxels_z)
	grid_dims_voxels = np.array(scale_info['grid_dims_voxels'])

	dim_x, dim_y, dim_z = grid_dims_voxels[0], grid_dims_voxels[1], grid_dims_voxels[2]

	# Labels tensor: (dim_z, dim_y, dim_x)
	vertex_labels = torch.zeros(dim_z, dim_y, dim_x, dtype=torch.float32)
	# Refinement targets tensor: (3, dim_z, dim_y, dim_x) for (dx, dy, dz) offsets
	refinement_targets = torch.zeros(3, dim_z, dim_y, dim_x, dtype=torch.float32)

	if vertices.shape[0] == 0:
	return vertex_labels, refinement_targets

	# Convert vertex metric coordinates to continuous voxel coordinates
	# (potentially fractional and outside [0, dim-1])
	continuous_voxel_coords_vertices = (vertices - grid_origin_metric) / voxel_size_metric

	for i in range(vertices.shape[0]):
	# v_continuous_coord_xyz is (vx, vy, vz) for the current vertex in continuous voxel space
	v_continuous_coord_xyz = continuous_voxel_coords_vertices[i]

	# Integer voxel index (ix, iy, iz) by flooring
	v_idx_int_xyz = np.floor(v_continuous_coord_xyz).astype(int)

	# Clip to be within grid boundaries [0, dim-1]
	idx_x = np.clip(v_idx_int_xyz[0], 0, dim_x - 1)
	idx_y = np.clip(v_idx_int_xyz[1], 0, dim_y - 1)
	idx_z = np.clip(v_idx_int_xyz[2], 0, dim_z - 1)

	# Set label for this voxel (using z, y, x order for tensor access)
	vertex_labels[idx_z, idx_y, idx_x] = 1.0

	# Calculate refinement offset:
	# Center of the assigned (clipped) voxel in continuous grid index space
	assigned_voxel_center_grid_idx_space = np.array([idx_x, idx_y, idx_z], dtype=float) + 0.5

	# Offset of the vertex from its assigned voxel center, in grid units.
	offset_xyz_grid_units = v_continuous_coord_xyz - assigned_voxel_center_grid_idx_space

	# Store dx, dy, dz in channels 0, 1, 2 respectively
	# refinement_targets is (3, Z, Y, X)
	refinement_targets[0, idx_z, idx_y, idx_x] = offset_xyz_grid_units[0] # dx
	refinement_targets[1, idx_z, idx_y, idx_x] = offset_xyz_grid_units[1] # dy
	refinement_targets[2, idx_z, idx_y, idx_x] = offset_xyz_grid_units[2] # dz

	return vertex_labels, refinement_targets

	class VoxelUNet(nn.Module):
	"""Encoder-decoder network with a 1x1x1 bottleneck for voxel-based vertex detection.
	Includes a per-voxel MLP before the first convolutional block."""

	def __init__(self, in_channels: int = 14, base_channels: int = 32, bottleneck_expansion: int = 2, mlp_hidden_factor: int = 2):
	super(VoxelUNet, self).__init__()

	bc = base_channels

	# Per-voxel MLP
	# The MLP transforms input features per voxel before the convolutional encoder.
	# Input to MLP: in_channels
	# Output of MLP: base_channels (bc)
	mlp_hidden_dim = in_channels * mlp_hidden_factor # Intermediate dimension for the MLP
	self.voxel_mlp = nn.Sequential(
	nn.Linear(in_channels, mlp_hidden_dim),
	nn.ReLU(inplace=True),
	nn.Linear(mlp_hidden_dim, bc) # Output of MLP has 'base_channels' features
	)

	# Encoder
	# self.enc1 now takes 'base_channels' as input from the MLP.
	self.enc1 = self._conv_block(bc, bc) # bc
	self.enc2 = self._conv_block(bc, bc * 2) # bc*2
	self.enc3 = self._conv_block(bc * 2, bc * 4) # bc*4
	self.enc4 = self._conv_block(bc * 4, bc * 8) # bc*8
	self.enc5 = self._conv_block(bc * 8, bc * 16) # bc*16

	self.pool = nn.MaxPool3d(2)

	# Bottleneck
	self.adaptive_pool = nn.AdaptiveAvgPool3d((1, 1, 1))
	bottleneck_in_channels = bc * 16
	# Width of the bottleneck vector (number of channels after 1x1x1 pooling)
	bottleneck_width = bottleneck_in_channels * bottleneck_expansion

	self.bottleneck = nn.Sequential(
	nn.Conv3d(bottleneck_in_channels, bottleneck_width, kernel_size=1, padding=0, bias=True),
	nn.ReLU(inplace=True),
	# Second 1x1 conv to add more capacity/non-linearity in the bottleneck
	nn.Conv3d(bottleneck_width, bottleneck_width, kernel_size=1, padding=0, bias=True),
	nn.ReLU(inplace=True)
	)

	# Decoder
	# Input channels for decoder blocks are adjusted as skip connections are removed.
	self.dec5 = self._conv_block(bottleneck_width, bc * 16) # Input from upsampled bottleneck

	self.up4 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)
	self.dec4 = self._conv_block(bc * 16, bc * 8) # Input from dec5 output

	self.up3 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)
	self.dec3 = self._conv_block(bc * 8, bc * 4) # Input from dec4 output

	self.up2 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)
	self.dec2 = self._conv_block(bc * 4, bc * 2) # Input from dec3 output

	self.up1 = nn.Upsample(scale_factor=2, mode='trilinear', align_corners=True)
	self.dec1 = self._conv_block(bc * 2, bc) # Input from dec2 output

	# Output heads
	#self.vertex_head = nn.Conv3d(bc, 1, kernel_size=1)

	self.vertex_head = nn.Sequential(
	nn.Conv3d(bc, bc // 2, kernel_size=1),
	nn.ReLU(inplace=True),
	nn.Conv3d(bc // 2, bc // 4, kernel_size=1),
	nn.ReLU(inplace=True),
	nn.Conv3d(bc // 4, 1, kernel_size=1)
	)
	self.refinement_head = nn.Conv3d(bc, 3, kernel_size=1)

	self.tanh = nn.Tanh() # For refinement head

	def _conv_block(self, in_channels: int, out_channels: int) -> nn.Sequential:
	# Standard convolutional block with two 3x3 convolutions
	# Using bias=False because BatchNorm3d is used after each convolution
	return nn.Sequential(
	nn.Conv3d(in_channels, out_channels, kernel_size=3, padding=1, bias=False),
	nn.BatchNorm3d(out_channels),
	nn.ReLU(inplace=True),
	nn.Conv3d(out_channels, out_channels, kernel_size=3, padding=1, bias=False),
	nn.BatchNorm3d(out_channels),
	nn.ReLU(inplace=True)
	)

	def forward(self, x: torch.Tensor) -> Tuple[torch.Tensor, torch.Tensor]:
	# x shape: (B, C_in_raw, D, H, W)

	# Per-voxel MLP
	B, C_in_raw, D, H, W = x.shape
	# Permute to (B, D, H, W, C_in_raw) for nn.Linear
	x_permuted = x.permute(0, 2, 3, 4, 1).contiguous()
	# Flatten spatial dimensions: (BDH*W, C_in_raw)
	x_flattened = x_permuted.view(-1, C_in_raw)

	# Apply MLP: (BDH*W, C_mlp_out) where C_mlp_out is base_channels (bc)
	mlp_out_flattened = self.voxel_mlp(x_flattened)

	C_mlp_out = mlp_out_flattened.shape[-1] # Should be self.base_channels
	# Reshape back to (B, D, H, W, C_mlp_out)
	x_mlp_reshaped = mlp_out_flattened.view(B, D, H, W, C_mlp_out)
	# Permute back to (B, C_mlp_out, D, H, W) for 3D convolutions
	x_processed = x_mlp_reshaped.permute(0, 4, 1, 2, 3).contiguous()

	# Encoder path
	e1 = self.enc1(x_processed) # Output spatial: S, Output channels: bc
	p1 = self.pool(e1) # Output spatial: S/2

	e2 = self.enc2(p1) # Output spatial: S/2, Output channels: bc*2
	p2 = self.pool(e2) # Output spatial: S/4

	e3 = self.enc3(p2) # Output spatial: S/4, Output channels: bc*4
	p3 = self.pool(e3) # Output spatial: S/8

	e4 = self.enc4(p3) # Output spatial: S/8, Output channels: bc*8
	p4 = self.pool(e4) # Output spatial: S/16

	e5 = self.enc5(p4) # Output spatial: S/16, Output channels: bc*16
	p5 = self.pool(e5) # Output spatial: S/32, Channels: bc*16 (input to bottleneck path)

	# Bottleneck
	b_pooled = self.adaptive_pool(p5) # Output spatial: 1x1x1, Output channels: bc*16
	b = self.bottleneck(b_pooled) # Output spatial: 1x1x1, Output channels: bottleneck_width

	# Decoder path
	# Upsample bottleneck output to match spatial dimensions of e5 (S/16)
	u5_from_b = nn.functional.interpolate(b, size=e5.shape[2:], mode='trilinear', align_corners=True)
	d5 = self.dec5(u5_from_b) # Output spatial: S/16, Output channels: bc*16

	u4 = self.up4(d5) # Output spatial: S/8
	d4 = self.dec4(u4) # Output spatial: S/8, Output channels: bc*8

	u3 = self.up3(d4) # Output spatial: S/4
	d3 = self.dec3(u3) # Output spatial: S/4, Output channels: bc*4

	u2 = self.up2(d3) # Output spatial: S/2
	d2 = self.dec2(u2) # Output spatial: S/2, Output channels: bc*2

	u1 = self.up1(d2) # Output spatial: S
	d1 = self.dec1(u1) # Output spatial: S, Output channels: bc

	# Output heads
	vertex_logits = self.vertex_head(d1)
	refinement = self.tanh(self.refinement_head(d1)) * 0.5 # Output range [-0.5, 0.5]

	return vertex_logits, refinement

	class VoxelDataset(Dataset):
	def __init__(self, data_files: List[str], voxel_size: float = 0.1, grid_size: int = 64):
	self.data_files = data_files
	self.voxel_size = voxel_size
	self.grid_size = grid_size

	def __len__(self):
	return len(self.data_files)

	def __getitem__(self, idx):
	data = load_data(self.data_files[idx])

	voxel_grid, _, scale_info = voxelize_points(
	data['pcloud_14d'], self.grid_size, self.voxel_size
	)

	wf_vertices_np = np.array(data['wf_vertices'])
	vertex_labels, refinement_targets = create_ground_truth(
	wf_vertices_np, scale_info
	)

	return voxel_grid, vertex_labels, refinement_targets, scale_info

	import torch.nn as nn
	import torch.nn.functional as F
	from typing import Tuple # Added for Tuple type hint

	class CombinedLoss(nn.Module):
	"""
	Combined loss for vertex classification and offset regression.
	Uses:
	- BCEWithLogitsLoss (with configurable negative/positive sample weighting)
	- Dice loss
	- MSE loss on refinement offsets (only over positive voxels)
	- Gaussian blur on the GT labels
	"""
	def __init__(self,
	vertex_weight: float = 1.0,
	refinement_weight: float = 0.0,
	dice_weight: float = 0.5,
	bce_neg_pos_ratio: float = 1.0, # Ratio of negative to positive sample weight in BCE
	blur_kernel_size: int = 5,
	blur_sigma: float = 1.0,
	eps: float = 1e-6):
	super().__init__()
	self.vertex_weight = vertex_weight
	self.refinement_weight = refinement_weight
	self.dice_weight = dice_weight
	self.bce_neg_pos_ratio = bce_neg_pos_ratio # Store the ratio
	self.eps = eps

	# BCE with logits (reduction='none' to apply custom weighting)
	self.bce_loss_fn = nn.BCEWithLogitsLoss(reduction='none')
	# MSE for offset regression
	self.mse_loss = nn.MSELoss()

	# build 3D gaussian kernel
	k = blur_kernel_size
	coords = torch.arange(k, dtype=torch.float32) - (k - 1) / 2
	xx, yy, zz = torch.meshgrid(coords, coords, coords, indexing='ij')
	kernel = torch.exp(-(xx2 + yy2 + zz*2) / (2 blur_sigma**2))
	# shape (1,1,k,k,k)
	kernel = kernel.view(1, 1, k, k, k)
	self.register_buffer('gaussian_kernel', kernel)
	self.pad = k // 2

	def forward(self,
	vertex_logits_pred: torch.Tensor, # (B,1,D,H,W)
	refinement_pred: torch.Tensor, # (B,3,D,H,W)
	vertex_gt: torch.Tensor, # (B,D,H,W), 0/1
	refinement_gt: torch.Tensor # (B,3,D,H,W)
	) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:

	# logits & gt
	logits = vertex_logits_pred.squeeze(1) # (B,D,H,W)
	gt = vertex_gt.float() # (B,D,H,W)

	# apply gaussian blur on gt
	gt_unsq = gt.unsqueeze(1) # (B,1,D,H,W)
	gt_blur = F.conv3d(gt_unsq, self.gaussian_kernel, padding=self.pad) # (B,1,D,H,W)
	gt_blur = gt_blur.clamp(0, 1) # ensure values are in [0, 1]
	gt_smooth = gt_blur.squeeze(1) # (B,D,H,W)

	# 1) Weighted BCE loss - positive when gt_smooth > 1e-3 (using smoothed GT for mask definition)
	pos_mask = gt_smooth > 1e-3 # Mask for "positive" regions based on smoothed GT
	neg_mask = ~pos_mask # Mask for "negative" regions

	bce_all = self.bce_loss_fn(logits, gt_smooth) # Calculate BCE loss for all elements

	# Calculate weighted BCE
	pos_weight_factor = 1.0 # Weight for positive samples' contribution
	neg_weight_factor = self.bce_neg_pos_ratio # Weight for negative samples' contribution

	bce = torch.tensor(0.0, device=logits.device)
	num_pos = pos_mask.sum()
	num_neg = neg_mask.sum()

	if num_pos > 0 and num_neg > 0:
	mean_pos_loss = bce_all[pos_mask].mean()
	mean_neg_loss = bce_all[neg_mask].mean()
	bce = pos_weight_factor * mean_pos_loss + neg_weight_factor * mean_neg_loss
	elif num_pos > 0: # Only positive samples contribute
	mean_pos_loss = bce_all[pos_mask].mean()
	bce = pos_weight_factor * mean_pos_loss
	elif num_neg > 0: # Only negative samples contribute
	mean_neg_loss = bce_all[neg_mask].mean()
	bce = neg_weight_factor * mean_neg_loss
	# If no samples (num_pos=0 and num_neg=0), bce remains 0.0

	# 2) Dice loss
	prob = torch.sigmoid(logits)
	# Use binarized smoothed GT for Dice target, consistent with original
	gt_smooth_round_for_dice = gt_smooth
	intersection = (prob * gt_smooth_round_for_dice).sum(dim=[1,2,3])
	union = prob.sum(dim=[1,2,3]) + gt_smooth_round_for_dice.sum(dim=[1,2,3])
	dice_score = (2 * intersection + self.eps) / (union + self.eps)
	dice_loss = 1 - dice_score.mean()

	vertex_loss = bce + self.dice_weight * dice_loss

	# 3) Refinement MSE (only where original gt==1, i.e., true vertex locations)
	# Use the original hard GT for selecting voxels for refinement loss
	mask_pos_refinement = (gt > 0.5).unsqueeze(1)

	refinement_loss = torch.tensor(0., device=logits.device)
	if mask_pos_refinement.sum() > 0:
	# Ensure pred and gt have the same shape for masked selection
	expanded_mask = mask_pos_refinement.expand_as(refinement_pred)

	pred_offsets = refinement_pred[expanded_mask].view(-1, 3)
	gt_offsets = refinement_gt[expanded_mask].view(-1, 3)

	if pred_offsets.numel() > 0: # Ensure there are elements to compute loss on
	refinement_loss = self.mse_loss(pred_offsets, gt_offsets)

	# 4) Total loss
	total_loss = (self.vertex_weight * vertex_loss +
	self.refinement_weight * refinement_loss)

	return total_loss, vertex_loss, refinement_loss

	def train_epoch(model, dataloader, optimizer, criterion, device, current_epoch: int):
	model.train()
	total_loss_epoch = 0.0
	vertex_loss_epoch = 0.0
	refinement_loss_epoch = 0.0

	for batch_idx, (voxel_grid_batch, vertex_labels_batch, refinement_targets_batch, _) in enumerate(dataloader):
	voxel_grid_batch = voxel_grid_batch.to(device)
	vertex_labels_batch = vertex_labels_batch.to(device)
	refinement_targets_batch = refinement_targets_batch.to(device)

	if False:
	print(f'Epoch {current_epoch+1}, Batch {batch_idx+1}/{len(dataloader)}')

	sample_voxel_features = voxel_grid_batch[0].cpu().numpy()
	sample_gt_labels = vertex_labels_batch[0].cpu().numpy()
	sample_gt_refinement = refinement_targets_batch[0].cpu().numpy()

	summed_xyz_in_voxels = sample_voxel_features[:3]
	occupied_voxel_mask = np.any(summed_xyz_in_voxels != 0, axis=0)

	plotter = pv.Plotter(window_size=[800,600])
	plotter.background_color = 'white'

	if np.any(occupied_voxel_mask):
	occupied_voxel_indices = np.array(np.where(occupied_voxel_mask)).T
	input_points_display = pv.PolyData(occupied_voxel_indices + 0.5)
	plotter.add_mesh(input_points_display, color='cornflowerblue', point_size=5, render_points_as_spheres=True, label='Occupied Voxels (Centers)')

	gt_vertex_voxel_mask = sample_gt_labels > 0.5
	if np.any(gt_vertex_voxel_mask):
	gt_vertex_indices_int = np.array(np.where(gt_vertex_voxel_mask)).T
	gt_offsets = sample_gt_refinement[:, gt_vertex_voxel_mask].T
	gt_vertex_positions_grid_space = gt_vertex_indices_int.astype(float) + 0.5 + gt_offsets

	target_vertices_display = pv.PolyData(gt_vertex_positions_grid_space)
	plotter.add_mesh(target_vertices_display, color='crimson', point_size=10, render_points_as_spheres=True, label='Target Vertices (GT)')

	plotter.show(title=f"Debug Viz E{current_epoch+1} B{batch_idx+1}", auto_close=False)
	else:
	print(f"Epoch {current_epoch+1} Batch {batch_idx+1}: No data to visualize for the first sample.")

	optimizer.zero_grad()
	vertex_logits_pred, refinement_pred = model(voxel_grid_batch)

	loss, vertex_loss, refinement_loss = criterion(
	vertex_logits_pred, refinement_pred, vertex_labels_batch, refinement_targets_batch
	)

	print(f"Batch {batch_idx+1}/{len(dataloader)}: Loss={loss.item():.4f}, Vertex Loss={vertex_loss.item():.4f}, Refinement Loss={refinement_loss.item():.4f}")

	if loss > 0.000001:
	loss.backward()
	optimizer.step()

	total_loss_epoch += loss.item()
	vertex_loss_epoch += vertex_loss.item()
	refinement_loss_epoch += refinement_loss.item()

	if (batch_idx + 1) % 200 == 0:
	checkpoint_path = f"model_epoch_{current_epoch+1}_batch_{batch_idx+1}_grid_128v9.pth" # Consider updating filename if grid size changes
	torch.save(model.state_dict(), checkpoint_path)
	print(f"Saved batch checkpoint: {checkpoint_path}")

	avg_total_loss = total_loss_epoch / len(dataloader) if len(dataloader) > 0 else 0
	avg_vertex_loss = vertex_loss_epoch / len(dataloader) if len(dataloader) > 0 else 0
	avg_refinement_loss = refinement_loss_epoch / len(dataloader) if len(dataloader) > 0 else 0

	return avg_total_loss, avg_vertex_loss, avg_refinement_loss

	def train_model(data_folder: str = "data", num_epochs: int = 100, batch_size: int = 4, neg_pos_ratio_val: float = 1.0):
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	print(f"Using device: {device}")

	data_files = get_data_files(data_folder)
	if not data_files:
	print(f"No data files found in {data_folder}. Exiting.")
	return

	GRID_SIZE_CFG = 128
	VOXEL_SIZE_CFG = 0.5

	dataset = VoxelDataset(data_files, voxel_size=VOXEL_SIZE_CFG, grid_size=GRID_SIZE_CFG)
	dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=8)

	model = VoxelUNet(in_channels=14, base_channels=32, bottleneck_expansion=4, mlp_hidden_factor= 10).to(device)
	optimizer = optim.Adam(model.parameters(), lr=1e-3)

	criterion = CombinedLoss(
	vertex_weight=10.0,
	refinement_weight=0.0,
	dice_weight=-0.0
	).to(device)

	print(f"Starting training: {num_epochs} epochs, Batch size: {batch_size}, Grid size: {GRID_SIZE_CFG}, Voxel size: {VOXEL_SIZE_CFG}, Initial LR: {optimizer.param_groups[0]['lr']}")

	for epoch in range(num_epochs):
	print(f"\n--- Epoch {epoch+1}/{num_epochs} ---")

	avg_loss, avg_vertex_loss, avg_refinement_loss = train_epoch(
	model, dataloader, optimizer, criterion, device, epoch
	)

	print(f"Epoch {epoch+1} Summary: Avg Loss: {avg_loss:.4f}, "
	f"Avg Vertex Loss: {avg_vertex_loss:.4f}, "
	f"Avg Refinement Loss: {avg_refinement_loss:.4f}, "
	f"Current LR: {optimizer.param_groups[0]['lr']:.6f}")

	checkpoint_path = f"model_epoch_{epoch+1}_grid{GRID_SIZE_CFG}_smooth_bal{neg_pos_ratio_val}_v9.pth"
	torch.save(model.state_dict(), checkpoint_path)
	print(f"Saved checkpoint: {checkpoint_path}")

	final_model_path = f"final_model_grid{GRID_SIZE_CFG}_epochs{num_epochs}_smooth_bal{neg_pos_ratio_val}_v9.pth"
	torch.save(model.state_dict(), final_model_path)
	print(f"Training completed! Final model saved as {final_model_path}")

	def load_model_for_inference(model_path: str, device: torch.device,
	in_channels: int = 14, base_channels: int = 32) -> VoxelUNet:
	"""Load a VoxelUNet model for inference."""
	model = VoxelUNet(in_channels=14, base_channels=32, bottleneck_expansion=4, mlp_hidden_factor= 10)
	model.load_state_dict(torch.load(model_path, map_location=device))
	model.to(device)
	model.eval()
	print(f"Model loaded from {model_path} and set to evaluation mode on {device}.")
	return model

	def predict_vertices(model: VoxelUNet,
	point_cloud_14d: np.ndarray,
	grid_size: int,
	device: torch.device,
	voxel_size_metric: float = 0.5, # Added for consistency, default matches voxelize_points
	vertex_threshold: float = 0.5) -> np.ndarray:
	"""
	Predict vertices from a 14D point cloud.

	Args:
	model: The trained VoxelUNet model.
	point_cloud_14d: (N, 14) NumPy array of the input point cloud.
	grid_size: The size of the voxel grid along X and Y dimensions (must match training).
	device: PyTorch device ('cuda' or 'cpu').
	voxel_size_metric: The metric size of each voxel (must match training).
	vertex_threshold: Threshold for classifying a voxel as containing a vertex.

	Returns:
	predicted_vertices_original_space: (M, 3) NumPy array of predicted vertex
	coordinates in the original point cloud space (X, Y, Z order).
	Returns an empty array if no vertices are predicted
	or if the input point cloud results in an empty voxel grid.
	"""
	voxel_grid_tensor, _, scale_info = voxelize_points(
	point_cloud_14d,
	grid_size_xy=grid_size,
	voxel_size_metric=voxel_size_metric
	)

	# Check if voxelization produced a valid grid (e.g., if input point cloud was empty)
	# voxelize_points returns a zero tensor for grid if input points are empty.
	# If voxel_grid_tensor is all zeros and no points were input, scale_info might be default.
	if voxel_grid_tensor.sum() == 0 and point_cloud_14d.shape[0] == 0:
	# This case implies empty input point cloud, voxelize_points handles this.
	# Predictions will naturally be empty if the grid is empty.
	pass # Continue, model will predict on zero grid.

	input_tensor = voxel_grid_tensor.unsqueeze(0).to(device)

	with torch.no_grad():
	vertex_logits_pred_tensor, refinement_pred_tensor = model(input_tensor)

	vertex_prob_pred_tensor = torch.sigmoid(vertex_logits_pred_tensor)

	vertex_prob_pred_np = vertex_prob_pred_tensor.squeeze(0).squeeze(0).cpu().numpy()
	refinement_pred_np = refinement_pred_tensor.squeeze(0).cpu().numpy() # Shape (3, D, H, W) -> (dx,dy,dz channels)

	print(f"Vertex Probabilities Stats: Min={np.min(vertex_prob_pred_np):.4f}, Max={np.max(vertex_prob_pred_np):.4f}, Mean={np.mean(vertex_prob_pred_np):.4f}, Median={np.median(vertex_prob_pred_np):.4f}")
	if refinement_pred_np.size > 0:
	print(f"Refinement Predictions Stats: Min={np.min(refinement_pred_np):.4f}, Max={np.max(refinement_pred_np):.4f}, Mean={np.mean(refinement_pred_np):.4f}, Median={np.median(refinement_pred_np):.4f}")
	for i in range(refinement_pred_np.shape[0]): # Iterate over dx, dy, dz components
	print(f" Refinement Dim {i} (dx,dy,dz order) Stats: Min={np.min(refinement_pred_np[i]):.4f}, Max={np.max(refinement_pred_np[i]):.4f}, Mean={np.mean(refinement_pred_np[i]):.4f}, Median={np.median(refinement_pred_np[i]):.4f}")
	else:
	print("Refinement Predictions Stats: Array is empty.")

	predicted_mask = vertex_prob_pred_np > vertex_threshold
	# predicted_voxel_indices are (N_preds, 3) with columns (idx_z, idx_y, idx_x)
	predicted_voxel_indices_zyx = np.argwhere(predicted_mask)

	if not predicted_voxel_indices_zyx.size:
	return np.empty((0, 3), dtype=np.float32)

	# Extract refinement offsets for the predicted voxels
	# offsets_channels_first will be (3, N_preds) where channels are (dx, dy, dz)
	offsets_channels_first = refinement_pred_np[:,
	predicted_voxel_indices_zyx[:, 0], # z_indices
	predicted_voxel_indices_zyx[:, 1], # y_indices
	predicted_voxel_indices_zyx[:, 2]] # x_indices

	# Transpose to (N_preds, 3) where columns are (dx, dy, dz)
	offsets_xyz_order = offsets_channels_first.T

	# Calculate refined coordinates in continuous voxel grid space (X, Y, Z order)
	# Voxel center is at index + 0.5
	# Refinement is added to this center.
	# predicted_voxel_indices_zyx[:, 2] is x_idx
	# predicted_voxel_indices_zyx[:, 1] is y_idx
	# predicted_voxel_indices_zyx[:, 0] is z_idx

	# offsets_xyz_order[:, 0] is dx
	# offsets_xyz_order[:, 1] is dy
	# offsets_xyz_order[:, 2] is dz

	refined_x_grid = predicted_voxel_indices_zyx[:, 2].astype(np.float32) + 0.5 #+ offsets_xyz_order[:, 0]
	refined_y_grid = predicted_voxel_indices_zyx[:, 1].astype(np.float32) + 0.5 #+ offsets_xyz_order[:, 1]
	refined_z_grid = predicted_voxel_indices_zyx[:, 0].astype(np.float32) + 0.5 #+ offsets_xyz_order[:, 2]

	# Stack to get (N_preds, 3) array in (X, Y, Z) order
	refined_grid_coords_xyz = np.stack((refined_x_grid, refined_y_grid, refined_z_grid), axis=-1)

	# Convert refined grid coordinates to original metric space
	grid_origin_metric = np.array(scale_info['grid_origin_metric']) # (ox, oy, oz)
	# Voxel_size_metric from scale_info should match the input voxel_size_metric parameter
	current_voxel_size_metric = scale_info['voxel_size_metric']

	# predicted_vertices_original_space are (N_preds, 3) in (X,Y,Z) order
	predicted_vertices_original_space = refined_grid_coords_xyz * current_voxel_size_metric + grid_origin_metric

	return predicted_vertices_original_space.astype(np.float32)

	# Simple inference script
	def run_inference(model_path: str,
	data_file_path: str,
	output_file: str = None,
	grid_size: int = 128,
	voxel_size: float = 0.5,
	vertex_threshold: float = 0.5):
	"""
	Run inference on all data files in a directory, visualize with pyvista, and save results.
	"""
	device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
	print(f"Using device: {device}")

	# Load model
	model = load_model_for_inference(model_path, device)

	# Get all data files from the directory
	data_files = get_data_files(data_file_path)
	if not data_files:
	print(f"No data files found in {data_file_path}")
	return

	print(f"Found {len(data_files)} data files to process")

	for i, file_path in enumerate(data_files):
	print(f"\n--- Processing file {i+1}/{len(data_files)}: {os.path.basename(file_path)} ---")

	# Load input data
	try:
	data = load_data(file_path)
	except Exception as e:
	print(f"Error loading {file_path}: {e}")
	continue

	if 'pcloud_14d' not in data:
	print(f"Error: File {file_path} does not contain 'pcloud_14d' key, skipping")
	continue

	# Extract original point cloud and ground-truth vertices
	pcloud = data['pcloud_14d'][:, :3] # (N,3)
	gt_vertices = np.array(data.get('wf_vertices', [])) # (M,3) or empty

	print(f"Input point cloud shape: {pcloud.shape}")
	if gt_vertices.size:
	print(f"GT vertices shape: {gt_vertices.shape}")

	# Run prediction
	print("Running inference...")
	try:
	predicted_vertices = predict_vertices(
	model=model,
	point_cloud_14d=data['pcloud_14d'],
	grid_size=grid_size,
	device=device,
	voxel_size_metric=voxel_size,
	vertex_threshold=vertex_threshold
	)
	except Exception as e:
	print(f"Error during prediction for {file_path}: {e}")
	continue

	print(f"Predicted {len(predicted_vertices)} vertices")

	# --- Visualization ---
	plotter = pv.Plotter(window_size=[800,600])
	plotter.background_color = 'white'

	# Original point cloud in light gray
	if pcloud.size:
	pc_cloud = pv.PolyData(pcloud)
	plotter.add_mesh(pc_cloud, color='lightgray', point_size=2, render_points_as_spheres=True, label='Input PC')

	# Ground-truth vertices in red
	if gt_vertices.size:
	gt_pd = pv.PolyData(gt_vertices)
	plotter.add_mesh(gt_pd, color='red', point_size=8, render_points_as_spheres=True, label='GT Vertices')

	# Predicted vertices in blue
	if predicted_vertices.size:
	pred_pd = pv.PolyData(predicted_vertices)
	plotter.add_mesh(pred_pd, color='blue', point_size=8, render_points_as_spheres=True, label='Predicted Vertices')

	plotter.add_legend()
	plotter.show(title=os.path.basename(file_path))

	# Prepare output data
	output_data = {
	'predicted_vertices': predicted_vertices,
	'input_file': file_path,
	'model_used': model_path,
	'grid_size': grid_size,
	'voxel_size': voxel_size,
	'vertex_threshold': vertex_threshold,
	'original_data': data
	}

	# Save results
	base_name = os.path.splitext(os.path.basename(file_path))[0]
	output_filename = f"{base_name}_predictions"
	try:
	save_data(output_data, output_filename) # Saves to 'data' subfolder by default
	print(f"Results saved to: data/{output_filename}.pkl")
	except Exception as e:
	print(f"Error saving results for {file_path}: {e}")

	print(f"\nCompleted processing {len(data_files)} files")

	if __name__ == "__main__":
	inference = False

	# Replace with your actual data folder path
	data_folder_train = 'YOUR_LOCAL_DATA_FOLDER_PATH'
	# Example: data_folder_train = '/path/to/your/training_data'

	num_epochs_train = 100
	batch_size_train = 16
	# This parameter now controls the ratio of negative to positive samples for BCE loss
	negative_to_positive_bce_ratio = 1

	if inference:
	# Replace with your actual model path and data path for inference
	run_inference(model_path='YOUR_MODEL_PATH.pth', # Example: '/path/to/your/model.pth'
	data_file_path='YOUR_INFERENCE_DATA_FOLDER_PATH', # Example: '/path/to/your/inference_data'
	output_file=None, # Output will be saved in a 'data' subfolder relative to script
	grid_size=128,
	voxel_size=0.5,
	vertex_threshold=0.5
	)
	else:
	train_model(data_folder=data_folder_train,
	num_epochs=num_epochs_train,
	batch_size=batch_size_train,
	neg_pos_ratio_val=negative_to_positive_bce_ratio)