lingbot-3d-ZERO

Running on Zero

App Files Files Community

lingbot-3d-ZERO / lingbot_map /vis /point_cloud_viewer.py

dennny123

Initial ZeroGPU Gradio Space for LingBot-Map

4700ca8 verified 20 days ago

raw

history blame contribute delete

58.1 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the license found in the
	# LICENSE file in the root directory of this source tree.

	"""
	Interactive 3D Point Cloud Viewer using Viser.

	This module provides the PointCloudViewer class for visualizing 3D reconstruction results,
	including point clouds, camera poses, and animated playback.
	"""

	import os
	import time
	import threading
	import subprocess
	import tempfile
	import shutil
	from typing import List, Optional, Dict, Any, Tuple

	import numpy as np
	import torch
	import cv2
	import matplotlib.cm as cm
	from tqdm.auto import tqdm

	import viser
	import viser.transforms as tf

	from lingbot_map.utils.geometry import closed_form_inverse_se3, unproject_depth_map_to_point_map
	from lingbot_map.vis.utils import CameraState
	from lingbot_map.vis.sky_segmentation import apply_sky_segmentation


	class PointCloudViewer:
	"""
	Interactive 3D point cloud viewer with camera visualization.

	Features:
	- Point cloud visualization with confidence-based filtering
	- Camera frustum visualization with gradient colors
	- Frame-by-frame playback animation (3D/4D modes)
	- Range-based and recent-N-frames visualization modes
	- Video export with FFmpeg

	Args:
	model: Optional model for interactive inference
	state_args: Optional state arguments
	pc_list: List of point clouds per frame
	color_list: List of colors per frame
	conf_list: List of confidence scores per frame
	cam_dict: Camera dictionary with focal, pp, R, t
	image_mask: Optional image mask
	edge_color_list: Optional edge colors
	device: Device for computation
	port: Viser server port
	show_camera: Whether to show camera frustums
	vis_threshold: Visibility threshold for filtering
	size: Image size
	downsample_factor: Point cloud downsample factor
	point_size: Initial point size
	pred_dict: Prediction dictionary (alternative to pc_list/color_list/conf_list)
	init_conf_threshold: Initial confidence threshold percentage
	use_point_map: Use point map instead of depth-based points
	mask_sky: Apply sky segmentation
	image_folder: Path to image folder (for sky segmentation)
	"""

	def __init__(
	self,
	model=None,
	state_args=None,
	pc_list=None,
	color_list=None,
	conf_list=None,
	cam_dict=None,
	image_mask=None,
	edge_color_list=None,
	device: str = "cpu",
	port: int = 8080,
	show_camera: bool = True,
	vis_threshold: float = 1.0,
	size: int = 512,
	downsample_factor: int = 10,
	point_size: float = 0.00001,
	pred_dict: Optional[Dict] = None,
	init_conf_threshold: float = 50.0,
	use_point_map: bool = False,
	mask_sky: bool = False,
	image_folder: Optional[str] = None,
	sky_mask_dir: Optional[str] = None,
	sky_mask_visualization_dir: Optional[str] = None,
	depth_stride: int = 1,
	):
	self.model = model
	self.size = size
	self.state_args = state_args
	self.server = viser.ViserServer(host="0.0.0.0", port=port)
	self.server.gui.configure_theme(titlebar_content=None, control_layout="collapsible")
	self.device = device
	self.conf_list = conf_list
	self.vis_threshold = vis_threshold
	self.point_size = point_size
	self.tt = lambda x: torch.from_numpy(x).float().to(device)

	# Process the prediction dictionary to create pc_list, color_list, conf_list
	if pred_dict is not None:
	pc_list, color_list, conf_list, cam_dict = self._process_pred_dict(
	pred_dict, use_point_map, mask_sky, image_folder,
	sky_mask_dir=sky_mask_dir,
	sky_mask_visualization_dir=sky_mask_visualization_dir,
	depth_stride=depth_stride,
	)
	else:
	self.original_images = []

	self.pcs, self.all_steps = self.read_data(
	pc_list, color_list, conf_list, edge_color_list
	)
	self.cam_dict = cam_dict
	self.num_frames = len(self.all_steps)
	self.image_mask = image_mask
	self.show_camera = show_camera
	self.on_replay = False
	self.vis_pts_list = []
	self.traj_list = []
	self.orig_img_list = [x[0] for x in color_list if len(x) > 0] if color_list else []
	self.via_points = []

	self._setup_gui()
	self.server.on_client_connect(self._connect_client)

	def _process_pred_dict(
	self,
	pred_dict: Dict,
	use_point_map: bool,
	mask_sky: bool,
	image_folder: Optional[str],
	sky_mask_dir: Optional[str] = None,
	sky_mask_visualization_dir: Optional[str] = None,
	depth_stride: int = 1,
	) -> Tuple[List, List, List, Dict]:
	"""Process prediction dictionary to extract visualization data.

	Args:
	pred_dict: Model prediction dictionary.
	use_point_map: Use point map instead of depth-based projection.
	mask_sky: Apply sky segmentation to filter sky points.
	image_folder: Path to images for sky segmentation.
	sky_mask_dir: Directory for cached sky masks.
	sky_mask_visualization_dir: Directory for sky mask visualization images.
	depth_stride: Only project depth to point cloud every N frames.
	Frames not projected will have empty point clouds but still
	show camera frustums and images. 1 = every frame (default).
	"""
	images = pred_dict["images"] # (S, 3, H, W)

	depth_map = pred_dict.get("depth") # (S, H, W, 1)
	depth_conf = pred_dict.get("depth_conf") # (S, H, W)

	extrinsics_cam = pred_dict["extrinsic"] # (S, 3, 4)
	intrinsics_cam = pred_dict["intrinsic"] # (S, 3, 3)

	# Compute world points from depth if not using the precomputed point map
	if not use_point_map:
	world_points = unproject_depth_map_to_point_map(depth_map, extrinsics_cam, intrinsics_cam)
	conf = depth_conf
	else:
	world_points = pred_dict["world_points"] # (S, H, W, 3)
	conf = pred_dict.get("world_points_conf", depth_conf) # (S, H, W)

	# Apply sky segmentation if enabled
	if mask_sky:
	conf = apply_sky_segmentation(
	conf, image_folder=image_folder, images=images,
	sky_mask_dir=sky_mask_dir,
	sky_mask_visualization_dir=sky_mask_visualization_dir,
	)

	# Convert images from (S, 3, H, W) to (S, H, W, 3)
	colors = images.transpose(0, 2, 3, 1) # now (S, H, W, 3)
	S = world_points.shape[0]

	# Store original images for camera frustum display
	self.original_images = []
	for i in range(S):
	img = images[i] # shape (3, H, W)
	img = (img.transpose(1, 2, 0) * 255).astype(np.uint8)
	self.original_images.append(img)

	# Create lists - apply depth_stride to skip frames for point projection
	H, W = world_points.shape[1], world_points.shape[2]
	pc_list = []
	color_list = []
	conf_list = []
	skipped = 0
	for i in range(S):
	if depth_stride > 1 and i % depth_stride != 0:
	# Empty point cloud for skipped frames
	pc_list.append(np.zeros((0, 0, 3), dtype=np.float32))
	color_list.append(np.zeros((0, 0, 3), dtype=np.float32))
	conf_list.append(np.zeros((0, 0), dtype=np.float32))
	skipped += 1
	else:
	pc_list.append(world_points[i])
	color_list.append(colors[i])
	if conf is not None:
	conf_list.append(conf[i])
	else:
	conf_list.append(np.ones(world_points[i].shape[:2], dtype=np.float32))

	if depth_stride > 1:
	print(f' depth_stride={depth_stride}: projecting {S - skipped}/{S} frames, skipping {skipped}')

	# Create camera dictionary (all frames keep cameras)
	cam_to_world_mat = closed_form_inverse_se3(extrinsics_cam)
	cam_dict = {
	"focal": [intrinsics_cam[i, 0, 0] for i in range(S)],
	"pp": [(intrinsics_cam[i, 0, 2], intrinsics_cam[i, 1, 2]) for i in range(S)],
	"R": [cam_to_world_mat[i, :3, :3] for i in range(S)],
	"t": [cam_to_world_mat[i, :3, 3] for i in range(S)],
	}

	return pc_list, color_list, conf_list, cam_dict

	def _compute_scene_center_and_scale(self) -> Tuple[np.ndarray, float]:
	"""Compute scene center and scale from camera positions and point clouds.

	Returns:
	Tuple of (center as 3D array, scale as float distance).
	"""
	# Use camera positions as primary reference (more reliable than noisy points)
	if self.cam_dict is not None and "t" in self.cam_dict:
	cam_positions = np.array([self.cam_dict["t"][s] for s in self.all_steps])
	center = np.mean(cam_positions, axis=0)
	if len(cam_positions) > 1:
	extent = np.ptp(cam_positions, axis=0) # range per axis
	scale = np.linalg.norm(extent)
	else:
	scale = 1.0
	else:
	# Fallback: use point cloud data
	all_pts = []
	for step in self.all_steps:
	pc = self.pcs[step]["pc"].reshape(-1, 3)
	# subsample for speed
	if len(pc) > 1000:
	pc = pc[::len(pc) // 1000]
	all_pts.append(pc)
	all_pts = np.concatenate(all_pts, axis=0)
	center = np.median(all_pts, axis=0)
	extent = np.percentile(all_pts, 95, axis=0) - np.percentile(all_pts, 5, axis=0)
	scale = np.linalg.norm(extent)

	return center, max(scale, 0.1)

	def _reset_view_to_direction(
	self,
	direction: np.ndarray,
	up: np.ndarray = np.array([0.0, -1.0, 0.0]),
	distance_scale: float = 1.5,
	smooth: bool = True,
	):
	"""Reset the viewer camera to look at scene center from a given direction.

	Args:
	direction: Unit vector pointing FROM the scene center TO the camera.
	up: Up vector for the camera.
	distance_scale: Multiplier on scene scale for camera distance.
	smooth: Whether to smoothly transition.
	"""
	center, scale = self._compute_scene_center_and_scale()
	distance = scale * distance_scale
	position = center + direction * distance

	for client in self.server.get_clients().values():
	if smooth:
	self._smooth_camera_transition(
	client,
	target_position=position,
	target_look_at=center,
	target_up=up,
	duration=0.4,
	)
	else:
	client.camera.up_direction = tuple(up)
	client.camera.position = tuple(position)
	client.camera.look_at = tuple(center)

	def _setup_gui(self):
	"""Setup GUI controls."""
	gui_reset_up = self.server.gui.add_button(
	"Reset up direction",
	hint="Set the camera control 'up' direction to the current camera's 'up'.",
	)

	@gui_reset_up.on_click
	def _(event: viser.GuiEvent) -> None:
	client = event.client
	assert client is not None
	client.camera.up_direction = tf.SO3(client.camera.wxyz) @ np.array(
	[0.0, -1.0, 0.0]
	)

	# Video frame display controls — kept at top so the current frame is always visible
	with self.server.gui.add_folder("Video Display"):
	self.show_video_checkbox = self.server.gui.add_checkbox("Show Current Frame", initial_value=True)
	if hasattr(self, 'original_images') and len(self.original_images) > 0:
	self.current_frame_image = self.server.gui.add_image(
	self.original_images[0], label="Current Frame"
	)
	else:
	self.current_frame_image = None

	# Preset view direction buttons
	with self.server.gui.add_folder("Reset View Direction"):
	btn_look_at_center = self.server.gui.add_button(
	"Look At Scene Center",
	hint="Reset orbit center to the scene center (fixes orbit after dragging).",
	)
	btn_overview = self.server.gui.add_button(
	"Overview",
	hint="Reset to a 3/4 overview of the scene.",
	)
	btn_front = self.server.gui.add_button(
	"Front (+Z)",
	hint="View scene from the front.",
	)
	btn_back = self.server.gui.add_button(
	"Back (-Z)",
	hint="View scene from the back.",
	)
	btn_top = self.server.gui.add_button(
	"Top (-Y)",
	hint="View scene from above (bird's eye).",
	)
	btn_left = self.server.gui.add_button(
	"Left (-X)",
	hint="View scene from the left.",
	)
	btn_right = self.server.gui.add_button(
	"Right (+X)",
	hint="View scene from the right.",
	)
	btn_first_cam = self.server.gui.add_button(
	"First Camera",
	hint="Reset to the first camera's viewpoint.",
	)

	@btn_look_at_center.on_click
	def _(_) -> None:
	center, _ = self._compute_scene_center_and_scale()
	for client in self.server.get_clients().values():
	client.camera.look_at = tuple(center)

	@btn_overview.on_click
	def _(_) -> None:
	d = np.array([0.5, -0.6, 0.6])
	self._reset_view_to_direction(d / np.linalg.norm(d))

	@btn_front.on_click
	def _(_) -> None:
	self._reset_view_to_direction(np.array([0.0, 0.0, 1.0]))

	@btn_back.on_click
	def _(_) -> None:
	self._reset_view_to_direction(np.array([0.0, 0.0, -1.0]))

	@btn_top.on_click
	def _(_) -> None:
	self._reset_view_to_direction(
	np.array([0.0, -1.0, 0.0]),
	up=np.array([0.0, 0.0, 1.0]),
	)

	@btn_left.on_click
	def _(_) -> None:
	self._reset_view_to_direction(np.array([-1.0, 0.0, 0.0]))

	@btn_right.on_click
	def _(_) -> None:
	self._reset_view_to_direction(np.array([1.0, 0.0, 0.0]))

	@btn_first_cam.on_click
	def _(_) -> None:
	self._move_to_camera(0, smooth=True)

	button3 = self.server.gui.add_button("4D (Only Show Current Frame)")
	button4 = self.server.gui.add_button("3D (Show All Frames)")
	self.is_render = False
	self.fourd = False

	@button3.on_click
	def _(event: viser.GuiEvent) -> None:
	self.fourd = True

	@button4.on_click
	def _(event: viser.GuiEvent) -> None:
	self.fourd = False

	self.focal_slider = self.server.gui.add_slider(
	"Focal Length", min=0.1, max=99999, step=1, initial_value=533
	)
	self.psize_slider = self.server.gui.add_slider(
	"Point Size", min=0.00001, max=0.1, step=0.00001, initial_value=self.point_size
	)
	self.camsize_slider = self.server.gui.add_slider(
	"Camera Size", min=0.01, max=0.5, step=0.01, initial_value=0.1
	)
	self.downsample_slider = self.server.gui.add_slider(
	"Downsample Factor", min=1, max=1000, step=1, initial_value=10
	)
	self.show_camera_checkbox = self.server.gui.add_checkbox(
	"Show Camera", initial_value=self.show_camera
	)
	self.vis_threshold_slider = self.server.gui.add_slider(
	"Visibility Threshold", min=1.0, max=5.0, step=0.01,
	initial_value=self.vis_threshold,
	)
	self.camera_downsample_slider = self.server.gui.add_slider(
	"Camera Downsample Factor", min=1, max=50, step=1, initial_value=1
	)

	# Screenshot controls
	with self.server.gui.add_folder("Screenshot"):
	self.screenshot_button = self.server.gui.add_button("Take Screenshot")
	self.screenshot_resolution = self.server.gui.add_dropdown(
	"Resolution",
	options=["1920x1080", "2560x1440", "3840x2160", "Current"],
	initial_value="1920x1080",
	)
	self.screenshot_path = self.server.gui.add_text(
	"Save Path", initial_value="screenshot.png"
	)
	self.screenshot_status = self.server.gui.add_text(
	"Status", initial_value="Ready"
	)

	@self.screenshot_button.on_click
	def _(event: viser.GuiEvent) -> None:
	self._take_screenshot(event.client)

	# GLB export controls
	with self.server.gui.add_folder("Export GLB"):
	self.glb_output_path = self.server.gui.add_text(
	"Output Path", initial_value="export.glb"
	)
	self.glb_show_cam_checkbox = self.server.gui.add_checkbox(
	"Include Cameras", initial_value=True,
	)
	self.glb_cam_scale_slider = self.server.gui.add_slider(
	"Camera Scale", min=0.01, max=5.0, step=0.01, initial_value=1.0,
	hint="Scale factor for camera size in GLB.",
	)
	self.glb_frustum_thickness_slider = self.server.gui.add_slider(
	"Frustum Thickness", min=1.0, max=10.0, step=0.5, initial_value=3.0,
	hint="Thickness multiplier for camera frustum edges.",
	)
	self.glb_trajectory_checkbox = self.server.gui.add_checkbox(
	"Show Trajectory", initial_value=True,
	hint="Connect cameras with a trajectory line.",
	)
	self.glb_trajectory_radius_slider = self.server.gui.add_slider(
	"Trajectory Radius", min=0.001, max=0.05, step=0.001, initial_value=0.005,
	hint="Radius of the trajectory tube.",
	)
	self.glb_mode_dropdown = self.server.gui.add_dropdown(
	"Export Mode",
	options=["Points", "Spheres"],
	initial_value="Points",
	hint="Points: raw (fast). Spheres: each point becomes a small sphere (prettier, slower).",
	)
	self.glb_sphere_radius_slider = self.server.gui.add_slider(
	"Sphere Radius", min=0.001, max=0.1, step=0.001, initial_value=0.005,
	hint="Radius of each sphere in Spheres mode.",
	disabled=True,
	)
	self.glb_max_sphere_pts_slider = self.server.gui.add_slider(
	"Max Sphere Points", min=10000, max=500000, step=10000, initial_value=100000,
	hint="Cap point count for Spheres mode to keep file size manageable.",
	disabled=True,
	)
	self.glb_opacity_slider = self.server.gui.add_slider(
	"Opacity", min=0.0, max=1.0, step=0.05, initial_value=1.0,
	hint="Point/sphere opacity (alpha). <1.0 = semi-transparent.",
	)
	self.glb_saturation_slider = self.server.gui.add_slider(
	"Saturation Boost", min=0.0, max=2.0, step=0.1, initial_value=1.0,
	hint="Color saturation multiplier. >1 = more vivid, <1 = washed out.",
	)
	self.glb_brightness_slider = self.server.gui.add_slider(
	"Brightness Boost", min=0.5, max=2.0, step=0.1, initial_value=1.0,
	hint="Color brightness multiplier.",
	)
	self.glb_export_button = self.server.gui.add_button(
	"Export GLB",
	hint="Export current filtered point clouds and cameras as GLB.",
	)
	self.glb_status = self.server.gui.add_text("Status", initial_value="Ready")

	@self.glb_mode_dropdown.on_update
	def _(_) -> None:
	is_sphere = self.glb_mode_dropdown.value == "Spheres"
	self.glb_sphere_radius_slider.disabled = not is_sphere
	self.glb_max_sphere_pts_slider.disabled = not is_sphere

	@self.glb_export_button.on_click
	def _(_) -> None:
	self._export_glb()

	# Video saving controls
	with self.server.gui.add_folder("Video Saving"):
	self.save_video_button = self.server.gui.add_button("Save Video", disabled=False)
	self.video_output_path = self.server.gui.add_text("Output Path", initial_value="output_pointcloud.mp4")
	self.video_save_fps = self.server.gui.add_slider("Video FPS", min=10, max=60, step=1, initial_value=30)
	self.video_resolution = self.server.gui.add_dropdown(
	"Resolution", options=["1920x1080", "1280x720", "3840x2160"], initial_value="1920x1080"
	)
	self.save_original_video_checkbox = self.server.gui.add_checkbox("Also Save Original Video", initial_value=True)
	self.video_status = self.server.gui.add_text("Status", initial_value="Ready to save")

	@self.save_video_button.on_click
	def _(_) -> None:
	self.save_video(
	output_path=self.video_output_path.value,
	fps=self.video_save_fps.value,
	resolution=self.video_resolution.value,
	save_original_video=self.save_original_video_checkbox.value
	)

	@self.show_video_checkbox.on_update
	def _(_) -> None:
	if self.current_frame_image is not None:
	self.current_frame_image.visible = self.show_video_checkbox.value

	self.pc_handles = []
	self.cam_handles = []

	@self.psize_slider.on_update
	def _(_) -> None:
	for handle in self.pc_handles:
	handle.point_size = self.psize_slider.value

	@self.camsize_slider.on_update
	def _(_) -> None:
	for handle in self.cam_handles:
	handle.scale = self.camsize_slider.value
	handle.line_thickness = 0.03 * handle.scale

	@self.downsample_slider.on_update
	def _(_) -> None:
	self._regenerate_point_clouds()

	@self.show_camera_checkbox.on_update
	def _(_) -> None:
	self.show_camera = self.show_camera_checkbox.value
	if self.show_camera:
	self._regenerate_cameras()
	else:
	for handle in self.cam_handles:
	handle.visible = False

	@self.vis_threshold_slider.on_update
	def _(_) -> None:
	self.vis_threshold = self.vis_threshold_slider.value
	self._regenerate_point_clouds()

	@self.camera_downsample_slider.on_update
	def _(_) -> None:
	self._regenerate_cameras()

	def _regenerate_point_clouds(self):
	"""Regenerate all point clouds with current settings."""
	if not hasattr(self, 'frame_nodes'):
	return

	for handle in self.pc_handles:
	try:
	handle.remove()
	except (KeyError, AttributeError):
	pass
	self.pc_handles.clear()
	self.vis_pts_list.clear()

	for i, step in enumerate(self.all_steps):
	pc = self.pcs[step]["pc"]
	color = self.pcs[step]["color"]
	conf = self.pcs[step]["conf"]
	edge_color = self.pcs[step].get("edge_color", None)

	pred_pts, pc_color = self.parse_pc_data(
	pc, color, conf, edge_color, set_border_color=True,
	downsample_factor=self.downsample_slider.value
	)

	self.vis_pts_list.append(pred_pts)
	handle = self.server.scene.add_point_cloud(
	name=f"/frames/{step}/pred_pts",
	points=pred_pts,
	colors=pc_color,
	point_size=self.psize_slider.value,
	)
	self.pc_handles.append(handle)

	def _regenerate_cameras(self):
	"""Regenerate camera visualizations with current settings."""
	if not hasattr(self, 'frame_nodes'):
	return

	for handle in self.cam_handles:
	try:
	handle.remove()
	except (KeyError, AttributeError):
	pass
	self.cam_handles.clear()

	if self.show_camera:
	downsample_factor = int(self.camera_downsample_slider.value)
	for i, step in enumerate(self.all_steps):
	if i % downsample_factor == 0:
	self.add_camera(step)

	def _export_glb(self):
	"""Export current filtered point clouds and cameras as a GLB file."""
	try:
	import trimesh
	except ImportError:
	self.glb_status.value = "Error: pip install trimesh"
	return

	self.glb_status.value = "Collecting points..."
	print("Exporting GLB...")

	# Collect all currently visible, filtered points and colors
	all_points = []
	all_colors = []
	for step in self.all_steps:
	pc = self.pcs[step]["pc"]
	color = self.pcs[step]["color"]
	conf = self.pcs[step]["conf"]
	edge_color = self.pcs[step].get("edge_color", None)

	pts, cols = self.parse_pc_data(
	pc, color, conf, edge_color, set_border_color=False,
	downsample_factor=self.downsample_slider.value,
	)
	if len(pts) > 0:
	all_points.append(pts)
	if cols.dtype != np.uint8:
	cols = (np.clip(cols, 0, 1) * 255).astype(np.uint8)
	all_colors.append(cols)

	if not all_points:
	self.glb_status.value = "Error: no points to export"
	return

	vertices = np.concatenate(all_points, axis=0)
	colors_rgb = np.concatenate(all_colors, axis=0)

	# --- Color enhancement ---
	colors_float = colors_rgb.astype(np.float32) / 255.0

	sat_boost = self.glb_saturation_slider.value
	if sat_boost != 1.0:
	gray = colors_float.mean(axis=1, keepdims=True)
	colors_float = gray + sat_boost * (colors_float - gray)

	bri_boost = self.glb_brightness_slider.value
	if bri_boost != 1.0:
	colors_float = colors_float * bri_boost

	colors_float = np.clip(colors_float, 0.0, 1.0)

	# --- Opacity ---
	# Simulate opacity by blending colors toward white (works in all viewers).
	# For Spheres mode, also set true alpha for viewers that support it.
	alpha = self.glb_opacity_slider.value
	if alpha < 1.0:
	bg = np.ones_like(colors_float) # white background
	colors_float = colors_float * alpha + bg * (1.0 - alpha)
	colors_float = np.clip(colors_float, 0.0, 1.0)

	colors_u8 = (colors_float * 255).astype(np.uint8)
	colors_rgba = np.concatenate([
	colors_u8,
	np.full((len(colors_u8), 1), int(alpha * 255), dtype=np.uint8),
	], axis=1) # (N, 4)

	# Compute scene scale for camera sizing
	lo = np.percentile(vertices, 5, axis=0)
	hi = np.percentile(vertices, 95, axis=0)
	scene_scale = max(np.linalg.norm(hi - lo), 0.1)

	scene_3d = trimesh.Scene()

	# --- Export mode ---
	export_mode = self.glb_mode_dropdown.value
	if export_mode == "Spheres":
	self.glb_status.value = "Building spheres..."
	max_pts = int(self.glb_max_sphere_pts_slider.value)
	radius = self.glb_sphere_radius_slider.value

	# Subsample if too many points
	if len(vertices) > max_pts:
	idx = np.random.choice(len(vertices), max_pts, replace=False)
	idx.sort()
	vertices = vertices[idx]
	colors_rgba = colors_rgba[idx]

	sphere_template = trimesh.creation.icosphere(subdivisions=1, radius=radius)
	n_verts_per = len(sphere_template.vertices)
	n_faces_per = len(sphere_template.faces)

	all_verts = np.empty((len(vertices) * n_verts_per, 3), dtype=np.float32)
	all_faces = np.empty((len(vertices) * n_faces_per, 3), dtype=np.int64)
	all_face_colors = np.empty((len(vertices) * n_faces_per, 4), dtype=np.uint8)

	for i, (pt, rgba) in enumerate(zip(vertices, colors_rgba)):
	v_off = i * n_verts_per
	f_off = i * n_faces_per
	all_verts[v_off:v_off + n_verts_per] = sphere_template.vertices + pt
	all_faces[f_off:f_off + n_faces_per] = sphere_template.faces + v_off
	all_face_colors[f_off:f_off + n_faces_per] = rgba

	mesh = trimesh.Trimesh(vertices=all_verts, faces=all_faces)
	mesh.visual.face_colors = all_face_colors
	# Enable alpha blending in glTF material for true transparency
	if alpha < 1.0:
	mesh.visual.material.alphaMode = 'BLEND'
	scene_3d.add_geometry(mesh)
	print(f"Spheres mode: {len(vertices):,} spheres, {len(all_faces):,} faces")
	else:
	# Points mode (GLB viewers ignore alpha on points, so use blended RGB)
	scene_3d.add_geometry(trimesh.PointCloud(vertices=vertices, colors=colors_u8))

	# Add cameras and trajectory
	if self.glb_show_cam_checkbox.value and self.cam_dict is not None:
	from lingbot_map.vis.glb_export import integrate_camera_into_scene
	import matplotlib
	colormap = matplotlib.colormaps.get_cmap("gist_rainbow")
	num_cameras = len(self.all_steps)
	cam_positions = []

	frustum_thickness = self.glb_frustum_thickness_slider.value
	effective_cam_scale = scene_scale * self.glb_cam_scale_slider.value

	for i, step in enumerate(self.all_steps):
	R = self.cam_dict["R"][step] if "R" in self.cam_dict else np.eye(3)
	t = self.cam_dict["t"][step] if "t" in self.cam_dict else np.zeros(3)

	c2w = np.eye(4)
	c2w[:3, :3] = R
	c2w[:3, 3] = t
	cam_positions.append(np.array(t, dtype=np.float64))

	rgba_c = colormap(i / max(num_cameras - 1, 1))
	cam_color = tuple(int(255 * x) for x in rgba_c[:3])
	integrate_camera_into_scene(
	scene_3d, c2w, cam_color,
	effective_cam_scale,
	frustum_thickness=frustum_thickness,
	)

	# Add trajectory line as a tube connecting camera positions
	if self.glb_trajectory_checkbox.value and len(cam_positions) >= 2:
	traj_pts = np.array(cam_positions)
	traj_radius = self.glb_trajectory_radius_slider.value * self.glb_cam_scale_slider.value
	traj_mesh = self._build_trajectory_tube(
	traj_pts, traj_radius, colormap, num_cameras
	)
	if traj_mesh is not None:
	scene_3d.add_geometry(traj_mesh)

	# Align scene using first camera extrinsic
	if self.cam_dict is not None and len(self.all_steps) > 0:
	from lingbot_map.vis.glb_export import apply_scene_alignment
	step0 = self.all_steps[0]
	R0 = self.cam_dict["R"][step0] if "R" in self.cam_dict else np.eye(3)
	t0 = self.cam_dict["t"][step0] if "t" in self.cam_dict else np.zeros(3)
	c2w_0 = np.eye(4)
	c2w_0[:3, :3] = R0
	c2w_0[:3, 3] = t0
	w2c_0 = np.linalg.inv(c2w_0)
	extrinsics = np.expand_dims(w2c_0, 0)
	scene_3d = apply_scene_alignment(scene_3d, extrinsics)

	output_path = self.glb_output_path.value
	scene_3d.export(output_path)

	n_pts = len(vertices)
	mode_str = f"spheres r={self.glb_sphere_radius_slider.value}" if export_mode == "Spheres" else "points"
	self.glb_status.value = f"Saved: {output_path} ({n_pts:,} {mode_str})"
	print(f"GLB exported to {output_path} ({n_pts:,} {mode_str})")

	@staticmethod
	def _build_trajectory_tube(positions, radius, colormap, num_cameras):
	"""Build a tube mesh following camera trajectory with per-segment color.

	Args:
	positions: (N, 3) camera positions.
	radius: Tube radius.
	colormap: Matplotlib colormap for gradient coloring.
	num_cameras: Total number of cameras (for color normalization).

	Returns:
	trimesh.Trimesh or None.
	"""
	import trimesh

	segments = []
	for i in range(len(positions) - 1):
	p0, p1 = positions[i], positions[i + 1]
	seg_len = np.linalg.norm(p1 - p0)
	if seg_len < 1e-8:
	continue

	# Create cylinder along Z, then transform
	cyl = trimesh.creation.cylinder(radius=radius, height=seg_len, sections=8)

	# Direction vector
	direction = (p1 - p0) / seg_len
	mid = (p0 + p1) / 2.0

	# Build rotation: default cylinder is along Z
	z_axis = np.array([0.0, 0.0, 1.0])
	v = np.cross(z_axis, direction)
	c = np.dot(z_axis, direction)

	if np.linalg.norm(v) < 1e-8:
	rot = np.eye(3) if c > 0 else np.diag([1, -1, -1])
	else:
	vx = np.array([[0, -v[2], v[1]],
	[v[2], 0, -v[0]],
	[-v[1], v[0], 0]])
	rot = np.eye(3) + vx + vx @ vx / (1.0 + c)

	transform = np.eye(4)
	transform[:3, :3] = rot
	transform[:3, 3] = mid
	cyl.apply_transform(transform)

	# Color: midpoint index
	t_color = (i + 0.5) / max(num_cameras - 1, 1)
	rgba = colormap(t_color)
	color_rgb = tuple(int(255 * x) for x in rgba[:3])
	cyl.visual.face_colors[:, :3] = color_rgb
	segments.append(cyl)

	if not segments:
	return None
	return trimesh.util.concatenate(segments)

	def update_frame_visibility(self):
	"""Show all frames up to the current timestep (or only the current one in 4D mode)."""
	if not hasattr(self, 'frame_nodes') or not hasattr(self, 'gui_timestep'):
	return

	current_timestep = self.gui_timestep.value
	for i, frame_node in enumerate(self.frame_nodes):
	frame_node.visible = (
	i <= current_timestep if not self.fourd else i == current_timestep
	)

	def _move_to_camera(self, frame_idx: int, smooth: bool = True):
	"""Move viewer camera to match reconstructed camera at given frame."""
	if self.cam_dict is None:
	return

	step = self.all_steps[frame_idx] if frame_idx < len(self.all_steps) else self.all_steps[-1]

	R = self.cam_dict["R"][step] if "R" in self.cam_dict else np.eye(3)
	t = self.cam_dict["t"][step] if "t" in self.cam_dict else np.zeros(3)
	focal = self.cam_dict["focal"][step] if "focal" in self.cam_dict else 1.0
	pp = self.cam_dict["pp"][step] if "pp" in self.cam_dict else (1.0, 1.0)

	offset = 0.5
	viewing_dir = R[:, 2] # camera Z axis in world frame
	position = t - viewing_dir * offset
	look_at = t + viewing_dir * 0.5 # look slightly ahead of camera

	fov = 2 * np.arctan(pp[0] / focal)
	up = -R[:, 1] # camera -Y axis in world frame

	for client in self.server.get_clients().values():
	if smooth:
	self._smooth_camera_transition(
	client,
	target_position=position,
	target_look_at=look_at,
	target_up=up,
	target_fov=fov,
	duration=0.3,
	)
	else:
	client.camera.up_direction = tuple(up)
	client.camera.position = tuple(position)
	client.camera.look_at = tuple(look_at)
	if fov is not None:
	client.camera.fov = fov

	def _smooth_camera_transition(
	self,
	client,
	target_position,
	target_look_at=None,
	target_up=None,
	target_fov=None,
	duration=0.3,
	):
	"""Smoothly transition camera to target pose using look_at based control.

	Args:
	client: Viser client handle.
	target_position: Target camera position (3,).
	target_look_at: Target look-at point (3,). If None, keeps current.
	target_up: Target up direction (3,). If None, keeps current.
	target_fov: Target FOV. If None, keeps current.
	duration: Transition duration in seconds.
	"""
	def interpolate():
	num_steps = 15
	dt = duration / num_steps

	start_position = np.array(client.camera.position, dtype=np.float64)
	start_look_at = np.array(client.camera.look_at, dtype=np.float64)
	start_fov = client.camera.fov

	end_position = np.asarray(target_position, dtype=np.float64)
	end_look_at = np.asarray(target_look_at, dtype=np.float64) if target_look_at is not None else start_look_at

	# Set up direction once at the start (not interpolated to avoid flicker)
	if target_up is not None:
	client.camera.up_direction = tuple(np.asarray(target_up, dtype=np.float64))

	for i in range(num_steps + 1):
	alpha = i / num_steps
	# Smooth ease-in-out
	alpha_smooth = alpha * alpha * (3 - 2 * alpha)

	interp_pos = start_position + (end_position - start_position) * alpha_smooth
	interp_look = start_look_at + (end_look_at - start_look_at) * alpha_smooth

	# Set position first (this auto-moves look_at), then override look_at
	client.camera.position = tuple(interp_pos)
	client.camera.look_at = tuple(interp_look)

	if target_fov is not None:
	interp_fov = start_fov + (target_fov - start_fov) * alpha_smooth
	client.camera.fov = interp_fov

	time.sleep(dt)

	thread = threading.Thread(target=interpolate, daemon=True)
	thread.start()

	def _slerp(self, q1, q2, t):
	"""Spherical linear interpolation between quaternions."""
	dot = np.dot(q1, q2)

	if abs(dot) > 0.9995:
	result = q1 + t * (q2 - q1)
	return result / np.linalg.norm(result)

	dot = np.clip(dot, -1.0, 1.0)
	theta_0 = np.arccos(dot)
	theta = theta_0 * t

	q2_orthogonal = q2 - q1 * dot
	q2_orthogonal = q2_orthogonal / np.linalg.norm(q2_orthogonal)

	return q1 * np.cos(theta) + q2_orthogonal * np.sin(theta)

	def get_camera_state(self, client: viser.ClientHandle) -> CameraState:
	"""Get current camera state from client."""
	camera = client.camera
	c2w = np.concatenate([
	np.concatenate([tf.SO3(camera.wxyz).as_matrix(), camera.position[:, None]], 1),
	[[0, 0, 0, 1]],
	], 0)
	return CameraState(fov=camera.fov, aspect=camera.aspect, c2w=c2w)

	@staticmethod
	def generate_pseudo_intrinsics(h: int, w: int) -> np.ndarray:
	"""Generate pseudo intrinsics from image size."""
	focal = (h2 + w2) ** 0.5
	return np.array([[focal, 0, w // 2], [0, focal, h // 2], [0, 0, 1]]).astype(np.float32)

	def _connect_client(self, client: viser.ClientHandle):
	"""Setup client connection callbacks."""
	wxyz_panel = client.gui.add_text("wxyz:", f"{client.camera.wxyz}")
	position_panel = client.gui.add_text("position:", f"{client.camera.position}")
	fov_panel = client.gui.add_text(
	"fov:", f"{2 * np.arctan(self.size/self.focal_slider.value) * 180 / np.pi}"
	)
	aspect_panel = client.gui.add_text("aspect:", "1.0")

	@client.camera.on_update
	def _(_: viser.CameraHandle):
	with self.server.atomic():
	wxyz_panel.value = f"{client.camera.wxyz}"
	position_panel.value = f"{client.camera.position}"
	fov_panel.value = f"{2 * np.arctan(self.size/self.focal_slider.value) * 180 / np.pi}"
	aspect_panel.value = "1.0"

	@staticmethod
	def set_color_border(image, border_width=5, color=[1, 0, 0]):
	"""Add colored border to image."""
	image[:border_width, :, 0] = color[0]
	image[:border_width, :, 1] = color[1]
	image[:border_width, :, 2] = color[2]
	image[-border_width:, :, 0] = color[0]
	image[-border_width:, :, 1] = color[1]
	image[-border_width:, :, 2] = color[2]
	image[:, :border_width, 0] = color[0]
	image[:, :border_width, 1] = color[1]
	image[:, :border_width, 2] = color[2]
	image[:, -border_width:, 0] = color[0]
	image[:, -border_width:, 1] = color[1]
	image[:, -border_width:, 2] = color[2]
	return image

	def read_data(self, pc_list, color_list, conf_list, edge_color_list=None):
	"""Read and organize point cloud data."""
	pcs = {}
	step_list = []
	for i, pc in enumerate(pc_list):
	step = i
	pcs.update({
	step: {
	"pc": pc,
	"color": color_list[i],
	"conf": conf_list[i],
	"edge_color": (
	None if edge_color_list is None or edge_color_list[i] is None
	else edge_color_list[i]
	),
	}
	})
	step_list.append(step)

	# Generate camera gradient colors
	num_cameras = len(pc_list)
	if num_cameras > 1:
	normalized_indices = np.array(list(range(num_cameras))) / (num_cameras - 1)
	else:
	normalized_indices = np.array([0.0])
	cmap = cm.get_cmap('viridis')
	self.camera_colors = cmap(normalized_indices)
	return pcs, step_list

	def parse_pc_data(
	self,
	pc,
	color,
	conf=None,
	edge_color=[0.251, 0.702, 0.902],
	set_border_color=False,
	downsample_factor=1,
	):
	"""Parse and filter point cloud data."""
	pred_pts = pc.reshape(-1, 3)

	if set_border_color and edge_color is not None:
	color = self.set_color_border(color[0], color=edge_color)
	if np.isnan(color).any():
	color = np.zeros((pred_pts.shape[0], 3))
	color[:, 2] = 1
	else:
	color = color.reshape(-1, 3)

	# Remove NaN / Inf points
	valid = np.isfinite(pred_pts).all(axis=1)
	if not valid.all():
	pred_pts = pred_pts[valid]
	color = color[valid]
	if conf is not None:
	conf = conf.reshape(-1)[valid]

	# Confidence threshold filter
	if conf is not None:
	conf_flat = conf.reshape(-1) if conf.ndim > 1 else conf
	mask = conf_flat > self.vis_threshold
	pred_pts = pred_pts[mask]
	color = color[mask]

	if len(pred_pts) == 0:
	return pred_pts, color

	# Downsample
	if downsample_factor > 1 and len(pred_pts) > 0:
	indices = np.arange(0, len(pred_pts), downsample_factor)
	pred_pts = pred_pts[indices]
	color = color[indices]

	return pred_pts, color

	def add_pc(self, step):
	"""Add point cloud for a frame."""
	pc = self.pcs[step]["pc"]
	color = self.pcs[step]["color"]
	conf = self.pcs[step]["conf"]
	edge_color = self.pcs[step].get("edge_color", None)

	pred_pts, color = self.parse_pc_data(
	pc, color, conf, edge_color, set_border_color=True,
	downsample_factor=self.downsample_slider.value
	)

	self.vis_pts_list.append(pred_pts)
	self.pc_handles.append(
	self.server.scene.add_point_cloud(
	name=f"/frames/{step}/pred_pts",
	points=pred_pts,
	colors=color,
	point_size=self.psize_slider.value,
	)
	)

	def add_camera(self, step):
	"""Add camera visualization for a frame."""
	cam = self.cam_dict
	focal = cam["focal"][step] if cam and "focal" in cam else 1.0
	pp = cam["pp"][step] if cam and "pp" in cam else (1.0, 1.0)
	R = cam["R"][step] if cam and "R" in cam else np.eye(3)
	t = cam["t"][step] if cam and "t" in cam else np.zeros(3)

	q = tf.SO3.from_matrix(R).wxyz
	fov = 2 * np.arctan(pp[0] / focal)
	aspect = pp[0] / pp[1]
	self.traj_list.append((q, t))

	step_index = self.all_steps.index(step) if step in self.all_steps else 0
	camera_color = self.camera_colors[step_index]
	camera_color_rgb = tuple((camera_color[:3] * 255).astype(int))

	self.server.scene.add_frame(
	f"/frames/{step}/camera_frame",
	wxyz=q,
	position=t,
	axes_length=0.05,
	axes_radius=0.002,
	origin_radius=0.002,
	)

	frustum_handle = self.server.scene.add_camera_frustum(
	name=f"/frames/{step}/camera",
	fov=fov,
	aspect=aspect,
	wxyz=q,
	position=t,
	scale=0.03,
	color=camera_color_rgb,
	)

	@frustum_handle.on_click
	def _(event) -> None:
	look_at_pt = t + R[:, 2] * 0.5 # look ahead along camera Z
	up_dir = -R[:, 1]
	for client in self.server.get_clients().values():
	client.camera.up_direction = tuple(up_dir)
	client.camera.position = tuple(t)
	client.camera.look_at = tuple(look_at_pt)

	self.cam_handles.append(frustum_handle)

	def animate(self):
	"""Setup and run animation controls."""
	with self.server.gui.add_folder("Playback"):
	self.gui_timestep = self.server.gui.add_slider(
	"Train Step", min=0, max=self.num_frames - 1, step=1, initial_value=0, disabled=False
	)
	gui_next_frame = self.server.gui.add_button("Next Step", disabled=False)
	gui_prev_frame = self.server.gui.add_button("Prev Step", disabled=False)
	gui_playing = self.server.gui.add_checkbox("Playing", True)
	gui_framerate = self.server.gui.add_slider("FPS", min=1, max=60, step=0.1, initial_value=20)
	gui_framerate_options = self.server.gui.add_button_group("FPS options", ("10", "20", "30", "60"))

	@gui_next_frame.on_click
	def _(_) -> None:
	self.gui_timestep.value = (self.gui_timestep.value + 1) % self.num_frames

	@gui_prev_frame.on_click
	def _(_) -> None:
	self.gui_timestep.value = (self.gui_timestep.value - 1) % self.num_frames

	@gui_playing.on_update
	def _(_) -> None:
	self.gui_timestep.disabled = gui_playing.value
	gui_next_frame.disabled = gui_playing.value
	gui_prev_frame.disabled = gui_playing.value

	@gui_framerate_options.on_click
	def _(_) -> None:
	gui_framerate.value = int(gui_framerate_options.value)

	prev_timestep = self.gui_timestep.value

	@self.gui_timestep.on_update
	def _(_) -> None:
	nonlocal prev_timestep
	current_timestep = self.gui_timestep.value

	if self.current_frame_image is not None and hasattr(self, 'original_images'):
	if current_timestep < len(self.original_images):
	self.current_frame_image.image = self.original_images[current_timestep]

	with self.server.atomic():
	self.frame_nodes[current_timestep].visible = True
	self.frame_nodes[prev_timestep].visible = False
	self.server.flush()

	prev_timestep = current_timestep

	self.server.scene.add_frame("/frames", show_axes=False)
	self.frame_nodes = []
	for i in range(self.num_frames):
	step = self.all_steps[i]
	self.frame_nodes.append(
	self.server.scene.add_frame(f"/frames/{step}", show_axes=False)
	)
	self.add_pc(step)
	if self.show_camera:
	downsample_factor = int(self.camera_downsample_slider.value)
	if i % downsample_factor == 0:
	self.add_camera(step)

	prev_timestep = self.gui_timestep.value
	while True:
	if self.on_replay:
	pass
	else:
	if gui_playing.value:
	self.gui_timestep.value = (self.gui_timestep.value + 1) % self.num_frames
	self.update_frame_visibility()

	time.sleep(1.0 / gui_framerate.value)

	def _take_screenshot(self, client: Optional[Any] = None):
	"""Capture a screenshot from the current view and save to file.

	Args:
	client: The viser client that triggered the action. If None,
	uses the first connected client.
	"""
	output_path = self.screenshot_path.value
	res_str = self.screenshot_resolution.value

	# Resolve client
	if client is None:
	clients = list(self.server.get_clients().values())
	if not clients:
	self.screenshot_status.value = "Error: no client connected"
	return
	client = clients[0]

	try:
	self.screenshot_status.value = "Capturing..."

	if res_str == "Current":
	# Use default render size
	width, height = 1920, 1080
	else:
	width, height = map(int, res_str.split("x"))

	render = client.camera.get_render(height=height, width=width)

	if render is not None:
	frame = np.array(render)
	if frame.shape[2] == 4:
	frame = frame[:, :, :3]
	frame_bgr = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
	cv2.imwrite(output_path, frame_bgr)
	self.screenshot_status.value = f"Saved: {output_path}"
	print(f"Screenshot saved to {output_path} ({width}x{height})")
	else:
	self.screenshot_status.value = "Error: render returned None"
	print("Screenshot failed: render returned None")

	except Exception as e:
	self.screenshot_status.value = f"Error: {e}"
	print(f"Screenshot error: {e}")

	def save_video(
	self,
	output_path: str = "output_pointcloud.mp4",
	fps: int = 30,
	resolution: str = "1920x1080",
	save_original_video: bool = True
	):
	"""Save point cloud animation as video."""
	try:
	if hasattr(self, 'video_status'):
	self.video_status.value = "Saving video..."
	print(f"Saving video to {output_path}...")

	width, height = map(int, resolution.split('x'))
	temp_dir = tempfile.mkdtemp(prefix="viser_video_")
	print(f"Temporary directory: {temp_dir}")

	print("Waiting for client connection...")
	timeout = 10
	start_time = time.time()
	while len(self.server.get_clients()) == 0:
	time.sleep(0.1)
	if time.time() - start_time > timeout:
	raise RuntimeError("No client connected. Please open the visualization in a browser first.")

	print("Client connected. Starting to render frames...")
	clients = list(self.server.get_clients().values())
	client = clients[0]

	if not hasattr(self, 'gui_timestep'):
	raise RuntimeError("Animation not initialized. Please ensure animate() is called before save_video().")

	for i in tqdm(range(self.num_frames), desc="Rendering frames"):
	self.gui_timestep.value = i
	time.sleep(0.1)

	try:
	screenshot = client.camera.get_render(height=height, width=width)
	if screenshot is not None:
	frame = np.array(screenshot)
	if frame.shape[2] == 4:
	frame = frame[:, :, :3]
	frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
	frame_path = os.path.join(temp_dir, f"frame_{i:06d}.png")
	cv2.imwrite(frame_path, frame)
	else:
	frame = self._render_frame_fallback(i, width, height)
	frame_path = os.path.join(temp_dir, f"frame_{i:06d}.png")
	cv2.imwrite(frame_path, frame)
	except Exception as e:
	print(f"Warning: Error capturing frame {i}: {e}, using fallback")
	frame = self._render_frame_fallback(i, width, height)
	frame_path = os.path.join(temp_dir, f"frame_{i:06d}.png")
	cv2.imwrite(frame_path, frame)

	print("Encoding video with ffmpeg...")
	ffmpeg_cmd = [
	'ffmpeg', '-y', '-framerate', str(fps),
	'-i', os.path.join(temp_dir, 'frame_%06d.png'),
	'-c:v', 'libx264', '-pix_fmt', 'yuv420p', '-crf', '18',
	output_path
	]

	result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)

	if result.returncode == 0:
	print(f"Point cloud video saved successfully to {output_path}")
	if hasattr(self, 'video_status'):
	self.video_status.value = f"Saved to {output_path}"
	else:
	print(f"FFmpeg error: {result.stderr}")
	if hasattr(self, 'video_status'):
	self.video_status.value = "Error: FFmpeg failed"

	if save_original_video and hasattr(self, 'original_images') and len(self.original_images) > 0:
	self._save_original_video(output_path, fps, width, height)

	shutil.rmtree(temp_dir)
	print("Temporary files cleaned up")

	except Exception as e:
	print(f"Error saving video: {e}")
	import traceback
	traceback.print_exc()
	if hasattr(self, 'video_status'):
	self.video_status.value = f"Error: {str(e)}"

	def _save_original_video(self, pointcloud_video_path: str, fps: int, width: int, height: int):
	"""Save original images as video."""
	base_path = os.path.splitext(pointcloud_video_path)[0]
	original_video_path = f"{base_path}_original.mp4"

	print(f"Saving original images video to {original_video_path}...")

	try:
	temp_dir = tempfile.mkdtemp(prefix="original_video_")

	for i, img in enumerate(tqdm(self.original_images, desc="Saving original frames")):
	frame = cv2.resize(img, (width, height))
	if len(frame.shape) == 3 and frame.shape[2] == 3:
	frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
	frame_path = os.path.join(temp_dir, f"frame_{i:06d}.png")
	cv2.imwrite(frame_path, frame)

	print("Encoding original video with ffmpeg...")
	ffmpeg_cmd = [
	'ffmpeg', '-y', '-framerate', str(fps),
	'-i', os.path.join(temp_dir, 'frame_%06d.png'),
	'-c:v', 'libx264', '-pix_fmt', 'yuv420p', '-crf', '18',
	original_video_path
	]

	result = subprocess.run(ffmpeg_cmd, capture_output=True, text=True)

	if result.returncode == 0:
	print(f"Original video saved successfully to {original_video_path}")
	else:
	print(f"FFmpeg error for original video: {result.stderr}")

	shutil.rmtree(temp_dir)

	except Exception as e:
	print(f"Error saving original video: {e}")
	import traceback
	traceback.print_exc()

	def _render_frame_fallback(self, frame_idx: int, width: int, height: int) -> np.ndarray:
	"""Fallback rendering when screenshot capture fails."""
	if hasattr(self, 'original_images') and frame_idx < len(self.original_images):
	frame = self.original_images[frame_idx].copy()
	frame = cv2.resize(frame, (width, height))
	cv2.putText(frame, f"Frame {frame_idx}", (10, 30),
	cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
	return frame
	else:
	frame = np.zeros((height, width, 3), dtype=np.uint8)
	cv2.putText(frame, f"Frame {frame_idx} - No render available",
	(width//4, height//2),
	cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
	return frame

	def run(self, background_mode: bool = False):
	"""Run the viewer."""
	self.animate()
	if background_mode:
	def server_loop():
	while True:
	time.sleep(0.001)

	thread = threading.Thread(target=server_loop, daemon=True)
	thread.start()
	else:
	while True:
	time.sleep(10.0)