"""Generate GLB scenes with colored point clouds / textured meshes and 3D boxes.

Uses pygltflib for point cloud + wireframe boxes (GL_POINTS/GL_LINES),
and trimesh + utils3d for textured mesh generation.

Usage:
    from vis3d_glb import depth_to_pointcloud, create_scene_glb
    from vis3d_glb import create_mesh_scene_glb

    # Point cloud mode
    points, colors = depth_to_pointcloud(depth_map, image, intrinsics)
    create_scene_glb(points, colors, boxes3d_list, output_path)

    # Textured mesh mode (like MoGe2)
    create_mesh_scene_glb(depth_map, image, intrinsics, boxes3d_list, output_path)
"""

import numpy as np
import pygltflib


def depth_to_pointcloud(
    depth_map: np.ndarray,
    image: np.ndarray,
    intrinsics: np.ndarray,
    max_depth: float = 20.0,
    subsample: int = 4,
    padding: tuple[int, int, int, int] | None = None,
    remove_edge: bool = True,
    edge_rtol: float = 0.04,
    confidence_map: np.ndarray | None = None,
    confidence_threshold: float = 0.0,
) -> tuple[np.ndarray, np.ndarray]:
    """Convert depth map + RGB image to colored point cloud.

    Args:
        depth_map: (H, W) or (1, H, W) depth in meters.
        image: (H, W, 3) RGB image, uint8 [0-255].
        intrinsics: (3, 3) camera intrinsics matrix.
        max_depth: Discard points beyond this depth.
        subsample: Take every Nth pixel to reduce point count.
        padding: (left, right, top, bottom) CenterPad offsets to exclude.
        remove_edge: Remove points at depth discontinuity edges
            (like MoGe2). Uses utils3d.np.depth_map_edge.
        edge_rtol: Relative tolerance for edge detection. Larger
            values remove more aggressive edges.
        confidence_map: (H, W) or (1, H, W) per-pixel confidence in
            [0, 1]. Points below confidence_threshold are discarded.
        confidence_threshold: Minimum confidence to keep a point.

    Returns:
        points: (N, 3) float32 xyz in camera frame.
        colors: (N, 4) uint8 RGBA.
    """
    # Handle various depth_map shapes
    while depth_map.ndim > 2:
        depth_map = depth_map.squeeze(0)  # (1, 1, H, W) -> (H, W)

    H, W = depth_map.shape

    # Handle confidence_map shape
    if confidence_map is not None:
        while confidence_map.ndim > 2:
            confidence_map = confidence_map.squeeze(0)

    # Handle various image shapes: (1, H, W, 3), (1, 1, H, W) etc
    while image.ndim > 3:
        image = image.squeeze(0)
    # If image is (3, H, W), transpose to (H, W, 3)
    if image.ndim == 3 and image.shape[0] in (1, 3):
        image = np.transpose(image, (1, 2, 0))
    # If grayscale (H, W), repeat to (H, W, 3)
    if image.ndim == 2:
        image = np.stack([image] * 3, axis=-1)

    # Match image size to depth map
    if image.shape[0] != H or image.shape[1] != W:
        from PIL import Image as PILImage
        img_pil = PILImage.fromarray(image)
        img_pil = img_pil.resize((W, H), PILImage.BILINEAR)
        image = np.array(img_pil)

    # Build full-resolution valid mask before subsampling
    full_valid = (depth_map > 0.01) & (depth_map < max_depth) & np.isfinite(depth_map)

    # Exclude padding regions (full resolution)
    if padding is not None:
        pad_left, pad_right, pad_top, pad_bottom = padding
        pad_mask = np.ones((H, W), dtype=bool)
        pad_mask[:, :pad_left] = False
        pad_mask[:pad_top, :] = False
        if pad_right > 0:
            pad_mask[:, W - pad_right:] = False
        if pad_bottom > 0:
            pad_mask[H - pad_bottom:, :] = False
        full_valid &= pad_mask

    # Remove depth discontinuity edges (MoGe2 style)
    if remove_edge:
        import utils3d
        edge_mask = utils3d.np.depth_map_edge(depth_map, rtol=edge_rtol)
        full_valid &= ~edge_mask

    # Filter by confidence
    if confidence_map is not None and confidence_threshold > 0:
        full_valid &= (confidence_map >= confidence_threshold)

    # Subsample grid
    ys = np.arange(0, H, subsample)
    xs = np.arange(0, W, subsample)
    xx, yy = np.meshgrid(xs, ys)

    depth_sub = depth_map[yy, xx]
    rgb_sub = image[yy, xx]  # (h, w, 3)
    valid = full_valid[yy, xx]

    # Unproject to 3D
    fx, fy = intrinsics[0, 0], intrinsics[1, 1]
    cx, cy = intrinsics[0, 2], intrinsics[1, 2]

    x3d = (xx[valid] - cx) * depth_sub[valid] / fx
    y3d = (yy[valid] - cy) * depth_sub[valid] / fy
    z3d = depth_sub[valid]

    # OpenCV (x-right, y-down, z-away) to glTF (x, -y, -z)
    points = np.stack([x3d, -y3d, -z3d], axis=-1).astype(np.float32)

    # Colors
    rgb = rgb_sub[valid]  # (N, 3) uint8
    alpha = np.full((rgb.shape[0], 1), 255, dtype=np.uint8)
    colors = np.concatenate([rgb, alpha], axis=-1)  # (N, 4)

    return points, colors


def _quaternion_to_rotation_matrix(qw, qx, qy, qz):
    """Convert quaternion to 3x3 rotation matrix."""
    return np.array([
        [1 - 2*(qy*qy + qz*qz), 2*(qx*qy - qz*qw), 2*(qx*qz + qy*qw)],
        [2*(qx*qy + qz*qw), 1 - 2*(qx*qx + qz*qz), 2*(qy*qz - qx*qw)],
        [2*(qx*qz - qy*qw), 2*(qy*qz + qx*qw), 1 - 2*(qx*qx + qy*qy)],
    ], dtype=np.float32)


def boxes3d_to_corners(boxes3d: np.ndarray) -> list[np.ndarray]:
    """Convert 3D box params to 8 corner points in GLB coords.

    Args:
        boxes3d: (N, 10) boxes in OpenCV camera frame.
            Format: [cx, cy, cz, w, h, l, qw, qx, qy, qz]

    Returns:
        List of (8, 3) corner arrays in GLB/Three.js coords (y-up, z-backward).
    """
    corners_list = []
    # Same transform as point cloud:
    # OpenCV (x,y,z) -> glTF (x, -y, -z)
    T = np.diag([1.0, -1.0, -1.0]).astype(np.float32)

    for box in boxes3d:
        cx, cy, cz = box[0], box[1], box[2]
        # Omni3D format: [width, length, height] not [w, h, l]
        # width = x-extent, length = z-extent, height = y-extent
        bw, bl, bh = box[3], box[4], box[5]
        qw, qx, qy, qz = box[6], box[7], box[8], box[9]

        hw, hl, hh = bw / 2, bl / 2, bh / 2

        # 8 local corners: x=length, y=height, z=width
        local_corners = np.array([
            [-hl, -hh, -hw],
            [ hl, -hh, -hw],
            [ hl,  hh, -hw],
            [-hl,  hh, -hw],
            [-hl, -hh,  hw],
            [ hl, -hh,  hw],
            [ hl,  hh,  hw],
            [-hl,  hh,  hw],
        ], dtype=np.float32)

        # Rotate by quaternion and translate (in OpenCV coords)
        R_cv = _quaternion_to_rotation_matrix(qw, qx, qy, qz)
        corners_cv = (R_cv @ local_corners.T).T + np.array([cx, cy, cz])

        # Convert OpenCV -> glTF: (-z, -y, x)
        corners = (T @ corners_cv.T).T

        corners_list.append(corners.astype(np.float32))

    return corners_list


def _generate_box_colors(n_boxes: int) -> list[list[int]]:
    """Generate distinct colors for boxes."""
    base_colors = [
        [255, 0, 0, 255],    # red
        [0, 255, 0, 255],    # green
        [0, 100, 255, 255],  # blue
        [255, 255, 0, 255],  # yellow
        [255, 0, 255, 255],  # magenta
        [0, 255, 255, 255],  # cyan
        [255, 128, 0, 255],  # orange
        [128, 0, 255, 255],  # purple
    ]
    colors = []
    for i in range(n_boxes):
        colors.append(base_colors[i % len(base_colors)])
    return colors


def _pad_to_4(data: bytes) -> bytes:
    """Pad binary data to 4-byte alignment (glTF requirement)."""
    remainder = len(data) % 4
    if remainder:
        data += b"\x00" * (4 - remainder)
    return data


def create_scene_glb(
    points: np.ndarray,
    point_colors: np.ndarray,
    boxes3d_list: list[np.ndarray],
    output_path: str,
    max_points: int = 500000,
) -> str:
    """Create a GLB file with colored point cloud + wireframe 3D boxes.

    Args:
        points: (N, 3) float32 point cloud xyz.
        point_colors: (N, 4) uint8 RGBA colors.
        boxes3d_list: List of (M, 10) box arrays (one per image).
        output_path: Where to save the .glb file.
        max_points: Max number of points to include.

    Returns:
        output_path.
    """
    # Subsample points if too many
    if len(points) > max_points:
        idx = np.random.choice(len(points), max_points, replace=False)
        points = points[idx]
        point_colors = point_colors[idx]

    points = np.ascontiguousarray(points, dtype=np.float32)
    point_colors = np.ascontiguousarray(point_colors, dtype=np.uint8)
    n_points = len(points)

    # Build box geometry
    all_corners_list = []
    for boxes3d in boxes3d_list:
        if len(boxes3d) > 0:
            corners = boxes3d_to_corners(boxes3d)
            all_corners_list.extend(corners)

    n_boxes = len(all_corners_list)
    box_colors_rgba = _generate_box_colors(n_boxes)

    # Box vertices and indices
    all_box_verts = []
    all_box_colors = []
    all_box_indices = []
    vertex_offset = 0

    edge_pairs = [
        (0, 1), (1, 2), (2, 3), (3, 0),  # bottom face
        (4, 5), (5, 6), (6, 7), (7, 4),  # top face
        (0, 4), (1, 5), (2, 6), (3, 7),  # vertical edges
    ]

    for i, corners in enumerate(all_corners_list):
        all_box_verts.append(corners)
        color = box_colors_rgba[i]
        all_box_colors.append(
            np.tile(np.array(color, dtype=np.uint8), (8, 1))
        )
        indices = np.array(
            [(a + vertex_offset, b + vertex_offset) for a, b in edge_pairs],
            dtype=np.uint16,
        )
        all_box_indices.append(indices)
        vertex_offset += 8

    has_boxes = n_boxes > 0

    if has_boxes:
        box_verts = np.concatenate(all_box_verts, axis=0).astype(np.float32)
        box_vert_colors = np.concatenate(all_box_colors, axis=0).astype(np.uint8)
        box_indices = np.concatenate(all_box_indices, axis=0).flatten().astype(np.uint16)
    else:
        box_verts = np.zeros((0, 3), dtype=np.float32)
        box_vert_colors = np.zeros((0, 4), dtype=np.uint8)
        box_indices = np.zeros(0, dtype=np.uint16)

    # Build binary blob
    points_bin = _pad_to_4(points.tobytes())
    colors_bin = _pad_to_4(point_colors.tobytes())
    box_verts_bin = _pad_to_4(box_verts.tobytes())
    box_colors_bin = _pad_to_4(box_vert_colors.tobytes())
    box_indices_bin = _pad_to_4(box_indices.tobytes())

    blob = points_bin + colors_bin + box_verts_bin + box_colors_bin + box_indices_bin

    # Build glTF structure
    buffer_views = []
    accessors = []
    offset = 0

    # BV0: point positions
    buffer_views.append(pygltflib.BufferView(
        buffer=0, byteOffset=offset, byteLength=len(points_bin),
        target=pygltflib.ARRAY_BUFFER,
    ))
    accessors.append(pygltflib.Accessor(
        bufferView=0, componentType=pygltflib.FLOAT,
        count=n_points, type=pygltflib.VEC3,
        max=points.max(axis=0).tolist() if n_points > 0 else [0, 0, 0],
        min=points.min(axis=0).tolist() if n_points > 0 else [0, 0, 0],
    ))
    offset += len(points_bin)

    # BV1: point colors
    buffer_views.append(pygltflib.BufferView(
        buffer=0, byteOffset=offset, byteLength=len(colors_bin),
        target=pygltflib.ARRAY_BUFFER,
    ))
    accessors.append(pygltflib.Accessor(
        bufferView=1, componentType=pygltflib.UNSIGNED_BYTE,
        count=n_points, type=pygltflib.VEC4,
        normalized=True,
    ))
    offset += len(colors_bin)

    nodes = []
    meshes = []

    # Point cloud mesh (GL_POINTS = mode 0)
    meshes.append(pygltflib.Mesh(
        primitives=[pygltflib.Primitive(
            attributes=pygltflib.Attributes(POSITION=0, COLOR_0=1),
            mode=0,
        )]
    ))
    nodes.append(pygltflib.Node(mesh=0))

    if has_boxes:
        # BV2: box vertices
        buffer_views.append(pygltflib.BufferView(
            buffer=0, byteOffset=offset, byteLength=len(box_verts_bin),
            target=pygltflib.ARRAY_BUFFER,
        ))
        accessors.append(pygltflib.Accessor(
            bufferView=2, componentType=pygltflib.FLOAT,
            count=len(box_verts), type=pygltflib.VEC3,
            max=box_verts.max(axis=0).tolist(),
            min=box_verts.min(axis=0).tolist(),
        ))
        offset += len(box_verts_bin)

        # BV3: box colors
        buffer_views.append(pygltflib.BufferView(
            buffer=0, byteOffset=offset, byteLength=len(box_colors_bin),
            target=pygltflib.ARRAY_BUFFER,
        ))
        accessors.append(pygltflib.Accessor(
            bufferView=3, componentType=pygltflib.UNSIGNED_BYTE,
            count=len(box_vert_colors), type=pygltflib.VEC4,
            normalized=True,
        ))
        offset += len(box_colors_bin)

        # BV4: box indices
        buffer_views.append(pygltflib.BufferView(
            buffer=0, byteOffset=offset, byteLength=len(box_indices_bin),
            target=pygltflib.ELEMENT_ARRAY_BUFFER,
        ))
        accessors.append(pygltflib.Accessor(
            bufferView=4, componentType=pygltflib.UNSIGNED_SHORT,
            count=len(box_indices), type=pygltflib.SCALAR,
            max=[int(box_indices.max())],
            min=[int(box_indices.min())],
        ))
        offset += len(box_indices_bin)

        # Box wireframe mesh (GL_LINES = mode 1)
        meshes.append(pygltflib.Mesh(
            primitives=[pygltflib.Primitive(
                attributes=pygltflib.Attributes(POSITION=2, COLOR_0=3),
                indices=4,
                mode=1,
            )]
        ))
        nodes.append(pygltflib.Node(mesh=1))

    gltf = pygltflib.GLTF2(
        scene=0,
        scenes=[pygltflib.Scene(nodes=list(range(len(nodes))))],
        nodes=nodes,
        meshes=meshes,
        accessors=accessors,
        bufferViews=buffer_views,
        buffers=[pygltflib.Buffer(byteLength=len(blob))],
    )
    gltf.set_binary_blob(blob)
    gltf.save(output_path)

    return output_path


def _create_edge_cylinder(p1, p2, radius=0.01, sections=6):
    """Create a thin cylinder mesh between two 3D points.

    Args:
        p1, p2: (3,) endpoints.
        radius: cylinder radius.
        sections: number of radial segments.

    Returns:
        trimesh.Trimesh or None if edge is degenerate.
    """
    import trimesh

    segment = p2 - p1
    length = float(np.linalg.norm(segment))
    if length < 1e-6:
        return None

    cyl = trimesh.creation.cylinder(
        radius=radius, height=length, sections=sections
    )
    direction = segment / length

    # Align cylinder Z-axis to segment direction
    z_axis = np.array([0, 0, 1], dtype=np.float64)
    cross = np.cross(z_axis, direction)
    dot = np.dot(z_axis, direction)
    cross_len = np.linalg.norm(cross)

    if cross_len < 1e-6:
        R = np.eye(3) if dot > 0 else np.diag([1.0, -1.0, -1.0])
    else:
        cross_n = cross / cross_len
        angle = np.arccos(np.clip(dot, -1, 1))
        K = np.array([
            [0, -cross_n[2], cross_n[1]],
            [cross_n[2], 0, -cross_n[0]],
            [-cross_n[1], cross_n[0], 0],
        ])
        R = np.eye(3) + np.sin(angle) * K + (1 - np.cos(angle)) * (K @ K)

    T = np.eye(4)
    T[:3, :3] = R
    T[:3, 3] = (p1 + p2) / 2.0
    cyl.apply_transform(T)
    return cyl


def _create_wireframe_box_trimesh(corners, color_rgba, radius=0.015):
    """Create wireframe box as thin cylinders.

    Args:
        corners: (8, 3) corner positions in glTF coords.
        color_rgba: [R, G, B, A] uint8 color.
        radius: cylinder radius in meters.

    Returns:
        trimesh.Trimesh or None.
    """
    import trimesh

    edge_pairs = [
        (0, 1), (1, 2), (2, 3), (3, 0),
        (4, 5), (5, 6), (6, 7), (7, 4),
        (0, 4), (1, 5), (2, 6), (3, 7),
    ]
    parts = []
    for a, b in edge_pairs:
        cyl = _create_edge_cylinder(
            corners[a].astype(np.float64),
            corners[b].astype(np.float64),
            radius=radius,
            sections=6,
        )
        if cyl is not None:
            cyl.visual.face_colors = color_rgba
            parts.append(cyl)

    if parts:
        return trimesh.util.concatenate(parts)
    return None


def create_mesh_scene_glb(
    depth_map: np.ndarray,
    image: np.ndarray,
    intrinsics: np.ndarray,
    boxes3d_list: list[np.ndarray],
    output_path: str,
    max_depth: float = 20.0,
    padding: tuple[int, int, int, int] | None = None,
    remove_edge: bool = True,
    edge_rtol: float = 0.04,
) -> str:
    """Create GLB with textured mesh (MoGe2 style) + wireframe 3D boxes.

    Args:
        depth_map: (H, W) or (1, H, W) depth in meters.
        image: (H, W, 3) RGB uint8 [0-255].
        intrinsics: (3, 3) camera intrinsics.
        boxes3d_list: List of (M, 10) box arrays.
        output_path: Where to save .glb.
        max_depth: Max depth cutoff.
        padding: (left, right, top, bottom) to exclude.
        remove_edge: Remove depth discontinuity edges.
        edge_rtol: Edge detection tolerance.

    Returns:
        output_path.
    """
    import utils3d
    import trimesh
    from PIL import Image as PILImage

    # Prepare depth
    while depth_map.ndim > 2:
        depth_map = depth_map.squeeze(0)
    depth_map = depth_map.astype(np.float32)
    H, W = depth_map.shape

    # Prepare image
    while image.ndim > 3:
        image = image.squeeze(0)
    if image.ndim == 3 and image.shape[0] in (1, 3):
        image = np.transpose(image, (1, 2, 0))
    if image.ndim == 2:
        image = np.stack([image] * 3, axis=-1)
    if image.shape[0] != H or image.shape[1] != W:
        img_pil = PILImage.fromarray(image)
        img_pil = img_pil.resize((W, H), PILImage.BILINEAR)
        image = np.array(img_pil)

    # Build valid mask
    valid = (
        (depth_map > 0.01)
        & (depth_map < max_depth)
        & np.isfinite(depth_map)
    )

    if padding is not None:
        pad_left, pad_right, pad_top, pad_bottom = padding
        if pad_left > 0:
            valid[:, :pad_left] = False
        if pad_right > 0:
            valid[:, W - pad_right:] = False
        if pad_top > 0:
            valid[:pad_top, :] = False
        if pad_bottom > 0:
            valid[H - pad_bottom:, :] = False

    if remove_edge:
        edge = utils3d.np.depth_map_edge(depth_map, rtol=edge_rtol)
        valid &= ~edge

    # Unproject to 3D in OpenCV coords (x-right, y-down, z-forward)
    # Build mesh in OpenCV space first so triangle winding is correct,
    # then transform vertices to glTF coords afterwards.
    fx, fy = float(intrinsics[0, 0]), float(intrinsics[1, 1])
    cx, cy = float(intrinsics[0, 2]), float(intrinsics[1, 2])
    u, v = np.meshgrid(np.arange(W), np.arange(H))
    x3d = (u - cx) * depth_map / fx
    y3d = (v - cy) * depth_map / fy
    z3d = depth_map
    points_cv = np.stack([x3d, y3d, z3d], axis=-1).astype(np.float32)

    # UV map
    uv = np.stack(
        [u / max(W - 1, 1), v / max(H - 1, 1)], axis=-1
    ).astype(np.float32)

    # Colors normalized [0, 1]
    colors = image.astype(np.float32) / 255.0

    # Build triangulated mesh in OpenCV coords (preserves correct winding)
    faces, vertices, vertex_colors, vertex_uvs = (
        utils3d.np.build_mesh_from_map(
            points_cv, colors, uv, mask=valid, tri=True
        )
    )

    print(
        f"[Mesh] {vertices.shape[0]} vertices, "
        f"{faces.shape[0]} faces, "
        f"valid pixels: {valid.sum()}/{valid.size}"
    )

    if len(vertices) == 0:
        # Fallback to empty file
        scene = trimesh.Scene()
        scene.export(output_path)
        return output_path

    # Transform vertices: OpenCV (x, y, z) -> glTF (x, -y, -z)
    # This is a 180-degree rotation around x-axis (det=+1),
    # so it preserves triangle winding order.
    vertices = vertices * np.array([1.0, -1.0, -1.0], dtype=np.float32)

    # Trimesh flips UV v when exporting to GLB (OpenGL v=0 at bottom
    # vs glTF v=0 at top). Our UVs are already in image convention
    # (v=0 at top), so pre-flip to compensate for trimesh's flip.
    vertex_uvs = vertex_uvs.copy()
    vertex_uvs[:, 1] = 1.0 - vertex_uvs[:, 1]

    # Create textured mesh (process=False to avoid trimesh modifying geometry)
    texture_img = PILImage.fromarray(image)
    material = trimesh.visual.material.PBRMaterial(
        baseColorTexture=texture_img,
        metallicFactor=0.0,
        roughnessFactor=1.0,
    )
    visuals = trimesh.visual.TextureVisuals(
        uv=vertex_uvs, material=material
    )
    mesh = trimesh.Trimesh(
        vertices=vertices, faces=faces, visual=visuals,
        process=False,
    )

    scene = trimesh.Scene()
    scene.add_geometry(mesh, node_name="scene_mesh")

    # Add wireframe 3D boxes as thin cylinder geometry
    all_corners = []
    for boxes3d in boxes3d_list:
        if len(boxes3d) > 0:
            corners = boxes3d_to_corners(boxes3d)
            all_corners.extend(corners)

    box_colors = _generate_box_colors(len(all_corners))
    for i, corners in enumerate(all_corners):
        box_mesh = _create_wireframe_box_trimesh(
            corners, box_colors[i], radius=0.015
        )
        if box_mesh is not None:
            scene.add_geometry(
                box_mesh, node_name=f"box_{i}"
            )

    scene.export(output_path)
    return output_path