Spaces:

3DReflecNet
/

3DReflecNet_Explorer

Sleeping

App Files Files Community

Asnly commited on 18 days ago

Commit

7591256

verified ·

1 Parent(s): e20c454

Deploy Space app files

Browse files

Files changed (3) hide show

app.py +506 -0
requirements.txt +5 -0
utils.py +166 -0

app.py ADDED Viewed

	@@ -0,0 +1,506 @@

+#!/usr/bin/env python3
+"""HF Space entry point for 3DReflecNet dataset preview.
+Loads the hybrid Hugging Face release using the `datasets` library:
+  1. data/metadata/train.parquet for filtering and GLB paths
+  2. data/preview/preview.parquet for the small image preview subset
+"""
+from __future__ import annotations
+import atexit
+import io
+import os
+import shutil
+import tempfile
+from pathlib import Path
+from typing import Any
+import gradio as gr
+import pandas as pd
+from datasets import load_dataset
+from huggingface_hub import hf_hub_download
+from PIL import Image
+from utils import (
+    BOOL_FILTER_CHOICES,
+    FILTER_ALL,
+    aggregate_by_model,
+    filter_dataframe_advanced,
+    format_instance_choice,
+    format_model_choice,
+    get_distinct_text_choices,
+    logger,
+    parse_choice_index,
+    require_bool_columns,
+    require_columns,
+    require_text_columns,
+    setup_logging,
+)
+DATASET_REPO = os.environ.get("DATASET_REPO", "3DReflecNet/3DReflecNet")
+HF_TOKEN = os.environ.get("HF_TOKEN", None)
+MAX_RESULTS = 300
+BOOL_COLUMNS = ["hasGlass", "isGenerated", "transparent", "near_light"]
+_GLB_CACHE_DIR = Path(tempfile.mkdtemp(prefix="glb_cache_"))
+atexit.register(shutil.rmtree, str(_GLB_CACHE_DIR), True)
+# ---------------------------------------------------------------------------
+# Data loading
+# ---------------------------------------------------------------------------
+def load_metadata() -> pd.DataFrame:
+    """Load lightweight metadata columns (one row per instance)."""
+    METADATA_COLS = [
+        "instance_id", "ply_path", "glb_path",
+        "main_category", "sub_category", "model_name",
+        "material_name", "env_name",
+        "hasGlass", "isGenerated", "transparent", "near_light",
+    ]
+    ds = load_dataset(
+        DATASET_REPO,
+        data_files="data/metadata/train.parquet",
+        split="train",
+        streaming=True,
+        token=HF_TOKEN,
+    ).select_columns(METADATA_COLS)
+    rows: list[dict[str, Any]] = []
+    seen: dict[str, dict[str, Any]] = {}
+    for example in ds:
+        row = {col: example[col] for col in METADATA_COLS}
+        iid = row["instance_id"]
+        if not isinstance(iid, str) or not iid.strip():
+            raise ValueError(f"Invalid instance_id in metadata row: {iid!r}")
+        if iid in seen:
+            if seen[iid] != row:
+                raise ValueError(f"Inconsistent metadata rows for instance_id {iid!r}.")
+            continue
+        seen[iid] = row
+        rows.append(row)
+    df = pd.DataFrame(rows, columns=METADATA_COLS)
+    require_text_columns(
+        df,
+        [
+            "instance_id", "ply_path", "glb_path", "main_category", "sub_category",
+            "model_name", "material_name", "env_name",
+        ],
+        "metadata parquet",
+    )
+    require_bool_columns(df, BOOL_COLUMNS, "metadata parquet")
+    return df
+def load_preview_dataframe() -> pd.DataFrame:
+    """Load the small preview Parquet into memory."""
+    PREVIEW_COLS = [
+        "instance_id", "split", "frame_id", "rgb", "mask",
+        "depth_preview", "normal_preview",
+        "main_category", "sub_category", "model_name",
+        "material_name", "env_name",
+        "hasGlass", "isGenerated", "transparent", "near_light",
+    ]
+    ds = load_dataset(
+        DATASET_REPO,
+        data_files="data/preview/preview.parquet",
+        split="train",
+        streaming=False,
+        token=HF_TOKEN,
+    ).select_columns(PREVIEW_COLS)
+    df = pd.DataFrame(list(ds))
+    require_columns(df, PREVIEW_COLS, "preview parquet")
+    require_text_columns(
+        df,
+        [
+            "instance_id", "split", "main_category", "sub_category",
+            "model_name", "material_name", "env_name",
+        ],
+        "preview parquet",
+    )
+    require_bool_columns(df, BOOL_COLUMNS, "preview parquet")
+    if df["frame_id"].isna().any() or not pd.api.types.is_integer_dtype(df["frame_id"]):
+        raise TypeError(f"Expected integer dtype for column 'frame_id' in preview parquet, got {df['frame_id'].dtype}.")
+    for col in ["rgb", "mask", "depth_preview", "normal_preview"]:
+        invalid = df[col].map(lambda value: not isinstance(value, (bytes, bytearray)) or len(value) == 0)
+        if invalid.any():
+            raise TypeError(f"Expected non-empty binary values for column {col!r} in preview parquet.")
+    return df
+def load_instance_frames(
+    preview_df: pd.DataFrame, instance_id: str, split: str = "train", max_frames: int = 50,
+) -> list[dict[str, Any]]:
+    """Load preview image payloads for one instance from preview Parquet."""
+    selected = preview_df[
+        (preview_df["instance_id"].astype(str) == str(instance_id))
+        & (preview_df["split"].astype(str) == split)
+    ].copy()
+    selected = selected.sort_values("frame_id").head(max_frames)
+    rows = selected.to_dict(orient="records")
+    if not rows and split == "train":
+        selected = preview_df[
+            preview_df["instance_id"].astype(str) == str(instance_id)
+        ].copy()
+        selected = selected.sort_values(["split", "frame_id"]).head(max_frames)
+        rows = selected.to_dict(orient="records")
+    frames: list[dict[str, Any]] = []
+    for example in rows:
+        frame_id = int(example["frame_id"])
+        frame_item: dict[str, Any] = {"frame_id": frame_id}
+        for key in ("rgb", "mask", "depth_preview", "normal_preview"):
+            img_bytes = example[key]
+            if not isinstance(img_bytes, (bytes, bytearray)) or not img_bytes:
+                raise TypeError(f"Expected non-empty image bytes for {key} frame {frame_id}.")
+            with Image.open(io.BytesIO(img_bytes)) as img:
+                frame_item[key] = img.copy()
+        frames.append(frame_item)
+    return frames
+def render_frame_gallery(frame_items: list[dict[str, Any]], frame_index: float) -> list[tuple[Any, str]]:
+    """Render RGB/Mask/Depth/Normal gallery for one selected frame index (1-based)."""
+    if not frame_items:
+        return []
+    idx = int(round(frame_index)) - 1
+    idx = max(0, min(idx, len(frame_items) - 1))
+    selected = frame_items[idx]
+    frame_id = int(selected["frame_id"])
+    gallery: list[tuple[Any, str]] = []
+    for key, label in [
+        ("rgb", "RGB"),
+        ("mask", "Mask"),
+        ("depth_preview", "Depth"),
+        ("normal_preview", "Normal"),
+    ]:
+        gallery.append((selected[key], f"{label} frame_{frame_id:05d}"))
+    return gallery
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+def download_glb(glb_path: str) -> str:
+    """Download pre-converted GLB file from HF dataset repo."""
+    if not glb_path:
+        raise ValueError("GLB path is required.")
+    local = _GLB_CACHE_DIR / Path(glb_path).name
+    if local.exists():
+        return str(local)
+    downloaded = hf_hub_download(
+        repo_id=DATASET_REPO,
+        filename=glb_path,
+        repo_type="dataset",
+        token=HF_TOKEN,
+    )
+    shutil.copy2(downloaded, str(local))
+    logger.info("GLB ready: %s", local)
+    return str(local)
+def build_stats_markdown(df: pd.DataFrame) -> str:
+    """Generate dataset overview statistics."""
+    total_instances = len(df)
+    total_models = df["model_name"].nunique()
+    main_cats = df["main_category"].dropna().astype(str)
+    main_dist = main_cats.value_counts().head(10)
+    dist_lines = " | ".join(f"**{cat}**: {cnt}" for cat, cnt in main_dist.items())
+    return (
+        f"**Dataset Overview** — "
+        f"**{total_instances}** instances, "
+        f"**{total_models}** models, "
+        f"**{main_cats.nunique()}** main categories\n\n"
+        f"Distribution: {dist_lines}"
+    )
+# ---------------------------------------------------------------------------
+# App builder
+# ---------------------------------------------------------------------------
+def build_app(df: pd.DataFrame, preview_df: pd.DataFrame) -> gr.Blocks:
+    model_name_choices = get_distinct_text_choices(df, "model_name")
+    material_name_choices = get_distinct_text_choices(df, "material_name")
+    env_name_choices = get_distinct_text_choices(df, "env_name")
+    model_display_cols = [
+        "model_name",
+        "material_name",
+        "env_name",
+        "hasGlass",
+        "isGenerated",
+        "transparent",
+        "near_light",
+        "instance_count",
+        "instance_ids",
+    ]
+    instance_display_cols = [
+        "instance_id",
+        "model_name",
+        "material_name",
+        "env_name",
+        "hasGlass",
+        "isGenerated",
+        "transparent",
+        "near_light",
+    ]
+    model_extra_cols = [
+        "material_name",
+        "env_name",
+        "hasGlass",
+        "isGenerated",
+        "transparent",
+        "near_light",
+        "glb_path",
+    ]
+    stats_md = build_stats_markdown(df)
+    # ---- 3D Viewer callbacks ----
+    def search_models(
+        model_name: str,
+        material_name: str,
+        env_name: str,
+        has_glass: str,
+        is_generated: str,
+        transparent: str,
+        near_light: str,
+    ):
+        filtered = filter_dataframe_advanced(
+            preview_df,
+            model_name=model_name,
+            material_name=material_name,
+            env_name=env_name,
+            has_glass=has_glass,
+            is_generated=is_generated,
+            transparent=transparent,
+            near_light=near_light,
+        )
+        aggregated = aggregate_by_model(filtered, extra_columns=model_extra_cols)
+        shown = aggregated.head(MAX_RESULTS).copy()
+        rows = shown.to_dict(orient="records")
+        choices = [format_model_choice(i, r) for i, r in enumerate(rows)]
+        selected = choices[0] if choices else None
+        summary = (
+            f"Matched **{len(aggregated)}** models, showing **{len(rows)}**. "
+            f"Total instances: **{len(filtered)}**."
+        )
+        table = shown[model_display_cols] if not shown.empty else pd.DataFrame(columns=model_display_cols)
+        meta = rows[0] if rows else {}
+        return summary, table, gr.update(choices=choices, value=selected), rows, meta
+    def on_model_select(choice: str, rows: list[dict[str, Any]]):
+        if not choice or not rows:
+            return {}
+        idx = parse_choice_index(choice, len(rows))
+        if idx is None:
+            return {}
+        return rows[idx]
+    def on_load_3d(rows: list[dict[str, Any]], choice: str):
+        if not choice or not rows:
+            return None
+        idx = parse_choice_index(choice, len(rows))
+        if idx is None:
+            return None
+        glb = rows[idx]["glb_path"]
+        logger.info("on_load_3d: glb_path=%r", glb)
+        if not isinstance(glb, str) or not glb.strip():
+            raise ValueError(f"Selected model row does not contain a GLB path: {rows[idx]!r}")
+        return download_glb(glb)
+    # ---- Image Viewer callbacks ----
+    def search_instances(
+        model_name: str,
+        material_name: str,
+        env_name: str,
+        has_glass: str,
+        is_generated: str,
+        transparent: str,
+        near_light: str,
+    ):
+        filtered = filter_dataframe_advanced(
+            df,
+            model_name=model_name,
+            material_name=material_name,
+            env_name=env_name,
+            has_glass=has_glass,
+            is_generated=is_generated,
+            transparent=transparent,
+            near_light=near_light,
+        )
+        shown = filtered.head(MAX_RESULTS).copy()
+        rows = shown[instance_display_cols].to_dict(orient="records")
+        choices = [format_instance_choice(i, r) for i, r in enumerate(rows)]
+        selected = choices[0] if choices else None
+        summary = f"Matched **{len(filtered)}** preview instances, showing **{len(rows)}**."
+        table = shown[instance_display_cols] if not shown.empty else pd.DataFrame(columns=instance_display_cols)
+        return summary, table, gr.update(choices=choices, value=selected), rows
+    def on_load_images(rows: list[dict[str, Any]], choice: str):
+        slider_empty = gr.update(minimum=1, maximum=1, step=1, value=1, interactive=False)
+        if not choice or not rows:
+            return [], slider_empty, []
+        idx = parse_choice_index(choice, len(rows))
+        if idx is None:
+            return [], slider_empty, []
+        instance_id = rows[idx]["instance_id"]
+        if not isinstance(instance_id, str) or not instance_id.strip():
+            raise ValueError(f"Selected instance row has invalid instance_id: {rows[idx]!r}")
+        logger.info("Loading images for instance: %s", instance_id)
+        frame_items = load_instance_frames(preview_df, instance_id, split="train", max_frames=50)
+        if not frame_items:
+            return [], slider_empty, []
+        slider_ready = gr.update(
+            minimum=1,
+            maximum=len(frame_items),
+            step=1,
+            value=1,
+            interactive=True,
+        )
+        return render_frame_gallery(frame_items, 1), slider_ready, frame_items
+    def on_frame_change(frame_idx: float, frame_items: list[dict[str, Any]]):
+        return render_frame_gallery(frame_items, frame_idx)
+    # ---- UI ----
+    with gr.Blocks(title="3DReflecNet Dataset Explorer") as demo:
+        gr.Markdown("# 3DReflecNet Dataset Explorer")
+        gr.Markdown(
+            "Filter by model/material/environment dropdowns and boolean scene tags, then preview 3D assets or the sampled image subset."
+        )
+        gr.Markdown(stats_md)
+        with gr.Tabs():
+            # === Tab 1: 3D Viewer ===
+            with gr.TabItem("3D Viewer"):
+                with gr.Row():
+                    m3d_model_name = gr.Dropdown(label="model_name", choices=model_name_choices, value=FILTER_ALL)
+                    m3d_material_name = gr.Dropdown(label="material_name", choices=material_name_choices, value=FILTER_ALL)
+                    m3d_env_name = gr.Dropdown(label="env_name", choices=env_name_choices, value=FILTER_ALL)
+                with gr.Row():
+                    m3d_has_glass = gr.Dropdown(label="hasGlass", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
+                    m3d_is_generated = gr.Dropdown(label="isGenerated", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
+                    m3d_transparent = gr.Dropdown(label="transparent", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
+                    m3d_near_light = gr.Dropdown(label="near_light", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
+                    m3d_btn = gr.Button("Search", variant="primary")
+                m3d_summary = gr.Markdown("Click **Search** to browse models.")
+                m3d_table = gr.Dataframe(headers=model_display_cols, interactive=False, wrap=True)
+                m3d_select = gr.Dropdown(label="Select model", choices=[], value=None)
+                m3d_meta = gr.JSON(label="Model Metadata")
+                m3d_load_btn = gr.Button("Load 3D Preview", variant="primary")
+                m3d_viewer = gr.Model3D(
+                    label="3D Preview (GLB)",
+                    clear_color=(0.35, 0.35, 0.38, 1.0),
+                    camera_position=(35, 70, 3.5),
+                )
+                m3d_state = gr.State([])
+                m3d_btn.click(
+                    fn=search_models,
+                    inputs=[
+                        m3d_model_name,
+                        m3d_material_name,
+                        m3d_env_name,
+                        m3d_has_glass,
+                        m3d_is_generated,
+                        m3d_transparent,
+                        m3d_near_light,
+                    ],
+                    outputs=[m3d_summary, m3d_table, m3d_select, m3d_state, m3d_meta],
+                )
+                m3d_select.change(
+                    fn=on_model_select,
+                    inputs=[m3d_select, m3d_state],
+                    outputs=[m3d_meta],
+                )
+                m3d_load_btn.click(
+                    fn=on_load_3d,
+                    inputs=[m3d_state, m3d_select],
+                    outputs=[m3d_viewer],
+                )
+            # === Tab 2: Image Viewer ===
+            with gr.TabItem("Image Viewer"):
+                with gr.Row():
+                    img_model_name = gr.Dropdown(label="model_name", choices=model_name_choices, value=FILTER_ALL)
+                    img_material_name = gr.Dropdown(label="material_name", choices=material_name_choices, value=FILTER_ALL)
+                    img_env_name = gr.Dropdown(label="env_name", choices=env_name_choices, value=FILTER_ALL)
+                with gr.Row():
+                    img_has_glass = gr.Dropdown(label="hasGlass", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
+                    img_is_generated = gr.Dropdown(label="isGenerated", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
+                    img_transparent = gr.Dropdown(label="transparent", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
+                    img_near_light = gr.Dropdown(label="near_light", choices=BOOL_FILTER_CHOICES, value=FILTER_ALL)
+                    img_btn = gr.Button("Search", variant="primary")
+                img_summary = gr.Markdown("Click **Search** to browse instances.")
+                img_table = gr.Dataframe(headers=instance_display_cols, interactive=False, wrap=True)
+                img_select = gr.Dropdown(label="Select instance", choices=[], value=None)
+                img_load_btn = gr.Button("Load Instance Frames", variant="primary")
+                img_gallery = gr.Gallery(label="Frame Images", columns=4, rows=1, object_fit="contain", height="auto")
+                img_frame_slider = gr.Slider(
+                    label="Frame",
+                    minimum=1,
+                    maximum=1,
+                    step=1,
+                    value=1,
+                    interactive=False,
+                )
+                img_state = gr.State([])
+                img_frame_state = gr.State([])
+                img_btn.click(
+                    fn=search_instances,
+                    inputs=[
+                        img_model_name,
+                        img_material_name,
+                        img_env_name,
+                        img_has_glass,
+                        img_is_generated,
+                        img_transparent,
+                        img_near_light,
+                    ],
+                    outputs=[img_summary, img_table, img_select, img_state],
+                )
+                img_load_btn.click(
+                    fn=on_load_images,
+                    inputs=[img_state, img_select],
+                    outputs=[img_gallery, img_frame_slider, img_frame_state],
+                )
+                img_frame_slider.change(
+                    fn=on_frame_change,
+                    inputs=[img_frame_slider, img_frame_state],
+                    outputs=[img_gallery],
+                )
+    return demo
+def main() -> None:
+    setup_logging()
+    logger.info("DATASET_REPO = %r", DATASET_REPO)
+    logger.info("HF_TOKEN set = %s, length = %d", HF_TOKEN is not None, len(HF_TOKEN) if HF_TOKEN else 0)
+    logger.info("Loading dataset metadata from Hugging Face Hub...")
+    df = load_metadata()
+    logger.info("Loaded %d instances.", len(df))
+    logger.info("Loading preview subset from Hugging Face Hub...")
+    preview_df = load_preview_dataframe()
+    logger.info("Loaded %d preview rows.", len(preview_df))
+    app = build_app(df, preview_df)
+    app.launch()
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio>=4.0.0
+datasets>=2.14.0
+huggingface_hub>=0.17.0
+pandas>=1.5.0
+Pillow>=9.0.0

utils.py ADDED Viewed

	@@ -0,0 +1,166 @@

+#!/usr/bin/env python3
+"""Shared utilities for 3DReflecNet HF release apps."""
+from __future__ import annotations
+import logging
+from typing import Any
+import pandas as pd
+logger = logging.getLogger("hf_release")
+FILTER_ALL = "ALL"
+BOOL_FILTER_CHOICES = [FILTER_ALL, "True", "False"]
+def setup_logging(level: int = logging.INFO) -> None:
+    """Configure logging for hf_release modules."""
+    logging.basicConfig(
+        level=level,
+        format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+        datefmt="%Y-%m-%d %H:%M:%S",
+    )
+def require_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
+    missing = [column for column in columns if column not in df.columns]
+    if missing:
+        raise KeyError(f"Missing required column(s) in {context}: {', '.join(missing)}")
+def require_bool_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
+    require_columns(df, columns, context)
+    for column in columns:
+        if df[column].isna().any():
+            raise ValueError(f"Boolean column {column!r} contains null values in {context}.")
+        if not pd.api.types.is_bool_dtype(df[column]):
+            raise TypeError(f"Expected boolean dtype for column {column!r} in {context}, got {df[column].dtype}.")
+def require_text_columns(df: pd.DataFrame, columns: list[str], context: str) -> None:
+    require_columns(df, columns, context)
+    for column in columns:
+        if df[column].isna().any():
+            raise ValueError(f"Text column {column!r} contains null values in {context}.")
+        invalid = df[column].map(lambda value: not isinstance(value, str))
+        if invalid.any():
+            bad_type = type(df.loc[invalid, column].iloc[0]).__name__
+            raise TypeError(f"Expected string values for column {column!r} in {context}, got {bad_type}.")
+def parse_bool_filter_value(selected_value: str) -> bool:
+    if selected_value == "True":
+        return True
+    if selected_value == "False":
+        return False
+    raise ValueError(f"Unsupported boolean filter value: {selected_value!r}")
+def apply_bool_filter(df: pd.DataFrame, column: str, selected_value: str) -> pd.DataFrame:
+    """Apply tri-state bool filter (ALL/True/False) to a DataFrame column."""
+    if selected_value == FILTER_ALL:
+        return df
+    if column not in df.columns:
+        raise KeyError(f"Missing required boolean filter column: {column}")
+    if not pd.api.types.is_bool_dtype(df[column]):
+        raise TypeError(f"Expected boolean dtype for column {column!r}, got {df[column].dtype}.")
+    target = parse_bool_filter_value(selected_value)
+    return df[df[column] == target]
+def get_distinct_text_choices(df: pd.DataFrame, column: str, all_label: str = FILTER_ALL) -> list[str]:
+    """Build dropdown choices from distinct non-empty text values."""
+    if column not in df.columns:
+        raise KeyError(f"Missing required text choice column: {column}")
+    values = {
+        str(v).strip()
+        for v in df[column].dropna().tolist()
+        if str(v).strip()
+    }
+    if not values:
+        raise ValueError(f"Column {column!r} has no non-empty values.")
+    return [all_label] + sorted(values)
+def _apply_text_equals(df: pd.DataFrame, column: str, selected_value: str, all_label: str = FILTER_ALL) -> pd.DataFrame:
+    if column not in df.columns:
+        raise KeyError(f"Missing required text filter column: {column}")
+    text = (selected_value or "").strip()
+    if not text or text == all_label:
+        return df
+    return df[df[column].astype(str).str.strip() == text]
+def filter_dataframe_advanced(
+    df: pd.DataFrame,
+    model_name: str = FILTER_ALL,
+    material_name: str = FILTER_ALL,
+    env_name: str = FILTER_ALL,
+    has_glass: str = FILTER_ALL,
+    is_generated: str = FILTER_ALL,
+    transparent: str = FILTER_ALL,
+    near_light: str = FILTER_ALL,
+) -> pd.DataFrame:
+    """Filter by model/material/environment exact selection and four tri-state bool fields."""
+    selected = df
+    selected = _apply_text_equals(selected, "model_name", model_name)
+    selected = _apply_text_equals(selected, "material_name", material_name)
+    selected = _apply_text_equals(selected, "env_name", env_name)
+    selected = apply_bool_filter(selected, "hasGlass", has_glass)
+    selected = apply_bool_filter(selected, "isGenerated", is_generated)
+    selected = apply_bool_filter(selected, "transparent", transparent)
+    selected = apply_bool_filter(selected, "near_light", near_light)
+    return selected.reset_index(drop=True)
+def aggregate_by_model(
+    df: pd.DataFrame,
+    extra_columns: list[str] | None = None,
+) -> pd.DataFrame:
+    """Group instances by model_name, counting instances and collecting IDs."""
+    base_cols = ["model_name", "main_category", "sub_category", "instance_count", "instance_ids"]
+    extra = extra_columns or []
+    all_cols = base_cols + extra
+    if df.empty:
+        return pd.DataFrame(columns=all_cols)
+    require_columns(df, ["model_name", "main_category", "sub_category", "instance_id"] + extra, "model aggregation")
+    rows: list[dict[str, Any]] = []
+    for model_name, group in df.groupby("model_name", dropna=False, sort=True):
+        instance_ids = sorted({
+            str(v) for v in group["instance_id"].dropna().tolist() if str(v).strip()
+        })
+        row: dict[str, Any] = {
+            "model_name": str(model_name),
+            "main_category": str(group["main_category"].iloc[0]),
+            "sub_category": str(group["sub_category"].iloc[0]),
+            "instance_count": len(instance_ids),
+            "instance_ids": "\n".join(instance_ids),
+        }
+        for col in extra:
+            candidates = [str(v) for v in group[col].dropna().tolist() if str(v).strip()]
+            row[col] = candidates[0] if candidates else ""
+        rows.append(row)
+    return pd.DataFrame(rows)
+def format_model_choice(index: int, row: dict[str, Any]) -> str:
+    return f"{index:04d} | {row['model_name']} | instances {row['instance_count']}"
+def format_instance_choice(index: int, row: dict[str, Any]) -> str:
+    return f"{index:04d} | {row['instance_id']} | {row['model_name']}"
+def parse_choice_index(choice: str, length: int) -> int | None:
+    """Extract the numeric index from a formatted choice string."""
+    index_str = choice.split("|", 1)[0].strip()
+    try:
+        idx = int(index_str)
+    except ValueError:
+        return None
+    if idx < 0 or idx >= length:
+        return None
+    return idx