#!/usr/bin/env python3 """Gradio Space styled like the Hugging Face dataset card viewer. Wide schema (one row per volume): volume_id, split, image_pa, image_ll, mask_labels_pa, masks_pa (list of binary masks for every anatomy class), mask_labels_ll, masks_ll. The masks_pa / masks_ll cells are rendered as horizontal scrollable thumb strips. Each thumbnail is the binary mask blended onto the corresponding chest projection (color = body system) so reviewers can recognise the anatomy at a glance. Tooltip shows the label. """ from __future__ import annotations import base64 import colorsys import io import json import math from functools import lru_cache from pathlib import Path import gradio as gr import numpy as np import pyarrow.parquet as pq from PIL import Image ROOT = Path(__file__).parent DATA = ROOT / "data" SYSTEMS_PATH = DATA / "label_systems.json" PAGE_SIZE = 20 # volumes per page (lazy mask thumbnails — only current page is decoded) THUMB_PX = 56 MASKS_PER_LINE = 10 # mask thumbnails per row inside a masks_pa / masks_ll cell MASK_THUMB_PX = 48 # rendered size of each mask thumbnail (must match CSS below) def _load_rows() -> list[dict]: out: list[dict] = [] for p in sorted(DATA.rglob("*.parquet")): out.extend(pq.read_table(p).to_pylist()) return out ROWS = _load_rows() LABEL_TO_SYSTEM: dict[str, str] = ( json.loads(SYSTEMS_PATH.read_text()) if SYSTEMS_PATH.is_file() else {} ) SYSTEMS = sorted(set(LABEL_TO_SYSTEM.values())) or ["Other"] SPLITS = sorted({r["split"] for r in ROWS}) ALL_LABELS = sorted({lab for r in ROWS for lab in r.get("mask_labels_pa", [])}) def _hue(idx: int, n: int) -> float: return (idx / max(n, 1)) % 1.0 SYSTEM_HUE = {s: _hue(i, len(SYSTEMS)) for i, s in enumerate(SYSTEMS)} @lru_cache(maxsize=2048) def color_for_system(system: str) -> tuple[int, int, int]: h = SYSTEM_HUE.get(system, 0.0) r, g, b = colorsys.hsv_to_rgb(h, 0.85, 1.0) return int(r * 255), int(g * 255), int(b * 255) def _decode_image(struct) -> Image.Image: return Image.open(io.BytesIO(struct["bytes"])).convert("RGB") def _decode_mask(struct) -> np.ndarray: m = np.array(Image.open(io.BytesIO(struct["bytes"])).convert("L")) return (m > 128).astype(np.uint8) def _b64_of_pil(img: Image.Image, quality: int = 80) -> str: buf = io.BytesIO() img.save(buf, format="JPEG", quality=quality, optimize=True) return base64.b64encode(buf.getvalue()).decode() def _mask_thumb(mask: np.ndarray, size: int) -> Image.Image: """Render a raw binary mask as a white-on-black thumbnail.""" img = Image.fromarray((mask * 255).astype(np.uint8), mode="L").convert("RGB") img.thumbnail((size, size), Image.LANCZOS) return img # ── Lightweight startup: only decode the two base images per volume. # Mask thumbnails are decoded lazily per page through get_thumbs(). print(f"[init] indexing {len(ROWS)} volumes (lazy mask thumbnails) ...") RENDERED: list[dict] = [] for idx, r in enumerate(ROWS): pa_img = _decode_image(r["image_pa"]) ll_img = _decode_image(r["image_ll"]) RENDERED.append({ "idx": idx, "volume_id": r["volume_id"], "split": r["split"], "image_pa_b64": _b64_of_pil(pa_img), "image_ll_b64": _b64_of_pil(ll_img), "n_pa": len(r.get("mask_labels_pa", [])), "n_ll": len(r.get("mask_labels_ll", [])), }) print(f"[init] indexed {len(RENDERED)} volumes; masks will be rendered on demand.") # ── Lazy mask thumbnail rendering ─────────────────────────────────────── # Each call decodes one volume + view's worth of binary masks once and # caches the result. LRU keeps memory bounded when the user navigates # through many pages. @lru_cache(maxsize=512) def get_thumbs(idx: int, view: str) -> tuple: r = ROWS[idx] labels = r[f"mask_labels_{view}"] masks = r[f"masks_{view}"] out = [] for lab, mstruct in zip(labels, masks): sysname = LABEL_TO_SYSTEM.get(lab, "Other") thumb = _mask_thumb(_decode_mask(mstruct), THUMB_PX) out.append((lab, sysname, _b64_of_pil(thumb, quality=72))) return tuple(out) def filter_rows(split: str, q: str) -> list[dict]: out = RENDERED if split != "All": out = [r for r in out if r["split"] == split] if q: ql = q.lower() out = [r for r in out if ql in r["volume_id"].lower()] return out CSS = """ /* ── Hugging Face Dataset Viewer mimic ──────────────────────────────── */ /* Reset host page so our own wrapper controls all spacing */ html, body { margin: 0 !important; padding: 0 !important; background: #ffffff !important; } .gradio-container { max-width: none !important; font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif !important; color: #111827 !important; background: #ffffff !important; margin: 0 !important; padding: 0 !important; box-sizing: border-box !important; } /* Our own page wrapper carries every bit of the outer spacing. This bypasses whatever Gradio's main/contain/app/wrap wrappers do. */ .dc-page { max-width: 1200px !important; margin: 0 auto !important; padding: 56px 64px 64px !important; box-sizing: border-box !important; width: 100% !important; background: #ffffff !important; } .dc-page > * { width: 100% !important; } /* Title block — aligned with the card's left edge, extra top breathing room */ .dc-title { margin: 24px 0 32px 0 !important; padding: 0 !important; } .dc-title > * { margin-left: 0 !important; padding-left: 0 !important; } .dc-title h2 { font-size: 22px !important; font-weight: 600 !important; color: #111827 !important; margin: 0 !important; padding: 0 !important; letter-spacing: -0.015em !important; } .dc-title h2::after { content: "Dataset Viewer"; display: block; color: #6b7280; font-size: 11px; font-weight: 500; text-transform: uppercase; letter-spacing: 0.06em; margin-top: 6px; } .dc-title p { color: #6b7280 !important; font-size: 13px !important; margin: 6px 0 0 !important; line-height: 1.5 !important; } .dc-title code { background: #f3f4f6 !important; padding: 1px 6px !important; border-radius: 4px !important; font-size: 12px !important; color: #111827 !important; border: 1px solid #e5e7eb !important; font-family: ui-monospace, SFMono-Regular, Menlo, monospace !important; } /* Gradio control reset */ .gradio-container .form, .gradio-container .block { border: none !important; box-shadow: none !important; background: transparent !important; } .gradio-container label > span:first-child, .gradio-container .label-wrap > span { font-size: 12px !important; font-weight: 500 !important; color: #374151 !important; margin-bottom: 4px !important; } .gradio-container input, .gradio-container select, .gradio-container textarea, .gradio-container .wrap { border-radius: 8px !important; border: 1px solid #e5e7eb !important; background: #ffffff !important; font-size: 13px !important; color: #111827 !important; font-family: inherit !important; transition: border-color 0.12s ease, box-shadow 0.12s ease !important; } .gradio-container input:focus, .gradio-container textarea:focus, .gradio-container .wrap:focus-within { border-color: #111827 !important; box-shadow: 0 0 0 3px rgba(17, 24, 39, 0.12) !important; outline: none !important; } /* Buttons — HF black-on-white style */ .gradio-container button { background: #111827 !important; color: #ffffff !important; border-radius: 8px !important; font-size: 13px !important; font-weight: 500 !important; border: 1px solid #111827 !important; padding: 6px 14px !important; } .gradio-container button:hover { background: #374151 !important; border-color: #374151 !important; } /* Outer frame that unifies attr bar + all row cards into one table-like card */ .dc-table-frame { background: #ffffff; border: 1px solid #e5e7eb; border-radius: 8px; overflow: hidden; } /* Attribute bar — header strip inside the frame */ .dc-attr-bar { display: grid; grid-template-columns: 1fr 1fr 1fr; gap: 16px; padding: 14px 16px; background: #f9fafb; border-bottom: 1px solid #e5e7eb; } .dc-attr-cell { display: flex; flex-direction: column; gap: 6px; min-width: 0; } .dc-attr-name { font-size: 12px; font-weight: 600; color: #1f2937; display: flex; align-items: baseline; gap: 8px; } .dc-attr-name .dc-type { font-weight: 400; color: #9ca3af; font-size: 11px; font-family: ui-monospace, SFMono-Regular, Menlo, monospace; } /* Native {_opts(["All"] + SPLITS, split_filter)}
image_paimage
image_llimage
masks_pasequence(image)
masks_llsequence(image)
""" if not chunk: return (f'
{attr_bar}' f'
No rows match the current filters.
' f'
') cards = [] for i, r in enumerate(chunk, start=start): thumbs_pa = get_thumbs(r["idx"], "pa") thumbs_ll = get_thumbs(r["idx"], "ll") card = f"""
{i}
{r['split']}
image_pa
image_ll
{r['n_pa']} items
{_strip_html(thumbs_pa, system_filter, label_filter)}
{r['n_ll']} items
{_strip_html(thumbs_ll, system_filter, label_filter)}
""" cards.append(card) return (f'
{attr_bar}' f'
' + "".join(cards) + "
" f"
") def render_meta(rows: list[dict], page: int) -> str: n_pages = max(1, math.ceil(len(rows) / PAGE_SIZE)) page = max(1, min(page, n_pages)) start = (page - 1) * PAGE_SIZE end = min(len(rows), start + PAGE_SIZE) return (f"{len(rows)} rows · " f"showing {start}{end} of {len(rows)}") def update(split, system, label, q, page): rows = filter_rows(split, q) n_pages = max(1, math.ceil(len(rows) / PAGE_SIZE)) page = max(1, min(int(page), n_pages)) return (render_table_html(rows, page, split, system, label), render_meta(rows, page), page, f"{page} / {n_pages}") def go_prev(p): return max(1, int(p) - 1) def go_next(p): return int(p) + 1 FILTER_BIND_JS = r""" () => { // Wire native elements via JS. # Kept *visible* in the DOM (just CSS-hidden) so JS can write to and # dispatch input events on the actual