EvidenceAIResearch's picture
Upload RadGenome-Anatomy preview Space (data-card UI)
9525eb2 verified
#!/usr/bin/env python3
"""Gradio Space styled like the Hugging Face dataset card viewer.
Wide schema (one row per volume):
volume_id, split, image_pa, image_ll,
mask_labels_pa, masks_pa (list of binary masks for every anatomy class),
mask_labels_ll, masks_ll.
The masks_pa / masks_ll cells are rendered as horizontal scrollable thumb
strips. Each thumbnail is the binary mask blended onto the corresponding
chest projection (color = body system) so reviewers can recognise the
anatomy at a glance. Tooltip shows the label.
"""
from __future__ import annotations
import base64
import colorsys
import io
import json
import math
from functools import lru_cache
from pathlib import Path
import gradio as gr
import numpy as np
import pyarrow.parquet as pq
from PIL import Image
ROOT = Path(__file__).parent
DATA = ROOT / "data"
SYSTEMS_PATH = DATA / "label_systems.json"
PAGE_SIZE = 20 # volumes per page (lazy mask thumbnails — only current page is decoded)
THUMB_PX = 56
MASKS_PER_LINE = 10 # mask thumbnails per row inside a masks_pa / masks_ll cell
MASK_THUMB_PX = 48 # rendered size of each mask thumbnail (must match CSS below)
def _load_rows() -> list[dict]:
out: list[dict] = []
for p in sorted(DATA.rglob("*.parquet")):
out.extend(pq.read_table(p).to_pylist())
return out
ROWS = _load_rows()
LABEL_TO_SYSTEM: dict[str, str] = (
json.loads(SYSTEMS_PATH.read_text()) if SYSTEMS_PATH.is_file() else {}
)
SYSTEMS = sorted(set(LABEL_TO_SYSTEM.values())) or ["Other"]
SPLITS = sorted({r["split"] for r in ROWS})
ALL_LABELS = sorted({lab for r in ROWS for lab in r.get("mask_labels_pa", [])})
def _hue(idx: int, n: int) -> float:
return (idx / max(n, 1)) % 1.0
SYSTEM_HUE = {s: _hue(i, len(SYSTEMS)) for i, s in enumerate(SYSTEMS)}
@lru_cache(maxsize=2048)
def color_for_system(system: str) -> tuple[int, int, int]:
h = SYSTEM_HUE.get(system, 0.0)
r, g, b = colorsys.hsv_to_rgb(h, 0.85, 1.0)
return int(r * 255), int(g * 255), int(b * 255)
def _decode_image(struct) -> Image.Image:
return Image.open(io.BytesIO(struct["bytes"])).convert("RGB")
def _decode_mask(struct) -> np.ndarray:
m = np.array(Image.open(io.BytesIO(struct["bytes"])).convert("L"))
return (m > 128).astype(np.uint8)
def _b64_of_pil(img: Image.Image, quality: int = 80) -> str:
buf = io.BytesIO()
img.save(buf, format="JPEG", quality=quality, optimize=True)
return base64.b64encode(buf.getvalue()).decode()
def _mask_thumb(mask: np.ndarray, size: int) -> Image.Image:
"""Render a raw binary mask as a white-on-black thumbnail."""
img = Image.fromarray((mask * 255).astype(np.uint8), mode="L").convert("RGB")
img.thumbnail((size, size), Image.LANCZOS)
return img
# ── Lightweight startup: only decode the two base images per volume.
# Mask thumbnails are decoded lazily per page through get_thumbs().
print(f"[init] indexing {len(ROWS)} volumes (lazy mask thumbnails) ...")
RENDERED: list[dict] = []
for idx, r in enumerate(ROWS):
pa_img = _decode_image(r["image_pa"])
ll_img = _decode_image(r["image_ll"])
RENDERED.append({
"idx": idx,
"volume_id": r["volume_id"],
"split": r["split"],
"image_pa_b64": _b64_of_pil(pa_img),
"image_ll_b64": _b64_of_pil(ll_img),
"n_pa": len(r.get("mask_labels_pa", [])),
"n_ll": len(r.get("mask_labels_ll", [])),
})
print(f"[init] indexed {len(RENDERED)} volumes; masks will be rendered on demand.")
# ── Lazy mask thumbnail rendering ───────────────────────────────────────
# Each call decodes one volume + view's worth of binary masks once and
# caches the result. LRU keeps memory bounded when the user navigates
# through many pages.
@lru_cache(maxsize=512)
def get_thumbs(idx: int, view: str) -> tuple:
r = ROWS[idx]
labels = r[f"mask_labels_{view}"]
masks = r[f"masks_{view}"]
out = []
for lab, mstruct in zip(labels, masks):
sysname = LABEL_TO_SYSTEM.get(lab, "Other")
thumb = _mask_thumb(_decode_mask(mstruct), THUMB_PX)
out.append((lab, sysname, _b64_of_pil(thumb, quality=72)))
return tuple(out)
def filter_rows(split: str, q: str) -> list[dict]:
out = RENDERED
if split != "All":
out = [r for r in out if r["split"] == split]
if q:
ql = q.lower()
out = [r for r in out if ql in r["volume_id"].lower()]
return out
CSS = """
/* ── Hugging Face Dataset Viewer mimic ──────────────────────────────── */
/* Reset host page so our own wrapper controls all spacing */
html, body { margin: 0 !important; padding: 0 !important; background: #ffffff !important; }
.gradio-container {
max-width: none !important;
font-family: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont,
"Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif !important;
color: #111827 !important;
background: #ffffff !important;
margin: 0 !important;
padding: 0 !important;
box-sizing: border-box !important;
}
/* Our own page wrapper carries every bit of the outer spacing.
This bypasses whatever Gradio's main/contain/app/wrap wrappers do. */
.dc-page {
max-width: 1200px !important;
margin: 0 auto !important;
padding: 56px 64px 64px !important;
box-sizing: border-box !important;
width: 100% !important;
background: #ffffff !important;
}
.dc-page > * { width: 100% !important; }
/* Title block — aligned with the card's left edge, extra top breathing room */
.dc-title {
margin: 24px 0 32px 0 !important;
padding: 0 !important;
}
.dc-title > * {
margin-left: 0 !important;
padding-left: 0 !important;
}
.dc-title h2 {
font-size: 22px !important;
font-weight: 600 !important;
color: #111827 !important;
margin: 0 !important;
padding: 0 !important;
letter-spacing: -0.015em !important;
}
.dc-title h2::after {
content: "Dataset Viewer";
display: block;
color: #6b7280;
font-size: 11px;
font-weight: 500;
text-transform: uppercase;
letter-spacing: 0.06em;
margin-top: 6px;
}
.dc-title p {
color: #6b7280 !important;
font-size: 13px !important;
margin: 6px 0 0 !important;
line-height: 1.5 !important;
}
.dc-title code {
background: #f3f4f6 !important;
padding: 1px 6px !important;
border-radius: 4px !important;
font-size: 12px !important;
color: #111827 !important;
border: 1px solid #e5e7eb !important;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace !important;
}
/* Gradio control reset */
.gradio-container .form, .gradio-container .block {
border: none !important; box-shadow: none !important; background: transparent !important;
}
.gradio-container label > span:first-child,
.gradio-container .label-wrap > span {
font-size: 12px !important;
font-weight: 500 !important;
color: #374151 !important;
margin-bottom: 4px !important;
}
.gradio-container input,
.gradio-container select,
.gradio-container textarea,
.gradio-container .wrap {
border-radius: 8px !important;
border: 1px solid #e5e7eb !important;
background: #ffffff !important;
font-size: 13px !important;
color: #111827 !important;
font-family: inherit !important;
transition: border-color 0.12s ease, box-shadow 0.12s ease !important;
}
.gradio-container input:focus,
.gradio-container textarea:focus,
.gradio-container .wrap:focus-within {
border-color: #111827 !important;
box-shadow: 0 0 0 3px rgba(17, 24, 39, 0.12) !important;
outline: none !important;
}
/* Buttons — HF black-on-white style */
.gradio-container button {
background: #111827 !important;
color: #ffffff !important;
border-radius: 8px !important;
font-size: 13px !important;
font-weight: 500 !important;
border: 1px solid #111827 !important;
padding: 6px 14px !important;
}
.gradio-container button:hover { background: #374151 !important; border-color: #374151 !important; }
/* Outer frame that unifies attr bar + all row cards into one table-like card */
.dc-table-frame {
background: #ffffff;
border: 1px solid #e5e7eb;
border-radius: 8px;
overflow: hidden;
}
/* Attribute bar — header strip inside the frame */
.dc-attr-bar {
display: grid;
grid-template-columns: 1fr 1fr 1fr;
gap: 16px;
padding: 14px 16px;
background: #f9fafb;
border-bottom: 1px solid #e5e7eb;
}
.dc-attr-cell {
display: flex;
flex-direction: column;
gap: 6px;
min-width: 0;
}
.dc-attr-name {
font-size: 12px;
font-weight: 600;
color: #1f2937;
display: flex;
align-items: baseline;
gap: 8px;
}
.dc-attr-name .dc-type {
font-weight: 400;
color: #9ca3af;
font-size: 11px;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
}
/* Native <select> for the attribute filters */
.dc-attr-select {
width: 100%;
height: 28px;
border: 1px solid #d1d5db;
border-radius: 6px;
background: #ffffff;
font-size: 12px;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
color: #111827;
padding: 2px 8px;
cursor: pointer;
box-sizing: border-box;
appearance: none;
-webkit-appearance: none;
background-image: url("data:image/svg+xml;utf8,<svg xmlns='http://www.w3.org/2000/svg' width='10' height='6' viewBox='0 0 10 6'><path d='M1 1l4 4 4-4' stroke='%236b7280' stroke-width='1.5' fill='none' stroke-linecap='round' stroke-linejoin='round'/></svg>");
background-repeat: no-repeat;
background-position: right 8px center;
padding-right: 26px;
}
.dc-attr-select:hover { border-color: #9ca3af; }
.dc-attr-select:focus {
outline: none;
border-color: #111827;
box-shadow: 0 0 0 2px rgba(17, 24, 39, 0.12);
}
/* Container for the row cards */
.dc-cards {
display: flex;
flex-direction: column;
}
/* Each row rendered as a card row inside the unified frame */
.dc-row-card {
background: #ffffff;
border-bottom: 1px solid #f3f4f6;
transition: background 0.12s ease;
}
.dc-row-card:last-child { border-bottom: none; }
.dc-row-card:hover { background: #fafafa; }
.dc-row-body {
display: grid;
grid-template-columns: 56px 110px auto auto 1fr 1fr;
column-gap: 20px;
row-gap: 12px;
padding: 14px;
align-items: start;
}
.dc-row-num {
color: #111827;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
font-size: 13px;
font-variant-numeric: tabular-nums;
font-weight: 500;
}
.dc-row-split {
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
font-size: 13px;
color: #111827;
}
.dc-row-split::before, .dc-row-split::after {
content: '"'; color: #9ca3af;
}
.dc-field { min-width: 0; }
.dc-field-name {
font-size: 11px;
font-weight: 600;
color: #1f2937;
margin-bottom: 6px;
display: flex;
align-items: baseline;
gap: 8px;
}
.dc-field-name .dc-type {
font-weight: 400;
color: #9ca3af;
font-size: 11px;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
}
.dc-field-img {
width: 96px; height: 96px;
object-fit: cover;
border-radius: 4px;
border: 1px solid #e5e7eb;
background: #f3f4f6;
display: block;
}
/* Mask strips (sequence(image)) — wrap to multiple lines, no horizontal scroll */
.dc-mask-count {
font-size: 11px;
color: #6b7280;
margin-bottom: 8px;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
}
.dc-mask-count b {
color: #111827; font-weight: 600;
font-variant-numeric: tabular-nums;
}
.dc-mask-strip {
display: grid;
/* MASKS_PER_LINE columns of MASK_THUMB_PX each (injected from Python) */
grid-template-columns: repeat(__MPL__, __MTPX__px);
gap: 4px;
justify-content: start;
/* 3 rows of __MTPX__px thumbs + 2 row gaps + a little breathing room */
max-height: 156px;
overflow-y: auto;
padding-right: 4px;
}
.dc-mask-strip::-webkit-scrollbar { width: 6px; }
.dc-mask-strip::-webkit-scrollbar-track { background: #f9fafb; }
.dc-mask-strip::-webkit-scrollbar-thumb { background: #d1d5db; border-radius: 3px; }
.dc-mask-strip::-webkit-scrollbar-thumb:hover { background: #9ca3af; }
.dc-mask-item { position: relative; flex: 0 0 auto; }
.dc-mask-item img {
width: __MTPX__px; height: __MTPX__px;
object-fit: cover;
border-radius: 3px;
border: 1px solid #e5e7eb;
background: #000;
display: block;
transition: border-color 0.1s ease;
}
.dc-mask-item:hover img { border-color: #111827; }
.dc-mask-item .dc-mask-tip {
position: absolute; bottom: calc(100% + 6px); left: 50%;
transform: translateX(-50%);
background: #111827;
color: #ffffff;
font-size: 11px;
padding: 4px 8px;
border-radius: 4px;
white-space: nowrap;
opacity: 0; pointer-events: none;
transition: opacity 0.1s;
z-index: 10;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
box-shadow: 0 2px 6px rgba(0, 0, 0, 0.15);
}
.dc-mask-item:hover .dc-mask-tip { opacity: 1; }
.dc-mask-item.dim img { opacity: 0.2; }
.dc-empty {
padding: 48px;
text-align: center;
color: #9ca3af;
font-size: 14px;
background: #ffffff;
}
/* Visually-hidden state holders (kept in the DOM so JS can read/write them) */
.dc-hidden-input {
position: absolute !important;
left: -9999px !important;
top: 0 !important;
width: 1px !important;
height: 1px !important;
opacity: 0 !important;
pointer-events: none !important;
overflow: hidden !important;
margin: 0 !important;
padding: 0 !important;
}
/* Bottom pagination bar */
.dc-pager {
margin-top: 16px !important;
padding: 0 !important;
display: flex !important;
justify-content: center !important;
align-items: center !important;
gap: 8px !important;
}
.dc-pager > * { margin: 0 !important; padding: 0 !important; }
.dc-pager .dc-pager-meta {
flex: 1 1 0;
color: #6b7280; font-size: 12px;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
}
.dc-pager-label {
font-family: ui-monospace, SFMono-Regular, Menlo, monospace;
font-size: 12px; color: #111827;
padding: 0 8px !important;
min-width: 64px; text-align: center;
}
.dc-pager-num {
flex: 0 0 64px !important;
max-width: 64px !important;
}
.dc-pager-num input {
height: 28px !important; min-height: 28px !important;
text-align: center !important;
font-family: ui-monospace, SFMono-Regular, Menlo, monospace !important;
}
.dc-pager-btn button {
background: #ffffff !important;
color: #111827 !important;
border: 1px solid #e5e7eb !important;
padding: 4px 12px !important;
font-size: 12px !important;
border-radius: 6px !important;
height: 28px !important;
min-height: 28px !important;
}
.dc-pager-btn button:hover {
background: #f9fafb !important;
border-color: #d1d5db !important;
}
"""
# Inject the configurable thumb sizes into the CSS once at import time.
CSS = CSS.replace("__MPL__", str(MASKS_PER_LINE)).replace("__MTPX__", str(MASK_THUMB_PX))
def _opts(values: list[str], current: str) -> str:
return "".join(
f'<option value="{v}"{" selected" if v == current else ""}>{v}</option>'
for v in values
)
def _strip_html(thumbs, system_filter: str, label_filter: str) -> str:
items = []
for lab, sysname, b64 in thumbs:
dim = ((system_filter != "All" and sysname != system_filter) or
(label_filter != "All" and lab != label_filter))
cls = "dc-mask-item dim" if dim else "dc-mask-item"
items.append(
f"<div class='{cls}' data-label='{lab}'>"
f"<img src='data:image/jpeg;base64,{b64}' loading='lazy' alt='{lab}'/>"
f"<div class='dc-mask-tip'>{lab} · {sysname}</div>"
f"</div>"
)
return f"<div class='dc-mask-strip'>{''.join(items)}</div>"
def render_table_html(rows: list[dict], page: int,
split_filter: str, system_filter: str, label_filter: str) -> str:
n_pages = max(1, math.ceil(len(rows) / PAGE_SIZE))
page = max(1, min(page, n_pages))
start = (page - 1) * PAGE_SIZE
chunk = rows[start:start + PAGE_SIZE]
attr_bar = f"""<div class="dc-attr-bar" style="grid-template-columns: 56px 110px auto auto 1fr 1fr;">
<div class="dc-attr-cell">
<span class="dc-attr-name">#<span class="dc-type">int32</span></span>
</div>
<div class="dc-attr-cell">
<span class="dc-attr-name">split<span class="dc-type">string</span></span>
<select id="filter-split" class="dc-attr-select">{_opts(["All"] + SPLITS, split_filter)}</select>
</div>
<div class="dc-attr-cell">
<span class="dc-attr-name">image_pa<span class="dc-type">image</span></span>
</div>
<div class="dc-attr-cell">
<span class="dc-attr-name">image_ll<span class="dc-type">image</span></span>
</div>
<div class="dc-attr-cell">
<span class="dc-attr-name">masks_pa<span class="dc-type">sequence(image)</span></span>
<select id="filter-system" class="dc-attr-select">{_opts(["All"] + SYSTEMS, system_filter)}</select>
</div>
<div class="dc-attr-cell">
<span class="dc-attr-name">masks_ll<span class="dc-type">sequence(image)</span></span>
<select id="filter-label" class="dc-attr-select">{_opts(["All"] + ALL_LABELS, label_filter)}</select>
</div>
</div>"""
if not chunk:
return (f'<div class="dc-table-frame">{attr_bar}'
f'<div class="dc-empty">No rows match the current filters.</div>'
f'</div>')
cards = []
for i, r in enumerate(chunk, start=start):
thumbs_pa = get_thumbs(r["idx"], "pa")
thumbs_ll = get_thumbs(r["idx"], "ll")
card = f"""<div class="dc-row-card">
<div class="dc-row-body">
<div class="dc-field">
<span class="dc-row-num">{i}</span>
</div>
<div class="dc-field">
<span class="dc-row-split">{r['split']}</span>
</div>
<div class="dc-field">
<img class="dc-field-img" src="data:image/jpeg;base64,{r['image_pa_b64']}" alt="image_pa" title="{r['volume_id']}"/>
</div>
<div class="dc-field">
<img class="dc-field-img" src="data:image/jpeg;base64,{r['image_ll_b64']}" alt="image_ll" title="{r['volume_id']}"/>
</div>
<div class="dc-field">
<div class="dc-field-name"><span class="dc-type">{r['n_pa']} items</span></div>
{_strip_html(thumbs_pa, system_filter, label_filter)}
</div>
<div class="dc-field">
<div class="dc-field-name"><span class="dc-type">{r['n_ll']} items</span></div>
{_strip_html(thumbs_ll, system_filter, label_filter)}
</div>
</div>
</div>"""
cards.append(card)
return (f'<div class="dc-table-frame">{attr_bar}'
f'<div class="dc-cards">' + "".join(cards) + "</div>"
f"</div>")
def render_meta(rows: list[dict], page: int) -> str:
n_pages = max(1, math.ceil(len(rows) / PAGE_SIZE))
page = max(1, min(page, n_pages))
start = (page - 1) * PAGE_SIZE
end = min(len(rows), start + PAGE_SIZE)
return (f"<b>{len(rows)}</b> rows · "
f"showing <b>{start}</b>–<b>{end}</b> of <b>{len(rows)}</b>")
def update(split, system, label, q, page):
rows = filter_rows(split, q)
n_pages = max(1, math.ceil(len(rows) / PAGE_SIZE))
page = max(1, min(int(page), n_pages))
return (render_table_html(rows, page, split, system, label),
render_meta(rows, page),
page,
f"{page} / {n_pages}")
def go_prev(p): return max(1, int(p) - 1)
def go_next(p): return int(p) + 1
FILTER_BIND_JS = r"""
() => {
// Wire native <select> elements rendered inside the table to the hidden
// Gradio Textboxes (state-split / state-system / state-label) so changing
// a filter triggers a re-render.
//
// Gradio's Svelte components ignore plain `el.value = X` because Svelte's
// store is bound via the property's native descriptor. We must use the
// native setter, then dispatch an `input` event so Svelte sees the change.
if (window.__dc_filter_bound) return;
window.__dc_filter_bound = true;
const map = {
'filter-split': 'state-split',
'filter-system': 'state-system',
'filter-label': 'state-label',
};
function setReactiveValue(el, val) {
const proto = el.tagName === 'TEXTAREA'
? window.HTMLTextAreaElement.prototype
: window.HTMLInputElement.prototype;
const setter = Object.getOwnPropertyDescriptor(proto, 'value').set;
setter.call(el, val);
el.dispatchEvent(new Event('input', { bubbles: true }));
el.dispatchEvent(new Event('change', { bubbles: true }));
}
function findInput(stateId) {
// The elem_id may be on the wrapper or directly on the input.
const direct = document.getElementById(stateId);
if (!direct) return null;
if (direct.tagName === 'TEXTAREA' || direct.tagName === 'INPUT') return direct;
return direct.querySelector('textarea, input');
}
document.addEventListener('change', (e) => {
const t = e.target;
if (!t || !t.id || !(t.id in map)) return;
const inp = findInput(map[t.id]);
if (!inp) return;
setReactiveValue(inp, t.value);
});
}
"""
with gr.Blocks(
title="RadGenome-Anatomy Preview",
css=CSS,
js=FILTER_BIND_JS,
theme=gr.themes.Soft(
primary_hue=gr.themes.colors.blue,
neutral_hue=gr.themes.colors.slate,
font=("-apple-system", "BlinkMacSystemFont", "SF Pro Text",
"Helvetica Neue", "Arial", "sans-serif"),
),
) as demo:
with gr.Column(elem_classes=["dc-page"]):
gr.Markdown("## RadGenome-Anatomy", elem_classes=["dc-title"])
# State holders bridged from the in-table <select> elements via JS.
# Kept *visible* in the DOM (just CSS-hidden) so JS can write to and
# dispatch input events on the actual <textarea> Gradio renders.
split_dd = gr.Textbox(value="All", elem_id="state-split",
elem_classes=["dc-hidden-input"], show_label=False,
container=False, interactive=True)
sys_dd = gr.Textbox(value="All", elem_id="state-system",
elem_classes=["dc-hidden-input"], show_label=False,
container=False, interactive=True)
lab_dd = gr.Textbox(value="All", elem_id="state-label",
elem_classes=["dc-hidden-input"], show_label=False,
container=False, interactive=True)
search = gr.Textbox(value="", visible=False)
table = gr.HTML()
with gr.Row(elem_classes=["dc-pager"]):
meta_md = gr.HTML(elem_classes=["dc-pager-meta"])
prev_btn = gr.Button("‹ Prev", elem_classes=["dc-pager-btn"])
page_label = gr.Markdown("1 / 1", elem_classes=["dc-pager-label"])
next_btn = gr.Button("Next ›", elem_classes=["dc-pager-btn"])
page_num = gr.Number(value=1, show_label=False, container=False,
precision=0, minimum=1,
elem_classes=["dc-pager-num"])
inputs = [split_dd, sys_dd, lab_dd, search, page_num]
outputs = [table, meta_md, page_num, page_label]
split_dd.change(update, inputs, outputs)
sys_dd.change(update, inputs, outputs)
lab_dd.change(update, inputs, outputs)
search.change(update, inputs, outputs)
page_num.change(update, inputs, outputs)
prev_btn.click(go_prev, page_num, page_num).then(update, inputs, outputs)
next_btn.click(go_next, page_num, page_num).then(update, inputs, outputs)
demo.load(update, inputs, outputs)
if __name__ == "__main__":
demo.queue(max_size=16).launch()