Spaces:
Runtime error
Runtime error
| """ | |
| Cross-Agent Review Queue Explorer | |
| ================================= | |
| Browse, filter, and analyze 37 anonymized cross-agent code-review checkpoints | |
| from the Neo Genesis monorepo (Codex <-> Claude, 2026-04-08 ~ 2026-04-14). | |
| Data source: ``neogenesislab/cross-agent-review-queue-2026`` (config=transcripts) | |
| """ | |
| from __future__ import annotations | |
| import collections | |
| import re | |
| from typing import Any | |
| import gradio as gr | |
| import pandas as pd | |
| import plotly.express as px | |
| from datasets import load_dataset | |
| DATASET_ID = "neogenesislab/cross-agent-review-queue-2026" | |
| DATASET_CONFIG = "transcripts" | |
| # --------------------------------------------------------------------------- | |
| # Cold-start data load | |
| # --------------------------------------------------------------------------- | |
| ds = load_dataset(DATASET_ID, DATASET_CONFIG, split="train") | |
| ROWS: list[dict[str, Any]] = list(ds) | |
| def _year_month(checkpoint_id: str) -> str: | |
| """Parse ``ccr-20260408-121555`` -> ``2026-04``.""" | |
| m = re.search(r"(\d{4})(\d{2})\d{2}", checkpoint_id or "") | |
| if m: | |
| return f"{m.group(1)}-{m.group(2)}" | |
| return "unknown" | |
| def _word_count(text: str | None) -> int: | |
| if not text: | |
| return 0 | |
| return len(str(text).split()) | |
| # Pre-compute derived columns once. | |
| for r in ROWS: | |
| r["year_month"] = _year_month(r.get("id", "")) | |
| r["prompt_words"] = _word_count(r.get("prompt")) | |
| r["response_words"] = _word_count(r.get("response")) | |
| REVIEW_LENSES = sorted({r.get("review_lens", "") or "" for r in ROWS}) | |
| TARGET_AGENTS = sorted({r.get("target", "") or "" for r in ROWS}) | |
| RESULTS = sorted({r.get("result", "") or "" for r in ROWS}) | |
| YEAR_MONTHS = sorted({r["year_month"] for r in ROWS}) | |
| ALL_FILTER = "all" | |
| # --------------------------------------------------------------------------- | |
| # Tab 1: Browse | |
| # --------------------------------------------------------------------------- | |
| def filter_rows( | |
| review_lens: str, | |
| target_agent: str, | |
| result: str, | |
| year_month: str, | |
| ) -> pd.DataFrame: | |
| out = [] | |
| for r in ROWS: | |
| if review_lens != ALL_FILTER and (r.get("review_lens") or "") != review_lens: | |
| continue | |
| if target_agent != ALL_FILTER and (r.get("target") or "") != target_agent: | |
| continue | |
| if result != ALL_FILTER and (r.get("result") or "") != result: | |
| continue | |
| if year_month != ALL_FILTER and r["year_month"] != year_month: | |
| continue | |
| title = (r.get("title") or "").strip() | |
| if len(title) > 80: | |
| title = title[:77] + "..." | |
| out.append({ | |
| "id": r.get("id"), | |
| "year_month": r["year_month"], | |
| "target": r.get("target"), | |
| "model": r.get("model"), | |
| "review_lens": (r.get("review_lens") or "")[:50], | |
| "result": r.get("result"), | |
| "title": title, | |
| "response_words": r.get("response_words"), | |
| }) | |
| return pd.DataFrame(out) | |
| # --------------------------------------------------------------------------- | |
| # Tab 2: Detail | |
| # --------------------------------------------------------------------------- | |
| def review_detail(checkpoint_id: str) -> str: | |
| if not checkpoint_id: | |
| return "_Pick a checkpoint id (e.g. `ccr-20260408-121555`)._" | |
| cid = checkpoint_id.strip() | |
| for r in ROWS: | |
| if r.get("id") == cid: | |
| parts = [] | |
| parts.append(f"## {r.get('id')}") | |
| parts.append("") | |
| parts.append(f"**created_at**: `{r.get('created_at')}` ") | |
| parts.append(f"**requester** -> **target**: `{r.get('requester')}` -> `{r.get('target')}` ") | |
| parts.append(f"**mode**: `{r.get('mode')}` | **model**: `{r.get('model')}` | **scope**: `{r.get('scope')}` ") | |
| parts.append(f"**review_lens**: `{r.get('review_lens')}` | **result**: `{r.get('result')}` ") | |
| parts.append("") | |
| if r.get("title"): | |
| parts.append(f"### Title") | |
| parts.append(f"> {r.get('title')}") | |
| parts.append("") | |
| if r.get("owner_goal"): | |
| parts.append(f"### Owner goal") | |
| parts.append(f"> {r.get('owner_goal')}") | |
| parts.append("") | |
| if r.get("owner_intent"): | |
| parts.append(f"### Owner intent") | |
| parts.append(f"> {r.get('owner_intent')}") | |
| parts.append("") | |
| if r.get("constraints"): | |
| parts.append(f"### Constraints") | |
| parts.append(f"> {r.get('constraints')}") | |
| parts.append("") | |
| if r.get("success_criteria"): | |
| parts.append(f"### Success criteria") | |
| parts.append(f"> {r.get('success_criteria')}") | |
| parts.append("") | |
| if r.get("ask"): | |
| parts.append(f"### Ask") | |
| parts.append(f"> {r.get('ask')}") | |
| parts.append("") | |
| if r.get("prompt"): | |
| parts.append(f"### Prompt ({r.get('prompt_words')} words)") | |
| parts.append("```") | |
| parts.append(str(r.get("prompt"))) | |
| parts.append("```") | |
| parts.append("") | |
| if r.get("response"): | |
| parts.append(f"### Response ({r.get('response_words')} words)") | |
| parts.append("") | |
| parts.append(str(r.get("response"))) | |
| parts.append("") | |
| return "\n".join(parts) | |
| return f"_Checkpoint `{cid}` not found in the {len(ROWS)}-row dataset._" | |
| # --------------------------------------------------------------------------- | |
| # Tab 3: Statistics | |
| # --------------------------------------------------------------------------- | |
| def _bar_chart(counter: collections.Counter, title: str, x_label: str): | |
| if not counter: | |
| return None | |
| items = counter.most_common() | |
| df = pd.DataFrame(items, columns=[x_label, "count"]) | |
| fig = px.bar(df, x=x_label, y="count", title=title, text="count") | |
| fig.update_traces(textposition="outside") | |
| fig.update_layout( | |
| margin=dict(l=20, r=20, t=50, b=20), | |
| height=380, | |
| ) | |
| return fig | |
| def stats_review_lens(): | |
| return _bar_chart( | |
| collections.Counter(r.get("review_lens") or "(empty)" for r in ROWS), | |
| "Reviews by review_lens", | |
| "review_lens", | |
| ) | |
| def stats_target_agent(): | |
| return _bar_chart( | |
| collections.Counter(r.get("target") or "(empty)" for r in ROWS), | |
| "Reviews by target agent", | |
| "target", | |
| ) | |
| def stats_result(): | |
| return _bar_chart( | |
| collections.Counter(r.get("result") or "(empty)" for r in ROWS), | |
| "Outcome distribution", | |
| "result", | |
| ) | |
| def stats_model(): | |
| return _bar_chart( | |
| collections.Counter(r.get("model") or "(empty)" for r in ROWS), | |
| "Reviews by Claude model", | |
| "model", | |
| ) | |
| def stats_year_month(): | |
| return _bar_chart( | |
| collections.Counter(r["year_month"] for r in ROWS), | |
| "Reviews by month", | |
| "year_month", | |
| ) | |
| def stats_summary_md() -> str: | |
| n = len(ROWS) | |
| avg_resp = sum(r["response_words"] for r in ROWS) / n if n else 0 | |
| avg_prompt = sum(r["prompt_words"] for r in ROWS) / n if n else 0 | |
| new_signal = sum(1 for r in ROWS if r.get("result") == "new_signal") | |
| no_new_signal = sum(1 for r in ROWS if r.get("result") == "no_new_signal") | |
| failed = sum(1 for r in ROWS if r.get("result") == "failed") | |
| opus = sum(1 for r in ROWS if r.get("model") == "opus") | |
| sonnet = sum(1 for r in ROWS if r.get("model") == "sonnet") | |
| return ( | |
| f"### Quick stats\n\n" | |
| f"| metric | value |\n" | |
| f"|---|---|\n" | |
| f"| Total reviews | **{n}** |\n" | |
| f"| Avg prompt length | {avg_prompt:.1f} words |\n" | |
| f"| Avg response length | {avg_resp:.1f} words |\n" | |
| f"| Result: new_signal | {new_signal} ({new_signal/n*100:.1f}%) |\n" | |
| f"| Result: no_new_signal | {no_new_signal} ({no_new_signal/n*100:.1f}%) |\n" | |
| f"| Result: failed | {failed} ({failed/n*100:.1f}%) |\n" | |
| f"| Model: opus | {opus} |\n" | |
| f"| Model: sonnet | {sonnet} |\n" | |
| ) | |
| # --------------------------------------------------------------------------- | |
| # Gradio app | |
| # --------------------------------------------------------------------------- | |
| INTRO_MD = f""" | |
| # Cross-Agent Review Queue Explorer | |
| Browse, filter, and inspect **{len(ROWS)} anonymized cross-agent code-review checkpoints** | |
| from the [Neo Genesis](https://neogenesis.app) monorepo, captured between | |
| `2026-04-08` and `2026-04-14` (Codex requesting reviews from Claude | |
| `neo-reviewer` / `neo-architect` agents). | |
| Each row is a real bounded review request with: | |
| - explicit **owner_goal** + **owner_intent** + **constraints** + **success_criteria** | |
| - a single **review_lens** (risk, regression, goal-fit, etc.) | |
| - a Claude **model** (sonnet / opus) and **mode** (review / architecture) | |
| - the resulting **outcome** (`new_signal` / `no_new_signal` / `failed`) | |
| This is the first publicly released dataset of bounded multi-agent code-review | |
| transcripts. Read the full schema in the | |
| [dataset card]({{}}). Use this Explorer to navigate and aggregate. | |
| - **Dataset**: [`{DATASET_ID}`](https://huggingface.co/datasets/{DATASET_ID}) | |
| - **License**: CC-BY-4.0 (data) | MIT (this Space's app code) | |
| - **Operator**: Yesol Heo / Neo Genesis | |
| """.format(f"https://huggingface.co/datasets/{DATASET_ID}") | |
| with gr.Blocks(title="Cross-Agent Review Queue Explorer", theme=gr.themes.Soft()) as demo: | |
| gr.Markdown(INTRO_MD) | |
| with gr.Tab("Browse"): | |
| gr.Markdown( | |
| "Filter the queue by review lens, target agent, outcome, or month. " | |
| "Click any row's `id` (e.g. `ccr-20260408-121555`) and paste it into " | |
| "the **Detail** tab to see the full transcript." | |
| ) | |
| with gr.Row(): | |
| lens_dd = gr.Dropdown( | |
| choices=[ALL_FILTER] + REVIEW_LENSES, | |
| value=ALL_FILTER, | |
| label="review_lens", | |
| ) | |
| target_dd = gr.Dropdown( | |
| choices=[ALL_FILTER] + TARGET_AGENTS, | |
| value=ALL_FILTER, | |
| label="target agent", | |
| ) | |
| result_dd = gr.Dropdown( | |
| choices=[ALL_FILTER] + RESULTS, | |
| value=ALL_FILTER, | |
| label="result", | |
| ) | |
| ym_dd = gr.Dropdown( | |
| choices=[ALL_FILTER] + YEAR_MONTHS, | |
| value=ALL_FILTER, | |
| label="year-month", | |
| ) | |
| table = gr.DataFrame( | |
| value=filter_rows(ALL_FILTER, ALL_FILTER, ALL_FILTER, ALL_FILTER), | |
| label=f"{len(ROWS)} reviews", | |
| wrap=True, | |
| interactive=False, | |
| ) | |
| for c in (lens_dd, target_dd, result_dd, ym_dd): | |
| c.change( | |
| filter_rows, | |
| inputs=[lens_dd, target_dd, result_dd, ym_dd], | |
| outputs=table, | |
| ) | |
| with gr.Tab("Detail"): | |
| gr.Markdown( | |
| "Paste a checkpoint id (e.g. `ccr-20260408-121555`) to see the full " | |
| "anonymized transcript: owner goal, constraints, prompt, and Claude's response." | |
| ) | |
| with gr.Row(): | |
| cid_in = gr.Textbox( | |
| label="Checkpoint id", | |
| placeholder="ccr-20260408-121555", | |
| value=ROWS[0]["id"] if ROWS else "", | |
| scale=4, | |
| ) | |
| view_btn = gr.Button("Show review", variant="primary", scale=1) | |
| detail_md = gr.Markdown( | |
| review_detail(ROWS[0]["id"]) if ROWS else "_dataset is empty_" | |
| ) | |
| view_btn.click(review_detail, inputs=cid_in, outputs=detail_md) | |
| cid_in.submit(review_detail, inputs=cid_in, outputs=detail_md) | |
| with gr.Tab("Statistics"): | |
| gr.Markdown(stats_summary_md()) | |
| with gr.Row(): | |
| gr.Plot(value=stats_review_lens()) | |
| gr.Plot(value=stats_target_agent()) | |
| with gr.Row(): | |
| gr.Plot(value=stats_result()) | |
| gr.Plot(value=stats_model()) | |
| gr.Plot(value=stats_year_month()) | |
| with gr.Tab("About"): | |
| gr.Markdown( | |
| f""" | |
| ### What is this? | |
| A frozen, anonymized snapshot of {len(ROWS)} cross-agent code-review checkpoints | |
| from the live SSOT (`.agent/shared-brain/cross-agent-review.md`) of | |
| [Neo Genesis](https://neogenesis.app) β a 1-person AI-native operator running | |
| **11 production AI business units**. | |
| ### Anonymization (6-tier) | |
| The published dataset replaces: | |
| - absolute file paths -> repo-relative paths | |
| - internal hostnames / IPs -> tier names | |
| - live API keys / tokens -> `[REDACTED]` | |
| - personal contact info -> tier role names | |
| - internal Telegram chat ids / Supabase project ids -> stable hashes | |
| - secret-bearing scopes -> `[redacted-scope]` | |
| while preserving the **structure of bounded reviews**: every transcript still | |
| shows the owner_goal, the constraints, the review_lens, and the actual | |
| prompt / response pair so you can study *how* the bounded-review protocol works. | |
| ### Why publish this? | |
| Most multi-agent papers report aggregate metrics. The actual *transcripts* of | |
| real bounded reviews β with explicit owner goals and review lenses β are rarely | |
| public. This dataset is meant to be a working example for: | |
| - agent-orchestration researchers studying handoff prompts | |
| - code-review automation builders calibrating their own review schemas | |
| - AI-governance teams evaluating bounded-review protocols against ad-hoc chats | |
| ### Resources | |
| - **Dataset**: <https://huggingface.co/datasets/{DATASET_ID}> | |
| - **Neo Genesis homepage**: <https://neogenesis.app> | |
| - **Operator**: <https://huggingface.co/neogenesislab> | |
| - **Wikidata**: [Q139569680](https://www.wikidata.org/wiki/Q139569680) | |
| ### Cite | |
| ```bibtex | |
| @misc{{neogenesis_cross_agent_review_queue_2026, | |
| title = {{Cross-Agent Code Review Queue: 37 anonymized Codex<->Claude bounded review checkpoints}}, | |
| author = {{Heo, Yesol}}, | |
| year = {{2026}}, | |
| url = {{https://huggingface.co/datasets/{DATASET_ID}}} | |
| }} | |
| ``` | |
| """ | |
| ) | |
| if __name__ == "__main__": | |
| demo.queue().launch() | |