""" Cross-Agent Review Queue Explorer ================================= Browse, filter, and analyze 37 anonymized cross-agent code-review checkpoints from the Neo Genesis monorepo (Codex <-> Claude, 2026-04-08 ~ 2026-04-14). Data source: ``neogenesislab/cross-agent-review-queue-2026`` (config=transcripts) """ from __future__ import annotations import collections import re from typing import Any import gradio as gr import pandas as pd import plotly.express as px from datasets import load_dataset DATASET_ID = "neogenesislab/cross-agent-review-queue-2026" DATASET_CONFIG = "transcripts" # --------------------------------------------------------------------------- # Cold-start data load # --------------------------------------------------------------------------- ds = load_dataset(DATASET_ID, DATASET_CONFIG, split="train") ROWS: list[dict[str, Any]] = list(ds) def _year_month(checkpoint_id: str) -> str: """Parse ``ccr-20260408-121555`` -> ``2026-04``.""" m = re.search(r"(\d{4})(\d{2})\d{2}", checkpoint_id or "") if m: return f"{m.group(1)}-{m.group(2)}" return "unknown" def _word_count(text: str | None) -> int: if not text: return 0 return len(str(text).split()) # Pre-compute derived columns once. for r in ROWS: r["year_month"] = _year_month(r.get("id", "")) r["prompt_words"] = _word_count(r.get("prompt")) r["response_words"] = _word_count(r.get("response")) REVIEW_LENSES = sorted({r.get("review_lens", "") or "" for r in ROWS}) TARGET_AGENTS = sorted({r.get("target", "") or "" for r in ROWS}) RESULTS = sorted({r.get("result", "") or "" for r in ROWS}) YEAR_MONTHS = sorted({r["year_month"] for r in ROWS}) ALL_FILTER = "all" # --------------------------------------------------------------------------- # Tab 1: Browse # --------------------------------------------------------------------------- def filter_rows( review_lens: str, target_agent: str, result: str, year_month: str, ) -> pd.DataFrame: out = [] for r in ROWS: if review_lens != ALL_FILTER and (r.get("review_lens") or "") != review_lens: continue if target_agent != ALL_FILTER and (r.get("target") or "") != target_agent: continue if result != ALL_FILTER and (r.get("result") or "") != result: continue if year_month != ALL_FILTER and r["year_month"] != year_month: continue title = (r.get("title") or "").strip() if len(title) > 80: title = title[:77] + "..." out.append({ "id": r.get("id"), "year_month": r["year_month"], "target": r.get("target"), "model": r.get("model"), "review_lens": (r.get("review_lens") or "")[:50], "result": r.get("result"), "title": title, "response_words": r.get("response_words"), }) return pd.DataFrame(out) # --------------------------------------------------------------------------- # Tab 2: Detail # --------------------------------------------------------------------------- def review_detail(checkpoint_id: str) -> str: if not checkpoint_id: return "_Pick a checkpoint id (e.g. `ccr-20260408-121555`)._" cid = checkpoint_id.strip() for r in ROWS: if r.get("id") == cid: parts = [] parts.append(f"## {r.get('id')}") parts.append("") parts.append(f"**created_at**: `{r.get('created_at')}` ") parts.append(f"**requester** -> **target**: `{r.get('requester')}` -> `{r.get('target')}` ") parts.append(f"**mode**: `{r.get('mode')}` | **model**: `{r.get('model')}` | **scope**: `{r.get('scope')}` ") parts.append(f"**review_lens**: `{r.get('review_lens')}` | **result**: `{r.get('result')}` ") parts.append("") if r.get("title"): parts.append(f"### Title") parts.append(f"> {r.get('title')}") parts.append("") if r.get("owner_goal"): parts.append(f"### Owner goal") parts.append(f"> {r.get('owner_goal')}") parts.append("") if r.get("owner_intent"): parts.append(f"### Owner intent") parts.append(f"> {r.get('owner_intent')}") parts.append("") if r.get("constraints"): parts.append(f"### Constraints") parts.append(f"> {r.get('constraints')}") parts.append("") if r.get("success_criteria"): parts.append(f"### Success criteria") parts.append(f"> {r.get('success_criteria')}") parts.append("") if r.get("ask"): parts.append(f"### Ask") parts.append(f"> {r.get('ask')}") parts.append("") if r.get("prompt"): parts.append(f"### Prompt ({r.get('prompt_words')} words)") parts.append("```") parts.append(str(r.get("prompt"))) parts.append("```") parts.append("") if r.get("response"): parts.append(f"### Response ({r.get('response_words')} words)") parts.append("") parts.append(str(r.get("response"))) parts.append("") return "\n".join(parts) return f"_Checkpoint `{cid}` not found in the {len(ROWS)}-row dataset._" # --------------------------------------------------------------------------- # Tab 3: Statistics # --------------------------------------------------------------------------- def _bar_chart(counter: collections.Counter, title: str, x_label: str): if not counter: return None items = counter.most_common() df = pd.DataFrame(items, columns=[x_label, "count"]) fig = px.bar(df, x=x_label, y="count", title=title, text="count") fig.update_traces(textposition="outside") fig.update_layout( margin=dict(l=20, r=20, t=50, b=20), height=380, ) return fig def stats_review_lens(): return _bar_chart( collections.Counter(r.get("review_lens") or "(empty)" for r in ROWS), "Reviews by review_lens", "review_lens", ) def stats_target_agent(): return _bar_chart( collections.Counter(r.get("target") or "(empty)" for r in ROWS), "Reviews by target agent", "target", ) def stats_result(): return _bar_chart( collections.Counter(r.get("result") or "(empty)" for r in ROWS), "Outcome distribution", "result", ) def stats_model(): return _bar_chart( collections.Counter(r.get("model") or "(empty)" for r in ROWS), "Reviews by Claude model", "model", ) def stats_year_month(): return _bar_chart( collections.Counter(r["year_month"] for r in ROWS), "Reviews by month", "year_month", ) def stats_summary_md() -> str: n = len(ROWS) avg_resp = sum(r["response_words"] for r in ROWS) / n if n else 0 avg_prompt = sum(r["prompt_words"] for r in ROWS) / n if n else 0 new_signal = sum(1 for r in ROWS if r.get("result") == "new_signal") no_new_signal = sum(1 for r in ROWS if r.get("result") == "no_new_signal") failed = sum(1 for r in ROWS if r.get("result") == "failed") opus = sum(1 for r in ROWS if r.get("model") == "opus") sonnet = sum(1 for r in ROWS if r.get("model") == "sonnet") return ( f"### Quick stats\n\n" f"| metric | value |\n" f"|---|---|\n" f"| Total reviews | **{n}** |\n" f"| Avg prompt length | {avg_prompt:.1f} words |\n" f"| Avg response length | {avg_resp:.1f} words |\n" f"| Result: new_signal | {new_signal} ({new_signal/n*100:.1f}%) |\n" f"| Result: no_new_signal | {no_new_signal} ({no_new_signal/n*100:.1f}%) |\n" f"| Result: failed | {failed} ({failed/n*100:.1f}%) |\n" f"| Model: opus | {opus} |\n" f"| Model: sonnet | {sonnet} |\n" ) # --------------------------------------------------------------------------- # Gradio app # --------------------------------------------------------------------------- INTRO_MD = f""" # Cross-Agent Review Queue Explorer Browse, filter, and inspect **{len(ROWS)} anonymized cross-agent code-review checkpoints** from the [Neo Genesis](https://neogenesis.app) monorepo, captured between `2026-04-08` and `2026-04-14` (Codex requesting reviews from Claude `neo-reviewer` / `neo-architect` agents). Each row is a real bounded review request with: - explicit **owner_goal** + **owner_intent** + **constraints** + **success_criteria** - a single **review_lens** (risk, regression, goal-fit, etc.) - a Claude **model** (sonnet / opus) and **mode** (review / architecture) - the resulting **outcome** (`new_signal` / `no_new_signal` / `failed`) This is the first publicly released dataset of bounded multi-agent code-review transcripts. Read the full schema in the [dataset card]({{}}). Use this Explorer to navigate and aggregate. - **Dataset**: [`{DATASET_ID}`](https://huggingface.co/datasets/{DATASET_ID}) - **License**: CC-BY-4.0 (data) | MIT (this Space's app code) - **Operator**: Yesol Heo / Neo Genesis """.format(f"https://huggingface.co/datasets/{DATASET_ID}") with gr.Blocks(title="Cross-Agent Review Queue Explorer", theme=gr.themes.Soft()) as demo: gr.Markdown(INTRO_MD) with gr.Tab("Browse"): gr.Markdown( "Filter the queue by review lens, target agent, outcome, or month. " "Click any row's `id` (e.g. `ccr-20260408-121555`) and paste it into " "the **Detail** tab to see the full transcript." ) with gr.Row(): lens_dd = gr.Dropdown( choices=[ALL_FILTER] + REVIEW_LENSES, value=ALL_FILTER, label="review_lens", ) target_dd = gr.Dropdown( choices=[ALL_FILTER] + TARGET_AGENTS, value=ALL_FILTER, label="target agent", ) result_dd = gr.Dropdown( choices=[ALL_FILTER] + RESULTS, value=ALL_FILTER, label="result", ) ym_dd = gr.Dropdown( choices=[ALL_FILTER] + YEAR_MONTHS, value=ALL_FILTER, label="year-month", ) table = gr.DataFrame( value=filter_rows(ALL_FILTER, ALL_FILTER, ALL_FILTER, ALL_FILTER), label=f"{len(ROWS)} reviews", wrap=True, interactive=False, ) for c in (lens_dd, target_dd, result_dd, ym_dd): c.change( filter_rows, inputs=[lens_dd, target_dd, result_dd, ym_dd], outputs=table, ) with gr.Tab("Detail"): gr.Markdown( "Paste a checkpoint id (e.g. `ccr-20260408-121555`) to see the full " "anonymized transcript: owner goal, constraints, prompt, and Claude's response." ) with gr.Row(): cid_in = gr.Textbox( label="Checkpoint id", placeholder="ccr-20260408-121555", value=ROWS[0]["id"] if ROWS else "", scale=4, ) view_btn = gr.Button("Show review", variant="primary", scale=1) detail_md = gr.Markdown( review_detail(ROWS[0]["id"]) if ROWS else "_dataset is empty_" ) view_btn.click(review_detail, inputs=cid_in, outputs=detail_md) cid_in.submit(review_detail, inputs=cid_in, outputs=detail_md) with gr.Tab("Statistics"): gr.Markdown(stats_summary_md()) with gr.Row(): gr.Plot(value=stats_review_lens()) gr.Plot(value=stats_target_agent()) with gr.Row(): gr.Plot(value=stats_result()) gr.Plot(value=stats_model()) gr.Plot(value=stats_year_month()) with gr.Tab("About"): gr.Markdown( f""" ### What is this? A frozen, anonymized snapshot of {len(ROWS)} cross-agent code-review checkpoints from the live SSOT (`.agent/shared-brain/cross-agent-review.md`) of [Neo Genesis](https://neogenesis.app) — a 1-person AI-native operator running **11 production AI business units**. ### Anonymization (6-tier) The published dataset replaces: - absolute file paths -> repo-relative paths - internal hostnames / IPs -> tier names - live API keys / tokens -> `[REDACTED]` - personal contact info -> tier role names - internal Telegram chat ids / Supabase project ids -> stable hashes - secret-bearing scopes -> `[redacted-scope]` while preserving the **structure of bounded reviews**: every transcript still shows the owner_goal, the constraints, the review_lens, and the actual prompt / response pair so you can study *how* the bounded-review protocol works. ### Why publish this? Most multi-agent papers report aggregate metrics. The actual *transcripts* of real bounded reviews — with explicit owner goals and review lenses — are rarely public. This dataset is meant to be a working example for: - agent-orchestration researchers studying handoff prompts - code-review automation builders calibrating their own review schemas - AI-governance teams evaluating bounded-review protocols against ad-hoc chats ### Resources - **Dataset**: - **Neo Genesis homepage**: - **Operator**: - **Wikidata**: [Q139569680](https://www.wikidata.org/wiki/Q139569680) ### Cite ```bibtex @misc{{neogenesis_cross_agent_review_queue_2026, title = {{Cross-Agent Code Review Queue: 37 anonymized Codex<->Claude bounded review checkpoints}}, author = {{Heo, Yesol}}, year = {{2026}}, url = {{https://huggingface.co/datasets/{DATASET_ID}}} }} ``` """ ) if __name__ == "__main__": demo.queue().launch()