Spaces:

neogenesislab
/

cross-agent-review-queue-explorer

Runtime error

App Files Files Community

neogenesislab commited on 13 days ago

Commit

f405b00

verified ·

1 Parent(s): 5e71de7

add app.py

Browse files

Files changed (1) hide show

app.py +391 -0

app.py ADDED Viewed

	@@ -0,0 +1,391 @@

+"""
+Cross-Agent Review Queue Explorer
+=================================
+Browse, filter, and analyze 37 anonymized cross-agent code-review checkpoints
+from the Neo Genesis monorepo (Codex <-> Claude, 2026-04-08 ~ 2026-04-14).
+Data source: ``neogenesislab/cross-agent-review-queue-2026`` (config=transcripts)
+"""
+from __future__ import annotations
+import collections
+import re
+from typing import Any
+import gradio as gr
+import pandas as pd
+import plotly.express as px
+from datasets import load_dataset
+DATASET_ID = "neogenesislab/cross-agent-review-queue-2026"
+DATASET_CONFIG = "transcripts"
+# ---------------------------------------------------------------------------
+# Cold-start data load
+# ---------------------------------------------------------------------------
+ds = load_dataset(DATASET_ID, DATASET_CONFIG, split="train")
+ROWS: list[dict[str, Any]] = list(ds)
+def _year_month(checkpoint_id: str) -> str:
+    """Parse ``ccr-20260408-121555`` -> ``2026-04``."""
+    m = re.search(r"(\d{4})(\d{2})\d{2}", checkpoint_id or "")
+    if m:
+        return f"{m.group(1)}-{m.group(2)}"
+    return "unknown"
+def _word_count(text: str | None) -> int:
+    if not text:
+        return 0
+    return len(str(text).split())
+# Pre-compute derived columns once.
+for r in ROWS:
+    r["year_month"] = _year_month(r.get("id", ""))
+    r["prompt_words"] = _word_count(r.get("prompt"))
+    r["response_words"] = _word_count(r.get("response"))
+REVIEW_LENSES = sorted({r.get("review_lens", "") or "" for r in ROWS})
+TARGET_AGENTS = sorted({r.get("target", "") or "" for r in ROWS})
+RESULTS = sorted({r.get("result", "") or "" for r in ROWS})
+YEAR_MONTHS = sorted({r["year_month"] for r in ROWS})
+ALL_FILTER = "all"
+# ---------------------------------------------------------------------------
+# Tab 1: Browse
+# ---------------------------------------------------------------------------
+def filter_rows(
+    review_lens: str,
+    target_agent: str,
+    result: str,
+    year_month: str,
+) -> pd.DataFrame:
+    out = []
+    for r in ROWS:
+        if review_lens != ALL_FILTER and (r.get("review_lens") or "") != review_lens:
+            continue
+        if target_agent != ALL_FILTER and (r.get("target") or "") != target_agent:
+            continue
+        if result != ALL_FILTER and (r.get("result") or "") != result:
+            continue
+        if year_month != ALL_FILTER and r["year_month"] != year_month:
+            continue
+        title = (r.get("title") or "").strip()
+        if len(title) > 80:
+            title = title[:77] + "..."
+        out.append({
+            "id": r.get("id"),
+            "year_month": r["year_month"],
+            "target": r.get("target"),
+            "model": r.get("model"),
+            "review_lens": (r.get("review_lens") or "")[:50],
+            "result": r.get("result"),
+            "title": title,
+            "response_words": r.get("response_words"),
+        })
+    return pd.DataFrame(out)
+# ---------------------------------------------------------------------------
+# Tab 2: Detail
+# ---------------------------------------------------------------------------
+def review_detail(checkpoint_id: str) -> str:
+    if not checkpoint_id:
+        return "_Pick a checkpoint id (e.g. `ccr-20260408-121555`)._"
+    cid = checkpoint_id.strip()
+    for r in ROWS:
+        if r.get("id") == cid:
+            parts = []
+            parts.append(f"## {r.get('id')}")
+            parts.append("")
+            parts.append(f"**created_at**: `{r.get('created_at')}`  ")
+            parts.append(f"**requester** -> **target**: `{r.get('requester')}` -> `{r.get('target')}`  ")
+            parts.append(f"**mode**: `{r.get('mode')}` | **model**: `{r.get('model')}` | **scope**: `{r.get('scope')}`  ")
+            parts.append(f"**review_lens**: `{r.get('review_lens')}` | **result**: `{r.get('result')}`  ")
+            parts.append("")
+            if r.get("title"):
+                parts.append(f"### Title")
+                parts.append(f"> {r.get('title')}")
+                parts.append("")
+            if r.get("owner_goal"):
+                parts.append(f"### Owner goal")
+                parts.append(f"> {r.get('owner_goal')}")
+                parts.append("")
+            if r.get("owner_intent"):
+                parts.append(f"### Owner intent")
+                parts.append(f"> {r.get('owner_intent')}")
+                parts.append("")
+            if r.get("constraints"):
+                parts.append(f"### Constraints")
+                parts.append(f"> {r.get('constraints')}")
+                parts.append("")
+            if r.get("success_criteria"):
+                parts.append(f"### Success criteria")
+                parts.append(f"> {r.get('success_criteria')}")
+                parts.append("")
+            if r.get("ask"):
+                parts.append(f"### Ask")
+                parts.append(f"> {r.get('ask')}")
+                parts.append("")
+            if r.get("prompt"):
+                parts.append(f"### Prompt ({r.get('prompt_words')} words)")
+                parts.append("```")
+                parts.append(str(r.get("prompt")))
+                parts.append("```")
+                parts.append("")
+            if r.get("response"):
+                parts.append(f"### Response ({r.get('response_words')} words)")
+                parts.append("")
+                parts.append(str(r.get("response")))
+                parts.append("")
+            return "\n".join(parts)
+    return f"_Checkpoint `{cid}` not found in the {len(ROWS)}-row dataset._"
+# ---------------------------------------------------------------------------
+# Tab 3: Statistics
+# ---------------------------------------------------------------------------
+def _bar_chart(counter: collections.Counter, title: str, x_label: str):
+    if not counter:
+        return None
+    items = counter.most_common()
+    df = pd.DataFrame(items, columns=[x_label, "count"])
+    fig = px.bar(df, x=x_label, y="count", title=title, text="count")
+    fig.update_traces(textposition="outside")
+    fig.update_layout(
+        margin=dict(l=20, r=20, t=50, b=20),
+        height=380,
+    )
+    return fig
+def stats_review_lens():
+    return _bar_chart(
+        collections.Counter(r.get("review_lens") or "(empty)" for r in ROWS),
+        "Reviews by review_lens",
+        "review_lens",
+    )
+def stats_target_agent():
+    return _bar_chart(
+        collections.Counter(r.get("target") or "(empty)" for r in ROWS),
+        "Reviews by target agent",
+        "target",
+    )
+def stats_result():
+    return _bar_chart(
+        collections.Counter(r.get("result") or "(empty)" for r in ROWS),
+        "Outcome distribution",
+        "result",
+    )
+def stats_model():
+    return _bar_chart(
+        collections.Counter(r.get("model") or "(empty)" for r in ROWS),
+        "Reviews by Claude model",
+        "model",
+    )
+def stats_year_month():
+    return _bar_chart(
+        collections.Counter(r["year_month"] for r in ROWS),
+        "Reviews by month",
+        "year_month",
+    )
+def stats_summary_md() -> str:
+    n = len(ROWS)
+    avg_resp = sum(r["response_words"] for r in ROWS) / n if n else 0
+    avg_prompt = sum(r["prompt_words"] for r in ROWS) / n if n else 0
+    new_signal = sum(1 for r in ROWS if r.get("result") == "new_signal")
+    no_new_signal = sum(1 for r in ROWS if r.get("result") == "no_new_signal")
+    failed = sum(1 for r in ROWS if r.get("result") == "failed")
+    opus = sum(1 for r in ROWS if r.get("model") == "opus")
+    sonnet = sum(1 for r in ROWS if r.get("model") == "sonnet")
+    return (
+        f"### Quick stats\n\n"
+        f"| metric | value |\n"
+        f"|---|---|\n"
+        f"| Total reviews | **{n}** |\n"
+        f"| Avg prompt length | {avg_prompt:.1f} words |\n"
+        f"| Avg response length | {avg_resp:.1f} words |\n"
+        f"| Result: new_signal | {new_signal} ({new_signal/n*100:.1f}%) |\n"
+        f"| Result: no_new_signal | {no_new_signal} ({no_new_signal/n*100:.1f}%) |\n"
+        f"| Result: failed | {failed} ({failed/n*100:.1f}%) |\n"
+        f"| Model: opus | {opus} |\n"
+        f"| Model: sonnet | {sonnet} |\n"
+    )
+# ---------------------------------------------------------------------------
+# Gradio app
+# ---------------------------------------------------------------------------
+INTRO_MD = f"""
+# Cross-Agent Review Queue Explorer
+Browse, filter, and inspect **{len(ROWS)} anonymized cross-agent code-review checkpoints**
+from the [Neo Genesis](https://neogenesis.app) monorepo, captured between
+`2026-04-08` and `2026-04-14` (Codex requesting reviews from Claude
+`neo-reviewer` / `neo-architect` agents).
+Each row is a real bounded review request with:
+- explicit **owner_goal** + **owner_intent** + **constraints** + **success_criteria**
+- a single **review_lens** (risk, regression, goal-fit, etc.)
+- a Claude **model** (sonnet / opus) and **mode** (review / architecture)
+- the resulting **outcome** (`new_signal` / `no_new_signal` / `failed`)
+This is the first publicly released dataset of bounded multi-agent code-review
+transcripts. Read the full schema in the
+[dataset card]({{}}). Use this Explorer to navigate and aggregate.
+- **Dataset**: [`{DATASET_ID}`](https://huggingface.co/datasets/{DATASET_ID})
+- **License**: CC-BY-4.0 (data) | MIT (this Space's app code)
+- **Operator**: Yesol Heo / Neo Genesis
+""".format(f"https://huggingface.co/datasets/{DATASET_ID}")
+with gr.Blocks(title="Cross-Agent Review Queue Explorer", theme=gr.themes.Soft()) as demo:
+    gr.Markdown(INTRO_MD)
+    with gr.Tab("Browse"):
+        gr.Markdown(
+            "Filter the queue by review lens, target agent, outcome, or month. "
+            "Click any row's `id` (e.g. `ccr-20260408-121555`) and paste it into "
+            "the **Detail** tab to see the full transcript."
+        )
+        with gr.Row():
+            lens_dd = gr.Dropdown(
+                choices=[ALL_FILTER] + REVIEW_LENSES,
+                value=ALL_FILTER,
+                label="review_lens",
+            )
+            target_dd = gr.Dropdown(
+                choices=[ALL_FILTER] + TARGET_AGENTS,
+                value=ALL_FILTER,
+                label="target agent",
+            )
+            result_dd = gr.Dropdown(
+                choices=[ALL_FILTER] + RESULTS,
+                value=ALL_FILTER,
+                label="result",
+            )
+            ym_dd = gr.Dropdown(
+                choices=[ALL_FILTER] + YEAR_MONTHS,
+                value=ALL_FILTER,
+                label="year-month",
+            )
+        table = gr.DataFrame(
+            value=filter_rows(ALL_FILTER, ALL_FILTER, ALL_FILTER, ALL_FILTER),
+            label=f"{len(ROWS)} reviews",
+            wrap=True,
+            interactive=False,
+        )
+        for c in (lens_dd, target_dd, result_dd, ym_dd):
+            c.change(
+                filter_rows,
+                inputs=[lens_dd, target_dd, result_dd, ym_dd],
+                outputs=table,
+            )
+    with gr.Tab("Detail"):
+        gr.Markdown(
+            "Paste a checkpoint id (e.g. `ccr-20260408-121555`) to see the full "
+            "anonymized transcript: owner goal, constraints, prompt, and Claude's response."
+        )
+        with gr.Row():
+            cid_in = gr.Textbox(
+                label="Checkpoint id",
+                placeholder="ccr-20260408-121555",
+                value=ROWS[0]["id"] if ROWS else "",
+                scale=4,
+            )
+            view_btn = gr.Button("Show review", variant="primary", scale=1)
+        detail_md = gr.Markdown(
+            review_detail(ROWS[0]["id"]) if ROWS else "_dataset is empty_"
+        )
+        view_btn.click(review_detail, inputs=cid_in, outputs=detail_md)
+        cid_in.submit(review_detail, inputs=cid_in, outputs=detail_md)
+    with gr.Tab("Statistics"):
+        gr.Markdown(stats_summary_md())
+        with gr.Row():
+            gr.Plot(value=stats_review_lens())
+            gr.Plot(value=stats_target_agent())
+        with gr.Row():
+            gr.Plot(value=stats_result())
+            gr.Plot(value=stats_model())
+        gr.Plot(value=stats_year_month())
+    with gr.Tab("About"):
+        gr.Markdown(
+            f"""
+### What is this?
+A frozen, anonymized snapshot of {len(ROWS)} cross-agent code-review checkpoints
+from the live SSOT (`.agent/shared-brain/cross-agent-review.md`) of
+[Neo Genesis](https://neogenesis.app) — a 1-person AI-native operator running
+**11 production AI business units**.
+### Anonymization (6-tier)
+The published dataset replaces:
+- absolute file paths -> repo-relative paths
+- internal hostnames / IPs -> tier names
+- live API keys / tokens -> `[REDACTED]`
+- personal contact info -> tier role names
+- internal Telegram chat ids / Supabase project ids -> stable hashes
+- secret-bearing scopes -> `[redacted-scope]`
+while preserving the **structure of bounded reviews**: every transcript still
+shows the owner_goal, the constraints, the review_lens, and the actual
+prompt / response pair so you can study *how* the bounded-review protocol works.
+### Why publish this?
+Most multi-agent papers report aggregate metrics. The actual *transcripts* of
+real bounded reviews — with explicit owner goals and review lenses — are rarely
+public. This dataset is meant to be a working example for:
+- agent-orchestration researchers studying handoff prompts
+- code-review automation builders calibrating their own review schemas
+- AI-governance teams evaluating bounded-review protocols against ad-hoc chats
+### Resources
+- **Dataset**: <https://huggingface.co/datasets/{DATASET_ID}>
+- **Neo Genesis homepage**: <https://neogenesis.app>
+- **Operator**: <https://huggingface.co/neogenesislab>
+- **Wikidata**: [Q139569680](https://www.wikidata.org/wiki/Q139569680)
+### Cite
+```bibtex
+@misc{{neogenesis_cross_agent_review_queue_2026,
+  title  = {{Cross-Agent Code Review Queue: 37 anonymized Codex<->Claude bounded review checkpoints}},
+  author = {{Heo, Yesol}},
+  year   = {{2026}},
+  url    = {{https://huggingface.co/datasets/{DATASET_ID}}}
+}}
+```
+"""
+        )
+if __name__ == "__main__":
+    demo.queue().launch()