Spaces:

neogenesislab
/

cross-agent-review-queue-explorer

Runtime error

File size: 14,078 Bytes

f405b00

"""
Cross-Agent Review Queue Explorer
=================================

Browse, filter, and analyze 37 anonymized cross-agent code-review checkpoints
from the Neo Genesis monorepo (Codex <-> Claude, 2026-04-08 ~ 2026-04-14).

Data source: ``neogenesislab/cross-agent-review-queue-2026`` (config=transcripts)
"""
from __future__ import annotations

import collections
import re
from typing import Any

import gradio as gr
import pandas as pd
import plotly.express as px
from datasets import load_dataset

DATASET_ID = "neogenesislab/cross-agent-review-queue-2026"
DATASET_CONFIG = "transcripts"

# ---------------------------------------------------------------------------
# Cold-start data load
# ---------------------------------------------------------------------------
ds = load_dataset(DATASET_ID, DATASET_CONFIG, split="train")
ROWS: list[dict[str, Any]] = list(ds)


def _year_month(checkpoint_id: str) -> str:
    """Parse ``ccr-20260408-121555`` -> ``2026-04``."""
    m = re.search(r"(\d{4})(\d{2})\d{2}", checkpoint_id or "")
    if m:
        return f"{m.group(1)}-{m.group(2)}"
    return "unknown"


def _word_count(text: str | None) -> int:
    if not text:
        return 0
    return len(str(text).split())


# Pre-compute derived columns once.
for r in ROWS:
    r["year_month"] = _year_month(r.get("id", ""))
    r["prompt_words"] = _word_count(r.get("prompt"))
    r["response_words"] = _word_count(r.get("response"))


REVIEW_LENSES = sorted({r.get("review_lens", "") or "" for r in ROWS})
TARGET_AGENTS = sorted({r.get("target", "") or "" for r in ROWS})
RESULTS = sorted({r.get("result", "") or "" for r in ROWS})
YEAR_MONTHS = sorted({r["year_month"] for r in ROWS})

ALL_FILTER = "all"


# ---------------------------------------------------------------------------
# Tab 1: Browse
# ---------------------------------------------------------------------------

def filter_rows(
    review_lens: str,
    target_agent: str,
    result: str,
    year_month: str,
) -> pd.DataFrame:
    out = []
    for r in ROWS:
        if review_lens != ALL_FILTER and (r.get("review_lens") or "") != review_lens:
            continue
        if target_agent != ALL_FILTER and (r.get("target") or "") != target_agent:
            continue
        if result != ALL_FILTER and (r.get("result") or "") != result:
            continue
        if year_month != ALL_FILTER and r["year_month"] != year_month:
            continue
        title = (r.get("title") or "").strip()
        if len(title) > 80:
            title = title[:77] + "..."
        out.append({
            "id": r.get("id"),
            "year_month": r["year_month"],
            "target": r.get("target"),
            "model": r.get("model"),
            "review_lens": (r.get("review_lens") or "")[:50],
            "result": r.get("result"),
            "title": title,
            "response_words": r.get("response_words"),
        })
    return pd.DataFrame(out)


# ---------------------------------------------------------------------------
# Tab 2: Detail
# ---------------------------------------------------------------------------

def review_detail(checkpoint_id: str) -> str:
    if not checkpoint_id:
        return "_Pick a checkpoint id (e.g. `ccr-20260408-121555`)._"
    cid = checkpoint_id.strip()
    for r in ROWS:
        if r.get("id") == cid:
            parts = []
            parts.append(f"## {r.get('id')}")
            parts.append("")
            parts.append(f"**created_at**: `{r.get('created_at')}`  ")
            parts.append(f"**requester** -> **target**: `{r.get('requester')}` -> `{r.get('target')}`  ")
            parts.append(f"**mode**: `{r.get('mode')}` | **model**: `{r.get('model')}` | **scope**: `{r.get('scope')}`  ")
            parts.append(f"**review_lens**: `{r.get('review_lens')}` | **result**: `{r.get('result')}`  ")
            parts.append("")
            if r.get("title"):
                parts.append(f"### Title")
                parts.append(f"> {r.get('title')}")
                parts.append("")
            if r.get("owner_goal"):
                parts.append(f"### Owner goal")
                parts.append(f"> {r.get('owner_goal')}")
                parts.append("")
            if r.get("owner_intent"):
                parts.append(f"### Owner intent")
                parts.append(f"> {r.get('owner_intent')}")
                parts.append("")
            if r.get("constraints"):
                parts.append(f"### Constraints")
                parts.append(f"> {r.get('constraints')}")
                parts.append("")
            if r.get("success_criteria"):
                parts.append(f"### Success criteria")
                parts.append(f"> {r.get('success_criteria')}")
                parts.append("")
            if r.get("ask"):
                parts.append(f"### Ask")
                parts.append(f"> {r.get('ask')}")
                parts.append("")
            if r.get("prompt"):
                parts.append(f"### Prompt ({r.get('prompt_words')} words)")
                parts.append("```")
                parts.append(str(r.get("prompt")))
                parts.append("```")
                parts.append("")
            if r.get("response"):
                parts.append(f"### Response ({r.get('response_words')} words)")
                parts.append("")
                parts.append(str(r.get("response")))
                parts.append("")
            return "\n".join(parts)
    return f"_Checkpoint `{cid}` not found in the {len(ROWS)}-row dataset._"


# ---------------------------------------------------------------------------
# Tab 3: Statistics
# ---------------------------------------------------------------------------

def _bar_chart(counter: collections.Counter, title: str, x_label: str):
    if not counter:
        return None
    items = counter.most_common()
    df = pd.DataFrame(items, columns=[x_label, "count"])
    fig = px.bar(df, x=x_label, y="count", title=title, text="count")
    fig.update_traces(textposition="outside")
    fig.update_layout(
        margin=dict(l=20, r=20, t=50, b=20),
        height=380,
    )
    return fig


def stats_review_lens():
    return _bar_chart(
        collections.Counter(r.get("review_lens") or "(empty)" for r in ROWS),
        "Reviews by review_lens",
        "review_lens",
    )


def stats_target_agent():
    return _bar_chart(
        collections.Counter(r.get("target") or "(empty)" for r in ROWS),
        "Reviews by target agent",
        "target",
    )


def stats_result():
    return _bar_chart(
        collections.Counter(r.get("result") or "(empty)" for r in ROWS),
        "Outcome distribution",
        "result",
    )


def stats_model():
    return _bar_chart(
        collections.Counter(r.get("model") or "(empty)" for r in ROWS),
        "Reviews by Claude model",
        "model",
    )


def stats_year_month():
    return _bar_chart(
        collections.Counter(r["year_month"] for r in ROWS),
        "Reviews by month",
        "year_month",
    )


def stats_summary_md() -> str:
    n = len(ROWS)
    avg_resp = sum(r["response_words"] for r in ROWS) / n if n else 0
    avg_prompt = sum(r["prompt_words"] for r in ROWS) / n if n else 0
    new_signal = sum(1 for r in ROWS if r.get("result") == "new_signal")
    no_new_signal = sum(1 for r in ROWS if r.get("result") == "no_new_signal")
    failed = sum(1 for r in ROWS if r.get("result") == "failed")
    opus = sum(1 for r in ROWS if r.get("model") == "opus")
    sonnet = sum(1 for r in ROWS if r.get("model") == "sonnet")
    return (
        f"### Quick stats\n\n"
        f"| metric | value |\n"
        f"|---|---|\n"
        f"| Total reviews | **{n}** |\n"
        f"| Avg prompt length | {avg_prompt:.1f} words |\n"
        f"| Avg response length | {avg_resp:.1f} words |\n"
        f"| Result: new_signal | {new_signal} ({new_signal/n*100:.1f}%) |\n"
        f"| Result: no_new_signal | {no_new_signal} ({no_new_signal/n*100:.1f}%) |\n"
        f"| Result: failed | {failed} ({failed/n*100:.1f}%) |\n"
        f"| Model: opus | {opus} |\n"
        f"| Model: sonnet | {sonnet} |\n"
    )


# ---------------------------------------------------------------------------
# Gradio app
# ---------------------------------------------------------------------------

INTRO_MD = f"""
# Cross-Agent Review Queue Explorer

Browse, filter, and inspect **{len(ROWS)} anonymized cross-agent code-review checkpoints**
from the [Neo Genesis](https://neogenesis.app) monorepo, captured between
`2026-04-08` and `2026-04-14` (Codex requesting reviews from Claude
`neo-reviewer` / `neo-architect` agents).

Each row is a real bounded review request with:
- explicit **owner_goal** + **owner_intent** + **constraints** + **success_criteria**
- a single **review_lens** (risk, regression, goal-fit, etc.)
- a Claude **model** (sonnet / opus) and **mode** (review / architecture)
- the resulting **outcome** (`new_signal` / `no_new_signal` / `failed`)

This is the first publicly released dataset of bounded multi-agent code-review
transcripts. Read the full schema in the
[dataset card]({{}}). Use this Explorer to navigate and aggregate.

- **Dataset**: [`{DATASET_ID}`](https://huggingface.co/datasets/{DATASET_ID})
- **License**: CC-BY-4.0 (data) | MIT (this Space's app code)
- **Operator**: Yesol Heo / Neo Genesis
""".format(f"https://huggingface.co/datasets/{DATASET_ID}")


with gr.Blocks(title="Cross-Agent Review Queue Explorer", theme=gr.themes.Soft()) as demo:
    gr.Markdown(INTRO_MD)

    with gr.Tab("Browse"):
        gr.Markdown(
            "Filter the queue by review lens, target agent, outcome, or month. "
            "Click any row's `id` (e.g. `ccr-20260408-121555`) and paste it into "
            "the **Detail** tab to see the full transcript."
        )
        with gr.Row():
            lens_dd = gr.Dropdown(
                choices=[ALL_FILTER] + REVIEW_LENSES,
                value=ALL_FILTER,
                label="review_lens",
            )
            target_dd = gr.Dropdown(
                choices=[ALL_FILTER] + TARGET_AGENTS,
                value=ALL_FILTER,
                label="target agent",
            )
            result_dd = gr.Dropdown(
                choices=[ALL_FILTER] + RESULTS,
                value=ALL_FILTER,
                label="result",
            )
            ym_dd = gr.Dropdown(
                choices=[ALL_FILTER] + YEAR_MONTHS,
                value=ALL_FILTER,
                label="year-month",
            )
        table = gr.DataFrame(
            value=filter_rows(ALL_FILTER, ALL_FILTER, ALL_FILTER, ALL_FILTER),
            label=f"{len(ROWS)} reviews",
            wrap=True,
            interactive=False,
        )
        for c in (lens_dd, target_dd, result_dd, ym_dd):
            c.change(
                filter_rows,
                inputs=[lens_dd, target_dd, result_dd, ym_dd],
                outputs=table,
            )

    with gr.Tab("Detail"):
        gr.Markdown(
            "Paste a checkpoint id (e.g. `ccr-20260408-121555`) to see the full "
            "anonymized transcript: owner goal, constraints, prompt, and Claude's response."
        )
        with gr.Row():
            cid_in = gr.Textbox(
                label="Checkpoint id",
                placeholder="ccr-20260408-121555",
                value=ROWS[0]["id"] if ROWS else "",
                scale=4,
            )
            view_btn = gr.Button("Show review", variant="primary", scale=1)
        detail_md = gr.Markdown(
            review_detail(ROWS[0]["id"]) if ROWS else "_dataset is empty_"
        )
        view_btn.click(review_detail, inputs=cid_in, outputs=detail_md)
        cid_in.submit(review_detail, inputs=cid_in, outputs=detail_md)

    with gr.Tab("Statistics"):
        gr.Markdown(stats_summary_md())
        with gr.Row():
            gr.Plot(value=stats_review_lens())
            gr.Plot(value=stats_target_agent())
        with gr.Row():
            gr.Plot(value=stats_result())
            gr.Plot(value=stats_model())
        gr.Plot(value=stats_year_month())

    with gr.Tab("About"):
        gr.Markdown(
            f"""
### What is this?

A frozen, anonymized snapshot of {len(ROWS)} cross-agent code-review checkpoints
from the live SSOT (`.agent/shared-brain/cross-agent-review.md`) of
[Neo Genesis](https://neogenesis.app) — a 1-person AI-native operator running
**11 production AI business units**.

### Anonymization (6-tier)

The published dataset replaces:
- absolute file paths -> repo-relative paths
- internal hostnames / IPs -> tier names
- live API keys / tokens -> `[REDACTED]`
- personal contact info -> tier role names
- internal Telegram chat ids / Supabase project ids -> stable hashes
- secret-bearing scopes -> `[redacted-scope]`

while preserving the **structure of bounded reviews**: every transcript still
shows the owner_goal, the constraints, the review_lens, and the actual
prompt / response pair so you can study *how* the bounded-review protocol works.

### Why publish this?

Most multi-agent papers report aggregate metrics. The actual *transcripts* of
real bounded reviews — with explicit owner goals and review lenses — are rarely
public. This dataset is meant to be a working example for:

- agent-orchestration researchers studying handoff prompts
- code-review automation builders calibrating their own review schemas
- AI-governance teams evaluating bounded-review protocols against ad-hoc chats

### Resources

- **Dataset**: <https://huggingface.co/datasets/{DATASET_ID}>
- **Neo Genesis homepage**: <https://neogenesis.app>
- **Operator**: <https://huggingface.co/neogenesislab>
- **Wikidata**: [Q139569680](https://www.wikidata.org/wiki/Q139569680)

### Cite

```bibtex
@misc{{neogenesis_cross_agent_review_queue_2026,
  title  = {{Cross-Agent Code Review Queue: 37 anonymized Codex<->Claude bounded review checkpoints}},
  author = {{Heo, Yesol}},
  year   = {{2026}},
  url    = {{https://huggingface.co/datasets/{DATASET_ID}}}
}}
```
"""
        )


if __name__ == "__main__":
    demo.queue().launch()