Spaces:

neogenesislab
/

cross-agent-review-queue-explorer

Runtime error

App Files Files Community

cross-agent-review-queue-explorer / app.py

neogenesislab

add app.py

f405b00 verified 13 days ago

raw

history blame contribute delete

14.1 kB

	"""
	Cross-Agent Review Queue Explorer
	=================================

	Browse, filter, and analyze 37 anonymized cross-agent code-review checkpoints
	from the Neo Genesis monorepo (Codex <-> Claude, 2026-04-08 ~ 2026-04-14).

	Data source: ``neogenesislab/cross-agent-review-queue-2026`` (config=transcripts)
	"""
	from __future__ import annotations

	import collections
	import re
	from typing import Any

	import gradio as gr
	import pandas as pd
	import plotly.express as px
	from datasets import load_dataset

	DATASET_ID = "neogenesislab/cross-agent-review-queue-2026"
	DATASET_CONFIG = "transcripts"

	# ---------------------------------------------------------------------------
	# Cold-start data load
	# ---------------------------------------------------------------------------
	ds = load_dataset(DATASET_ID, DATASET_CONFIG, split="train")
	ROWS: list[dict[str, Any]] = list(ds)


	def _year_month(checkpoint_id: str) -> str:
	"""Parse ``ccr-20260408-121555`` -> ``2026-04``."""
	m = re.search(r"(\d{4})(\d{2})\d{2}", checkpoint_id or "")
	if m:
	return f"{m.group(1)}-{m.group(2)}"
	return "unknown"


	def _word_count(text: str \| None) -> int:
	if not text:
	return 0
	return len(str(text).split())


	# Pre-compute derived columns once.
	for r in ROWS:
	r["year_month"] = _year_month(r.get("id", ""))
	r["prompt_words"] = _word_count(r.get("prompt"))
	r["response_words"] = _word_count(r.get("response"))


	REVIEW_LENSES = sorted({r.get("review_lens", "") or "" for r in ROWS})
	TARGET_AGENTS = sorted({r.get("target", "") or "" for r in ROWS})
	RESULTS = sorted({r.get("result", "") or "" for r in ROWS})
	YEAR_MONTHS = sorted({r["year_month"] for r in ROWS})

	ALL_FILTER = "all"


	# ---------------------------------------------------------------------------
	# Tab 1: Browse
	# ---------------------------------------------------------------------------

	def filter_rows(
	review_lens: str,
	target_agent: str,
	result: str,
	year_month: str,
	) -> pd.DataFrame:
	out = []
	for r in ROWS:
	if review_lens != ALL_FILTER and (r.get("review_lens") or "") != review_lens:
	continue
	if target_agent != ALL_FILTER and (r.get("target") or "") != target_agent:
	continue
	if result != ALL_FILTER and (r.get("result") or "") != result:
	continue
	if year_month != ALL_FILTER and r["year_month"] != year_month:
	continue
	title = (r.get("title") or "").strip()
	if len(title) > 80:
	title = title[:77] + "..."
	out.append({
	"id": r.get("id"),
	"year_month": r["year_month"],
	"target": r.get("target"),
	"model": r.get("model"),
	"review_lens": (r.get("review_lens") or "")[:50],
	"result": r.get("result"),
	"title": title,
	"response_words": r.get("response_words"),
	})
	return pd.DataFrame(out)


	# ---------------------------------------------------------------------------
	# Tab 2: Detail
	# ---------------------------------------------------------------------------

	def review_detail(checkpoint_id: str) -> str:
	if not checkpoint_id:
	return "_Pick a checkpoint id (e.g. `ccr-20260408-121555`)._"
	cid = checkpoint_id.strip()
	for r in ROWS:
	if r.get("id") == cid:
	parts = []
	parts.append(f"## {r.get('id')}")
	parts.append("")
	parts.append(f"created_at: `{r.get('created_at')}` ")
	parts.append(f"requester -> target: `{r.get('requester')}` -> `{r.get('target')}` ")
	parts.append(f"mode: `{r.get('mode')}` \| model: `{r.get('model')}` \| scope: `{r.get('scope')}` ")
	parts.append(f"review_lens: `{r.get('review_lens')}` \| result: `{r.get('result')}` ")
	parts.append("")
	if r.get("title"):
	parts.append(f"### Title")
	parts.append(f"> {r.get('title')}")
	parts.append("")
	if r.get("owner_goal"):
	parts.append(f"### Owner goal")
	parts.append(f"> {r.get('owner_goal')}")
	parts.append("")
	if r.get("owner_intent"):
	parts.append(f"### Owner intent")
	parts.append(f"> {r.get('owner_intent')}")
	parts.append("")
	if r.get("constraints"):
	parts.append(f"### Constraints")
	parts.append(f"> {r.get('constraints')}")
	parts.append("")
	if r.get("success_criteria"):
	parts.append(f"### Success criteria")
	parts.append(f"> {r.get('success_criteria')}")
	parts.append("")
	if r.get("ask"):
	parts.append(f"### Ask")
	parts.append(f"> {r.get('ask')}")
	parts.append("")
	if r.get("prompt"):
	parts.append(f"### Prompt ({r.get('prompt_words')} words)")
	parts.append("```")
	parts.append(str(r.get("prompt")))
	parts.append("```")
	parts.append("")
	if r.get("response"):
	parts.append(f"### Response ({r.get('response_words')} words)")
	parts.append("")
	parts.append(str(r.get("response")))
	parts.append("")
	return "\n".join(parts)
	return f"_Checkpoint `{cid}` not found in the {len(ROWS)}-row dataset._"


	# ---------------------------------------------------------------------------
	# Tab 3: Statistics
	# ---------------------------------------------------------------------------

	def _bar_chart(counter: collections.Counter, title: str, x_label: str):
	if not counter:
	return None
	items = counter.most_common()
	df = pd.DataFrame(items, columns=[x_label, "count"])
	fig = px.bar(df, x=x_label, y="count", title=title, text="count")
	fig.update_traces(textposition="outside")
	fig.update_layout(
	margin=dict(l=20, r=20, t=50, b=20),
	height=380,
	)
	return fig


	def stats_review_lens():
	return _bar_chart(
	collections.Counter(r.get("review_lens") or "(empty)" for r in ROWS),
	"Reviews by review_lens",
	"review_lens",
	)


	def stats_target_agent():
	return _bar_chart(
	collections.Counter(r.get("target") or "(empty)" for r in ROWS),
	"Reviews by target agent",
	"target",
	)


	def stats_result():
	return _bar_chart(
	collections.Counter(r.get("result") or "(empty)" for r in ROWS),
	"Outcome distribution",
	"result",
	)


	def stats_model():
	return _bar_chart(
	collections.Counter(r.get("model") or "(empty)" for r in ROWS),
	"Reviews by Claude model",
	"model",
	)


	def stats_year_month():
	return _bar_chart(
	collections.Counter(r["year_month"] for r in ROWS),
	"Reviews by month",
	"year_month",
	)


	def stats_summary_md() -> str:
	n = len(ROWS)
	avg_resp = sum(r["response_words"] for r in ROWS) / n if n else 0
	avg_prompt = sum(r["prompt_words"] for r in ROWS) / n if n else 0
	new_signal = sum(1 for r in ROWS if r.get("result") == "new_signal")
	no_new_signal = sum(1 for r in ROWS if r.get("result") == "no_new_signal")
	failed = sum(1 for r in ROWS if r.get("result") == "failed")
	opus = sum(1 for r in ROWS if r.get("model") == "opus")
	sonnet = sum(1 for r in ROWS if r.get("model") == "sonnet")
	return (
	f"### Quick stats\n\n"
	f"\| metric \| value \|\n"
	f"\|---\|---\|\n"
	f"\| Total reviews \| {n} \|\n"
	f"\| Avg prompt length \| {avg_prompt:.1f} words \|\n"
	f"\| Avg response length \| {avg_resp:.1f} words \|\n"
	f"\| Result: new_signal \| {new_signal} ({new_signal/n*100:.1f}%) \|\n"
	f"\| Result: no_new_signal \| {no_new_signal} ({no_new_signal/n*100:.1f}%) \|\n"
	f"\| Result: failed \| {failed} ({failed/n*100:.1f}%) \|\n"
	f"\| Model: opus \| {opus} \|\n"
	f"\| Model: sonnet \| {sonnet} \|\n"
	)


	# ---------------------------------------------------------------------------
	# Gradio app
	# ---------------------------------------------------------------------------

	INTRO_MD = f"""
	# Cross-Agent Review Queue Explorer

	Browse, filter, and inspect {len(ROWS)} anonymized cross-agent code-review checkpoints
	from the [Neo Genesis](https://neogenesis.app) monorepo, captured between
	`2026-04-08` and `2026-04-14` (Codex requesting reviews from Claude
	`neo-reviewer` / `neo-architect` agents).

	Each row is a real bounded review request with:
	- explicit owner_goal + owner_intent + constraints + success_criteria
	- a single review_lens (risk, regression, goal-fit, etc.)
	- a Claude model (sonnet / opus) and mode (review / architecture)
	- the resulting outcome (`new_signal` / `no_new_signal` / `failed`)

	This is the first publicly released dataset of bounded multi-agent code-review
	transcripts. Read the full schema in the
	[dataset card]({{}}). Use this Explorer to navigate and aggregate.

	- Dataset: [`{DATASET_ID}`](https://huggingface.co/datasets/{DATASET_ID})
	- License: CC-BY-4.0 (data) \| MIT (this Space's app code)
	- Operator: Yesol Heo / Neo Genesis
	""".format(f"https://huggingface.co/datasets/{DATASET_ID}")


	with gr.Blocks(title="Cross-Agent Review Queue Explorer", theme=gr.themes.Soft()) as demo:
	gr.Markdown(INTRO_MD)

	with gr.Tab("Browse"):
	gr.Markdown(
	"Filter the queue by review lens, target agent, outcome, or month. "
	"Click any row's `id` (e.g. `ccr-20260408-121555`) and paste it into "
	"the Detail tab to see the full transcript."
	)
	with gr.Row():
	lens_dd = gr.Dropdown(
	choices=[ALL_FILTER] + REVIEW_LENSES,
	value=ALL_FILTER,
	label="review_lens",
	)
	target_dd = gr.Dropdown(
	choices=[ALL_FILTER] + TARGET_AGENTS,
	value=ALL_FILTER,
	label="target agent",
	)
	result_dd = gr.Dropdown(
	choices=[ALL_FILTER] + RESULTS,
	value=ALL_FILTER,
	label="result",
	)
	ym_dd = gr.Dropdown(
	choices=[ALL_FILTER] + YEAR_MONTHS,
	value=ALL_FILTER,
	label="year-month",
	)
	table = gr.DataFrame(
	value=filter_rows(ALL_FILTER, ALL_FILTER, ALL_FILTER, ALL_FILTER),
	label=f"{len(ROWS)} reviews",
	wrap=True,
	interactive=False,
	)
	for c in (lens_dd, target_dd, result_dd, ym_dd):
	c.change(
	filter_rows,
	inputs=[lens_dd, target_dd, result_dd, ym_dd],
	outputs=table,
	)

	with gr.Tab("Detail"):
	gr.Markdown(
	"Paste a checkpoint id (e.g. `ccr-20260408-121555`) to see the full "
	"anonymized transcript: owner goal, constraints, prompt, and Claude's response."
	)
	with gr.Row():
	cid_in = gr.Textbox(
	label="Checkpoint id",
	placeholder="ccr-20260408-121555",
	value=ROWS[0]["id"] if ROWS else "",
	scale=4,
	)
	view_btn = gr.Button("Show review", variant="primary", scale=1)
	detail_md = gr.Markdown(
	review_detail(ROWS[0]["id"]) if ROWS else "_dataset is empty_"
	)
	view_btn.click(review_detail, inputs=cid_in, outputs=detail_md)
	cid_in.submit(review_detail, inputs=cid_in, outputs=detail_md)

	with gr.Tab("Statistics"):
	gr.Markdown(stats_summary_md())
	with gr.Row():
	gr.Plot(value=stats_review_lens())
	gr.Plot(value=stats_target_agent())
	with gr.Row():
	gr.Plot(value=stats_result())
	gr.Plot(value=stats_model())
	gr.Plot(value=stats_year_month())

	with gr.Tab("About"):
	gr.Markdown(
	f"""
	### What is this?

	A frozen, anonymized snapshot of {len(ROWS)} cross-agent code-review checkpoints
	from the live SSOT (`.agent/shared-brain/cross-agent-review.md`) of
	[Neo Genesis](https://neogenesis.app) — a 1-person AI-native operator running
	11 production AI business units.

	### Anonymization (6-tier)

	The published dataset replaces:
	- absolute file paths -> repo-relative paths
	- internal hostnames / IPs -> tier names
	- live API keys / tokens -> `[REDACTED]`
	- personal contact info -> tier role names
	- internal Telegram chat ids / Supabase project ids -> stable hashes
	- secret-bearing scopes -> `[redacted-scope]`

	while preserving the structure of bounded reviews: every transcript still
	shows the owner_goal, the constraints, the review_lens, and the actual
	prompt / response pair so you can study how the bounded-review protocol works.

	### Why publish this?

	Most multi-agent papers report aggregate metrics. The actual transcripts of
	real bounded reviews — with explicit owner goals and review lenses — are rarely
	public. This dataset is meant to be a working example for:

	- agent-orchestration researchers studying handoff prompts
	- code-review automation builders calibrating their own review schemas
	- AI-governance teams evaluating bounded-review protocols against ad-hoc chats

	### Resources

	- Dataset: <https://huggingface.co/datasets/{DATASET_ID}>
	- Neo Genesis homepage: <https://neogenesis.app>
	- Operator: <https://huggingface.co/neogenesislab>
	- Wikidata: [Q139569680](https://www.wikidata.org/wiki/Q139569680)

	### Cite

	```bibtex
	@misc{{neogenesis_cross_agent_review_queue_2026,
	title = {{Cross-Agent Code Review Queue: 37 anonymized Codex<->Claude bounded review checkpoints}},
	author = {{Heo, Yesol}},
	year = {{2026}},
	url = {{https://huggingface.co/datasets/{DATASET_ID}}}
	}}
	```
	"""
	)


	if __name__ == "__main__":
	demo.queue().launch()