neogenesislab commited on
Commit
f405b00
Β·
verified Β·
1 Parent(s): 5e71de7

add app.py

Browse files
Files changed (1) hide show
  1. app.py +391 -0
app.py ADDED
@@ -0,0 +1,391 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Cross-Agent Review Queue Explorer
3
+ =================================
4
+
5
+ Browse, filter, and analyze 37 anonymized cross-agent code-review checkpoints
6
+ from the Neo Genesis monorepo (Codex <-> Claude, 2026-04-08 ~ 2026-04-14).
7
+
8
+ Data source: ``neogenesislab/cross-agent-review-queue-2026`` (config=transcripts)
9
+ """
10
+ from __future__ import annotations
11
+
12
+ import collections
13
+ import re
14
+ from typing import Any
15
+
16
+ import gradio as gr
17
+ import pandas as pd
18
+ import plotly.express as px
19
+ from datasets import load_dataset
20
+
21
+ DATASET_ID = "neogenesislab/cross-agent-review-queue-2026"
22
+ DATASET_CONFIG = "transcripts"
23
+
24
+ # ---------------------------------------------------------------------------
25
+ # Cold-start data load
26
+ # ---------------------------------------------------------------------------
27
+ ds = load_dataset(DATASET_ID, DATASET_CONFIG, split="train")
28
+ ROWS: list[dict[str, Any]] = list(ds)
29
+
30
+
31
+ def _year_month(checkpoint_id: str) -> str:
32
+ """Parse ``ccr-20260408-121555`` -> ``2026-04``."""
33
+ m = re.search(r"(\d{4})(\d{2})\d{2}", checkpoint_id or "")
34
+ if m:
35
+ return f"{m.group(1)}-{m.group(2)}"
36
+ return "unknown"
37
+
38
+
39
+ def _word_count(text: str | None) -> int:
40
+ if not text:
41
+ return 0
42
+ return len(str(text).split())
43
+
44
+
45
+ # Pre-compute derived columns once.
46
+ for r in ROWS:
47
+ r["year_month"] = _year_month(r.get("id", ""))
48
+ r["prompt_words"] = _word_count(r.get("prompt"))
49
+ r["response_words"] = _word_count(r.get("response"))
50
+
51
+
52
+ REVIEW_LENSES = sorted({r.get("review_lens", "") or "" for r in ROWS})
53
+ TARGET_AGENTS = sorted({r.get("target", "") or "" for r in ROWS})
54
+ RESULTS = sorted({r.get("result", "") or "" for r in ROWS})
55
+ YEAR_MONTHS = sorted({r["year_month"] for r in ROWS})
56
+
57
+ ALL_FILTER = "all"
58
+
59
+
60
+ # ---------------------------------------------------------------------------
61
+ # Tab 1: Browse
62
+ # ---------------------------------------------------------------------------
63
+
64
+ def filter_rows(
65
+ review_lens: str,
66
+ target_agent: str,
67
+ result: str,
68
+ year_month: str,
69
+ ) -> pd.DataFrame:
70
+ out = []
71
+ for r in ROWS:
72
+ if review_lens != ALL_FILTER and (r.get("review_lens") or "") != review_lens:
73
+ continue
74
+ if target_agent != ALL_FILTER and (r.get("target") or "") != target_agent:
75
+ continue
76
+ if result != ALL_FILTER and (r.get("result") or "") != result:
77
+ continue
78
+ if year_month != ALL_FILTER and r["year_month"] != year_month:
79
+ continue
80
+ title = (r.get("title") or "").strip()
81
+ if len(title) > 80:
82
+ title = title[:77] + "..."
83
+ out.append({
84
+ "id": r.get("id"),
85
+ "year_month": r["year_month"],
86
+ "target": r.get("target"),
87
+ "model": r.get("model"),
88
+ "review_lens": (r.get("review_lens") or "")[:50],
89
+ "result": r.get("result"),
90
+ "title": title,
91
+ "response_words": r.get("response_words"),
92
+ })
93
+ return pd.DataFrame(out)
94
+
95
+
96
+ # ---------------------------------------------------------------------------
97
+ # Tab 2: Detail
98
+ # ---------------------------------------------------------------------------
99
+
100
+ def review_detail(checkpoint_id: str) -> str:
101
+ if not checkpoint_id:
102
+ return "_Pick a checkpoint id (e.g. `ccr-20260408-121555`)._"
103
+ cid = checkpoint_id.strip()
104
+ for r in ROWS:
105
+ if r.get("id") == cid:
106
+ parts = []
107
+ parts.append(f"## {r.get('id')}")
108
+ parts.append("")
109
+ parts.append(f"**created_at**: `{r.get('created_at')}` ")
110
+ parts.append(f"**requester** -> **target**: `{r.get('requester')}` -> `{r.get('target')}` ")
111
+ parts.append(f"**mode**: `{r.get('mode')}` | **model**: `{r.get('model')}` | **scope**: `{r.get('scope')}` ")
112
+ parts.append(f"**review_lens**: `{r.get('review_lens')}` | **result**: `{r.get('result')}` ")
113
+ parts.append("")
114
+ if r.get("title"):
115
+ parts.append(f"### Title")
116
+ parts.append(f"> {r.get('title')}")
117
+ parts.append("")
118
+ if r.get("owner_goal"):
119
+ parts.append(f"### Owner goal")
120
+ parts.append(f"> {r.get('owner_goal')}")
121
+ parts.append("")
122
+ if r.get("owner_intent"):
123
+ parts.append(f"### Owner intent")
124
+ parts.append(f"> {r.get('owner_intent')}")
125
+ parts.append("")
126
+ if r.get("constraints"):
127
+ parts.append(f"### Constraints")
128
+ parts.append(f"> {r.get('constraints')}")
129
+ parts.append("")
130
+ if r.get("success_criteria"):
131
+ parts.append(f"### Success criteria")
132
+ parts.append(f"> {r.get('success_criteria')}")
133
+ parts.append("")
134
+ if r.get("ask"):
135
+ parts.append(f"### Ask")
136
+ parts.append(f"> {r.get('ask')}")
137
+ parts.append("")
138
+ if r.get("prompt"):
139
+ parts.append(f"### Prompt ({r.get('prompt_words')} words)")
140
+ parts.append("```")
141
+ parts.append(str(r.get("prompt")))
142
+ parts.append("```")
143
+ parts.append("")
144
+ if r.get("response"):
145
+ parts.append(f"### Response ({r.get('response_words')} words)")
146
+ parts.append("")
147
+ parts.append(str(r.get("response")))
148
+ parts.append("")
149
+ return "\n".join(parts)
150
+ return f"_Checkpoint `{cid}` not found in the {len(ROWS)}-row dataset._"
151
+
152
+
153
+ # ---------------------------------------------------------------------------
154
+ # Tab 3: Statistics
155
+ # ---------------------------------------------------------------------------
156
+
157
+ def _bar_chart(counter: collections.Counter, title: str, x_label: str):
158
+ if not counter:
159
+ return None
160
+ items = counter.most_common()
161
+ df = pd.DataFrame(items, columns=[x_label, "count"])
162
+ fig = px.bar(df, x=x_label, y="count", title=title, text="count")
163
+ fig.update_traces(textposition="outside")
164
+ fig.update_layout(
165
+ margin=dict(l=20, r=20, t=50, b=20),
166
+ height=380,
167
+ )
168
+ return fig
169
+
170
+
171
+ def stats_review_lens():
172
+ return _bar_chart(
173
+ collections.Counter(r.get("review_lens") or "(empty)" for r in ROWS),
174
+ "Reviews by review_lens",
175
+ "review_lens",
176
+ )
177
+
178
+
179
+ def stats_target_agent():
180
+ return _bar_chart(
181
+ collections.Counter(r.get("target") or "(empty)" for r in ROWS),
182
+ "Reviews by target agent",
183
+ "target",
184
+ )
185
+
186
+
187
+ def stats_result():
188
+ return _bar_chart(
189
+ collections.Counter(r.get("result") or "(empty)" for r in ROWS),
190
+ "Outcome distribution",
191
+ "result",
192
+ )
193
+
194
+
195
+ def stats_model():
196
+ return _bar_chart(
197
+ collections.Counter(r.get("model") or "(empty)" for r in ROWS),
198
+ "Reviews by Claude model",
199
+ "model",
200
+ )
201
+
202
+
203
+ def stats_year_month():
204
+ return _bar_chart(
205
+ collections.Counter(r["year_month"] for r in ROWS),
206
+ "Reviews by month",
207
+ "year_month",
208
+ )
209
+
210
+
211
+ def stats_summary_md() -> str:
212
+ n = len(ROWS)
213
+ avg_resp = sum(r["response_words"] for r in ROWS) / n if n else 0
214
+ avg_prompt = sum(r["prompt_words"] for r in ROWS) / n if n else 0
215
+ new_signal = sum(1 for r in ROWS if r.get("result") == "new_signal")
216
+ no_new_signal = sum(1 for r in ROWS if r.get("result") == "no_new_signal")
217
+ failed = sum(1 for r in ROWS if r.get("result") == "failed")
218
+ opus = sum(1 for r in ROWS if r.get("model") == "opus")
219
+ sonnet = sum(1 for r in ROWS if r.get("model") == "sonnet")
220
+ return (
221
+ f"### Quick stats\n\n"
222
+ f"| metric | value |\n"
223
+ f"|---|---|\n"
224
+ f"| Total reviews | **{n}** |\n"
225
+ f"| Avg prompt length | {avg_prompt:.1f} words |\n"
226
+ f"| Avg response length | {avg_resp:.1f} words |\n"
227
+ f"| Result: new_signal | {new_signal} ({new_signal/n*100:.1f}%) |\n"
228
+ f"| Result: no_new_signal | {no_new_signal} ({no_new_signal/n*100:.1f}%) |\n"
229
+ f"| Result: failed | {failed} ({failed/n*100:.1f}%) |\n"
230
+ f"| Model: opus | {opus} |\n"
231
+ f"| Model: sonnet | {sonnet} |\n"
232
+ )
233
+
234
+
235
+ # ---------------------------------------------------------------------------
236
+ # Gradio app
237
+ # ---------------------------------------------------------------------------
238
+
239
+ INTRO_MD = f"""
240
+ # Cross-Agent Review Queue Explorer
241
+
242
+ Browse, filter, and inspect **{len(ROWS)} anonymized cross-agent code-review checkpoints**
243
+ from the [Neo Genesis](https://neogenesis.app) monorepo, captured between
244
+ `2026-04-08` and `2026-04-14` (Codex requesting reviews from Claude
245
+ `neo-reviewer` / `neo-architect` agents).
246
+
247
+ Each row is a real bounded review request with:
248
+ - explicit **owner_goal** + **owner_intent** + **constraints** + **success_criteria**
249
+ - a single **review_lens** (risk, regression, goal-fit, etc.)
250
+ - a Claude **model** (sonnet / opus) and **mode** (review / architecture)
251
+ - the resulting **outcome** (`new_signal` / `no_new_signal` / `failed`)
252
+
253
+ This is the first publicly released dataset of bounded multi-agent code-review
254
+ transcripts. Read the full schema in the
255
+ [dataset card]({{}}). Use this Explorer to navigate and aggregate.
256
+
257
+ - **Dataset**: [`{DATASET_ID}`](https://huggingface.co/datasets/{DATASET_ID})
258
+ - **License**: CC-BY-4.0 (data) | MIT (this Space's app code)
259
+ - **Operator**: Yesol Heo / Neo Genesis
260
+ """.format(f"https://huggingface.co/datasets/{DATASET_ID}")
261
+
262
+
263
+ with gr.Blocks(title="Cross-Agent Review Queue Explorer", theme=gr.themes.Soft()) as demo:
264
+ gr.Markdown(INTRO_MD)
265
+
266
+ with gr.Tab("Browse"):
267
+ gr.Markdown(
268
+ "Filter the queue by review lens, target agent, outcome, or month. "
269
+ "Click any row's `id` (e.g. `ccr-20260408-121555`) and paste it into "
270
+ "the **Detail** tab to see the full transcript."
271
+ )
272
+ with gr.Row():
273
+ lens_dd = gr.Dropdown(
274
+ choices=[ALL_FILTER] + REVIEW_LENSES,
275
+ value=ALL_FILTER,
276
+ label="review_lens",
277
+ )
278
+ target_dd = gr.Dropdown(
279
+ choices=[ALL_FILTER] + TARGET_AGENTS,
280
+ value=ALL_FILTER,
281
+ label="target agent",
282
+ )
283
+ result_dd = gr.Dropdown(
284
+ choices=[ALL_FILTER] + RESULTS,
285
+ value=ALL_FILTER,
286
+ label="result",
287
+ )
288
+ ym_dd = gr.Dropdown(
289
+ choices=[ALL_FILTER] + YEAR_MONTHS,
290
+ value=ALL_FILTER,
291
+ label="year-month",
292
+ )
293
+ table = gr.DataFrame(
294
+ value=filter_rows(ALL_FILTER, ALL_FILTER, ALL_FILTER, ALL_FILTER),
295
+ label=f"{len(ROWS)} reviews",
296
+ wrap=True,
297
+ interactive=False,
298
+ )
299
+ for c in (lens_dd, target_dd, result_dd, ym_dd):
300
+ c.change(
301
+ filter_rows,
302
+ inputs=[lens_dd, target_dd, result_dd, ym_dd],
303
+ outputs=table,
304
+ )
305
+
306
+ with gr.Tab("Detail"):
307
+ gr.Markdown(
308
+ "Paste a checkpoint id (e.g. `ccr-20260408-121555`) to see the full "
309
+ "anonymized transcript: owner goal, constraints, prompt, and Claude's response."
310
+ )
311
+ with gr.Row():
312
+ cid_in = gr.Textbox(
313
+ label="Checkpoint id",
314
+ placeholder="ccr-20260408-121555",
315
+ value=ROWS[0]["id"] if ROWS else "",
316
+ scale=4,
317
+ )
318
+ view_btn = gr.Button("Show review", variant="primary", scale=1)
319
+ detail_md = gr.Markdown(
320
+ review_detail(ROWS[0]["id"]) if ROWS else "_dataset is empty_"
321
+ )
322
+ view_btn.click(review_detail, inputs=cid_in, outputs=detail_md)
323
+ cid_in.submit(review_detail, inputs=cid_in, outputs=detail_md)
324
+
325
+ with gr.Tab("Statistics"):
326
+ gr.Markdown(stats_summary_md())
327
+ with gr.Row():
328
+ gr.Plot(value=stats_review_lens())
329
+ gr.Plot(value=stats_target_agent())
330
+ with gr.Row():
331
+ gr.Plot(value=stats_result())
332
+ gr.Plot(value=stats_model())
333
+ gr.Plot(value=stats_year_month())
334
+
335
+ with gr.Tab("About"):
336
+ gr.Markdown(
337
+ f"""
338
+ ### What is this?
339
+
340
+ A frozen, anonymized snapshot of {len(ROWS)} cross-agent code-review checkpoints
341
+ from the live SSOT (`.agent/shared-brain/cross-agent-review.md`) of
342
+ [Neo Genesis](https://neogenesis.app) β€” a 1-person AI-native operator running
343
+ **11 production AI business units**.
344
+
345
+ ### Anonymization (6-tier)
346
+
347
+ The published dataset replaces:
348
+ - absolute file paths -> repo-relative paths
349
+ - internal hostnames / IPs -> tier names
350
+ - live API keys / tokens -> `[REDACTED]`
351
+ - personal contact info -> tier role names
352
+ - internal Telegram chat ids / Supabase project ids -> stable hashes
353
+ - secret-bearing scopes -> `[redacted-scope]`
354
+
355
+ while preserving the **structure of bounded reviews**: every transcript still
356
+ shows the owner_goal, the constraints, the review_lens, and the actual
357
+ prompt / response pair so you can study *how* the bounded-review protocol works.
358
+
359
+ ### Why publish this?
360
+
361
+ Most multi-agent papers report aggregate metrics. The actual *transcripts* of
362
+ real bounded reviews β€” with explicit owner goals and review lenses β€” are rarely
363
+ public. This dataset is meant to be a working example for:
364
+
365
+ - agent-orchestration researchers studying handoff prompts
366
+ - code-review automation builders calibrating their own review schemas
367
+ - AI-governance teams evaluating bounded-review protocols against ad-hoc chats
368
+
369
+ ### Resources
370
+
371
+ - **Dataset**: <https://huggingface.co/datasets/{DATASET_ID}>
372
+ - **Neo Genesis homepage**: <https://neogenesis.app>
373
+ - **Operator**: <https://huggingface.co/neogenesislab>
374
+ - **Wikidata**: [Q139569680](https://www.wikidata.org/wiki/Q139569680)
375
+
376
+ ### Cite
377
+
378
+ ```bibtex
379
+ @misc{{neogenesis_cross_agent_review_queue_2026,
380
+ title = {{Cross-Agent Code Review Queue: 37 anonymized Codex<->Claude bounded review checkpoints}},
381
+ author = {{Heo, Yesol}},
382
+ year = {{2026}},
383
+ url = {{https://huggingface.co/datasets/{DATASET_ID}}}
384
+ }}
385
+ ```
386
+ """
387
+ )
388
+
389
+
390
+ if __name__ == "__main__":
391
+ demo.queue().launch()