Spaces:
Running
improve: tour quality + layout from Claude Code source study
Browse filesPrompt improvements (from studying Claude Code /init + MagicDocs source):
Investigation phase now uses Claude Code's WHY/HOW/WHERE/WHAT framing:
- WHY: what breaks without this component
- HOW: how it connects to adjacent pipeline stages
- WHERE: entry point a reader should start from
- WHAT: non-obvious pattern (class names now allowed when they clarify design)
Synthesis phase now enforces fan-out dependency graph instead of linear chain:
- depends_on = conceptual prerequisite, NOT execution order
- Most concepts should depend on concept 0 only (fan-out, not chain)
- Added explicit wrong/right examples in the prompt
- "A chain AβBβCβD is almost always wrong. Fan-out from 0 is almost always right."
Layout fix for linear chains:
- MAX_COLS = 4: caps horizontal width regardless of dependency depth
- Concepts beyond column 4 wrap into a second band below the first
- 7-wide layout becomes 2-band (4 top + 3 bottom) β fits viewport
- BAND_GAP = 80px between bands for visual separation
Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
- backend/services/tour_agent.py +51 -26
- ui/src/components/ExploreView.jsx +56 -20
|
@@ -70,11 +70,15 @@ _MAP_SYSTEM = (
|
|
| 70 |
)
|
| 71 |
|
| 72 |
_INVESTIGATE_SYSTEM = (
|
| 73 |
-
"You are a senior engineer doing a deep-dive into one component of a
|
| 74 |
-
"You know where this component
|
| 75 |
-
"Your job:
|
| 76 |
-
"
|
|
|
|
|
|
|
|
|
|
| 77 |
"Every claim must be grounded in the actual code shown. "
|
|
|
|
| 78 |
"Return ONLY valid JSON, no markdown, no explanation."
|
| 79 |
)
|
| 80 |
|
|
@@ -82,8 +86,10 @@ _SYNTHESIZE_SYSTEM = (
|
|
| 82 |
"You are a senior engineer writing the guided tour you wished existed before "
|
| 83 |
"reading this codebase. You have already traced the full pipeline and investigated "
|
| 84 |
"each stage. Convert your traced findings into the structured tour format. "
|
| 85 |
-
"
|
| 86 |
-
"
|
|
|
|
|
|
|
| 87 |
"Return ONLY valid JSON, no markdown, no explanation."
|
| 88 |
)
|
| 89 |
|
|
@@ -326,21 +332,26 @@ Full pipeline (for context):
|
|
| 326 |
Code for this stage β {stage_file}:
|
| 327 |
{code_text}
|
| 328 |
|
| 329 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 330 |
|
| 331 |
Return ONLY this JSON:
|
| 332 |
{{
|
| 333 |
-
"name": "
|
| 334 |
-
"subtitle": "One sentence: the specific problem
|
| 335 |
-
"insight": "2-3 sentences
|
| 336 |
-
"key_functions": ["
|
| 337 |
-
"naive_rejected": "One sentence:
|
| 338 |
}}
|
| 339 |
|
| 340 |
Rules:
|
| 341 |
-
-
|
| 342 |
-
-
|
| 343 |
-
-
|
| 344 |
"""
|
| 345 |
raw = self._gen.generate(_INVESTIGATE_SYSTEM, prompt, temperature=0.0,
|
| 346 |
json_mode=True, max_tokens=800)
|
|
@@ -396,13 +407,26 @@ Per-stage findings (already investigated β use these verbatim):
|
|
| 396 |
|
| 397 |
Convert this traced understanding into a concept tour JSON.
|
| 398 |
|
| 399 |
-
|
| 400 |
-
|
| 401 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 402 |
|
|
|
|
| 403 |
Return ONLY this JSON:
|
| 404 |
{{
|
| 405 |
-
"summary": "2 sentences: (1) what the user can DO with this repo
|
| 406 |
"entry_point": "{entry}",
|
| 407 |
"concepts": [
|
| 408 |
{{
|
|
@@ -411,19 +435,19 @@ Return ONLY this JSON:
|
|
| 411 |
"subtitle": "What this pipeline does for the user",
|
| 412 |
"file": "{entry}",
|
| 413 |
"type": "module",
|
| 414 |
-
"description": "2-3 sentences
|
| 415 |
-
"key_items": ["
|
| 416 |
"depends_on": [],
|
| 417 |
"reading_order": 1,
|
| 418 |
"ask": "How does the full pipeline work end to end?"
|
| 419 |
}},
|
| 420 |
{{
|
| 421 |
"id": 1,
|
| 422 |
-
"name": "Use the exact 'name'
|
| 423 |
-
"subtitle": "Use the exact 'subtitle'
|
| 424 |
"file": "file from stage 1",
|
| 425 |
"type": "class|function|module|algorithm",
|
| 426 |
-
"description": "Use the exact 'insight'
|
| 427 |
"key_items": ["use exact key_functions from findings"],
|
| 428 |
"depends_on": [0],
|
| 429 |
"reading_order": 2,
|
|
@@ -433,9 +457,10 @@ Return ONLY this JSON:
|
|
| 433 |
}}
|
| 434 |
|
| 435 |
Rules:
|
| 436 |
-
- 6-8 concepts total (concept 0 = pipeline overview, concepts 1
|
| 437 |
-
- Use the EXACT name, subtitle, insight, key_functions from the per-stage findings
|
| 438 |
- All concepts except id=0 must have depends_on non-empty
|
|
|
|
| 439 |
- reading_order: sequential integers starting at 1
|
| 440 |
- type: exactly one of class, function, module, algorithm
|
| 441 |
"""
|
|
|
|
| 70 |
)
|
| 71 |
|
| 72 |
_INVESTIGATE_SYSTEM = (
|
| 73 |
+
"You are a senior engineer doing a deep-dive into one component of a codebase. "
|
| 74 |
+
"You know exactly where this component fits in the larger system. "
|
| 75 |
+
"Your job: answer four questions about this code β "
|
| 76 |
+
"WHY does this component exist (what breaks without it?), "
|
| 77 |
+
"HOW does it connect to adjacent components, "
|
| 78 |
+
"WHERE is the entry point a reader should start, "
|
| 79 |
+
"WHAT non-obvious pattern or design decision makes this work. "
|
| 80 |
"Every claim must be grounded in the actual code shown. "
|
| 81 |
+
"Class names, function names, and file names are ENCOURAGED when they clarify the design. "
|
| 82 |
"Return ONLY valid JSON, no markdown, no explanation."
|
| 83 |
)
|
| 84 |
|
|
|
|
| 86 |
"You are a senior engineer writing the guided tour you wished existed before "
|
| 87 |
"reading this codebase. You have already traced the full pipeline and investigated "
|
| 88 |
"each stage. Convert your traced findings into the structured tour format. "
|
| 89 |
+
"DEPENDENCY RULE: depends_on means 'a developer cannot understand B without first "
|
| 90 |
+
"understanding A' β it is NOT execution order. Most concepts are parallel: they "
|
| 91 |
+
"share concept 0 as a prerequisite but are independent of each other. "
|
| 92 |
+
"A chain AβBβCβD is almost always wrong. A fan-out from concept 0 is almost always right. "
|
| 93 |
"Return ONLY valid JSON, no markdown, no explanation."
|
| 94 |
)
|
| 95 |
|
|
|
|
| 332 |
Code for this stage β {stage_file}:
|
| 333 |
{code_text}
|
| 334 |
|
| 335 |
+
Answer four questions about this component. Every answer must be grounded in the code above.
|
| 336 |
+
|
| 337 |
+
1. WHY does this component exist? What breaks or degrades without it?
|
| 338 |
+
2. HOW does it connect to the rest of the pipeline? What does it receive, what does it produce?
|
| 339 |
+
3. WHERE should a reader start? Name the entry-point function or class.
|
| 340 |
+
4. WHAT is the non-obvious pattern? Name the technique (and the class/function that implements it if helpful).
|
| 341 |
|
| 342 |
Return ONLY this JSON:
|
| 343 |
{{
|
| 344 |
+
"name": "3-5 words naming the key technique or component (class names OK if they explain the design)",
|
| 345 |
+
"subtitle": "One sentence: WHY this exists β the specific problem it solves",
|
| 346 |
+
"insight": "2-3 sentences covering HOW it works and WHAT makes it non-obvious. Include the naive alternative and its failure mode.",
|
| 347 |
+
"key_functions": ["entry_point_function", "other_actual_function"],
|
| 348 |
+
"naive_rejected": "One sentence: the simpler approach that would fail and why"
|
| 349 |
}}
|
| 350 |
|
| 351 |
Rules:
|
| 352 |
+
- key_functions must be actual names visible in the code above
|
| 353 |
+
- insight must name a concrete failure mode with the naive approach
|
| 354 |
+
- Use actual class/function names when they clarify the design (e.g. 'QdrantStore.hybrid_search')
|
| 355 |
"""
|
| 356 |
raw = self._gen.generate(_INVESTIGATE_SYSTEM, prompt, temperature=0.0,
|
| 357 |
json_mode=True, max_tokens=800)
|
|
|
|
| 407 |
|
| 408 |
Convert this traced understanding into a concept tour JSON.
|
| 409 |
|
| 410 |
+
βββ DEPENDENCY RULE (CRITICAL) βββ
|
| 411 |
+
depends_on means "a developer CANNOT understand concept B without first understanding A."
|
| 412 |
+
It is NOT execution order.
|
| 413 |
+
|
| 414 |
+
Ask yourself for each concept: "Can someone understand this WITHOUT knowing the others?"
|
| 415 |
+
- If yes β depends_on: [0] (only the pipeline overview is a prerequisite)
|
| 416 |
+
- If no β depends_on: [id of the specific concept they must know first]
|
| 417 |
+
|
| 418 |
+
WRONG (chain): 1β2β3β4β5β6β7 (almost never true)
|
| 419 |
+
RIGHT (fan-out): most concepts depend on 0 only, forming a tree 1-2 levels deep
|
| 420 |
+
|
| 421 |
+
For a 7-concept tour the typical structure is:
|
| 422 |
+
0: pipeline overview (no deps)
|
| 423 |
+
1,2,3,4,5: core concepts, each depends on 0 only
|
| 424 |
+
6: one concept that genuinely requires knowing concept 1 or 2 first
|
| 425 |
|
| 426 |
+
βββ FORMAT βββ
|
| 427 |
Return ONLY this JSON:
|
| 428 |
{{
|
| 429 |
+
"summary": "2 sentences: (1) what the user can DO with this repo, naming the key technique. (2) the single architectural decision that shapes everything else.",
|
| 430 |
"entry_point": "{entry}",
|
| 431 |
"concepts": [
|
| 432 |
{{
|
|
|
|
| 435 |
"subtitle": "What this pipeline does for the user",
|
| 436 |
"file": "{entry}",
|
| 437 |
"type": "module",
|
| 438 |
+
"description": "2-3 sentences: what enters, how each stage transforms it, what the user gets. Name the key files and the split that makes it work.",
|
| 439 |
+
"key_items": ["entry_function", "other_function"],
|
| 440 |
"depends_on": [],
|
| 441 |
"reading_order": 1,
|
| 442 |
"ask": "How does the full pipeline work end to end?"
|
| 443 |
}},
|
| 444 |
{{
|
| 445 |
"id": 1,
|
| 446 |
+
"name": "Use the exact 'name' from stage 1 findings",
|
| 447 |
+
"subtitle": "Use the exact 'subtitle' from stage 1 findings",
|
| 448 |
"file": "file from stage 1",
|
| 449 |
"type": "class|function|module|algorithm",
|
| 450 |
+
"description": "Use the exact 'insight' from stage 1 findings",
|
| 451 |
"key_items": ["use exact key_functions from findings"],
|
| 452 |
"depends_on": [0],
|
| 453 |
"reading_order": 2,
|
|
|
|
| 457 |
}}
|
| 458 |
|
| 459 |
Rules:
|
| 460 |
+
- 6-8 concepts total (concept 0 = pipeline overview, concepts 1-N = one per stage insight)
|
| 461 |
+
- Use the EXACT name, subtitle, insight, key_functions from the per-stage findings
|
| 462 |
- All concepts except id=0 must have depends_on non-empty
|
| 463 |
+
- Most concepts should have depends_on: [0] β only add deeper dependencies when genuinely required
|
| 464 |
- reading_order: sequential integers starting at 1
|
| 465 |
- type: exactly one of class, function, module, algorithm
|
| 466 |
"""
|
|
@@ -93,11 +93,20 @@ function expansionOffsets(selectedId, concepts, basePositions) {
|
|
| 93 |
// ββ Layout: topological column assignment with overflow wrapping βββββββββββββββ
|
| 94 |
// Returns { [conceptId]: { x, y } } in canvas coordinates.
|
| 95 |
//
|
| 96 |
-
//
|
| 97 |
-
//
|
| 98 |
-
//
|
| 99 |
-
//
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 100 |
const MAX_PER_COL = 3;
|
|
|
|
|
|
|
| 101 |
|
| 102 |
function computeLayout(concepts) {
|
| 103 |
if (!concepts.length) return {};
|
|
@@ -125,9 +134,6 @@ function computeLayout(concepts) {
|
|
| 125 |
);
|
| 126 |
|
| 127 |
// Step 3: assign visual columns, capping at MAX_PER_COL items per column.
|
| 128 |
-
// Each depth level starts in its own column. If a depth has more than MAX_PER_COL
|
| 129 |
-
// nodes, overflow spills into the next column. The following depth level then
|
| 130 |
-
// starts in the column after the last one used by the previous depth.
|
| 131 |
const colAssign = {};
|
| 132 |
let nextCol = 0;
|
| 133 |
|
|
@@ -141,26 +147,56 @@ function computeLayout(concepts) {
|
|
| 141 |
colAssign[node.id] = col;
|
| 142 |
count++;
|
| 143 |
});
|
| 144 |
-
nextCol = col + 1;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 145 |
});
|
| 146 |
|
| 147 |
-
// Step
|
| 148 |
-
|
|
|
|
|
|
|
|
|
|
| 149 |
concepts.forEach(c => {
|
| 150 |
-
const
|
| 151 |
-
|
| 152 |
-
|
|
|
|
|
|
|
| 153 |
});
|
| 154 |
-
Object.values(visualCols).forEach(arr =>
|
| 155 |
-
arr.sort((a, b) => (a.reading_order ?? 99) - (b.reading_order ?? 99))
|
| 156 |
-
);
|
| 157 |
|
| 158 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
const positions = {};
|
| 160 |
-
Object.entries(
|
| 161 |
-
const
|
|
|
|
|
|
|
| 162 |
const colH = nodes.length * (CARD_H + ROW_GAP) - ROW_GAP;
|
| 163 |
-
const startY = (
|
| 164 |
nodes.forEach((node, row) => {
|
| 165 |
positions[node.id] = { x, y: startY + row * (CARD_H + ROW_GAP) };
|
| 166 |
});
|
|
|
|
| 93 |
// ββ Layout: topological column assignment with overflow wrapping βββββββββββββββ
|
| 94 |
// Returns { [conceptId]: { x, y } } in canvas coordinates.
|
| 95 |
//
|
| 96 |
+
// Two kinds of overflow:
|
| 97 |
+
//
|
| 98 |
+
// 1. Same-depth overflow (fan-out): many nodes at depth 1 (e.g. 5 children of
|
| 99 |
+
// the pipeline overview). MAX_PER_COL = 3 caps per column and overflows into
|
| 100 |
+
// the next column, then the next depth starts in the column after that.
|
| 101 |
+
//
|
| 102 |
+
// 2. Too-many-columns overflow (linear chain): a sequential AβBβCβDβEβFβG
|
| 103 |
+
// produces 7 columns β too wide for the screen. MAX_COLS = 4 caps the total
|
| 104 |
+
// horizontal width. After column 3, nodes wrap into a second visual band
|
| 105 |
+
// (row), placed below the first band. This turns a 7-wide layout into a
|
| 106 |
+
// 2-band layout (cols 0-3 top, cols 4-6 bottom), which fits the viewport.
|
| 107 |
const MAX_PER_COL = 3;
|
| 108 |
+
const MAX_COLS = 4; // wrap into a second band after this many visual columns
|
| 109 |
+
const BAND_GAP = 80; // extra vertical gap between bands
|
| 110 |
|
| 111 |
function computeLayout(concepts) {
|
| 112 |
if (!concepts.length) return {};
|
|
|
|
| 134 |
);
|
| 135 |
|
| 136 |
// Step 3: assign visual columns, capping at MAX_PER_COL items per column.
|
|
|
|
|
|
|
|
|
|
| 137 |
const colAssign = {};
|
| 138 |
let nextCol = 0;
|
| 139 |
|
|
|
|
| 147 |
colAssign[node.id] = col;
|
| 148 |
count++;
|
| 149 |
});
|
| 150 |
+
nextCol = col + 1;
|
| 151 |
+
});
|
| 152 |
+
|
| 153 |
+
// Step 4: wrap columns past MAX_COLS into bands.
|
| 154 |
+
// band = Math.floor(colIndex / MAX_COLS), wrappedCol = colIndex % MAX_COLS
|
| 155 |
+
// This maps e.g. columns [0,1,2,3,4,5,6] to band 0: [0,1,2,3], band 1: [0,1,2]
|
| 156 |
+
const bandAssign = {};
|
| 157 |
+
const wrappedColAssign = {};
|
| 158 |
+
Object.entries(colAssign).forEach(([id, col]) => {
|
| 159 |
+
bandAssign[id] = Math.floor(col / MAX_COLS);
|
| 160 |
+
wrappedColAssign[id] = col % MAX_COLS;
|
| 161 |
});
|
| 162 |
|
| 163 |
+
// Step 5: assign pixel positions β group by (band, wrappedCol)
|
| 164 |
+
// Compute each band's total height first so we can stack bands vertically.
|
| 165 |
+
const bandColGroups = {}; // { band_wrappedCol: [concept, ...] }
|
| 166 |
+
const bandHeights = {}; // { band: maxColumnHeight }
|
| 167 |
+
|
| 168 |
concepts.forEach(c => {
|
| 169 |
+
const band = bandAssign[c.id] ?? 0;
|
| 170 |
+
const wc = wrappedColAssign[c.id] ?? 0;
|
| 171 |
+
const key = `${band}_${wc}`;
|
| 172 |
+
if (!bandColGroups[key]) bandColGroups[key] = [];
|
| 173 |
+
bandColGroups[key].push(c);
|
| 174 |
});
|
|
|
|
|
|
|
|
|
|
| 175 |
|
| 176 |
+
Object.entries(bandColGroups).forEach(([key, nodes]) => {
|
| 177 |
+
nodes.sort((a, b) => (a.reading_order ?? 99) - (b.reading_order ?? 99));
|
| 178 |
+
const [band] = key.split("_").map(Number);
|
| 179 |
+
const h = nodes.length * (CARD_H + ROW_GAP);
|
| 180 |
+
bandHeights[band] = Math.max(bandHeights[band] ?? 0, h);
|
| 181 |
+
});
|
| 182 |
+
|
| 183 |
+
// Cumulative Y offsets per band
|
| 184 |
+
const bandStartY = {};
|
| 185 |
+
let cumY = 48;
|
| 186 |
+
const numBands = Math.max(...Object.values(bandAssign)) + 1;
|
| 187 |
+
for (let b = 0; b < numBands; b++) {
|
| 188 |
+
bandStartY[b] = cumY;
|
| 189 |
+
cumY += (bandHeights[b] ?? 0) + BAND_GAP;
|
| 190 |
+
}
|
| 191 |
+
|
| 192 |
+
// Within each band, center columns relative to the tallest column in that band
|
| 193 |
const positions = {};
|
| 194 |
+
Object.entries(bandColGroups).forEach(([key, nodes]) => {
|
| 195 |
+
const [band, wc] = key.split("_").map(Number);
|
| 196 |
+
const x = wc * (CARD_W + COL_GAP) + 48;
|
| 197 |
+
const maxH = bandHeights[band] ?? 0;
|
| 198 |
const colH = nodes.length * (CARD_H + ROW_GAP) - ROW_GAP;
|
| 199 |
+
const startY = bandStartY[band] + (maxH - colH) / 2;
|
| 200 |
nodes.forEach((node, row) => {
|
| 201 |
positions[node.id] = { x, y: startY + row * (CARD_H + ROW_GAP) };
|
| 202 |
});
|