""" CTX: Trigger-Driven Dynamic Context Loading for Code-Aware LLM Agents HuggingFace Space — Paper Demo """ import gradio as gr import json import re import ast import math from collections import defaultdict # ───────────────────────────────────────────── # Inline CTX core (self-contained for Space) # ───────────────────────────────────────────── SAMPLE_CODEBASE = { "main.py": ''' import pipeline import config def main(): """Entry point for the CTX demo.""" cfg = config.load_config("config.yaml") pipe = pipeline.Pipeline(cfg) results = pipe.run(query="find all retrieval strategies") return results ''', "pipeline.py": ''' import retriever import evaluator import graph_builder class Pipeline: """Main pipeline: query → retrieval → evaluation.""" def __init__(self, config): self.config = config self.retriever = retriever.Retriever(config) self.evaluator = evaluator.Evaluator() self.graph = graph_builder.build_import_graph(".") def run(self, query): docs = self.retriever.retrieve(query) score = self.evaluator.score(docs, query) return {"docs": docs, "score": score} ''', "retriever.py": ''' import bm25_index import symbol_index import concept_index class Retriever: """Trigger-based retrieval dispatcher.""" def __init__(self, config): self.bm25 = bm25_index.BM25Index() self.symbols = symbol_index.SymbolIndex() self.concepts = concept_index.ConceptIndex() def retrieve(self, query, k=5): trigger = classify_trigger(query) if trigger == "EXPLICIT_SYMBOL": return self.symbols.search(query, k) elif trigger == "SEMANTIC_CONCEPT": return self.concepts.search(query, k) else: return self.bm25.search(query, k) ''', "evaluator.py": ''' import metrics class Evaluator: """Computes retrieval quality metrics.""" def score(self, docs, query): recall = metrics.recall_at_k(docs, query, k=5) token_ratio = metrics.token_efficiency(docs) tes = recall / math.log(1 + len(docs)) if docs else 0 return {"recall@5": recall, "token%": token_ratio, "TES": tes} ''', "graph_builder.py": ''' import ast import os import networkx as nx def build_import_graph(root_dir): """Build import dependency graph via AST parsing.""" G = nx.DiGraph() for root, dirs, files in os.walk(root_dir): dirs[:] = [d for d in dirs if d not in ["venv", "__pycache__"]] for f in files: if f.endswith(".py"): path = os.path.join(root, f) imports = extract_imports(path) G.add_node(f) for imp in imports: G.add_edge(f, imp + ".py") return G ''', "bm25_index.py": ''' from rank_bm25 import BM25Okapi class BM25Index: """BM25 keyword retrieval over codebase.""" def __init__(self): self.corpus = [] self.bm25 = None def build(self, files): tokenized = [f.split() for f in files] self.bm25 = BM25Okapi(tokenized) def search(self, query, k=5): scores = self.bm25.get_scores(query.split()) top_k = sorted(enumerate(scores), key=lambda x: -x[1])[:k] return [self.corpus[i] for i, _ in top_k] ''', "symbol_index.py": ''' import ast class SymbolIndex: """Exact symbol (function/class) lookup via AST.""" def __init__(self): self.symbols = {} def build(self, file_contents): for fname, code in file_contents.items(): try: tree = ast.parse(code) for node in ast.walk(tree): if isinstance(node, (ast.FunctionDef, ast.ClassDef)): self.symbols[node.name] = fname except SyntaxError: pass def search(self, query, k=5): tokens = query.lower().split() matches = [] for sym, fname in self.symbols.items(): if any(t in sym.lower() for t in tokens): matches.append(fname) return list(set(matches))[:k] ''', "concept_index.py": ''' from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity class ConceptIndex: """TF-IDF semantic concept matching.""" def __init__(self): self.vectorizer = TfidfVectorizer() self.matrix = None self.files = [] def build(self, file_contents): self.files = list(file_contents.keys()) corpus = list(file_contents.values()) self.matrix = self.vectorizer.fit_transform(corpus) def search(self, query, k=5): qvec = self.vectorizer.transform([query]) sims = cosine_similarity(qvec, self.matrix)[0] top_k = sorted(enumerate(sims), key=lambda x: -x[1])[:k] return [self.files[i] for i, _ in top_k] ''', "metrics.py": ''' import math def recall_at_k(retrieved, relevant, k=5): """Recall@K: fraction of relevant items retrieved.""" if not relevant: return 0.0 retrieved_set = set(retrieved[:k]) relevant_set = set(relevant) if isinstance(relevant, list) else {relevant} return len(retrieved_set & relevant_set) / len(relevant_set) def token_efficiency(docs, total_tokens=10000): """Estimate token usage ratio.""" est_tokens = sum(len(d.split()) * 4 for d in docs) if docs else 0 return min(1.0, est_tokens / total_tokens) def tes(recall, n_retrieved): """Trade-off Efficiency Score = Recall / ln(1 + |retrieved|)""" if n_retrieved == 0: return 0.0 return recall / math.log(1 + n_retrieved) ''', "config.py": ''' import yaml def load_config(path): """Load YAML configuration file.""" try: with open(path) as f: return yaml.safe_load(f) except FileNotFoundError: return {"k": 5, "strategy": "adaptive_trigger", "max_hops": 2} ''', } TRIGGER_PATTERNS = { "EXPLICIT_SYMBOL": [ r'\b(function|class|method|def|variable|symbol|`[^`]+`)\b', r'\bwhat does\s+\w+\s+do\b', r'\bhow does\s+\w+\s+work\b', r'\b[A-Z][a-zA-Z]+\.[a-zA-Z]+\b', ], "IMPLICIT_CONTEXT": [ r'\b(import|depend|module|used by|calls|related to|what modules|which files)\b', r'\b(dependency|dependencies|transitive|downstream|upstream)\b', ], "TEMPORAL_HISTORY": [ r'\b(recent|latest|last|previously|changed|modified|updated|history)\b', r'\b(before|after|since|ago|commit)\b', ], "SEMANTIC_CONCEPT": [ r'\b(pipeline|architecture|design|pattern|flow|how|overview|explain)\b', r'\b(concept|idea|purpose|strategy|approach|mechanism)\b', ], } def classify_trigger(query: str) -> str: q = query.lower() scores = defaultdict(int) for trigger, patterns in TRIGGER_PATTERNS.items(): for pat in patterns: if re.search(pat, q): scores[trigger] += 1 if not scores: return "SEMANTIC_CONCEPT" return max(scores, key=scores.get) def build_import_graph(files: dict) -> dict: graph = defaultdict(set) for fname, code in files.items(): try: tree = ast.parse(code) for node in ast.walk(tree): if isinstance(node, ast.Import): for alias in node.names: dep = alias.name.split(".")[0] + ".py" if dep in files: graph[fname].add(dep) elif isinstance(node, ast.ImportFrom) and node.module: dep = node.module.split(".")[0] + ".py" if dep in files: graph[fname].add(dep) except SyntaxError: pass return graph def bfs_expand(seed_files: list, graph: dict, max_hops: int = 2) -> list: visited = set(seed_files) frontier = list(seed_files) for _ in range(max_hops): next_frontier = [] for f in frontier: for dep in graph.get(f, []): if dep not in visited: visited.add(dep) next_frontier.append(dep) frontier = next_frontier return list(visited) def ctx_retrieve(query: str, files: dict, k: int = 5): trigger = classify_trigger(query) graph = build_import_graph(files) tokens_q = query.lower().split() file_names = list(files.keys()) if trigger == "EXPLICIT_SYMBOL": # Symbol index: AST-based function/class matching matched = [] for fname, code in files.items(): try: tree = ast.parse(code) for node in ast.walk(tree): if isinstance(node, (ast.FunctionDef, ast.ClassDef)): if any(t in node.name.lower() for t in tokens_q): matched.append(fname) break except SyntaxError: pass # Fallback to BM25 if no symbol match if not matched: matched = bm25_retrieve(query, files, k) seeds = matched[:k] elif trigger == "IMPLICIT_CONTEXT": # Start with BM25 seeds, then BFS import graph seeds_bm25 = bm25_retrieve(query, files, k=3) seeds = bfs_expand(seeds_bm25, graph, max_hops=2)[:k] elif trigger == "TEMPORAL_HISTORY": # Return recently modified files (by position in dict as proxy) seeds = list(files.keys())[-k:] else: # SEMANTIC_CONCEPT # TF-IDF cosine similarity from sklearn.feature_extraction.text import TfidfVectorizer from sklearn.metrics.pairwise import cosine_similarity corpus = list(files.values()) try: vec = TfidfVectorizer(max_features=500) mat = vec.fit_transform(corpus) qvec = vec.transform([query]) sims = cosine_similarity(qvec, mat)[0] top_k = sorted(enumerate(sims), key=lambda x: -x[1])[:k] seeds = [file_names[i] for i, _ in top_k] except Exception: seeds = bm25_retrieve(query, files, k) return trigger, seeds[:k] def bm25_retrieve(query: str, files: dict, k: int = 5) -> list: try: from rank_bm25 import BM25Okapi file_names = list(files.keys()) corpus = [f"{fn}\n{code}" for fn, code in files.items()] tokenized = [doc.lower().split() for doc in corpus] bm25 = BM25Okapi(tokenized) scores = bm25.get_scores(query.lower().split()) top_k = sorted(enumerate(scores), key=lambda x: -x[1])[:k] return [file_names[i] for i, _ in top_k] except Exception: return list(files.keys())[:k] def full_context_retrieve(files: dict, k: int = 5) -> list: return list(files.keys()) def run_demo(query: str, strategy: str, k: int): if not query.strip(): return "Please enter a query.", "", "" files = SAMPLE_CODEBASE total_tokens = sum(len(c.split()) * 4 for c in files.values()) if strategy == "CTX (Adaptive Trigger)": trigger, retrieved = ctx_retrieve(query, files, k) strategy_note = f"Trigger: **{trigger}**" elif strategy == "BM25": retrieved = bm25_retrieve(query, files, k) trigger = "N/A" strategy_note = "Strategy: **BM25 keyword matching**" else: # Full Context retrieved = full_context_retrieve(files, k) trigger = "N/A" strategy_note = "Strategy: **Full context (all files)**" # Build result token_used = sum(len(files[f].split()) * 4 for f in retrieved if f in files) token_pct = token_used / total_tokens * 100 if total_tokens > 0 else 0 n_retrieved = len(retrieved) tes_val = (n_retrieved / len(files)) / math.log(1 + n_retrieved) if n_retrieved > 0 else 0 result_md = f"""### Retrieved Files ({n_retrieved}/{len(files)}) {strategy_note} | # | File | Relevance | |---|------|-----------| """ for i, fname in enumerate(retrieved, 1): # Show first docstring/comment as relevance hint code = files.get(fname, "") first_line = [l.strip() for l in code.split("\n") if l.strip() and not l.strip().startswith("#")] hint = first_line[1] if len(first_line) > 1 else "—" if len(hint) > 60: hint = hint[:57] + "..." result_md += f"| {i} | `{fname}` | {hint} |\n" metrics_md = f"""### Efficiency Metrics | Metric | Value | |--------|-------| | Files retrieved | **{n_retrieved}** / {len(files)} | | Token usage | **{token_pct:.1f}%** of codebase | | TES (Recall/ln(1+k)) | **{tes_val:.3f}** | > **TES** = efficiency-adjusted quality. CTX target: >0.7""" # Show file contents preview preview_md = "### Retrieved File Contents\n\n" for fname in retrieved[:3]: code = files.get(fname, "") preview_md += f"**`{fname}`**\n```python\n{code.strip()[:300]}{'...' if len(code) > 300 else ''}\n```\n\n" return result_md, metrics_md, preview_md # ───────────────────────────────────────────── # Paper results data # ───────────────────────────────────────────── SYNTHETIC_RESULTS = """ | Strategy | Recall@5 | Token% | TES | |----------|----------|--------|-----| | Full Context | 0.075 | 100.0% | 0.019 | | BM25 | 0.982 | 18.7% | 0.410 | | Dense TF-IDF | 0.973 | 21.0% | 0.406 | | LlamaIndex | 0.972 | 20.1% | 0.405 | | Chroma Dense | 0.829 | 19.3% | 0.346 | | GraphRAG-lite | 0.523 | 24.0% | 0.218 | | Hybrid Dense+CTX | 0.725 | 23.6% | 0.303 | | **CTX (Ours)** | **0.874** | **5.2%** | **0.776** | """ COIR_RESULTS = """ | Strategy | Recall@1 | Recall@5 | MRR | |----------|----------|----------|-----| | Dense Embedding (MiniLM) | 0.960 | 1.000 | 0.978 | | **Hybrid Dense+CTX** | **0.930** | **0.950** | **0.940** | | BM25 | 0.920 | 0.980 | 0.946 | | CTX Adaptive | 0.210 | 0.380 | 0.293 | """ TRIGGER_RESULTS = """ | Trigger Type | BM25 | TF-IDF | CTX | Delta | |-------------|------|--------|-----|-------| | EXPLICIT_SYMBOL | 0.81 | 0.73 | **0.97** | +19.8% | | SEMANTIC_CONCEPT | 0.54 | **0.68** | 0.60 | — | | TEMPORAL_HISTORY | 0.50 | 0.50 | **1.00** | +100% | | IMPLICIT_CONTEXT | 0.40 | 0.40 | **1.00** | +150% | """ ABLATION_RESULTS = """ | Variant | Removed | Recall@5 | TES | IMPL_CONTEXT | |---------|---------|----------|-----|--------------| | Full CTX | — | **0.874** | **0.776** | **1.000** | | No Graph | Import graph | 0.821 | 0.635 | 0.400 | | No Classifier | Trigger type | 0.743 | 0.412 | 0.600 | | Fixed-k=5 | Adaptive-k | 0.856 | 0.712 | 1.000 | """ CSS = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap'); :root { --primary: #4F46E5; --primary-light: #818CF8; --accent: #06B6D4; --bg: #0F172A; --surface: #1E293B; --surface2: #273347; --border: #334155; --text: #E2E8F0; --text-muted: #94A3B8; --success: #10B981; --warning: #F59E0B; } body, .gradio-container { background: var(--bg) !important; color: var(--text) !important; font-family: 'Inter', sans-serif !important; } .paper-hero { background: linear-gradient(135deg, #1e1b4b 0%, #0f172a 50%, #164e63 100%); border: 1px solid var(--border); border-radius: 16px; padding: 48px 40px 40px; margin-bottom: 8px; position: relative; overflow: hidden; } .paper-hero::before { content: ''; position: absolute; top: -50%; left: -50%; width: 200%; height: 200%; background: radial-gradient(circle at 30% 40%, rgba(79,70,229,0.15) 0%, transparent 50%), radial-gradient(circle at 70% 60%, rgba(6,182,212,0.1) 0%, transparent 50%); pointer-events: none; } .paper-title { font-size: 2.2em; font-weight: 700; background: linear-gradient(135deg, #818CF8, #38BDF8); -webkit-background-clip: text; -webkit-text-fill-color: transparent; background-clip: text; margin: 0 0 8px; line-height: 1.2; } .paper-subtitle { font-size: 1.05em; color: var(--text-muted); margin: 0 0 20px; font-weight: 400; } .authors { font-size: 1em; color: #94A3B8; margin-bottom: 24px; } .badge-row { display: flex; gap: 10px; flex-wrap: wrap; } .badge { display: inline-flex; align-items: center; gap: 6px; padding: 6px 14px; border-radius: 20px; font-size: 0.82em; font-weight: 500; text-decoration: none; border: 1px solid; transition: all 0.2s; } .badge-paper { background: rgba(79,70,229,0.2); border-color: #4F46E5; color: #818CF8; } .badge-code { background: rgba(16,185,129,0.2); border-color: #10B981; color: #34D399; } .badge-arxiv { background: rgba(245,158,11,0.2); border-color: #F59E0B; color: #FCD34D; } .metric-grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(160px, 1fr)); gap: 16px; margin: 20px 0; } .metric-card { background: var(--surface); border: 1px solid var(--border); border-radius: 12px; padding: 20px; text-align: center; } .metric-value { font-size: 2.2em; font-weight: 700; color: var(--primary-light); line-height: 1; margin-bottom: 6px; font-family: 'JetBrains Mono', monospace; } .metric-label { font-size: 0.78em; color: var(--text-muted); text-transform: uppercase; letter-spacing: 0.08em; } .section-header { font-size: 1.3em; font-weight: 600; color: var(--text); border-left: 3px solid var(--primary); padding-left: 12px; margin: 24px 0 16px; } .abstract-box { background: var(--surface); border: 1px solid var(--border); border-radius: 12px; padding: 24px; font-size: 0.95em; line-height: 1.75; color: var(--text); } .highlight { background: rgba(79,70,229,0.15); border: 1px solid rgba(79,70,229,0.3); border-radius: 4px; padding: 1px 6px; color: var(--primary-light); font-family: 'JetBrains Mono', monospace; font-size: 0.9em; } .contribution-list { list-style: none; padding: 0; } .contribution-list li { padding: 10px 0 10px 28px; position: relative; border-bottom: 1px solid var(--border); color: var(--text); font-size: 0.92em; line-height: 1.6; } .contribution-list li::before { content: '▸'; position: absolute; left: 8px; color: var(--primary-light); font-size: 0.8em; top: 12px; } .demo-section { background: var(--surface); border: 1px solid var(--border); border-radius: 12px; padding: 24px; } .bibtex-box { background: #0d1117; border: 1px solid var(--border); border-radius: 8px; padding: 20px; font-family: 'JetBrains Mono', monospace; font-size: 0.82em; color: #7dd3fc; white-space: pre; overflow-x: auto; } table { width: 100%; border-collapse: collapse; } th { background: var(--surface2); color: var(--primary-light); padding: 10px 12px; text-align: left; font-size: 0.85em; text-transform: uppercase; letter-spacing: 0.05em; } td { padding: 9px 12px; border-bottom: 1px solid var(--border); font-size: 0.9em; color: var(--text); } tr:hover td { background: rgba(255,255,255,0.03); } strong { color: #34D399; } .tabs { border-radius: 12px; overflow: hidden; } button.selected { background: var(--primary) !important; color: white !important; } .gr-button-primary { background: var(--primary) !important; border: none !important; } .gr-button-primary:hover { background: var(--primary-light) !important; } label { color: var(--text-muted) !important; font-size: 0.85em !important; } .gr-input, .gr-dropdown select, textarea { background: var(--surface) !important; border: 1px solid var(--border) !important; color: var(--text) !important; border-radius: 8px !important; } """ HERO_HTML = """
CTX: Trigger-Driven Dynamic Context Loading
for Code-Aware LLM Agents
Jeawon Jang  ·  be2jay67@gmail.com
⚡ Code: github.com/jaytoone/CTX 📄 arXiv: Pending Endorsement (cs.IR) 🏆 8 Strategies · 415 Queries · p<0.05
""" METRICS_HTML = """
0.776
TES Score
1.9×
vs BM25 Baseline
5.2%
Token Usage
0.95
Hybrid COIR R@5
1.00
IMPLICIT Recall@5
415
Total Queries
""" ABSTRACT_HTML = """
Large language models suffer from context dilution when processing extensive codebases — the "Lost in the Middle" problem. Standard RAG approaches treat code as flat text, ignoring the structural dependency information in import graphs.

We present CTX, a trigger-driven dynamic context loading system that classifies developer queries into four types — EXPLICIT_SYMBOL, SEMANTIC_CONCEPT, TEMPORAL_HISTORY, IMPLICIT_CONTEXT — and routes each to a specialized retrieval pipeline.

For dependency-sensitive queries, CTX performs breadth-first traversal over the codebase import graph, resolving transitive relationships invisible to keyword and embedding methods. Evaluated on a synthetic benchmark (50 files, 166 queries) and three real Python codebases (968 files total, 249 queries), CTX achieves TES 1.9× higher than BM25 with only 5.2% token usage. Statistical significance via McNemar and Wilcoxon tests (p<0.05) across 415 queries.
""" CONTRIBUTIONS_HTML = """ """ BIBTEX = """@misc{jang2026ctx, title = {CTX: Trigger-Driven Dynamic Context Loading for Code-Aware LLM Agents}, author = {Jeawon Jang}, year = {2026}, note = {Preprint. Code: https://github.com/jaytoone/CTX}, url = {https://github.com/jaytoone/CTX} }""" # ───────────────────────────────────────────── # Build Gradio Interface # ───────────────────────────────────────────── with gr.Blocks(css=CSS, title="CTX — Trigger-Driven Dynamic Context Loading") as demo: gr.HTML(HERO_HTML) gr.HTML(METRICS_HTML) with gr.Tabs(): # ── Tab 1: Paper ────────────────────────── with gr.Tab("📄 Paper"): gr.HTML('
Abstract
') gr.HTML(ABSTRACT_HTML) gr.HTML('
Contributions
') gr.HTML(CONTRIBUTIONS_HTML) gr.HTML('
Architecture
') gr.Markdown(""" ``` Query Input │ ▼ ┌──────────────────────────────┐ │ Trigger Classifier │ → regex + keyword patterns │ (EXPLICIT / SEMANTIC / │ │ TEMPORAL / IMPLICIT) │ └──────────────┬───────────────┘ │ ┌───────┴────────┬──────────────┬─────────────────┐ │ │ │ │ ▼ ▼ ▼ ▼ Symbol Index TF-IDF/Dense History Log Import Graph BFS (AST lookup) (cosine sim) (git history) (transitive deps) │ │ │ │ └───────┬─────────┴──────────────┴─────────────────┘ │ ┌──────────▼──────────────┐ │ Adaptive-k Selection │ → k = f(query_type, codebase_size) │ (3~10 files) │ └──────────┬──────────────┘ │ ▼ LLM Context (5.2% tokens) ``` """) # ── Tab 2: Results ──────────────────────── with gr.Tab("📊 Results"): gr.HTML('
Synthetic Benchmark (50 files, 166 queries)
') gr.Markdown(SYNTHETIC_RESULTS) gr.HTML('
COIR External Benchmark (CodeSearchNet Python)
') gr.Markdown(COIR_RESULTS) gr.HTML('
Per-Trigger-Type Recall@5 (Synthetic)
') gr.Markdown(TRIGGER_RESULTS) gr.HTML('
Ablation Study
') gr.Markdown(ABLATION_RESULTS) gr.Markdown(""" **Key Findings:** - Removing import graph → IMPLICIT_CONTEXT recall drops **60%** (1.0 → 0.4) - Removing trigger classifier → TES drops **47%** (0.776 → 0.412) - TES–NDCG@5 Pearson **r = 0.87** (p < 0.001, 28 strategy-dataset pairs) - pass@1 with MiniMax M2.5: CTX **0.265** vs Full Context **0.102** (n=49, McNemar p<0.05) """) # ── Tab 3: Live Demo ────────────────────── with gr.Tab("🚀 Live Demo"): gr.HTML('
') gr.Markdown("### Try CTX on a Sample Codebase") gr.Markdown( "The demo runs CTX retrieval on a 10-file sample Python codebase " "(pipeline, retriever, evaluator, graph builder, metrics, etc.). " "Enter a natural language query and compare retrieval strategies." ) with gr.Row(): with gr.Column(scale=2): query_input = gr.Textbox( label="Code Query", placeholder="e.g., 'what does the retriever do?' or 'find all modules that import graph_builder'", lines=2, ) with gr.Column(scale=1): strategy_select = gr.Dropdown( choices=["CTX (Adaptive Trigger)", "BM25", "Full Context"], value="CTX (Adaptive Trigger)", label="Retrieval Strategy", ) k_slider = gr.Slider(minimum=1, maximum=10, value=5, step=1, label="k (max files)") run_btn = gr.Button("Retrieve", variant="primary") gr.Markdown("**Example queries:**") gr.Examples( examples=[ ["find all modules that depend on graph_builder", "CTX (Adaptive Trigger)", 5], ["what does the Pipeline class do?", "CTX (Adaptive Trigger)", 5], ["how does retrieval work?", "CTX (Adaptive Trigger)", 5], ["what does evaluator import?", "CTX (Adaptive Trigger)", 5], ["find all modules that depend on graph_builder", "BM25", 5], ], inputs=[query_input, strategy_select, k_slider], ) with gr.Row(): with gr.Column(): result_out = gr.Markdown(label="Retrieved Files") with gr.Column(): metrics_out = gr.Markdown(label="Metrics") preview_out = gr.Markdown(label="File Previews") run_btn.click( fn=run_demo, inputs=[query_input, strategy_select, k_slider], outputs=[result_out, metrics_out, preview_out], ) gr.HTML('
') # ── Tab 4: Implementation ───────────────── with gr.Tab("🔬 Implementation"): gr.HTML('
Core Algorithm — Trigger Classifier
') gr.Code(value='''def classify_trigger(query: str) -> str: """ Classify a developer query into one of four trigger types. Uses regex pattern matching — lightweight, no model required. Returns: EXPLICIT_SYMBOL | SEMANTIC_CONCEPT | TEMPORAL_HISTORY | IMPLICIT_CONTEXT """ PATTERNS = { "EXPLICIT_SYMBOL": [ r"\\b(function|class|method|def|variable|`[^`]+`)\\b", r"\\bwhat does\\s+\\w+\\s+do\\b", r"\\b[A-Z][a-zA-Z]+\\.[a-zA-Z]+\\b", # e.g. Pipeline.run ], "IMPLICIT_CONTEXT": [ r"\\b(import|depend|module|used by|calls|related to)\\b", r"\\b(dependency|transitive|downstream|upstream)\\b", ], "TEMPORAL_HISTORY": [ r"\\b(recent|latest|last|changed|modified|history)\\b", ], "SEMANTIC_CONCEPT": [ r"\\b(pipeline|architecture|design|flow|how|overview)\\b", ], } scores = defaultdict(int) q = query.lower() for trigger, patterns in PATTERNS.items(): for pat in patterns: if re.search(pat, q): scores[trigger] += 1 return max(scores, key=scores.get) if scores else "SEMANTIC_CONCEPT"''', language="python", label="trigger_classifier.py") gr.HTML('
Import Graph BFS — Key Differentiator vs RAG
') gr.Code(value='''def build_import_graph(root_dir: str) -> nx.DiGraph: """ Parse Python AST to extract import relationships. Creates a directed graph: file → imported_file edges. O(N) in codebase size — runs in <1s on 1000-file codebases. """ G = nx.DiGraph() for path in Path(root_dir).rglob("*.py"): if any(skip in path.parts for skip in ["venv", "__pycache__", ".git"]): continue try: tree = ast.parse(path.read_text(encoding="utf-8", errors="ignore")) for node in ast.walk(tree): if isinstance(node, ast.Import): for alias in node.names: dep = alias.name.split(".")[0] G.add_edge(str(path), dep) elif isinstance(node, ast.ImportFrom) and node.module: G.add_edge(str(path), node.module.split(".")[0]) except SyntaxError: pass return G def bfs_expand(seed_files: list[str], graph: nx.DiGraph, max_hops: int = 2) -> list[str]: """ BFS traversal over import graph from seed files. Resolves TRANSITIVE dependencies invisible to BM25/embedding methods. Example: query "what does the evaluator use?" seed: [evaluator.py] hop1: [metrics.py, downstream_quality.py] ← direct imports hop2: [scipy, numpy, ...] ← transitive imports This is what gives CTX perfect Recall@5=1.0 on IMPLICIT_CONTEXT queries. """ visited = set(seed_files) frontier = list(seed_files) for _ in range(max_hops): next_frontier = [] for f in frontier: for neighbor in graph.successors(f): if neighbor not in visited: visited.add(neighbor) next_frontier.append(neighbor) frontier = next_frontier return list(visited)''', language="python", label="adaptive_trigger.py — BFS Import Graph") gr.HTML('
TES Metric — Trade-off Efficiency Score
') gr.Code(value='''def tes(recall_at_k: float, n_retrieved: int) -> float: """ Trade-off Efficiency Score: balances recall against context size. TES = Recall@K / ln(1 + |retrieved|) Intuition: diminishing returns of loading more files. - Loading 1 file: ln(2) = 0.693 penalty - Loading 10 files: ln(11) = 2.398 penalty - Loading ALL files: ln(1001) = 6.909 penalty ← Full Context collapses here Results: Strategy Recall@5 Token% TES ──────────────── ────────── ────── ───── Full Context 0.075 100.0% 0.019 ← bad: high penalty BM25 0.982 18.7% 0.410 CTX (Ours) 0.874 5.2% 0.776 ← best: minimal files Validated: Pearson r=0.87 with NDCG@5 (p<0.001, 28 strategy-dataset pairs) """ if n_retrieved == 0: return 0.0 return recall_at_k / math.log(1 + n_retrieved) def adaptive_k(trigger_type: str, codebase_size: int) -> int: """ Adaptive retrieval budget based on trigger type. Symbol lookups need few files; dependency queries need more hops. """ base = { "EXPLICIT_SYMBOL": 3, # exact match → few files "TEMPORAL_HISTORY": 3, # recent changes → few files "SEMANTIC_CONCEPT": 5, # concept → moderate "IMPLICIT_CONTEXT": 7, # graph traversal → more files } k = base.get(trigger_type, 5) # Scale with codebase size (log scale) if codebase_size > 500: k = min(k + 2, 10) return k''', language="python", label="metrics.py + adaptive_k") gr.Markdown(""" --- **Full source**: [github.com/jaytoone/CTX](https://github.com/jaytoone/CTX) ```bash git clone https://github.com/jaytoone/CTX && cd CTX pip install -r requirements.txt python run_experiment.py --dataset-size small --strategy all ``` """) # ── Tab 5: Citation ─────────────────────── with gr.Tab("📚 Citation"): gr.HTML('
BibTeX
') gr.Textbox(value=BIBTEX, label="BibTeX", lines=10, max_lines=15) gr.Markdown(""" ### Links - **GitHub**: https://github.com/jaytoone/CTX - **arXiv**: Pending (cs.IR endorsement in progress — code: HBJRI6) ### Experiment Reproducibility ```bash git clone https://github.com/jaytoone/CTX cd CTX pip install -r requirements.txt # Synthetic benchmark (all 8 strategies) python run_experiment.py --dataset-size small --strategy all # Real codebase evaluation python run_experiment.py --dataset-source real --project-path /path/to/project # COIR benchmark python run_coir_eval.py # LLM pass@1 evaluation (requires MINIMAX_API_KEY) python run_llm_eval_v2.py ``` """) if __name__ == "__main__": demo.launch()