muthuk1 commited on
Commit
079d32f
·
verified ·
1 Parent(s): 3101051

Fix #2+#4+#6: Add LLM-Only pipeline, wire NoveltyEngine, integrate TG GraphRAG client, 3-pipeline comparison

Browse files
Files changed (1) hide show
  1. graphrag/layers/orchestration_layer.py +248 -69
graphrag/layers/orchestration_layer.py CHANGED
@@ -1,14 +1,19 @@
1
  """
2
- Layer 2: Inference Orchestration — Dual Pipeline Manager
3
- ========================================================
4
- Routes queries through Baseline RAG and GraphRAG pipelines,
5
- collects metrics, and provides adaptive routing.
 
 
 
 
6
  """
7
  import json
8
  import logging
9
  import time
 
10
  from dataclasses import dataclass, field
11
- from typing import Any, Dict, List, Tuple
12
 
13
  from .graph_layer import GraphLayer, cosine_similarity
14
  from .llm_layer import LLMLayer, LLMResponse, TokenTracker
@@ -33,11 +38,29 @@ class PipelineResult:
33
  complexity_score: float = 0.0
34
  query_type: str = ""
35
  token_breakdown: Dict = field(default_factory=dict)
 
 
36
 
37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  @dataclass
39
  class ComparisonResult:
40
- """Side-by-side comparison of both pipelines."""
41
  query: str = ""
42
  baseline: PipelineResult = field(default_factory=PipelineResult)
43
  graphrag: PipelineResult = field(default_factory=PipelineResult)
@@ -106,28 +129,84 @@ class EmbeddingManager:
106
 
107
  class InferenceOrchestrator:
108
  """
109
- Layer 2: Manages both pipelines and routes queries.
 
 
 
110
  """
111
 
112
- def __init__(self, graph_layer=None, llm_layer=None, embedder=None, config=None):
 
113
  self.graph = graph_layer or GraphLayer()
114
  self.llm = llm_layer or LLMLayer()
115
  self.embedder = embedder or EmbeddingManager()
 
 
116
  self.config = config or {}
 
117
  self.baseline_tracker = TokenTracker()
118
  self.graphrag_tracker = TokenTracker()
119
- self.comparison_history: List[ComparisonResult] = []
120
 
121
  def initialize(self):
122
  self.llm.initialize()
123
  self.embedder.initialize()
124
- logger.info("Inference Orchestrator initialized.")
125
 
126
- # ── Pipeline A: Baseline RAG ────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
 
128
  def run_baseline_rag(self, query, passages=None, top_k=5):
129
  """
130
- Pipeline A: Query → Embed → Vector Search → Top-K Chunks → LLM → Answer
 
131
  """
132
  start = time.perf_counter()
133
  result = PipelineResult(pipeline_type="baseline")
@@ -158,18 +237,27 @@ class InferenceOrchestrator:
158
  self.baseline_tracker.record(resp, "baseline")
159
  return result
160
 
161
- # ── Pipeline B: GraphRAG ────────────────────────────────
162
 
163
- def run_graphrag(self, query, passages=None, seed_entities=5, hops=2, max_ctx=10):
 
164
  """
165
- Pipeline B: Query Keywords Entity Search Graph Traverse → Structured Context → LLM
166
- Novelties: Dual-level keywords, schema-bounded extraction, graph reasoning
 
 
 
 
 
 
 
 
167
  """
168
  start = time.perf_counter()
169
- result = PipelineResult(pipeline_type="graphrag")
170
  ti = to = cost = 0.0
171
 
172
- # Step 1: Extract dual-level keywords (LightRAG-inspired)
173
  kw_resp = self.llm.extract_keywords(query)
174
  ti += kw_resp.input_tokens; to += kw_resp.output_tokens; cost += kw_resp.cost_usd
175
  self.graphrag_tracker.record(kw_resp, "keywords")
@@ -181,8 +269,27 @@ class InferenceOrchestrator:
181
 
182
  low_level = kws.get("low_level", [])
183
 
184
- if self.graph.is_connected:
185
- # Step 2: Find seed entities via vector search
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
186
  search_text = " ".join(low_level) if low_level else query
187
  query_emb = self.embedder.embed_single(search_text)
188
  ents = self.graph.vector_search_entities(query_emb, seed_entities)
@@ -192,60 +299,108 @@ class InferenceOrchestrator:
192
  "description": e.get("description",""), "score": e.get("score",0)}
193
  for e in ents
194
  ]
195
- # Step 3: Multi-hop graph traversal
196
  if seed_ids:
197
  traversal = self.graph.graph_traverse(seed_ids, hops)
198
  result.contexts = traversal.get("chunk_texts", [])[:max_ctx]
199
  result.relations_traversed = traversal.get("relations", [])
200
  result.hops_used = hops
201
- else:
202
- # Fallback: simulate GraphRAG with passages + entity extraction
203
- if passages:
204
- query_emb = self.embedder.embed_single(query)
205
- passage_embs = self.embedder.embed(passages)
206
- scored = sorted(
207
- [(cosine_similarity(query_emb, emb), p, i)
208
- for i, (p, emb) in enumerate(zip(passages, passage_embs))],
209
- reverse=True
210
- )
211
 
212
- # Extract entities from top passages (simulates graph construction)
213
- top_p = scored[:3]
214
- all_ent_names = set()
215
- for _, passage, _ in top_p:
216
- ext_resp = self.llm.extract_entities(passage)
217
- ti += ext_resp.input_tokens; to += ext_resp.output_tokens; cost += ext_resp.cost_usd
218
- self.graphrag_tracker.record(ext_resp, "entity_extraction")
219
- try:
220
- extracted = json.loads(ext_resp.content)
221
- for ent in extracted.get("entities", []):
222
- all_ent_names.add(ent.get("name", ""))
223
- result.entities_found.append(ent)
224
- for rel in extracted.get("relations", []):
225
- result.relations_traversed.append(
226
- f"{rel['source']} -[{rel['type']}]-> {rel['target']}: {rel.get('description','')}")
227
- except json.JSONDecodeError:
228
- pass
229
-
230
- # Multi-hop simulation: expand by entity mentions
231
- expanded = []
232
- for _, passage, idx in scored:
233
- for en in all_ent_names:
234
- if en.lower() in passage.lower():
235
- expanded.append(passage)
236
- break
237
- all_ctx = [p for _, p, _ in top_p]
238
- for ep in expanded:
239
- if ep not in all_ctx: all_ctx.append(ep)
240
- result.contexts = all_ctx[:max_ctx]
241
- result.hops_used = hops
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
242
 
243
  # Step 4: Build structured context with graph information
244
  ctx_parts = []
245
  if result.entities_found:
246
- ctx_parts.append("### Entities Found:\n" + "\n".join(
247
- [f"- **{e.get('name','?')}** ({e.get('entity_type','?')}): {e.get('description','')}"
248
- for e in result.entities_found[:10]]))
 
 
 
 
 
249
  if result.relations_traversed:
250
  ctx_parts.append("### Relationships:\n" + "\n".join(
251
  [f"- {r}" for r in result.relations_traversed[:15]]))
@@ -280,8 +435,29 @@ class InferenceOrchestrator:
280
  except (json.JSONDecodeError, ValueError):
281
  return 0.5, "unknown", "Analysis failed"
282
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
  def run_comparison(self, query, passages=None, top_k=5, hops=2):
284
- """Run both pipelines and compare."""
285
  b = self.run_baseline_rag(query, passages, top_k)
286
  g = self.run_graphrag(query, passages, hops=hops)
287
  comp = ComparisonResult(query=query, baseline=b, graphrag=g)
@@ -289,15 +465,12 @@ class InferenceOrchestrator:
289
  comp.token_savings_pct = (g.total_tokens - b.total_tokens) / b.total_tokens * 100
290
  comp.latency_diff_ms = g.latency_ms - b.latency_ms
291
  comp.cost_diff_usd = g.cost_usd - b.cost_usd
292
- self.comparison_history.append(comp)
293
  return comp
294
 
295
  def run_adaptive(self, query, passages=None, threshold=0.6):
296
  """Adaptive routing: automatically picks optimal pipeline."""
297
  score, qtype, reasoning = self.analyze_complexity(query)
298
- comp = self.run_comparison(query, passages)
299
- comp.baseline.complexity_score = score
300
- comp.baseline.query_type = qtype
301
  comp.graphrag.complexity_score = score
302
  comp.graphrag.query_type = qtype
303
  if score >= threshold:
@@ -320,6 +493,12 @@ class InferenceOrchestrator:
320
  n = len(self.comparison_history)
321
  return {
322
  "total_queries": n,
 
 
 
 
 
 
323
  "baseline": {
324
  "total_tokens": sum(c.baseline.total_tokens for c in self.comparison_history),
325
  "avg_tokens": sum(c.baseline.total_tokens for c in self.comparison_history) / n,
 
1
  """
2
+ Layer 2: Inference Orchestration — Triple Pipeline Manager
3
+ ==========================================================
4
+ Routes queries through three pipelines:
5
+ Pipeline 1: LLM-Only (no retrieval — worst-case baseline)
6
+ Pipeline 2: Basic RAG (vector embeddings + LLM — industry standard)
7
+ Pipeline 3: GraphRAG (TigerGraph GraphRAG repo + novelty engine)
8
+
9
+ Collects metrics for all three and provides adaptive routing.
10
  """
11
  import json
12
  import logging
13
  import time
14
+ from collections import defaultdict
15
  from dataclasses import dataclass, field
16
+ from typing import Any, Dict, List, Optional, Tuple
17
 
18
  from .graph_layer import GraphLayer, cosine_similarity
19
  from .llm_layer import LLMLayer, LLMResponse, TokenTracker
 
38
  complexity_score: float = 0.0
39
  query_type: str = ""
40
  token_breakdown: Dict = field(default_factory=dict)
41
+ novelty_chain: List[str] = field(default_factory=list)
42
+ retriever_used: str = ""
43
 
44
 
45
+ @dataclass
46
+ class TripleComparisonResult:
47
+ """Side-by-side comparison of all 3 pipelines."""
48
+ query: str = ""
49
+ llm_only: PipelineResult = field(default_factory=PipelineResult)
50
+ baseline: PipelineResult = field(default_factory=PipelineResult)
51
+ graphrag: PipelineResult = field(default_factory=PipelineResult)
52
+ token_savings_vs_baseline_pct: float = 0.0
53
+ token_savings_vs_llm_only_pct: float = 0.0
54
+ latency_diff_ms: float = 0.0
55
+ cost_diff_usd: float = 0.0
56
+ recommended_pipeline: str = ""
57
+ routing_reason: str = ""
58
+
59
+
60
+ # Keep backward compat
61
  @dataclass
62
  class ComparisonResult:
63
+ """Side-by-side comparison of both pipelines (backward compat)."""
64
  query: str = ""
65
  baseline: PipelineResult = field(default_factory=PipelineResult)
66
  graphrag: PipelineResult = field(default_factory=PipelineResult)
 
129
 
130
  class InferenceOrchestrator:
131
  """
132
+ Layer 2: Manages all three pipelines and routes queries.
133
+ Pipeline 1: LLM-Only (no retrieval)
134
+ Pipeline 2: Basic RAG (vector search + LLM)
135
+ Pipeline 3: GraphRAG (TG GraphRAG service + novelty engine)
136
  """
137
 
138
+ def __init__(self, graph_layer=None, llm_layer=None, embedder=None,
139
+ tg_graphrag_client=None, novelty_engine=None, config=None):
140
  self.graph = graph_layer or GraphLayer()
141
  self.llm = llm_layer or LLMLayer()
142
  self.embedder = embedder or EmbeddingManager()
143
+ self.tg_client = tg_graphrag_client # official TG GraphRAG service client
144
+ self.novelty_engine = novelty_engine # NoveltyEngine from novelties.py
145
  self.config = config or {}
146
+ self.llm_only_tracker = TokenTracker()
147
  self.baseline_tracker = TokenTracker()
148
  self.graphrag_tracker = TokenTracker()
149
+ self.comparison_history: List[TripleComparisonResult] = []
150
 
151
  def initialize(self):
152
  self.llm.initialize()
153
  self.embedder.initialize()
 
154
 
155
+ # Initialize TG GraphRAG client if not provided
156
+ if self.tg_client is None:
157
+ try:
158
+ from .tg_graphrag_client import TGGraphRAGClient
159
+ self.tg_client = TGGraphRAGClient()
160
+ self.tg_client.connect()
161
+ except Exception as e:
162
+ logger.info(f"TG GraphRAG client not available: {e}")
163
+
164
+ # Initialize NoveltyEngine if not provided
165
+ if self.novelty_engine is None:
166
+ try:
167
+ from .novelties import NoveltyEngine
168
+ self.novelty_engine = NoveltyEngine(
169
+ token_budget=self.config.get("token_budget", 2000))
170
+ logger.info("NoveltyEngine initialized.")
171
+ except Exception as e:
172
+ logger.warning(f"NoveltyEngine not available: {e}")
173
+
174
+ logger.info("Inference Orchestrator initialized (3-pipeline mode).")
175
+
176
+ # ── Pipeline 1: LLM-Only (No Retrieval) ─────────────────
177
+
178
+ def run_llm_only(self, query: str) -> PipelineResult:
179
+ """
180
+ Pipeline 1: LLM-Only — raw prompt in, answer out. No retrieval.
181
+ This is the worst-case baseline: the LLM uses only its parametric knowledge.
182
+ """
183
+ start = time.perf_counter()
184
+ result = PipelineResult(pipeline_type="llm_only")
185
+
186
+ sys_prompt = (
187
+ "You are a knowledgeable assistant. Answer the question accurately and concisely "
188
+ "based on your knowledge. If you are not sure, say so."
189
+ )
190
+ resp = self.llm.generate([
191
+ {"role": "system", "content": sys_prompt},
192
+ {"role": "user", "content": f"Question: {query}\n\nAnswer:"},
193
+ ], max_tokens=512)
194
+
195
+ result.answer = resp.content
196
+ result.input_tokens = resp.input_tokens
197
+ result.output_tokens = resp.output_tokens
198
+ result.total_tokens = resp.total_tokens
199
+ result.cost_usd = resp.cost_usd
200
+ result.latency_ms = (time.perf_counter() - start) * 1000
201
+ self.llm_only_tracker.record(resp, "llm_only")
202
+ return result
203
+
204
+ # ── Pipeline 2: Basic RAG ────────────────────────────────
205
 
206
  def run_baseline_rag(self, query, passages=None, top_k=5):
207
  """
208
+ Pipeline 2: Basic RAG — Query → Embed → Vector Search → Top-K Chunks → LLM → Answer
209
+ Industry standard vector-based retrieval augmented generation.
210
  """
211
  start = time.perf_counter()
212
  result = PipelineResult(pipeline_type="baseline")
 
237
  self.baseline_tracker.record(resp, "baseline")
238
  return result
239
 
240
+ # ── Pipeline 3: GraphRAG (TG GraphRAG + Novelties) ──────
241
 
242
+ def run_graphrag(self, query, passages=None, seed_entities=5, hops=2,
243
+ max_ctx=10, retriever="hybrid", community_level=1):
244
  """
245
+ Pipeline 3: GraphRAG Built on top of the TigerGraph GraphRAG repo.
246
+
247
+ Flow:
248
+ 1. Call TG GraphRAG service (official repo REST API) for retrieval
249
+ 2. Apply NoveltyEngine enhancements (PPR, activation, token budget, etc.)
250
+ 3. Build structured context with entities + relationships + passages
251
+ 4. Generate answer with graph-aware LLM prompt
252
+
253
+ Falls back to direct pyTigerGraph GSQL queries if service unavailable.
254
+ Falls back to passage-based entity extraction if no TG connection.
255
  """
256
  start = time.perf_counter()
257
+ result = PipelineResult(pipeline_type="graphrag", retriever_used=retriever)
258
  ti = to = cost = 0.0
259
 
260
+ # Step 1: Extract dual-level keywords (LightRAG-inspired novelty)
261
  kw_resp = self.llm.extract_keywords(query)
262
  ti += kw_resp.input_tokens; to += kw_resp.output_tokens; cost += kw_resp.cost_usd
263
  self.graphrag_tracker.record(kw_resp, "keywords")
 
269
 
270
  low_level = kws.get("low_level", [])
271
 
272
+ # Step 2: Try TG GraphRAG service first (official repo integration)
273
+ tg_used = False
274
+ if self.tg_client and self.tg_client.is_connected:
275
+ try:
276
+ tg_result = self.tg_client.retrieve(
277
+ query=query, retriever=retriever,
278
+ top_k=seed_entities * 2, num_hops=hops,
279
+ community_level=community_level,
280
+ )
281
+ if tg_result.chunks:
282
+ result.contexts = [c.get("text", "") for c in tg_result.chunks[:max_ctx]]
283
+ result.entities_found = tg_result.entities
284
+ result.relations_traversed = tg_result.relations
285
+ result.hops_used = hops
286
+ tg_used = True
287
+ logger.info(f"TG GraphRAG service returned {len(tg_result.chunks)} chunks")
288
+ except Exception as e:
289
+ logger.warning(f"TG GraphRAG service call failed: {e}")
290
+
291
+ # Step 2b: Fall back to direct pyTigerGraph if service failed
292
+ if not tg_used and self.graph.is_connected:
293
  search_text = " ".join(low_level) if low_level else query
294
  query_emb = self.embedder.embed_single(search_text)
295
  ents = self.graph.vector_search_entities(query_emb, seed_entities)
 
299
  "description": e.get("description",""), "score": e.get("score",0)}
300
  for e in ents
301
  ]
 
302
  if seed_ids:
303
  traversal = self.graph.graph_traverse(seed_ids, hops)
304
  result.contexts = traversal.get("chunk_texts", [])[:max_ctx]
305
  result.relations_traversed = traversal.get("relations", [])
306
  result.hops_used = hops
307
+ tg_used = True
 
 
 
 
 
 
 
 
 
308
 
309
+ # Step 2c: Fallback for offline mode — simulate with passages + entity extraction
310
+ if not tg_used and passages:
311
+ query_emb = self.embedder.embed_single(query)
312
+ passage_embs = self.embedder.embed(passages)
313
+ scored = sorted(
314
+ [(cosine_similarity(query_emb, emb), p, i)
315
+ for i, (p, emb) in enumerate(zip(passages, passage_embs))],
316
+ reverse=True
317
+ )
318
+ top_p = scored[:3]
319
+ all_ent_names = set()
320
+ for _, passage, _ in top_p:
321
+ ext_resp = self.llm.extract_entities(passage)
322
+ ti += ext_resp.input_tokens; to += ext_resp.output_tokens; cost += ext_resp.cost_usd
323
+ self.graphrag_tracker.record(ext_resp, "entity_extraction")
324
+ try:
325
+ extracted = json.loads(ext_resp.content)
326
+ for ent in extracted.get("entities", []):
327
+ all_ent_names.add(ent.get("name", ""))
328
+ result.entities_found.append(ent)
329
+ for rel in extracted.get("relations", []):
330
+ result.relations_traversed.append(
331
+ f"{rel['source']} -[{rel['type']}]-> {rel['target']}: {rel.get('description','')}")
332
+ except json.JSONDecodeError:
333
+ pass
334
+
335
+ # Multi-hop simulation: expand by entity mentions
336
+ expanded = []
337
+ for _, passage, idx in scored:
338
+ for en in all_ent_names:
339
+ if en.lower() in passage.lower():
340
+ expanded.append(passage)
341
+ break
342
+ all_ctx = [p for _, p, _ in top_p]
343
+ for ep in expanded:
344
+ if ep not in all_ctx: all_ctx.append(ep)
345
+ result.contexts = all_ctx[:max_ctx]
346
+ result.hops_used = hops
347
+
348
+ # Step 3: Apply NoveltyEngine enhancements if available
349
+ if self.novelty_engine and result.entities_found and result.contexts:
350
+ try:
351
+ # Build adjacency from extracted relations
352
+ adjacency: Dict[str, List[Tuple[str, float]]] = defaultdict(list)
353
+ entity_to_chunks: Dict[str, List[str]] = defaultdict(list)
354
+ chunk_texts: Dict[str, str] = {}
355
+
356
+ for i, ctx in enumerate(result.contexts):
357
+ cid = f"ctx_{i}"
358
+ chunk_texts[cid] = ctx
359
+
360
+ for e in result.entities_found:
361
+ ename = e.get("name", "").lower()
362
+ for i, ctx in enumerate(result.contexts):
363
+ if ename in ctx.lower():
364
+ entity_to_chunks[ename].append(f"ctx_{i}")
365
+
366
+ for rel_str in result.relations_traversed:
367
+ parts = rel_str.split(" -[")
368
+ if len(parts) >= 2:
369
+ src = parts[0].strip().lower()
370
+ rest = parts[1].split("]->")
371
+ if len(rest) >= 2:
372
+ tgt = rest[1].split(":")[0].strip().lower()
373
+ adjacency[src].append((tgt, 0.8))
374
+ adjacency[tgt].append((src, 0.8))
375
+
376
+ seed_ents = [e.get("name", "").lower() for e in result.entities_found[:5]]
377
+
378
+ if adjacency and seed_ents and entity_to_chunks:
379
+ novelty_result = self.novelty_engine.enhanced_retrieve(
380
+ query=query,
381
+ adjacency=adjacency,
382
+ seed_entities=seed_ents,
383
+ entity_to_chunks=entity_to_chunks,
384
+ chunk_texts=chunk_texts,
385
+ )
386
+ if novelty_result.get("contexts"):
387
+ result.contexts = novelty_result["contexts"]
388
+ result.novelty_chain = novelty_result.get("technique_chain", [])
389
+ logger.info(f"NoveltyEngine applied: {result.novelty_chain}")
390
+ except Exception as e:
391
+ logger.warning(f"NoveltyEngine enhancement failed: {e}")
392
 
393
  # Step 4: Build structured context with graph information
394
  ctx_parts = []
395
  if result.entities_found:
396
+ ent_list = result.entities_found[:10]
397
+ if isinstance(ent_list[0], dict):
398
+ ctx_parts.append("### Entities Found:\n" + "\n".join(
399
+ [f"- **{e.get('name','?')}** ({e.get('entity_type','?')}): {e.get('description','')}"
400
+ for e in ent_list]))
401
+ else:
402
+ ctx_parts.append("### Entities Found:\n" + "\n".join(
403
+ [f"- {e}" for e in ent_list]))
404
  if result.relations_traversed:
405
  ctx_parts.append("### Relationships:\n" + "\n".join(
406
  [f"- {r}" for r in result.relations_traversed[:15]]))
 
435
  except (json.JSONDecodeError, ValueError):
436
  return 0.5, "unknown", "Analysis failed"
437
 
438
+ def run_triple_comparison(self, query, passages=None, top_k=5, hops=2):
439
+ """Run all 3 pipelines and compare side-by-side."""
440
+ lo = self.run_llm_only(query)
441
+ b = self.run_baseline_rag(query, passages, top_k)
442
+ g = self.run_graphrag(query, passages, hops=hops)
443
+
444
+ comp = TripleComparisonResult(query=query, llm_only=lo, baseline=b, graphrag=g)
445
+ if b.total_tokens > 0:
446
+ comp.token_savings_vs_baseline_pct = (
447
+ (b.total_tokens - g.total_tokens) / b.total_tokens * 100
448
+ )
449
+ if lo.total_tokens > 0:
450
+ comp.token_savings_vs_llm_only_pct = (
451
+ (lo.total_tokens - g.total_tokens) / lo.total_tokens * 100
452
+ )
453
+ comp.latency_diff_ms = g.latency_ms - b.latency_ms
454
+ comp.cost_diff_usd = g.cost_usd - b.cost_usd
455
+ self.comparison_history.append(comp)
456
+ return comp
457
+
458
+ # Backward compat — 2-pipeline comparison
459
  def run_comparison(self, query, passages=None, top_k=5, hops=2):
460
+ """Run both pipelines and compare (backward compat)."""
461
  b = self.run_baseline_rag(query, passages, top_k)
462
  g = self.run_graphrag(query, passages, hops=hops)
463
  comp = ComparisonResult(query=query, baseline=b, graphrag=g)
 
465
  comp.token_savings_pct = (g.total_tokens - b.total_tokens) / b.total_tokens * 100
466
  comp.latency_diff_ms = g.latency_ms - b.latency_ms
467
  comp.cost_diff_usd = g.cost_usd - b.cost_usd
 
468
  return comp
469
 
470
  def run_adaptive(self, query, passages=None, threshold=0.6):
471
  """Adaptive routing: automatically picks optimal pipeline."""
472
  score, qtype, reasoning = self.analyze_complexity(query)
473
+ comp = self.run_triple_comparison(query, passages)
 
 
474
  comp.graphrag.complexity_score = score
475
  comp.graphrag.query_type = qtype
476
  if score >= threshold:
 
493
  n = len(self.comparison_history)
494
  return {
495
  "total_queries": n,
496
+ "llm_only": {
497
+ "total_tokens": sum(c.llm_only.total_tokens for c in self.comparison_history),
498
+ "avg_tokens": sum(c.llm_only.total_tokens for c in self.comparison_history) / n,
499
+ "total_cost": sum(c.llm_only.cost_usd for c in self.comparison_history),
500
+ "avg_latency": sum(c.llm_only.latency_ms for c in self.comparison_history) / n,
501
+ },
502
  "baseline": {
503
  "total_tokens": sum(c.baseline.total_tokens for c in self.comparison_history),
504
  "avg_tokens": sum(c.baseline.total_tokens for c in self.comparison_history) / n,