File size: 7,963 Bytes
19df402
ddb116f
577adc4
19df402
 
 
 
 
 
ddb116f
 
19df402
 
 
 
 
 
 
577adc4
19df402
 
 
 
 
ddb116f
 
 
 
19df402
ddb116f
 
 
19df402
 
ddb116f
19df402
 
90b36cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577adc4
 
 
 
 
 
 
 
90b36cb
577adc4
90b36cb
577adc4
 
90b36cb
577adc4
90b36cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19df402
577adc4
 
19df402
577adc4
 
 
 
 
 
19df402
 
577adc4
 
 
 
19df402
9333670
 
 
577adc4
9333670
 
19df402
577adc4
 
 
 
19df402
 
577adc4
 
19df402
 
ddb116f
577adc4
19df402
577adc4
19df402
 
577adc4
 
19df402
 
 
 
ddb116f
 
19df402
577adc4
 
19df402
 
 
ddb116f
577adc4
19df402
 
 
ddb116f
19df402
9333670
577adc4
 
9333670
19df402
577adc4
 
 
19df402
 
577adc4
 
 
 
 
19df402
577adc4
 
 
ddb116f
19df402
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
import { NextRequest, NextResponse } from "next/server";
import { callLLM, PROVIDERS, type ProviderId } from "@/lib/llm-providers";
import { getEmbedding, searchChunks, chunkToEntityContext } from "@/lib/retrieval";

export const runtime = "nodejs";
export const dynamic = "force-dynamic";

interface CompareRequest {
  query: string;
  provider?: ProviderId;
  model?: string;
  adaptiveRouting?: boolean;
  topK?: number;
}

export async function POST(req: NextRequest) {
  try {
    const body: CompareRequest = await req.json();
    const { query, provider = "openai", model, adaptiveRouting = true, topK = 5 } = body;

    if (!query?.trim()) {
      return NextResponse.json({ error: "Query required" }, { status: 400 });
    }

    const providerConfig = PROVIDERS[provider];
    if (!providerConfig) {
      return NextResponse.json({ error: `Unknown provider: ${provider}` }, { status: 400 });
    }

    const hasKey = providerConfig.isLocal || !providerConfig.requiresApiKey || !!process.env[providerConfig.apiKeyEnv];
    if (!hasKey) {
      return NextResponse.json(getDemoResponse(query, provider));
    }

    const selectedModel = model || providerConfig.defaultModel;
    const startTime = Date.now();

    // ── Parallel phase 1: LLM-Only + embedding fetch run simultaneously ──
    // LLM-only needs no retrieval; start it immediately alongside the embed call.
    const llmOnlyStart = Date.now();
    const [llmOnlyResp, embedding] = await Promise.all([
      callLLM({
        provider, model: selectedModel,
        messages: [
          { role: "system", content: "Answer the question accurately and concisely from your knowledge. If unsure, say so." },
          { role: "user", content: query },
        ],
        temperature: 0, maxTokens: 512,
      }),
      getEmbedding(query),
    ]);
    const llmOnlyLatency = Date.now() - llmOnlyStart;

    // ── Retrieve chunks from TigerGraph (needs embedding) ─────────────────
    const chunks = embedding ? await searchChunks(embedding, topK) : [];
    const hasRetrieval = chunks.length > 0;

    // Full text context (Basic RAG: raw chunks concatenated)
    const ragContext = hasRetrieval
      ? chunks.map((c, i) => `[Passage ${i + 1}]\n${c.text}`).join("\n\n")
      : `No documents retrieved. Answering from general knowledge.`;

    // Compact entity context (GraphRAG: first-sentence descriptions, pre-indexed at ingest time)
    const graphContext = hasRetrieval
      ? chunks.map((c, i) => `[${i + 1}] ${chunkToEntityContext(c.text)}`).join("\n")
      : `No graph context available.`;

    // ── Parallel phase 2: Basic RAG + GraphRAG run simultaneously ────────
    const ragStart = Date.now();
    const [basicRagResp, graphragResp] = await Promise.all([
      callLLM({
        provider, model: selectedModel,
        messages: [
          { role: "system", content: "Answer the question using ONLY the provided context passages. Be accurate and concise." },
          { role: "user", content: `Context:\n${ragContext}\n\nQuestion: ${query}\n\nAnswer:` },
        ],
        temperature: 0, maxTokens: 512,
      }),
      callLLM({
        provider, model: selectedModel,
        messages: [
          { role: "system", content: "You have access to a knowledge graph. The entity descriptions below were pre-indexed from the document corpus. Use them to answer precisely and concisely β€” follow any relationship chains implied." },
          { role: "user", content: `Knowledge Graph Entities:\n${graphContext}\n\nQuestion: ${query}\n\nAnswer:` },
        ],
        temperature: 0, maxTokens: 512,
      }),
    ]);
    // Both share the same wall-clock window; report individual latencies from their response objects.
    const parallelLat = Date.now() - ragStart;
    const ragLatency = basicRagResp.latencyMs;
    const graphragLatency = graphragResp.latencyMs;
    void parallelLat; // measured for tracing, total captured in totalTimeMs

    // ── Adaptive routing (complexity scoring) ────────────────────────────
    let complexity = 0.5, queryType = "factoid", recommended = "graphrag";
    if (adaptiveRouting) {
      const words = query.toLowerCase();
      const isMultiHop = /same|both|compare|which.*first|who.*born|difference|related|between/i.test(words);
      const isSimple = /what is|define|spell|capital of/i.test(words);
      complexity = isSimple ? 0.2 : isMultiHop ? 0.8 : 0.55;
      queryType = isMultiHop ? "multi_hop" : isSimple ? "factoid" : "comparison";
      recommended = complexity >= 0.5 ? "graphrag" : "baseline";
    }

    // ── Entity list from compact context (for UI display) ────────────────
    const entities = chunks.map((c) => chunkToEntityContext(c.text, 80)).filter(Boolean);
    const relations: string[] = [];

    return NextResponse.json({
      llmOnly: {
        answer: llmOnlyResp.content,
        tokens: llmOnlyResp.totalTokens,
        latencyMs: llmOnlyLatency,
        costUsd: llmOnlyResp.costUsd,
      },
      baseline: {
        answer: basicRagResp.content,
        tokens: basicRagResp.totalTokens,
        latencyMs: ragLatency,
        costUsd: basicRagResp.costUsd,
        entities: [],
        relations: [],
        retrievedChunks: chunks.length,
        contextTokens: basicRagResp.inputTokens,
      },
      graphrag: {
        answer: graphragResp.content,
        tokens: graphragResp.totalTokens,
        latencyMs: graphragLatency,
        costUsd: graphragResp.costUsd,
        entities,
        relations,
        retrievedChunks: chunks.length,
        contextTokens: graphragResp.inputTokens,
      },
      complexity,
      queryType,
      recommended,
      provider,
      model: selectedModel,
      totalTimeMs: Date.now() - startTime,
      retrievalEnabled: hasRetrieval,
      chunksRetrieved: chunks.length,
    });
  } catch (error) {
    console.error("Compare API error:", error);
    const errMsg = error instanceof Error ? error.message : "Unknown error";
    return NextResponse.json(getDemoResponse("", "openai", errMsg));
  }
}

function getDemoResponse(query: string, provider: string, error?: string) {
  return {
    llmOnly: {
      answer: "Albert Einstein developed general relativity, and Niels Bohr contributed to quantum mechanics β€” they worked in different areas of physics.",
      tokens: 124, latencyMs: 820, costUsd: 0.000019,
    },
    baseline: {
      answer: "Based on the retrieved documents: General relativity was developed by Albert Einstein. Quantum mechanics was pioneered by several physicists including Niels Bohr, Werner Heisenberg, and Erwin SchrΓΆdinger. These are distinct theories β€” general relativity describes gravity at large scales while quantum mechanics describes subatomic behavior.",
      tokens: 1847, latencyMs: 1480, costUsd: 0.000277,
      entities: [], relations: [], retrievedChunks: 5, contextTokens: 1620,
    },
    graphrag: {
      answer: "General relativity (Einstein, 1915) describes gravity as spacetime curvature. Quantum mechanics (Bohr, Heisenberg, SchrΓΆdinger, 1920s) governs subatomic particles. They are complementary theories covering different scales.",
      tokens: 387, latencyMs: 980, costUsd: 0.000058,
      entities: ["Albert Einstein (physicist, general relativity)", "Niels Bohr (physicist, quantum model)", "Werner Heisenberg (physicist, uncertainty principle)"],
      relations: ["Einstein -[DEVELOPED]-> General Relativity", "Bohr -[DEVELOPED]-> Quantum Model of Atom"],
      retrievedChunks: 5, contextTokens: 312,
    },
    complexity: 0.65, queryType: "comparison", recommended: "graphrag",
    provider, model: "demo-mode", totalTimeMs: 3300,
    retrievalEnabled: false, chunksRetrieved: 0,
    ...(error ? { demoMode: true, demoReason: error } : { demoMode: true, demoReason: "No API key configured" }),
  };
}