File size: 7,963 Bytes

19df402
ddb116f
577adc4
19df402
 
 
 
 
 
ddb116f
 
19df402
 
 
 
 
 
 
577adc4
19df402
 
 
 
 
ddb116f
 
 
 
19df402
ddb116f
 
 
19df402
 
ddb116f
19df402
 
90b36cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
577adc4
 
 
 
 
 
 
 
90b36cb
577adc4
90b36cb
577adc4
 
90b36cb
577adc4
90b36cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19df402
577adc4
 
19df402
577adc4
 
 
 
 
 
19df402
 
577adc4
 
 
 
19df402
9333670
 
 
577adc4
9333670
 
19df402
577adc4
 
 
 
19df402
 
577adc4
 
19df402
 
ddb116f
577adc4
19df402
577adc4
19df402
 
577adc4
 
19df402
 
 
 
ddb116f
 
19df402
577adc4
 
19df402
 
 
ddb116f
577adc4
19df402
 
 
ddb116f
19df402
9333670
577adc4
 
9333670
19df402
577adc4
 
 
19df402
 
577adc4
 
 
 
 
19df402
577adc4
 
 
ddb116f
19df402

import { NextRequest, NextResponse } from "next/server";
import { callLLM, PROVIDERS, type ProviderId } from "@/lib/llm-providers";
import { getEmbedding, searchChunks, chunkToEntityContext } from "@/lib/retrieval";

export const runtime = "nodejs";
export const dynamic = "force-dynamic";

interface CompareRequest {
  query: string;
  provider?: ProviderId;
  model?: string;
  adaptiveRouting?: boolean;
  topK?: number;
}

export async function POST(req: NextRequest) {
  try {
    const body: CompareRequest = await req.json();
    const { query, provider = "openai", model, adaptiveRouting = true, topK = 5 } = body;

    if (!query?.trim()) {
      return NextResponse.json({ error: "Query required" }, { status: 400 });
    }

    const providerConfig = PROVIDERS[provider];
    if (!providerConfig) {
      return NextResponse.json({ error: `Unknown provider: ${provider}` }, { status: 400 });
    }

    const hasKey = providerConfig.isLocal || !providerConfig.requiresApiKey || !!process.env[providerConfig.apiKeyEnv];
    if (!hasKey) {
      return NextResponse.json(getDemoResponse(query, provider));
    }

    const selectedModel = model || providerConfig.defaultModel;
    const startTime = Date.now();

    // ── Parallel phase 1: LLM-Only + embedding fetch run simultaneously ──
    // LLM-only needs no retrieval; start it immediately alongside the embed call.
    const llmOnlyStart = Date.now();
    const [llmOnlyResp, embedding] = await Promise.all([
      callLLM({
        provider, model: selectedModel,
        messages: [
          { role: "system", content: "Answer the question accurately and concisely from your knowledge. If unsure, say so." },
          { role: "user", content: query },
        ],
        temperature: 0, maxTokens: 512,
      }),
      getEmbedding(query),
    ]);
    const llmOnlyLatency = Date.now() - llmOnlyStart;

    // ── Retrieve chunks from TigerGraph (needs embedding) ─────────────────
    const chunks = embedding ? await searchChunks(embedding, topK) : [];
    const hasRetrieval = chunks.length > 0;

    // Full text context (Basic RAG: raw chunks concatenated)
    const ragContext = hasRetrieval
      ? chunks.map((c, i) => `[Passage ${i + 1}]\n${c.text}`).join("\n\n")
      : `No documents retrieved. Answering from general knowledge.`;

    // Compact entity context (GraphRAG: first-sentence descriptions, pre-indexed at ingest time)
    const graphContext = hasRetrieval
      ? chunks.map((c, i) => `[${i + 1}] ${chunkToEntityContext(c.text)}`).join("\n")
      : `No graph context available.`;

    // ── Parallel phase 2: Basic RAG + GraphRAG run simultaneously ────────
    const ragStart = Date.now();
    const [basicRagResp, graphragResp] = await Promise.all([
      callLLM({
        provider, model: selectedModel,
        messages: [
          { role: "system", content: "Answer the question using ONLY the provided context passages. Be accurate and concise." },
          { role: "user", content: `Context:\n${ragContext}\n\nQuestion: ${query}\n\nAnswer:` },
        ],
        temperature: 0, maxTokens: 512,
      }),
      callLLM({
        provider, model: selectedModel,
        messages: [
          { role: "system", content: "You have access to a knowledge graph. The entity descriptions below were pre-indexed from the document corpus. Use them to answer precisely and concisely — follow any relationship chains implied." },
          { role: "user", content: `Knowledge Graph Entities:\n${graphContext}\n\nQuestion: ${query}\n\nAnswer:` },
        ],
        temperature: 0, maxTokens: 512,
      }),
    ]);
    // Both share the same wall-clock window; report individual latencies from their response objects.
    const parallelLat = Date.now() - ragStart;
    const ragLatency = basicRagResp.latencyMs;
    const graphragLatency = graphragResp.latencyMs;
    void parallelLat; // measured for tracing, total captured in totalTimeMs

    // ── Adaptive routing (complexity scoring) ────────────────────────────
    let complexity = 0.5, queryType = "factoid", recommended = "graphrag";
    if (adaptiveRouting) {
      const words = query.toLowerCase();
      const isMultiHop = /same|both|compare|which.*first|who.*born|difference|related|between/i.test(words);
      const isSimple = /what is|define|spell|capital of/i.test(words);
      complexity = isSimple ? 0.2 : isMultiHop ? 0.8 : 0.55;
      queryType = isMultiHop ? "multi_hop" : isSimple ? "factoid" : "comparison";
      recommended = complexity >= 0.5 ? "graphrag" : "baseline";
    }

    // ── Entity list from compact context (for UI display) ────────────────
    const entities = chunks.map((c) => chunkToEntityContext(c.text, 80)).filter(Boolean);
    const relations: string[] = [];

    return NextResponse.json({
      llmOnly: {
        answer: llmOnlyResp.content,
        tokens: llmOnlyResp.totalTokens,
        latencyMs: llmOnlyLatency,
        costUsd: llmOnlyResp.costUsd,
      },
      baseline: {
        answer: basicRagResp.content,
        tokens: basicRagResp.totalTokens,
        latencyMs: ragLatency,
        costUsd: basicRagResp.costUsd,
        entities: [],
        relations: [],
        retrievedChunks: chunks.length,
        contextTokens: basicRagResp.inputTokens,
      },
      graphrag: {
        answer: graphragResp.content,
        tokens: graphragResp.totalTokens,
        latencyMs: graphragLatency,
        costUsd: graphragResp.costUsd,
        entities,
        relations,
        retrievedChunks: chunks.length,
        contextTokens: graphragResp.inputTokens,
      },
      complexity,
      queryType,
      recommended,
      provider,
      model: selectedModel,
      totalTimeMs: Date.now() - startTime,
      retrievalEnabled: hasRetrieval,
      chunksRetrieved: chunks.length,
    });
  } catch (error) {
    console.error("Compare API error:", error);
    const errMsg = error instanceof Error ? error.message : "Unknown error";
    return NextResponse.json(getDemoResponse("", "openai", errMsg));
  }
}

function getDemoResponse(query: string, provider: string, error?: string) {
  return {
    llmOnly: {
      answer: "Albert Einstein developed general relativity, and Niels Bohr contributed to quantum mechanics — they worked in different areas of physics.",
      tokens: 124, latencyMs: 820, costUsd: 0.000019,
    },
    baseline: {
      answer: "Based on the retrieved documents: General relativity was developed by Albert Einstein. Quantum mechanics was pioneered by several physicists including Niels Bohr, Werner Heisenberg, and Erwin Schrödinger. These are distinct theories — general relativity describes gravity at large scales while quantum mechanics describes subatomic behavior.",
      tokens: 1847, latencyMs: 1480, costUsd: 0.000277,
      entities: [], relations: [], retrievedChunks: 5, contextTokens: 1620,
    },
    graphrag: {
      answer: "General relativity (Einstein, 1915) describes gravity as spacetime curvature. Quantum mechanics (Bohr, Heisenberg, Schrödinger, 1920s) governs subatomic particles. They are complementary theories covering different scales.",
      tokens: 387, latencyMs: 980, costUsd: 0.000058,
      entities: ["Albert Einstein (physicist, general relativity)", "Niels Bohr (physicist, quantum model)", "Werner Heisenberg (physicist, uncertainty principle)"],
      relations: ["Einstein -[DEVELOPED]-> General Relativity", "Bohr -[DEVELOPED]-> Quantum Model of Atom"],
      retrievedChunks: 5, contextTokens: 312,
    },
    complexity: 0.65, queryType: "comparison", recommended: "graphrag",
    provider, model: "demo-mode", totalTimeMs: 3300,
    retrievalEnabled: false, chunksRetrieved: 0,
    ...(error ? { demoMode: true, demoReason: error } : { demoMode: true, demoReason: "No API key configured" }),
  };
}