File size: 12,966 Bytes
005833b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f26d060
005833b
f26d060
 
 
 
 
 
005833b
f26d060
 
 
 
 
 
 
 
005833b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f26d060
 
 
 
 
005833b
 
f26d060
005833b
f26d060
 
 
 
 
 
005833b
f26d060
 
 
 
 
 
005833b
 
f26d060
 
 
 
 
 
 
 
005833b
f26d060
 
 
 
005833b
 
 
 
f26d060
005833b
 
f26d060
005833b
 
 
 
 
f26d060
 
 
 
 
 
 
 
005833b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f26d060
 
 
 
 
 
005833b
f26d060
 
 
005833b
f26d060
 
 
 
005833b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f26d060
 
 
 
005833b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
"use client";

import { useState } from "react";

const SECTIONS = [
  {
    id: "quickstart",
    title: "Quick Start",
    icon: "πŸš€",
    content: [
      {
        heading: "Prerequisites",
        body: `Before running GraphRAG, make sure you have:
- Python 3.10+
- Node.js 18+
- Docker (optional, for containerized deployment)
- A TigerGraph Cloud account (free tier available)
- At least one LLM API key (Claude, OpenAI, etc.)`,
      },
      {
        heading: "Installation",
        code: `# Clone the repository
git clone https://github.com/MUTHUKUMARAN-K-1/graphrag-inference-hackathon
cd graphrag-inference-hackathon

# Backend setup
pip install -r requirements.txt

# Frontend setup
cd web
npm install
npm run dev

# Or use Docker
docker build -t graphrag .
docker run -p 3000:3000 -p 8000:8000 graphrag`,
      },
      {
        heading: "Environment Variables",
        code: `# web/.env  (copy from web/.env.example)

# TigerGraph
TG_HOST=https://your-instance.i.tgcloud.io
TG_TOKEN=your_bearer_token
TG_GRAPH=GraphRAG

# LLM β€” set at least one
OPENAI_API_KEY=sk-...
OPENAI_BASE_URL=https://models.botlearn.ai/v1   # optional: botlearn.ai / other proxy
LLM_MODEL=gemini-2.5-flash                      # optional: override default model

# Embeddings (for live TigerGraph retrieval)
HF_TOKEN=hf_...

# Optional additional providers
ANTHROPIC_API_KEY=sk-ant-...
GEMINI_API_KEY=AI...
GROQ_API_KEY=gsk_...`,
      },
    ],
  },
  {
    id: "api",
    title: "API Reference",
    icon: "πŸ“‘",
    content: [
      {
        heading: "POST /api/compare",
        body: "Run a side-by-side comparison of Baseline RAG and GraphRAG pipelines.",
        code: `// Request
POST /api/compare
{
  "query": "What theory describes gravity as the curvature of spacetime?",
  "provider": "openai",
  "model": "gemini-2.5-flash",
  "adaptiveRouting": true,
  "topK": 5
}

// Response β€” 3 pipelines run in parallel
{
  "llmOnly": {
    "answer": "General relativity.",
    "tokens": 84,
    "latencyMs": 820,
    "costUsd": 0.000013
  },
  "baseline": {
    "answer": "General relativity, published by Einstein in 1915...",
    "tokens": 290,
    "latencyMs": 1100,
    "costUsd": 0.000044,
    "retrievedChunks": 5,
    "contextTokens": 240
  },
  "graphrag": {
    "answer": "General relativity (Einstein, 1915).",
    "tokens": 163,
    "latencyMs": 950,
    "costUsd": 0.000025,
    "entities": ["General Relativity (THEORY, Einstein 1915)"],
    "relations": [],
    "retrievedChunks": 5,
    "contextTokens": 112
  },
  "complexity": 0.2,
  "queryType": "factoid",
  "recommended": "baseline",
  "totalTimeMs": 2700
}`,
      },
      {
        heading: "POST /api/benchmark",
        body: "Run batch evaluation on 10 science questions from the ingested Wikipedia corpus. All samples run in parallel.",
        code: `// Request
POST /api/benchmark
{ "numSamples": 10, "provider": "openai", "model": "gemini-2.5-flash" }

// Response
{
  "aggregate": {
    "numSamples": 10,
    "llmOnly":  { "avgF1": 0.7000, "avgEM": 0.70, "avgTokens": 84,  "avgLatency": 820 },
    "baseline": { "avgF1": 0.5800, "avgEM": 0.50, "avgTokens": 290, "avgLatency": 1100 },
    "graphrag": { "avgF1": 0.7467, "avgEM": 0.60, "avgTokens": 163, "avgLatency": 950 },
    "tokenReductionVsBaseline": 44,
    "graphragF1WinRate": 0.90
  },
  "demoMode": false,
  "note": "TigerGraph live retrieval attempted; corpus passages used as fallback."
}`,
      },
      {
        heading: "GET /api/providers",
        body: "List all available LLM providers and their models.",
        code: `// Response
{
  "providers": [
    {
      "id": "anthropic",
      "name": "Anthropic Claude",
      "isLocal": false,
      "hasApiKey": true,
      "defaultModel": "claude-sonnet-4-20250514",
      "models": [
        { "id": "claude-sonnet-4-20250514", "name": "Claude Sonnet 4", "speed": "medium", "quality": "high" }
      ]
    },
    ...
  ]
}`,
      },
    ],
  },
  {
    id: "architecture",
    title: "Architecture",
    icon: "πŸ—οΈ",
    content: [
      {
        heading: "4-Layer AI Factory Model",
        body: `The system follows an AI Factory architecture with four distinct layers:

**Layer 1 β€” Graph Layer (TigerGraph Cloud)**
Stores the knowledge graph with typed vertices and edges. Supports GSQL queries for multi-hop traversal.

**Layer 2 β€” Orchestration Layer**
Routes queries through the Adaptive Router, which scores complexity and classifies query types (bridge, comparison, factoid).

**Layer 3 β€” LLM Layer (12 Providers)**
Universal LLM abstraction supporting Claude, GPT-4, Gemini, Llama, Mistral, DeepSeek, Grok, Cohere, and more.

**Layer 4 β€” Evaluation Layer (RAGAS)**
Automated evaluation with F1, Exact Match, token counting, cost tracking, and latency measurement.`,
      },
      {
        heading: "3-Pipeline Design",
        body: `Every query runs through all three pipelines concurrently (parallel execution):

**Pipeline 1 β€” LLM-Only**
Query β†’ LLM β†’ Answer
- Fewest tokens (~84/query). Pure parametric knowledge, no retrieval.

**Pipeline 2 β€” Basic RAG**
Query β†’ Embed β†’ TigerGraph vector search β†’ Full chunk text β†’ LLM
- More tokens (~290/query). Industry-standard retrieval baseline.

**Pipeline 3 β€” GraphRAG**
Query β†’ Embed β†’ TigerGraph vector search β†’ Compact entity descriptions (pre-indexed) β†’ LLM
- Fewest retrieval tokens (~163/query). βˆ’44% vs Basic RAG, +28.7% F1.
- Entity descriptions extracted once at ingest time β€” amortized cost.`,
      },
    ],
  },
  {
    id: "novelties",
    title: "Novel Features",
    icon: "✨",
    content: [
      {
        heading: "1. Adaptive Query Router",
        body: `Analyzes query complexity (0.0–1.0) using:
- Entity count β€” more entities = higher complexity
- Multi-hop indicators β€” "both", "same", "compared to"
- Question word analysis β€” "which" and "who" patterns
- Dependency chain length

Routes simple queries to Baseline RAG (fast, cheap) and complex queries to GraphRAG (precise, traceable).`,
      },
      {
        heading: "2. Schema-Bounded Extraction",
        body: `Traditional NER extracts arbitrary entity types. Our system constrains extraction to TigerGraph's actual schema:
- Only PERSON, LOCATION, WORK, CONCEPT, etc. (valid vertex types)
- Eliminates hallucinated node types that would fail on graph lookup
- Ensures every extracted entity maps to a real vertex in the graph`,
      },
      {
        heading: "3. Dual-Level Keywords",
        body: `Extracts keywords at two granularity levels:
- **High-level**: Concepts, themes, categories (e.g., "nationality", "American cinema")
- **Low-level**: Specific entities, names, dates (e.g., "Scott Derrickson", "1962")
Enables graph traversal at multiple levels for richer context retrieval.`,
      },
      {
        heading: "4. Graph Reasoning Paths",
        body: `Traces explicit entity→relation→entity chains:
- Scott Derrickson β†’ BORN_IN β†’ Denver, CO β†’ LOCATED_IN β†’ United States
- Ed Wood β†’ BORN_IN β†’ Poughkeepsie, NY β†’ LOCATED_IN β†’ United States

These paths are included in the LLM prompt as structured evidence, making answers verifiable and explainable.`,
      },
      {
        heading: "5. Real-Time Cost Tracking",
        body: `Measures per-query economics:
- Input/output tokens counted per provider's tokenization
- USD cost calculated using current provider pricing
- Latency measured end-to-end (graph + LLM)
- Interactive projections: "What would 100K queries/month cost?"`,
      },
    ],
  },
  {
    id: "deployment",
    title: "Deployment",
    icon: "🐳",
    content: [
      {
        heading: "Docker Deployment",
        code: `# Build the image
docker build -t graphrag .

# Run with environment variables
docker run -d \\
  -p 3000:3000 \\
  -e TG_HOST=https://your-instance.i.tgcloud.io \\
  -e TG_TOKEN=your_bearer_token \\
  -e OPENAI_API_KEY=sk-... \\
  -e HF_TOKEN=hf_... \\
  graphrag`,
      },
      {
        heading: "TigerGraph Cloud Setup",
        body: `1. Create a free account at tgcloud.io
2. Create a new cluster (free tier: 50MB storage)
3. Install the GraphRAG schema:
   - Go to GraphStudio
   - Import the schema from graphrag/setup_tigergraph.py
4. Set TIGERGRAPH_HOST, TIGERGRAPH_GRAPH, TIGERGRAPH_SECRET in .env`,
      },
      {
        heading: "Running Locally",
        code: `# Backend (Python)
cd graphrag-inference-hackathon
pip install -r requirements.txt
python -m graphrag.main

# Frontend (Next.js)
cd web
npm install
npm run dev

# Open http://localhost:3000`,
      },
    ],
  },
];

export function DocsContent() {
  const [activeSection, setActiveSection] = useState("quickstart");

  const section = SECTIONS.find(s => s.id === activeSection) || SECTIONS[0];

  return (
    <div style={{ display: "flex", minHeight: "calc(100vh - 72px)" }}>
      {/* Sidebar */}
      <aside style={{
        width: "260px",
        flexShrink: 0,
        borderRight: "1px solid var(--color-hairline-soft)",
        padding: "32px 0",
        position: "sticky",
        top: "72px",
        height: "calc(100vh - 72px)",
        overflowY: "auto",
        display: "none",
      }} className="lg:!block">
        <div style={{ padding: "0 24px" }}>
          <div className="caption-uppercase mb-4" style={{ color: "var(--color-tiger-orange)" }}>Documentation</div>
          <nav className="flex flex-col gap-1">
            {SECTIONS.map((s) => (
              <button
                key={s.id}
                onClick={() => setActiveSection(s.id)}
                style={{
                  display: "flex",
                  alignItems: "center",
                  gap: "10px",
                  padding: "10px 14px",
                  borderRadius: "8px",
                  border: "none",
                  background: activeSection === s.id ? "var(--color-tiger-orange-light)" : "transparent",
                  color: activeSection === s.id ? "var(--color-tiger-orange)" : "var(--color-muted)",
                  fontWeight: activeSection === s.id ? 600 : 400,
                  fontSize: "0.875rem",
                  cursor: "pointer",
                  textAlign: "left",
                  transition: "all 0.15s ease",
                  width: "100%",
                }}
              >
                <span>{s.icon}</span>
                {s.title}
              </button>
            ))}
          </nav>
        </div>
      </aside>

      {/* Mobile Tabs */}
      <div className="lg:hidden" style={{
        position: "fixed", bottom: 0, left: 0, right: 0, zIndex: 40,
        background: "var(--color-canvas)",
        borderTop: "1px solid var(--color-hairline)",
        display: "flex",
        overflowX: "auto",
        padding: "8px",
        gap: "4px",
      }}>
        {SECTIONS.map((s) => (
          <button
            key={s.id}
            onClick={() => setActiveSection(s.id)}
            className={activeSection === s.id ? "badge-orange" : "badge-outline"}
            style={{ fontSize: "0.6875rem", whiteSpace: "nowrap", cursor: "pointer", border: "none" }}
          >
            {s.icon} {s.title}
          </button>
        ))}
      </div>

      {/* Content */}
      <main style={{ flex: 1, padding: "48px 40px 96px", maxWidth: "840px" }}>
        <div className="flex items-center gap-3 mb-2">
          <span style={{ fontSize: "1.5rem" }}>{section.icon}</span>
          <h1 className="display-lg">{section.title}</h1>
        </div>
        <div className="divider mb-8" />

        <div className="flex flex-col gap-10">
          {section.content.map((block, i) => (
            <div key={i}>
              <h2 className="display-sm mb-4">{block.heading}</h2>
              {block.body && (
                <div className="body-lg" style={{ color: "var(--color-body)", lineHeight: 1.75, whiteSpace: "pre-line" }}>
                  {block.body.split(/(\*\*[^*]+\*\*)/).map((part, j) => {
                    if (part.startsWith("**") && part.endsWith("**")) {
                      return <strong key={j} style={{ color: "var(--color-ink)" }}>{part.slice(2, -2)}</strong>;
                    }
                    return <span key={j}>{part}</span>;
                  })}
                </div>
              )}
              {block.code && (
                <div className="code-window mt-4">
                  <div className="code-window-header">
                    <div className="code-window-dot code-window-dot-red" />
                    <div className="code-window-dot code-window-dot-yellow" />
                    <div className="code-window-dot code-window-dot-green" />
                  </div>
                  <pre className="code-window-body" style={{ fontSize: "0.8125rem", lineHeight: 1.7, whiteSpace: "pre-wrap" }}>
                    {block.code}
                  </pre>
                </div>
              )}
            </div>
          ))}
        </div>
      </main>
    </div>
  );
}