File size: 6,421 Bytes
0a8419b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | """
Main Entry Point — GraphRAG Inference Hackathon
================================================
Run: python -m graphrag.main {dashboard|benchmark|ingest|demo}
"""
import argparse
import logging
import os
import sys
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
def main():
parser = argparse.ArgumentParser(description="GraphRAG Inference Hackathon — Dual Pipeline System")
parser.add_argument("command", choices=["dashboard", "benchmark", "ingest", "demo"],
help="Command to run")
parser.add_argument("--port", type=int, default=7860, help="Dashboard port")
parser.add_argument("--samples", type=int, default=50, help="Number of samples")
parser.add_argument("--top-k", type=int, default=5, help="Top-K retrieval")
parser.add_argument("--hops", type=int, default=2, help="Graph traversal hops")
parser.add_argument("--share", action="store_true", help="Create Gradio share link")
parser.add_argument("--output", type=str, default="results.json", help="Output file")
args = parser.parse_args()
if args.command == "dashboard":
from graphrag.dashboard import build_dashboard
demo = build_dashboard()
demo.launch(server_port=args.port, share=args.share, show_error=True)
elif args.command == "benchmark":
run_benchmark(args)
elif args.command == "ingest":
run_ingestion(args)
elif args.command == "demo":
run_demo(args)
def run_benchmark(args):
from graphrag.layers.graph_layer import GraphLayer
from graphrag.layers.llm_layer import LLMLayer
from graphrag.layers.orchestration_layer import InferenceOrchestrator, EmbeddingManager
from graphrag.layers.evaluation_layer import EvaluationLayer
from graphrag.benchmark import BenchmarkRunner
llm = LLMLayer(api_key=os.getenv("OPENAI_API_KEY", ""), model=os.getenv("LLM_MODEL", "gpt-4o-mini"))
llm.initialize()
embedder = EmbeddingManager(provider="openai", model="text-embedding-3-small",
api_key=os.getenv("OPENAI_API_KEY", ""))
embedder.initialize()
graph = GraphLayer()
orchestrator = InferenceOrchestrator(graph_layer=graph, llm_layer=llm, embedder=embedder)
orchestrator.initialize()
evaluator = EvaluationLayer(eval_llm_model=os.getenv("LLM_MODEL", "gpt-4o-mini"),
api_key=os.getenv("OPENAI_API_KEY", ""))
evaluator.initialize()
runner = BenchmarkRunner(orchestrator, evaluator)
logger.info(f"Running benchmark with {args.samples} samples...")
results = runner.run_hotpotqa_benchmark(num_samples=args.samples, top_k=args.top_k, hops=args.hops)
print("\n" + results["report"])
runner.save_results(args.output)
logger.info(f"Results saved to {args.output}")
def run_ingestion(args):
from graphrag.layers.graph_layer import GraphLayer
from graphrag.layers.llm_layer import LLMLayer
from graphrag.layers.orchestration_layer import EmbeddingManager
from graphrag.ingestion import IngestionPipeline
graph = GraphLayer(config={"host": os.getenv("TG_HOST", ""), "graphname": os.getenv("TG_GRAPH", "GraphRAG"),
"username": os.getenv("TG_USERNAME", "tigergraph"),
"password": os.getenv("TG_PASSWORD", "")})
if not graph.connect():
logger.error("Failed to connect to TigerGraph. Set TG_HOST, TG_PASSWORD env vars.")
sys.exit(1)
graph.create_schema()
graph.install_queries()
llm = LLMLayer(api_key=os.getenv("OPENAI_API_KEY", ""), model="gpt-4o-mini")
llm.initialize()
embedder = EmbeddingManager(provider="openai", model="text-embedding-3-small")
embedder.initialize()
pipeline = IngestionPipeline(graph, llm, embedder)
stats = pipeline.ingest_hotpotqa(max_docs=args.samples)
logger.info(f"Ingestion complete: {stats}")
def run_demo(args):
from graphrag.layers.llm_layer import LLMLayer
from graphrag.layers.orchestration_layer import InferenceOrchestrator, EmbeddingManager
from graphrag.layers.graph_layer import GraphLayer
from graphrag.layers.evaluation_layer import compute_f1
print("=" * 60)
print("🔍 GraphRAG Inference Demo")
print("=" * 60)
llm = LLMLayer(api_key=os.getenv("OPENAI_API_KEY", ""), model="gpt-4o-mini")
llm.initialize()
embedder = EmbeddingManager(provider="openai", model="text-embedding-3-small")
embedder.initialize()
graph = GraphLayer()
orch = InferenceOrchestrator(graph_layer=graph, llm_layer=llm, embedder=embedder)
orch.initialize()
queries = [
"Were Scott Derrickson and Ed Wood of the same nationality?",
"Which magazine was started first, Arthur's Magazine or First for Women?",
]
for query in queries:
print(f"\n{'─' * 60}")
print(f"Query: {query}")
try:
from datasets import load_dataset
ds = load_dataset("hotpotqa/hotpot_qa", "distractor", split="validation", streaming=True)
for row in ds:
if query.lower() == row["question"].lower():
passages = [f"{t}: {' '.join(s)}"
for t, s in zip(row["context"]["title"], row["context"]["sentences"])]
comp = orch.run_comparison(query, passages)
gold = row["answer"]
print(f"\n🔵 Baseline: {comp.baseline.answer}")
print(f" Tokens: {comp.baseline.total_tokens} | Cost: ${comp.baseline.cost_usd:.6f}")
print(f"\n🔴 GraphRAG: {comp.graphrag.answer}")
print(f" Tokens: {comp.graphrag.total_tokens} | Cost: ${comp.graphrag.cost_usd:.6f}")
print(f" Entities: {len(comp.graphrag.entities_found)} | Relations: {len(comp.graphrag.relations_traversed)}")
print(f"\n📋 Gold: {gold}")
print(f" Baseline F1: {compute_f1(comp.baseline.answer, gold):.4f}")
print(f" GraphRAG F1: {compute_f1(comp.graphrag.answer, gold):.4f}")
break
except Exception as e:
print(f"Error: {e}")
print(f"\n{'=' * 60}")
print("Run 'python -m graphrag.main dashboard' for the full UI!")
if __name__ == "__main__":
main()
|