Spaces:

Jackken
/

hermes-bot

Running

hermes-bot / scripts /knowledge_graph.py

Z User

v5.0: 梦境模式+信息节食+概率思维+好奇心+工作流+知识图谱+自愈

020c94b 9 days ago

6.75 kB

	#!/usr/bin/env python3
	"""
	Hermes 记忆知识图谱 - 基于 NetworkX 构建记忆实体关联
	通过 execute_code 调用，将碎片记忆构建为关联图谱

	功能：
	1. 从记忆数据库提取实体（人/项目/技术/问题）
	2. 建立实体间关系（使用/属于/解决/关联）
	3. 支持关联查询：给出一个实体，找出所有关联实体
	4. 可视化输出（文本格式）
	"""

	import sqlite3
	import json
	import os
	import glob
	import re
	from collections import defaultdict

	MEMORY_DIR = os.environ.get("HERMES_DATA_DIR", "/data/hermes/memories")


	class KnowledgeGraph:
	def __init__(self):
	self.nodes = {} # id -> {"type": str, "label": str, "count": int}
	self.edges = [] # [(source_id, target_id, relation, weight)]

	def add_entity(self, entity_id, entity_type, label):
	if entity_id not in self.nodes:
	self.nodes[entity_id] = {"type": entity_type, "label": label, "count": 0}
	self.nodes[entity_id]["count"] += 1

	def add_relation(self, source_id, target_id, relation, weight=1):
	self.edges.append((source_id, target_id, relation, weight))

	def get_related(self, entity_id, depth=1):
	"""获取关联实体（BFS）"""
	visited = {entity_id}
	current = [entity_id]

	for _ in range(depth):
	next_level = []
	for s, t, r, w in self.edges:
	if s in current and t not in visited:
	next_level.append(t)
	visited.add(t)
	if t in current and s not in visited:
	next_level.append(s)
	visited.add(s)
	current = next_level

	return visited

	def to_text(self, entity_id=None):
	"""文本格式输出图谱"""
	lines = []

	if entity_id:
	related = self.get_related(entity_id)
	lines.append(f"=== {entity_id} 的知识图谱 ===")
	for eid in related:
	if eid == entity_id:
	continue
	node = self.nodes.get(eid, {})
	lines.append(f" [{node.get('type', '?')}] {eid} (提及{node.get('count', 0)}次)")
	for s, t, r, w in self.edges:
	if (s == entity_id and t == eid) or (t == entity_id and s == eid):
	lines.append(f" └─ {r}")
	else:
	lines.append("=== 知识图谱概览 ===")
	# 按类型分组
	by_type = defaultdict(list)
	for eid, info in self.nodes.items():
	by_type[info["type"]].append((eid, info["count"]))

	for etype, entities in sorted(by_type.items()):
	lines.append(f"\n[{etype}] ({len(entities)} 个实体)")
	for eid, count in sorted(entities, key=lambda x: -x[1]):
	lines.append(f" {eid} (提及{count}次)")

	lines.append(f"\n关系总数: {len(self.edges)}")

	return "\n".join(lines)


	def find_memory_db():
	patterns = [
	os.path.join(MEMORY_DIR, "*.db"),
	os.path.join(MEMORY_DIR, "*/.db"),
	"/data/hermes/memories/holographic.db",
	"/data/hermes/memories/memory.db",
	]
	for p in patterns:
	for f in glob.glob(p, recursive=True):
	return f
	return None


	def extract_entities_from_text(text):
	"""从文本中提取实体（简单 NER）"""
	entities = []

	# 技术关键词
	tech_patterns = [
	(r'\b(Python\|JavaScript\|TypeScript\|React\|Vue\|Node\.js\|Docker\|Kubernetes?\|Redis\|PostgreSQL\|MySQL\|MongoDB\|Nginx\|Linux\|Git\|Rust\|Go\|Java\|C\+\+\|Swift\|Kotlin)\b', "technology"),
	(r'\b(Hermes\|飞书\|HuggingFace\|OpenRouter\|GitHub\|Cloudflare\|Vercel\|AWS\|GCP)\b', "platform"),
	(r'\b(API\|REST\|GraphQL\|WebSocket\|HTTP\|HTTPS\|TCP\|UDP\|SSH\|SSL\|TLS)\b', "protocol"),
	]

	for pattern, etype in tech_patterns:
	matches = re.findall(pattern, text, re.IGNORECASE)
	for m in matches:
	entities.append((m.lower(), etype))

	return entities


	def build_graph_from_memories(db_path):
	"""从记忆数据库构建知识图谱"""
	graph = KnowledgeGraph()

	if not db_path:
	return graph

	try:
	conn = sqlite3.connect(db_path)
	cursor = conn.cursor()

	# 获取所有记忆内容
	all_text_parts = []
	for table in ["memories", "memory", "entries"]:
	cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}'")
	if cursor.fetchone():
	try:
	cursor.execute(f"SELECT content, value, text FROM {table}")
	except Exception:
	try:
	cursor.execute(f"SELECT * FROM {table}")
	except Exception:
	continue

	for row in cursor.fetchall():
	text = " ".join(str(r) for r in row)
	all_text_parts.append(text)
	break

	conn.close()
	except Exception as e:
	print(f"读取记忆失败: {e}")
	return graph

	# 从每条记忆中提取实体并建立关联
	all_entities = defaultdict(list)
	for text in all_text_parts:
	entities = extract_entities_from_text(text)
	all_entities[text].extend(entities)

	# 添加节点
	seen_entities = set()
	for text, entities in all_entities.items():
	for eid, etype in entities:
	graph.add_entity(eid, etype, eid)
	seen_entities.add(eid)

	# 在同一条记忆中出现的实体建立关联
	for text, entities in all_entities.items():
	unique_entities = list({e[0] for e in entities})
	for i, e1 in enumerate(unique_entities):
	for e2 in unique_entities[i + 1:]:
	graph.add_relation(e1, e2, "co-mentioned")

	return graph


	def main():
	db_path = find_memory_db()
	print(f"记忆数据库: {db_path or '未找到'}")

	graph = build_graph_from_memories(db_path)
	print(graph.to_text())

	# 保存图谱数据
	output = {
	"nodes": {k: v for k, v in graph.nodes.items()},
	"edges": [
	{"source": s, "target": t, "relation": r, "weight": w}
	for s, t, r, w in graph.edges
	],
	}

	output_path = os.path.join(MEMORY_DIR, "knowledge_graph.json")
	try:
	os.makedirs(os.path.dirname(output_path), exist_ok=True)
	with open(output_path, "w", encoding="utf-8") as f:
	json.dump(output, f, ensure_ascii=False, indent=2)
	print(f"\n图谱已保存到: {output_path}")
	except Exception as e:
	print(f"保存图谱失败: {e}")


	if __name__ == "__main__":
	main()