hermes-bot / scripts /knowledge_graph.py
Z User
v5.0: 梦境模式+信息节食+概率思维+好奇心+工作流+知识图谱+自愈
020c94b
raw
history blame
6.75 kB
#!/usr/bin/env python3
"""
Hermes 记忆知识图谱 - 基于 NetworkX 构建记忆实体关联
通过 execute_code 调用,将碎片记忆构建为关联图谱
功能:
1. 从记忆数据库提取实体(人/项目/技术/问题)
2. 建立实体间关系(使用/属于/解决/关联)
3. 支持关联查询:给出一个实体,找出所有关联实体
4. 可视化输出(文本格式)
"""
import sqlite3
import json
import os
import glob
import re
from collections import defaultdict
MEMORY_DIR = os.environ.get("HERMES_DATA_DIR", "/data/hermes/memories")
class KnowledgeGraph:
def __init__(self):
self.nodes = {} # id -> {"type": str, "label": str, "count": int}
self.edges = [] # [(source_id, target_id, relation, weight)]
def add_entity(self, entity_id, entity_type, label):
if entity_id not in self.nodes:
self.nodes[entity_id] = {"type": entity_type, "label": label, "count": 0}
self.nodes[entity_id]["count"] += 1
def add_relation(self, source_id, target_id, relation, weight=1):
self.edges.append((source_id, target_id, relation, weight))
def get_related(self, entity_id, depth=1):
"""获取关联实体(BFS)"""
visited = {entity_id}
current = [entity_id]
for _ in range(depth):
next_level = []
for s, t, r, w in self.edges:
if s in current and t not in visited:
next_level.append(t)
visited.add(t)
if t in current and s not in visited:
next_level.append(s)
visited.add(s)
current = next_level
return visited
def to_text(self, entity_id=None):
"""文本格式输出图谱"""
lines = []
if entity_id:
related = self.get_related(entity_id)
lines.append(f"=== {entity_id} 的知识图谱 ===")
for eid in related:
if eid == entity_id:
continue
node = self.nodes.get(eid, {})
lines.append(f" [{node.get('type', '?')}] {eid} (提及{node.get('count', 0)}次)")
for s, t, r, w in self.edges:
if (s == entity_id and t == eid) or (t == entity_id and s == eid):
lines.append(f" └─ {r}")
else:
lines.append("=== 知识图谱概览 ===")
# 按类型分组
by_type = defaultdict(list)
for eid, info in self.nodes.items():
by_type[info["type"]].append((eid, info["count"]))
for etype, entities in sorted(by_type.items()):
lines.append(f"\n[{etype}] ({len(entities)} 个实体)")
for eid, count in sorted(entities, key=lambda x: -x[1]):
lines.append(f" {eid} (提及{count}次)")
lines.append(f"\n关系总数: {len(self.edges)}")
return "\n".join(lines)
def find_memory_db():
patterns = [
os.path.join(MEMORY_DIR, "*.db"),
os.path.join(MEMORY_DIR, "**/*.db"),
"/data/hermes/memories/holographic.db",
"/data/hermes/memories/memory.db",
]
for p in patterns:
for f in glob.glob(p, recursive=True):
return f
return None
def extract_entities_from_text(text):
"""从文本中提取实体(简单 NER)"""
entities = []
# 技术关键词
tech_patterns = [
(r'\b(Python|JavaScript|TypeScript|React|Vue|Node\.js|Docker|Kubernetes?|Redis|PostgreSQL|MySQL|MongoDB|Nginx|Linux|Git|Rust|Go|Java|C\+\+|Swift|Kotlin)\b', "technology"),
(r'\b(Hermes|飞书|HuggingFace|OpenRouter|GitHub|Cloudflare|Vercel|AWS|GCP)\b', "platform"),
(r'\b(API|REST|GraphQL|WebSocket|HTTP|HTTPS|TCP|UDP|SSH|SSL|TLS)\b', "protocol"),
]
for pattern, etype in tech_patterns:
matches = re.findall(pattern, text, re.IGNORECASE)
for m in matches:
entities.append((m.lower(), etype))
return entities
def build_graph_from_memories(db_path):
"""从记忆数据库构建知识图谱"""
graph = KnowledgeGraph()
if not db_path:
return graph
try:
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# 获取所有记忆内容
all_text_parts = []
for table in ["memories", "memory", "entries"]:
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}'")
if cursor.fetchone():
try:
cursor.execute(f"SELECT content, value, text FROM {table}")
except Exception:
try:
cursor.execute(f"SELECT * FROM {table}")
except Exception:
continue
for row in cursor.fetchall():
text = " ".join(str(r) for r in row)
all_text_parts.append(text)
break
conn.close()
except Exception as e:
print(f"读取记忆失败: {e}")
return graph
# 从每条记忆中提取实体并建立关联
all_entities = defaultdict(list)
for text in all_text_parts:
entities = extract_entities_from_text(text)
all_entities[text].extend(entities)
# 添加节点
seen_entities = set()
for text, entities in all_entities.items():
for eid, etype in entities:
graph.add_entity(eid, etype, eid)
seen_entities.add(eid)
# 在同一条记忆中出现的实体建立关联
for text, entities in all_entities.items():
unique_entities = list({e[0] for e in entities})
for i, e1 in enumerate(unique_entities):
for e2 in unique_entities[i + 1:]:
graph.add_relation(e1, e2, "co-mentioned")
return graph
def main():
db_path = find_memory_db()
print(f"记忆数据库: {db_path or '未找到'}")
graph = build_graph_from_memories(db_path)
print(graph.to_text())
# 保存图谱数据
output = {
"nodes": {k: v for k, v in graph.nodes.items()},
"edges": [
{"source": s, "target": t, "relation": r, "weight": w}
for s, t, r, w in graph.edges
],
}
output_path = os.path.join(MEMORY_DIR, "knowledge_graph.json")
try:
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, "w", encoding="utf-8") as f:
json.dump(output, f, ensure_ascii=False, indent=2)
print(f"\n图谱已保存到: {output_path}")
except Exception as e:
print(f"保存图谱失败: {e}")
if __name__ == "__main__":
main()