Spaces:
Running
Running
File size: 6,745 Bytes
020c94b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 | #!/usr/bin/env python3
"""
Hermes 记忆知识图谱 - 基于 NetworkX 构建记忆实体关联
通过 execute_code 调用,将碎片记忆构建为关联图谱
功能:
1. 从记忆数据库提取实体(人/项目/技术/问题)
2. 建立实体间关系(使用/属于/解决/关联)
3. 支持关联查询:给出一个实体,找出所有关联实体
4. 可视化输出(文本格式)
"""
import sqlite3
import json
import os
import glob
import re
from collections import defaultdict
MEMORY_DIR = os.environ.get("HERMES_DATA_DIR", "/data/hermes/memories")
class KnowledgeGraph:
def __init__(self):
self.nodes = {} # id -> {"type": str, "label": str, "count": int}
self.edges = [] # [(source_id, target_id, relation, weight)]
def add_entity(self, entity_id, entity_type, label):
if entity_id not in self.nodes:
self.nodes[entity_id] = {"type": entity_type, "label": label, "count": 0}
self.nodes[entity_id]["count"] += 1
def add_relation(self, source_id, target_id, relation, weight=1):
self.edges.append((source_id, target_id, relation, weight))
def get_related(self, entity_id, depth=1):
"""获取关联实体(BFS)"""
visited = {entity_id}
current = [entity_id]
for _ in range(depth):
next_level = []
for s, t, r, w in self.edges:
if s in current and t not in visited:
next_level.append(t)
visited.add(t)
if t in current and s not in visited:
next_level.append(s)
visited.add(s)
current = next_level
return visited
def to_text(self, entity_id=None):
"""文本格式输出图谱"""
lines = []
if entity_id:
related = self.get_related(entity_id)
lines.append(f"=== {entity_id} 的知识图谱 ===")
for eid in related:
if eid == entity_id:
continue
node = self.nodes.get(eid, {})
lines.append(f" [{node.get('type', '?')}] {eid} (提及{node.get('count', 0)}次)")
for s, t, r, w in self.edges:
if (s == entity_id and t == eid) or (t == entity_id and s == eid):
lines.append(f" └─ {r}")
else:
lines.append("=== 知识图谱概览 ===")
# 按类型分组
by_type = defaultdict(list)
for eid, info in self.nodes.items():
by_type[info["type"]].append((eid, info["count"]))
for etype, entities in sorted(by_type.items()):
lines.append(f"\n[{etype}] ({len(entities)} 个实体)")
for eid, count in sorted(entities, key=lambda x: -x[1]):
lines.append(f" {eid} (提及{count}次)")
lines.append(f"\n关系总数: {len(self.edges)}")
return "\n".join(lines)
def find_memory_db():
patterns = [
os.path.join(MEMORY_DIR, "*.db"),
os.path.join(MEMORY_DIR, "**/*.db"),
"/data/hermes/memories/holographic.db",
"/data/hermes/memories/memory.db",
]
for p in patterns:
for f in glob.glob(p, recursive=True):
return f
return None
def extract_entities_from_text(text):
"""从文本中提取实体(简单 NER)"""
entities = []
# 技术关键词
tech_patterns = [
(r'\b(Python|JavaScript|TypeScript|React|Vue|Node\.js|Docker|Kubernetes?|Redis|PostgreSQL|MySQL|MongoDB|Nginx|Linux|Git|Rust|Go|Java|C\+\+|Swift|Kotlin)\b', "technology"),
(r'\b(Hermes|飞书|HuggingFace|OpenRouter|GitHub|Cloudflare|Vercel|AWS|GCP)\b', "platform"),
(r'\b(API|REST|GraphQL|WebSocket|HTTP|HTTPS|TCP|UDP|SSH|SSL|TLS)\b', "protocol"),
]
for pattern, etype in tech_patterns:
matches = re.findall(pattern, text, re.IGNORECASE)
for m in matches:
entities.append((m.lower(), etype))
return entities
def build_graph_from_memories(db_path):
"""从记忆数据库构建知识图谱"""
graph = KnowledgeGraph()
if not db_path:
return graph
try:
conn = sqlite3.connect(db_path)
cursor = conn.cursor()
# 获取所有记忆内容
all_text_parts = []
for table in ["memories", "memory", "entries"]:
cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}'")
if cursor.fetchone():
try:
cursor.execute(f"SELECT content, value, text FROM {table}")
except Exception:
try:
cursor.execute(f"SELECT * FROM {table}")
except Exception:
continue
for row in cursor.fetchall():
text = " ".join(str(r) for r in row)
all_text_parts.append(text)
break
conn.close()
except Exception as e:
print(f"读取记忆失败: {e}")
return graph
# 从每条记忆中提取实体并建立关联
all_entities = defaultdict(list)
for text in all_text_parts:
entities = extract_entities_from_text(text)
all_entities[text].extend(entities)
# 添加节点
seen_entities = set()
for text, entities in all_entities.items():
for eid, etype in entities:
graph.add_entity(eid, etype, eid)
seen_entities.add(eid)
# 在同一条记忆中出现的实体建立关联
for text, entities in all_entities.items():
unique_entities = list({e[0] for e in entities})
for i, e1 in enumerate(unique_entities):
for e2 in unique_entities[i + 1:]:
graph.add_relation(e1, e2, "co-mentioned")
return graph
def main():
db_path = find_memory_db()
print(f"记忆数据库: {db_path or '未找到'}")
graph = build_graph_from_memories(db_path)
print(graph.to_text())
# 保存图谱数据
output = {
"nodes": {k: v for k, v in graph.nodes.items()},
"edges": [
{"source": s, "target": t, "relation": r, "weight": w}
for s, t, r, w in graph.edges
],
}
output_path = os.path.join(MEMORY_DIR, "knowledge_graph.json")
try:
os.makedirs(os.path.dirname(output_path), exist_ok=True)
with open(output_path, "w", encoding="utf-8") as f:
json.dump(output, f, ensure_ascii=False, indent=2)
print(f"\n图谱已保存到: {output_path}")
except Exception as e:
print(f"保存图谱失败: {e}")
if __name__ == "__main__":
main()
|