#!/usr/bin/env python3 """ Hermes 记忆知识图谱 - 基于 NetworkX 构建记忆实体关联 通过 execute_code 调用,将碎片记忆构建为关联图谱 功能: 1. 从记忆数据库提取实体(人/项目/技术/问题) 2. 建立实体间关系(使用/属于/解决/关联) 3. 支持关联查询:给出一个实体,找出所有关联实体 4. 可视化输出(文本格式) """ import sqlite3 import json import os import glob import re from collections import defaultdict MEMORY_DIR = os.environ.get("HERMES_DATA_DIR", "/data/hermes/memories") class KnowledgeGraph: def __init__(self): self.nodes = {} # id -> {"type": str, "label": str, "count": int} self.edges = [] # [(source_id, target_id, relation, weight)] def add_entity(self, entity_id, entity_type, label): if entity_id not in self.nodes: self.nodes[entity_id] = {"type": entity_type, "label": label, "count": 0} self.nodes[entity_id]["count"] += 1 def add_relation(self, source_id, target_id, relation, weight=1): self.edges.append((source_id, target_id, relation, weight)) def get_related(self, entity_id, depth=1): """获取关联实体(BFS)""" visited = {entity_id} current = [entity_id] for _ in range(depth): next_level = [] for s, t, r, w in self.edges: if s in current and t not in visited: next_level.append(t) visited.add(t) if t in current and s not in visited: next_level.append(s) visited.add(s) current = next_level return visited def to_text(self, entity_id=None): """文本格式输出图谱""" lines = [] if entity_id: related = self.get_related(entity_id) lines.append(f"=== {entity_id} 的知识图谱 ===") for eid in related: if eid == entity_id: continue node = self.nodes.get(eid, {}) lines.append(f" [{node.get('type', '?')}] {eid} (提及{node.get('count', 0)}次)") for s, t, r, w in self.edges: if (s == entity_id and t == eid) or (t == entity_id and s == eid): lines.append(f" └─ {r}") else: lines.append("=== 知识图谱概览 ===") # 按类型分组 by_type = defaultdict(list) for eid, info in self.nodes.items(): by_type[info["type"]].append((eid, info["count"])) for etype, entities in sorted(by_type.items()): lines.append(f"\n[{etype}] ({len(entities)} 个实体)") for eid, count in sorted(entities, key=lambda x: -x[1]): lines.append(f" {eid} (提及{count}次)") lines.append(f"\n关系总数: {len(self.edges)}") return "\n".join(lines) def find_memory_db(): patterns = [ os.path.join(MEMORY_DIR, "*.db"), os.path.join(MEMORY_DIR, "**/*.db"), "/data/hermes/memories/holographic.db", "/data/hermes/memories/memory.db", ] for p in patterns: for f in glob.glob(p, recursive=True): return f return None def extract_entities_from_text(text): """从文本中提取实体(简单 NER)""" entities = [] # 技术关键词 tech_patterns = [ (r'\b(Python|JavaScript|TypeScript|React|Vue|Node\.js|Docker|Kubernetes?|Redis|PostgreSQL|MySQL|MongoDB|Nginx|Linux|Git|Rust|Go|Java|C\+\+|Swift|Kotlin)\b', "technology"), (r'\b(Hermes|飞书|HuggingFace|OpenRouter|GitHub|Cloudflare|Vercel|AWS|GCP)\b', "platform"), (r'\b(API|REST|GraphQL|WebSocket|HTTP|HTTPS|TCP|UDP|SSH|SSL|TLS)\b', "protocol"), ] for pattern, etype in tech_patterns: matches = re.findall(pattern, text, re.IGNORECASE) for m in matches: entities.append((m.lower(), etype)) return entities def build_graph_from_memories(db_path): """从记忆数据库构建知识图谱""" graph = KnowledgeGraph() if not db_path: return graph try: conn = sqlite3.connect(db_path) cursor = conn.cursor() # 获取所有记忆内容 all_text_parts = [] for table in ["memories", "memory", "entries"]: cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}'") if cursor.fetchone(): try: cursor.execute(f"SELECT content, value, text FROM {table}") except Exception: try: cursor.execute(f"SELECT * FROM {table}") except Exception: continue for row in cursor.fetchall(): text = " ".join(str(r) for r in row) all_text_parts.append(text) break conn.close() except Exception as e: print(f"读取记忆失败: {e}") return graph # 从每条记忆中提取实体并建立关联 all_entities = defaultdict(list) for text in all_text_parts: entities = extract_entities_from_text(text) all_entities[text].extend(entities) # 添加节点 seen_entities = set() for text, entities in all_entities.items(): for eid, etype in entities: graph.add_entity(eid, etype, eid) seen_entities.add(eid) # 在同一条记忆中出现的实体建立关联 for text, entities in all_entities.items(): unique_entities = list({e[0] for e in entities}) for i, e1 in enumerate(unique_entities): for e2 in unique_entities[i + 1:]: graph.add_relation(e1, e2, "co-mentioned") return graph def main(): db_path = find_memory_db() print(f"记忆数据库: {db_path or '未找到'}") graph = build_graph_from_memories(db_path) print(graph.to_text()) # 保存图谱数据 output = { "nodes": {k: v for k, v in graph.nodes.items()}, "edges": [ {"source": s, "target": t, "relation": r, "weight": w} for s, t, r, w in graph.edges ], } output_path = os.path.join(MEMORY_DIR, "knowledge_graph.json") try: os.makedirs(os.path.dirname(output_path), exist_ok=True) with open(output_path, "w", encoding="utf-8") as f: json.dump(output, f, ensure_ascii=False, indent=2) print(f"\n图谱已保存到: {output_path}") except Exception as e: print(f"保存图谱失败: {e}") if __name__ == "__main__": main()