Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """ | |
| Hermes 记忆知识图谱 - 基于 NetworkX 构建记忆实体关联 | |
| 通过 execute_code 调用,将碎片记忆构建为关联图谱 | |
| 功能: | |
| 1. 从记忆数据库提取实体(人/项目/技术/问题) | |
| 2. 建立实体间关系(使用/属于/解决/关联) | |
| 3. 支持关联查询:给出一个实体,找出所有关联实体 | |
| 4. 可视化输出(文本格式) | |
| """ | |
| import sqlite3 | |
| import json | |
| import os | |
| import glob | |
| import re | |
| from collections import defaultdict | |
| MEMORY_DIR = os.environ.get("HERMES_DATA_DIR", "/data/hermes/memories") | |
| class KnowledgeGraph: | |
| def __init__(self): | |
| self.nodes = {} # id -> {"type": str, "label": str, "count": int} | |
| self.edges = [] # [(source_id, target_id, relation, weight)] | |
| def add_entity(self, entity_id, entity_type, label): | |
| if entity_id not in self.nodes: | |
| self.nodes[entity_id] = {"type": entity_type, "label": label, "count": 0} | |
| self.nodes[entity_id]["count"] += 1 | |
| def add_relation(self, source_id, target_id, relation, weight=1): | |
| self.edges.append((source_id, target_id, relation, weight)) | |
| def get_related(self, entity_id, depth=1): | |
| """获取关联实体(BFS)""" | |
| visited = {entity_id} | |
| current = [entity_id] | |
| for _ in range(depth): | |
| next_level = [] | |
| for s, t, r, w in self.edges: | |
| if s in current and t not in visited: | |
| next_level.append(t) | |
| visited.add(t) | |
| if t in current and s not in visited: | |
| next_level.append(s) | |
| visited.add(s) | |
| current = next_level | |
| return visited | |
| def to_text(self, entity_id=None): | |
| """文本格式输出图谱""" | |
| lines = [] | |
| if entity_id: | |
| related = self.get_related(entity_id) | |
| lines.append(f"=== {entity_id} 的知识图谱 ===") | |
| for eid in related: | |
| if eid == entity_id: | |
| continue | |
| node = self.nodes.get(eid, {}) | |
| lines.append(f" [{node.get('type', '?')}] {eid} (提及{node.get('count', 0)}次)") | |
| for s, t, r, w in self.edges: | |
| if (s == entity_id and t == eid) or (t == entity_id and s == eid): | |
| lines.append(f" └─ {r}") | |
| else: | |
| lines.append("=== 知识图谱概览 ===") | |
| # 按类型分组 | |
| by_type = defaultdict(list) | |
| for eid, info in self.nodes.items(): | |
| by_type[info["type"]].append((eid, info["count"])) | |
| for etype, entities in sorted(by_type.items()): | |
| lines.append(f"\n[{etype}] ({len(entities)} 个实体)") | |
| for eid, count in sorted(entities, key=lambda x: -x[1]): | |
| lines.append(f" {eid} (提及{count}次)") | |
| lines.append(f"\n关系总数: {len(self.edges)}") | |
| return "\n".join(lines) | |
| def find_memory_db(): | |
| patterns = [ | |
| os.path.join(MEMORY_DIR, "*.db"), | |
| os.path.join(MEMORY_DIR, "**/*.db"), | |
| "/data/hermes/memories/holographic.db", | |
| "/data/hermes/memories/memory.db", | |
| ] | |
| for p in patterns: | |
| for f in glob.glob(p, recursive=True): | |
| return f | |
| return None | |
| def extract_entities_from_text(text): | |
| """从文本中提取实体(简单 NER)""" | |
| entities = [] | |
| # 技术关键词 | |
| tech_patterns = [ | |
| (r'\b(Python|JavaScript|TypeScript|React|Vue|Node\.js|Docker|Kubernetes?|Redis|PostgreSQL|MySQL|MongoDB|Nginx|Linux|Git|Rust|Go|Java|C\+\+|Swift|Kotlin)\b', "technology"), | |
| (r'\b(Hermes|飞书|HuggingFace|OpenRouter|GitHub|Cloudflare|Vercel|AWS|GCP)\b', "platform"), | |
| (r'\b(API|REST|GraphQL|WebSocket|HTTP|HTTPS|TCP|UDP|SSH|SSL|TLS)\b', "protocol"), | |
| ] | |
| for pattern, etype in tech_patterns: | |
| matches = re.findall(pattern, text, re.IGNORECASE) | |
| for m in matches: | |
| entities.append((m.lower(), etype)) | |
| return entities | |
| def build_graph_from_memories(db_path): | |
| """从记忆数据库构建知识图谱""" | |
| graph = KnowledgeGraph() | |
| if not db_path: | |
| return graph | |
| try: | |
| conn = sqlite3.connect(db_path) | |
| cursor = conn.cursor() | |
| # 获取所有记忆内容 | |
| all_text_parts = [] | |
| for table in ["memories", "memory", "entries"]: | |
| cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}'") | |
| if cursor.fetchone(): | |
| try: | |
| cursor.execute(f"SELECT content, value, text FROM {table}") | |
| except Exception: | |
| try: | |
| cursor.execute(f"SELECT * FROM {table}") | |
| except Exception: | |
| continue | |
| for row in cursor.fetchall(): | |
| text = " ".join(str(r) for r in row) | |
| all_text_parts.append(text) | |
| break | |
| conn.close() | |
| except Exception as e: | |
| print(f"读取记忆失败: {e}") | |
| return graph | |
| # 从每条记忆中提取实体并建立关联 | |
| all_entities = defaultdict(list) | |
| for text in all_text_parts: | |
| entities = extract_entities_from_text(text) | |
| all_entities[text].extend(entities) | |
| # 添加节点 | |
| seen_entities = set() | |
| for text, entities in all_entities.items(): | |
| for eid, etype in entities: | |
| graph.add_entity(eid, etype, eid) | |
| seen_entities.add(eid) | |
| # 在同一条记忆中出现的实体建立关联 | |
| for text, entities in all_entities.items(): | |
| unique_entities = list({e[0] for e in entities}) | |
| for i, e1 in enumerate(unique_entities): | |
| for e2 in unique_entities[i + 1:]: | |
| graph.add_relation(e1, e2, "co-mentioned") | |
| return graph | |
| def main(): | |
| db_path = find_memory_db() | |
| print(f"记忆数据库: {db_path or '未找到'}") | |
| graph = build_graph_from_memories(db_path) | |
| print(graph.to_text()) | |
| # 保存图谱数据 | |
| output = { | |
| "nodes": {k: v for k, v in graph.nodes.items()}, | |
| "edges": [ | |
| {"source": s, "target": t, "relation": r, "weight": w} | |
| for s, t, r, w in graph.edges | |
| ], | |
| } | |
| output_path = os.path.join(MEMORY_DIR, "knowledge_graph.json") | |
| try: | |
| os.makedirs(os.path.dirname(output_path), exist_ok=True) | |
| with open(output_path, "w", encoding="utf-8") as f: | |
| json.dump(output, f, ensure_ascii=False, indent=2) | |
| print(f"\n图谱已保存到: {output_path}") | |
| except Exception as e: | |
| print(f"保存图谱失败: {e}") | |
| if __name__ == "__main__": | |
| main() | |