File size: 6,745 Bytes
020c94b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
#!/usr/bin/env python3
"""
Hermes 记忆知识图谱 - 基于 NetworkX 构建记忆实体关联
通过 execute_code 调用,将碎片记忆构建为关联图谱

功能:
1. 从记忆数据库提取实体(人/项目/技术/问题)
2. 建立实体间关系(使用/属于/解决/关联)
3. 支持关联查询:给出一个实体,找出所有关联实体
4. 可视化输出(文本格式)
"""

import sqlite3
import json
import os
import glob
import re
from collections import defaultdict

MEMORY_DIR = os.environ.get("HERMES_DATA_DIR", "/data/hermes/memories")


class KnowledgeGraph:
    def __init__(self):
        self.nodes = {}  # id -> {"type": str, "label": str, "count": int}
        self.edges = []  # [(source_id, target_id, relation, weight)]

    def add_entity(self, entity_id, entity_type, label):
        if entity_id not in self.nodes:
            self.nodes[entity_id] = {"type": entity_type, "label": label, "count": 0}
        self.nodes[entity_id]["count"] += 1

    def add_relation(self, source_id, target_id, relation, weight=1):
        self.edges.append((source_id, target_id, relation, weight))

    def get_related(self, entity_id, depth=1):
        """获取关联实体(BFS)"""
        visited = {entity_id}
        current = [entity_id]

        for _ in range(depth):
            next_level = []
            for s, t, r, w in self.edges:
                if s in current and t not in visited:
                    next_level.append(t)
                    visited.add(t)
                if t in current and s not in visited:
                    next_level.append(s)
                    visited.add(s)
            current = next_level

        return visited

    def to_text(self, entity_id=None):
        """文本格式输出图谱"""
        lines = []

        if entity_id:
            related = self.get_related(entity_id)
            lines.append(f"=== {entity_id} 的知识图谱 ===")
            for eid in related:
                if eid == entity_id:
                    continue
                node = self.nodes.get(eid, {})
                lines.append(f"  [{node.get('type', '?')}] {eid} (提及{node.get('count', 0)}次)")
                for s, t, r, w in self.edges:
                    if (s == entity_id and t == eid) or (t == entity_id and s == eid):
                        lines.append(f"    └─ {r}")
        else:
            lines.append("=== 知识图谱概览 ===")
            # 按类型分组
            by_type = defaultdict(list)
            for eid, info in self.nodes.items():
                by_type[info["type"]].append((eid, info["count"]))

            for etype, entities in sorted(by_type.items()):
                lines.append(f"\n[{etype}] ({len(entities)} 个实体)")
                for eid, count in sorted(entities, key=lambda x: -x[1]):
                    lines.append(f"  {eid} (提及{count}次)")

            lines.append(f"\n关系总数: {len(self.edges)}")

        return "\n".join(lines)


def find_memory_db():
    patterns = [
        os.path.join(MEMORY_DIR, "*.db"),
        os.path.join(MEMORY_DIR, "**/*.db"),
        "/data/hermes/memories/holographic.db",
        "/data/hermes/memories/memory.db",
    ]
    for p in patterns:
        for f in glob.glob(p, recursive=True):
            return f
    return None


def extract_entities_from_text(text):
    """从文本中提取实体(简单 NER)"""
    entities = []

    # 技术关键词
    tech_patterns = [
        (r'\b(Python|JavaScript|TypeScript|React|Vue|Node\.js|Docker|Kubernetes?|Redis|PostgreSQL|MySQL|MongoDB|Nginx|Linux|Git|Rust|Go|Java|C\+\+|Swift|Kotlin)\b', "technology"),
        (r'\b(Hermes|飞书|HuggingFace|OpenRouter|GitHub|Cloudflare|Vercel|AWS|GCP)\b', "platform"),
        (r'\b(API|REST|GraphQL|WebSocket|HTTP|HTTPS|TCP|UDP|SSH|SSL|TLS)\b', "protocol"),
    ]

    for pattern, etype in tech_patterns:
        matches = re.findall(pattern, text, re.IGNORECASE)
        for m in matches:
            entities.append((m.lower(), etype))

    return entities


def build_graph_from_memories(db_path):
    """从记忆数据库构建知识图谱"""
    graph = KnowledgeGraph()

    if not db_path:
        return graph

    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()

        # 获取所有记忆内容
        all_text_parts = []
        for table in ["memories", "memory", "entries"]:
            cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}'")
            if cursor.fetchone():
                try:
                    cursor.execute(f"SELECT content, value, text FROM {table}")
                except Exception:
                    try:
                        cursor.execute(f"SELECT * FROM {table}")
                    except Exception:
                        continue

                for row in cursor.fetchall():
                    text = " ".join(str(r) for r in row)
                    all_text_parts.append(text)
                break

        conn.close()
    except Exception as e:
        print(f"读取记忆失败: {e}")
        return graph

    # 从每条记忆中提取实体并建立关联
    all_entities = defaultdict(list)
    for text in all_text_parts:
        entities = extract_entities_from_text(text)
        all_entities[text].extend(entities)

    # 添加节点
    seen_entities = set()
    for text, entities in all_entities.items():
        for eid, etype in entities:
            graph.add_entity(eid, etype, eid)
            seen_entities.add(eid)

    # 在同一条记忆中出现的实体建立关联
    for text, entities in all_entities.items():
        unique_entities = list({e[0] for e in entities})
        for i, e1 in enumerate(unique_entities):
            for e2 in unique_entities[i + 1:]:
                graph.add_relation(e1, e2, "co-mentioned")

    return graph


def main():
    db_path = find_memory_db()
    print(f"记忆数据库: {db_path or '未找到'}")

    graph = build_graph_from_memories(db_path)
    print(graph.to_text())

    # 保存图谱数据
    output = {
        "nodes": {k: v for k, v in graph.nodes.items()},
        "edges": [
            {"source": s, "target": t, "relation": r, "weight": w}
            for s, t, r, w in graph.edges
        ],
    }

    output_path = os.path.join(MEMORY_DIR, "knowledge_graph.json")
    try:
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(output, f, ensure_ascii=False, indent=2)
        print(f"\n图谱已保存到: {output_path}")
    except Exception as e:
        print(f"保存图谱失败: {e}")


if __name__ == "__main__":
    main()