Spaces:

Jackken
/

hermes-bot

Running

File size: 6,745 Bytes

020c94b

#!/usr/bin/env python3
"""
Hermes 记忆知识图谱 - 基于 NetworkX 构建记忆实体关联
通过 execute_code 调用，将碎片记忆构建为关联图谱

功能：
1. 从记忆数据库提取实体（人/项目/技术/问题）
2. 建立实体间关系（使用/属于/解决/关联）
3. 支持关联查询：给出一个实体，找出所有关联实体
4. 可视化输出（文本格式）
"""

import sqlite3
import json
import os
import glob
import re
from collections import defaultdict

MEMORY_DIR = os.environ.get("HERMES_DATA_DIR", "/data/hermes/memories")


class KnowledgeGraph:
    def __init__(self):
        self.nodes = {}  # id -> {"type": str, "label": str, "count": int}
        self.edges = []  # [(source_id, target_id, relation, weight)]

    def add_entity(self, entity_id, entity_type, label):
        if entity_id not in self.nodes:
            self.nodes[entity_id] = {"type": entity_type, "label": label, "count": 0}
        self.nodes[entity_id]["count"] += 1

    def add_relation(self, source_id, target_id, relation, weight=1):
        self.edges.append((source_id, target_id, relation, weight))

    def get_related(self, entity_id, depth=1):
        """获取关联实体（BFS）"""
        visited = {entity_id}
        current = [entity_id]

        for _ in range(depth):
            next_level = []
            for s, t, r, w in self.edges:
                if s in current and t not in visited:
                    next_level.append(t)
                    visited.add(t)
                if t in current and s not in visited:
                    next_level.append(s)
                    visited.add(s)
            current = next_level

        return visited

    def to_text(self, entity_id=None):
        """文本格式输出图谱"""
        lines = []

        if entity_id:
            related = self.get_related(entity_id)
            lines.append(f"=== {entity_id} 的知识图谱 ===")
            for eid in related:
                if eid == entity_id:
                    continue
                node = self.nodes.get(eid, {})
                lines.append(f"  [{node.get('type', '?')}] {eid} (提及{node.get('count', 0)}次)")
                for s, t, r, w in self.edges:
                    if (s == entity_id and t == eid) or (t == entity_id and s == eid):
                        lines.append(f"    └─ {r}")
        else:
            lines.append("=== 知识图谱概览 ===")
            # 按类型分组
            by_type = defaultdict(list)
            for eid, info in self.nodes.items():
                by_type[info["type"]].append((eid, info["count"]))

            for etype, entities in sorted(by_type.items()):
                lines.append(f"\n[{etype}] ({len(entities)} 个实体)")
                for eid, count in sorted(entities, key=lambda x: -x[1]):
                    lines.append(f"  {eid} (提及{count}次)")

            lines.append(f"\n关系总数: {len(self.edges)}")

        return "\n".join(lines)


def find_memory_db():
    patterns = [
        os.path.join(MEMORY_DIR, "*.db"),
        os.path.join(MEMORY_DIR, "**/*.db"),
        "/data/hermes/memories/holographic.db",
        "/data/hermes/memories/memory.db",
    ]
    for p in patterns:
        for f in glob.glob(p, recursive=True):
            return f
    return None


def extract_entities_from_text(text):
    """从文本中提取实体（简单 NER）"""
    entities = []

    # 技术关键词
    tech_patterns = [
        (r'\b(Python|JavaScript|TypeScript|React|Vue|Node\.js|Docker|Kubernetes?|Redis|PostgreSQL|MySQL|MongoDB|Nginx|Linux|Git|Rust|Go|Java|C\+\+|Swift|Kotlin)\b', "technology"),
        (r'\b(Hermes|飞书|HuggingFace|OpenRouter|GitHub|Cloudflare|Vercel|AWS|GCP)\b', "platform"),
        (r'\b(API|REST|GraphQL|WebSocket|HTTP|HTTPS|TCP|UDP|SSH|SSL|TLS)\b', "protocol"),
    ]

    for pattern, etype in tech_patterns:
        matches = re.findall(pattern, text, re.IGNORECASE)
        for m in matches:
            entities.append((m.lower(), etype))

    return entities


def build_graph_from_memories(db_path):
    """从记忆数据库构建知识图谱"""
    graph = KnowledgeGraph()

    if not db_path:
        return graph

    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()

        # 获取所有记忆内容
        all_text_parts = []
        for table in ["memories", "memory", "entries"]:
            cursor.execute(f"SELECT name FROM sqlite_master WHERE type='table' AND name='{table}'")
            if cursor.fetchone():
                try:
                    cursor.execute(f"SELECT content, value, text FROM {table}")
                except Exception:
                    try:
                        cursor.execute(f"SELECT * FROM {table}")
                    except Exception:
                        continue

                for row in cursor.fetchall():
                    text = " ".join(str(r) for r in row)
                    all_text_parts.append(text)
                break

        conn.close()
    except Exception as e:
        print(f"读取记忆失败: {e}")
        return graph

    # 从每条记忆中提取实体并建立关联
    all_entities = defaultdict(list)
    for text in all_text_parts:
        entities = extract_entities_from_text(text)
        all_entities[text].extend(entities)

    # 添加节点
    seen_entities = set()
    for text, entities in all_entities.items():
        for eid, etype in entities:
            graph.add_entity(eid, etype, eid)
            seen_entities.add(eid)

    # 在同一条记忆中出现的实体建立关联
    for text, entities in all_entities.items():
        unique_entities = list({e[0] for e in entities})
        for i, e1 in enumerate(unique_entities):
            for e2 in unique_entities[i + 1:]:
                graph.add_relation(e1, e2, "co-mentioned")

    return graph


def main():
    db_path = find_memory_db()
    print(f"记忆数据库: {db_path or '未找到'}")

    graph = build_graph_from_memories(db_path)
    print(graph.to_text())

    # 保存图谱数据
    output = {
        "nodes": {k: v for k, v in graph.nodes.items()},
        "edges": [
            {"source": s, "target": t, "relation": r, "weight": w}
            for s, t, r, w in graph.edges
        ],
    }

    output_path = os.path.join(MEMORY_DIR, "knowledge_graph.json")
    try:
        os.makedirs(os.path.dirname(output_path), exist_ok=True)
        with open(output_path, "w", encoding="utf-8") as f:
            json.dump(output, f, ensure_ascii=False, indent=2)
        print(f"\n图谱已保存到: {output_path}")
    except Exception as e:
        print(f"保存图谱失败: {e}")


if __name__ == "__main__":
    main()