Spaces:

axentx
/

surrogate-1

Runtime error

File size: 3,378 Bytes

e36381e

#!/usr/bin/env bash
# ... (original content unchanged)
# Sync Obsidian markdown patterns/knowledge → FalkorDB Lite (graph DB)
# Complements rag-index.sh (vector DB) — same sources, 2 different indexes.
set -e
PYTHON="${HOME}/.surrogate/venv/bin/python"
[ -x "$PYTHON" ] || { echo "venv not found: $PYTHON"; exit 1; }

"$PYTHON" <<'PY'
import re, os
from pathlib import Path
from redislite.falkordb_client import FalkorDB
import yaml

HOME = Path.home()
SOURCES = [
    HOME / "Documents/Obsidian Vault/AI-Hub/patterns",
    HOME / "Documents/Obsidian Vault/AI-Hub/knowledge",
    HOME / "Documents/Obsidian Vault/AI-Hub/inbox",
    HOME / ".surrogate/memory",
]
DB_FILE = str(HOME / ".surrogate/graph-db.rdb")

db = FalkorDB(dbfilename=DB_FILE)
g = db.select_graph("ashira")

try: g.query("MATCH (n) DETACH DELETE n")
except: pass

frontmatter_re = re.compile(r'^---\n(.*?)\n---', re.DOTALL)
wikilink_re = re.compile(r'\[\[([^\]|]+?)(?:\|[^\]]+)?\]\]')

def esc(s):
    return str(s).replace("\\", "\\\\").replace("'", "\\'") if s else ""

nodes = {}
edges = []

for src in SOURCES:
    if not src.exists(): continue
    for md in src.rglob("*.md"):
        stem = md.stem
        text = md.read_text(errors="ignore")
        fm_match = frontmatter_re.match(text)
        fm = {}
        if fm_match:
            try: fm = yaml.safe_load(fm_match.group(1)) or {}
            except: pass

        tags = fm.get("tags", [])
        if isinstance(tags, str): tags = [tags]

        nodes[stem] = {
            "path": str(md.relative_to(HOME)),
            "tags": [str(t).replace("#","") for t in tags],
            "category": md.parent.name,
            "severity": str(fm.get("severity", "medium")),
        }

        for link in wikilink_re.findall(text):
            target = link.split("/")[-1].split("|")[0].replace(".md", "").strip()
            if target and target != stem:
                edges.append((stem, target))

for name, info in nodes.items():
    g.query(
        f"MERGE (n:Doc {{name:'{esc(name)}'}}) "
        f"SET n.path='{esc(info['path'])}', "
        f"n.category='{esc(info['category'])}', "
        f"n.severity='{esc(info['severity'])}', "
        f"n.tags='{esc(','.join(info['tags']))}'"
    )

edge_count = 0
for src_name, tgt_name in edges:
    try:
        g.query(
            f"MATCH (a:Doc {{name:'{esc(src_name)}'}}), (b:Doc {{name:'{esc(tgt_name)}'}}) "
            f"MERGE (a)-[:LINKS_TO]->(b)"
        )
        edge_count += 1
    except: pass

all_tags = set()
for info in nodes.values():
    for t in info["tags"]:
        if t: all_tags.add(t)
for t in all_tags:
    g.query(f"MERGE (:Tag {{name:'{esc(t)}'}})")
for name, info in nodes.items():
    for t in info["tags"]:
        if not t: continue
        g.query(
            f"MATCH (d:Doc {{name:'{esc(name)}'}}), (t:Tag {{name:'{esc(t)}'}}) "
            f"MERGE (d)-[:TAGGED]->(t)"
        )

print(f"Graph built: {len(nodes)} docs, {edge_count} links, {len(all_tags)} tags")

r = g.query("MATCH (d:Doc)-[:TAGGED]->(t:Tag) RETURN t.name, count(d) AS c ORDER BY c DESC LIMIT 10")
print("\nTop 10 tags:")
for row in r.result_set: print(f"  #{row[0]}: {row[1]} docs")

r = g.query("MATCH (d:Doc)-[r:LINKS_TO]-() RETURN d.name, count(r) AS c ORDER BY c DESC LIMIT 10")
print("\nTop 10 hubs (most connected):")
for row in r.result_set: print(f"  {row[0]}: {row[1]} links")
PY