File size: 12,831 Bytes
e36381e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
023ab84
e36381e
 
 
 
 
 
 
 
023ab84
e36381e
 
023ab84
e36381e
023ab84
 
e36381e
 
 
 
 
 
023ab84
 
e36381e
 
 
 
 
023ab84
e36381e
 
 
 
023ab84
 
e36381e
 
 
 
 
023ab84
e36381e
 
 
 
 
 
 
 
023ab84
 
e36381e
 
 
 
 
 
 
 
 
 
 
 
 
023ab84
e36381e
 
 
 
 
 
023ab84
e36381e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
023ab84
e36381e
 
 
 
 
 
 
023ab84
e36381e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
023ab84
e36381e
 
 
 
 
 
 
023ab84
e36381e
 
 
 
 
 
023ab84
e36381e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
#!/usr/bin/env bash
# Shared context builder β€” sourced by qwen-coder-worker + dev-cloud-worker.
# Produces rich context: repo-map + similar functions from project + past accepted examples.
# Call: build_rich_context <project> <priority_id> <priority_title>
# Sets env vars: REPO_MAP, SIMILAR_FUNCS, FEWSHOT_ACCEPTED, ANTI_PATTERNS
build_rich_context() {
    local PRIO_PROJECT="$1"
    local PRIO_ID="$2"
    local PRIO_TITLE="$3"
    local SHARED="$HOME/.hermes/workspace/swarm-shared"
    local PROJECT_DIR="$HOME/axentx/$PRIO_PROJECT"

    # 1. Full repo-map (up to 10KB β€” was 3KB).
    # build-repo-map.sh writes to "<proj>_map.md"; some older paths used "<proj>.md".
    # Try both so we don't silently lose the strongest grounding signal.
    REPO_MAP=""
    for candidate in "$SHARED/repo-maps/${PRIO_PROJECT}_map.md" "$SHARED/repo-maps/${PRIO_PROJECT}.md"; do
        if [[ -f "$candidate" ]]; then
            REPO_MAP=$(head -c 10000 "$candidate")
            break
        fi
    done

    # 2. Similar function signatures from project (grep in real codebase)
    SIMILAR_FUNCS=""
    if [[ -d "$PROJECT_DIR" ]]; then
        # Extract keywords from title for grep
        local KW=$(echo "$PRIO_TITLE" | /usr/bin/tr '[:upper:]' '[:lower:]' | /usr/bin/tr -cs 'a-z0-9' ' ' | /usr/bin/tr ' ' '\n' | awk 'length>4' | head -3 | /usr/bin/tr '\n' '|' | sed 's/|$//')
        if [[ -n "$KW" ]]; then
            SIMILAR_FUNCS=$(/usr/bin/find "$PROJECT_DIR" -type f \( -name '*.py' -o -name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.go' \) ! -path '*/node_modules/*' ! -path '*/.hermes-*' 2>/dev/null | \
                xargs grep -lE "($KW)" 2>/dev/null | head -3 | while read f; do
                    echo "=== ${f#$PROJECT_DIR/} ==="
                    grep -A3 -E "^(def|function|export const|class|async def|interface)" "$f" 2>/dev/null | head -30
                done 2>/dev/null | head -c 4000)
        fi
    fi

    # 3. RAG: actual code patterns from project (SQLite FTS via ask-sqlite.py if exists)
    RAG_EXAMPLES=""
    if [[ -x "$HOME/.surrogate/bin/ask-sqlite.py" ]]; then
        RAG_EXAMPLES=$(python3 "$HOME/.surrogate/bin/ask-sqlite.py" \
            "$PRIO_PROJECT $PRIO_TITLE" 2>/dev/null | head -c 3000)
    fi

    # 4. Semantic RAG (from embeddings) β€” top-5 similar
    SEMANTIC_RAG=""
    if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then
        SEMANTIC_RAG=$(python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$PRIO_TITLE" 2>/dev/null | head -c 2000)
    fi

    # 5. Past ACCEPTED examples (few-shot from qualityβ‰₯7 history)
    FEWSHOT_ACCEPTED=""
    for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | head -30); do
        if grep -qE '"quality_score":\s*[789]|"quality_score":\s*10' "$review" 2>/dev/null; then
            local OUT_FILE=$(basename "$review" .review.json)
            # Search all worker output dirs
            for WD in qwen-coder dev-cloud-samba dev-cloud-github dev-cloud-cloudflare dev-cloud-groq dev-cloud-synthesis; do
                local OUT_PATH="$HOME/.hermes/workspace/$WD/${OUT_FILE}.md"
                if [[ -f "$OUT_PATH" ]]; then
                    FEWSHOT_ACCEPTED=$(head -c 2000 "$OUT_PATH")
                    break 2
                fi
            done
        fi
    done

    # 6. Anti-patterns (last 5 rejection reasons across all workers)
    ANTI_PATTERNS=""
    for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | head -10); do
        local bugs=$(python3 -c "
import json, re, sys
try:
    txt = open('$review').read()
    m = re.search(r'\{.*\}', txt, re.DOTALL)
    if not m: sys.exit()
    d = json.loads(m.group(0))
    if d.get('verdict') in ('reject','rework') and d.get('bugs'):
        for b in d['bugs'][:2]:
            print(f'- {b[:180]}')
except: pass
" 2>/dev/null)
        [[ -n "$bugs" ]] && ANTI_PATTERNS="$ANTI_PATTERNS$bugs"$'\n'
    done
    ANTI_PATTERNS=$(echo "$ANTI_PATTERNS" | head -10)

    # 7. Active-learning prompt deltas β€” aggregate last 5 UNIQUE anti-patterns.
    # Preference: same-project anti-patterns first, then generic.
    # Dedup by first 80 chars of prompt_addition (similar bugs shouldn't bloat prompt).
    PROMPT_DELTAS=""
    if [[ -f "$HOME/.surrogate/memory/worker-prompt-deltas.jsonl" ]]; then
        PROMPT_DELTAS=$(python3 -c "
import json, sys
from pathlib import Path
try:
    entries = []
    for l in Path('$HOME/.surrogate/memory/worker-prompt-deltas.jsonl').read_text().splitlines():
        if not l.strip(): continue
        try: entries.append(json.loads(l))
        except: pass
    # Dedup by first 80 chars
    seen = set()
    picked = []
    # Walk newest β†’ oldest, cap 5 unique
    for e in reversed(entries):
        addn = (e.get('prompt_addition') or '').strip()
        if not addn: continue
        key = addn[:80]
        if key in seen: continue
        seen.add(key)
        picked.append(addn)
        if len(picked) >= 5: break
    if picked:
        out = ['ACTIVE-LEARNED RULES (avoid these past mistakes):']
        for i, a in enumerate(picked, 1):
            out.append(f'{i}. {a[:400]}')
        print('\n'.join(out))
except Exception as e: pass
" 2>/dev/null)
    fi

    # 8. Priority full spec (if a detailed spec file exists)
    # Spec is the single most important signal β€” cap high (6KB) so the full
    # Context/Requirements/DO NOT sections fit.  Other RAG signals are capped
    # lower because they're supplementary; the spec is authoritative.
    PRIO_SPEC=""
    local SPEC_FILE="$HOME/.hermes/workspace/swarm-shared/specs/${PRIO_ID}.md"
    [[ -f "$SPEC_FILE" ]] && PRIO_SPEC=$(head -c 6000 "$SPEC_FILE")

    # 9. Task-type authoritative sources β€” boost scraped knowledge based on title.
    # Security task β†’ CVE/MITRE/OWASP/Prowler. SRE β†’ Google SRE/postmortems.
    # Observability β†’ OTel/Prometheus/Grafana/Honeycomb. etc.
    # This is THE fix that makes all our scraping actually used by Hermes workers.
    AUTHORITATIVE_CONTEXT=""
    if [[ -f "$HOME/.surrogate/index.db" ]]; then
        AUTHORITATIVE_CONTEXT=$(python3 <<PYEOF
import sqlite3, re
title = """${PRIO_TITLE}""".lower()
project = """${PRIO_PROJECT}""".lower()
# Classify task β†’ preferred source whitelist
routes = {
    # Security tasks
    ('security','cve','vuln','prowler','kyverno','opa','admission','ciem','sigma','mitre','attack','cosign','sbom','falco','threat','malware','exploit'): ['cisa-kev','mitre-attack','owasp-cheatsheet','domain:sec-cloudsec','domain:sec-appsec','domain:sec-devsecops','code-deep:sec-appsec','code-deep:sec-cloudsec'],
    # SRE / incident / postmortem
    ('sre','slo','sli','incident','postmortem','runbook','chaos','rca','dora','mttr','blameless','on-call','pager','outage'): ['google-sre','postmortems-index','firecrawl','eng-blog:charity-majors','eng-blog:high-scalability','mythos-ai-engineering','domain:ops-sre','code-deep:ops-sre'],
    # Observability
    ('observab','otel','telemetry','prometheus','grafana','loki','tempo','metric','trace','log','honeycomb','ebpf'): ['opentelemetry-spec','prometheus-docs','grafana-docs','firecrawl','domain:ops-observability'],
    # Cloud / K8s / Terraform
    ('kubernetes','k8s','helm','istio','terraform','aws','ecs','eks','lambda','cloudformation','cdk','gcp','azure','argocd','flux'): ['firecrawl','github-public','code-deep:ops-devops','domain:ops-devops','mythos-cloud','github-trending'],
    # AI / multi-agent
    ('agent','autogen','crewai','langgraph','orchestra','mcp','reflexion','dspy','rag','llm'): ['anthropic-cookbook','arxiv','mythos-ai-agent','mythos-ai-engineering','domain:ai-engineering','code-deep:ai-engineering','firecrawl','hf-papers'],
    # FinOps
    ('cost','finops','focus','rightsizing','kubecost','opencost','savings','budget','spend','waste'): ['firecrawl','rss','eng-blog:high-scalability','domain:ops-devops','arxiv'],
    # Frontend / FE
    ('frontend','react','nextjs','typescript','tsx','ui'): ['domain:dev-frontend','domain:design-ux','code-deep:dev-frontend','stackoverflow','github-trending'],
    # Backend / API / DB
    ('backend','api','fastapi','database','sql','postgres','asyncpg','sqlalchemy'): ['domain:dev-backend','domain:dev-fullstack','code-deep:dev-backend','github-public','stackoverflow','hf-papers'],
    # Mobile
    ('mobile','android','ios','flutter','reactnative','line','workio'): ['domain:dev-mobile','code-deep:dev-mobile','firecrawl','stackoverflow'],
}
# Project-specific boost
project_preferred = {
    'vanguard': ['cisa-kev','mitre-attack','owasp-cheatsheet','code-deep:sec-appsec'],
    'costinel': ['firecrawl','rss','arxiv','mythos-ai-engineering'],
    'arkship':  ['google-sre','postmortems-index','anthropic-cookbook','opentelemetry-spec','firecrawl'],
    'surrogate':['arxiv','hf-papers','anthropic-cookbook','mythos-ai-agent'],
    'workio':   ['firecrawl','stackoverflow','github-public'],
}

preferred_sources = set()
for keywords, srcs in routes.items():
    if any(k in title for k in keywords):
        preferred_sources.update(srcs)
for proj_key, srcs in project_preferred.items():
    if proj_key in project:
        preferred_sources.update(srcs)

if not preferred_sources:
    print(''); exit()

# FTS query β€” prefer authoritative sources
conn = sqlite3.connect('$HOME/.surrogate/index.db')
conn.row_factory = sqlite3.Row
# Simple keyword from title
kw = ' '.join([w for w in re.sub(r'[^a-zA-Z0-9 ]', ' ', title).split() if len(w) > 3][:5])
if not kw: exit()

src_list = ','.join(f"'{s}'" for s in preferred_sources)
# Strategy: 3-tier fallback β€” preferred+match β†’ any+match β†’ preferred random
rows = []
try:
    # Tier 1: preferred sources + FTS match on keywords
    q = f"""SELECT d.source, d.instruction, substr(d.response, 1, 600) as body
            FROM docs_fts f JOIN docs d ON d.id = f.rowid
            WHERE f.docs_fts MATCH ? AND d.source IN ({src_list})
            ORDER BY bm25(docs_fts) LIMIT 6"""
    rows = conn.execute(q, (kw,)).fetchall()
except sqlite3.OperationalError: pass

if not rows:
    # Tier 2: FTS match on ANY source β€” relax source filter
    try:
        q2 = """SELECT d.source, d.instruction, substr(d.response, 1, 600) as body
                FROM docs_fts f JOIN docs d ON d.id = f.rowid
                WHERE f.docs_fts MATCH ? ORDER BY bm25(docs_fts) LIMIT 6"""
        rows = conn.execute(q2, (kw,)).fetchall()
    except sqlite3.OperationalError: pass

if not rows:
    # Tier 3: random sample from preferred sources (even if no keyword match)
    rows = conn.execute(f"SELECT source, instruction, substr(response,1,600) as body FROM docs WHERE source IN ({src_list}) ORDER BY RANDOM() LIMIT 6").fetchall()

conn.close()

out = []
for r in rows:
    out.append(f"[{r['source']}] {(r['instruction'] or '')[:120]}")
    out.append((r['body'] or '')[:500])
    out.append('')
print('\n'.join(out)[:3500])
PYEOF
)
    fi

    # 10. FalkorDB graph β€” related decisions + past priorities with similar theme
    GRAPH_CONTEXT=""
    local REDIS_SOCK=$(/usr/bin/find /var/folders /tmp -name 'redis.socket' -type s 2>/dev/null | head -1)
    if [[ -n "$REDIS_SOCK" ]]; then
        # Get related priorities + learned rules
        GRAPH_CONTEXT=$(/opt/homebrew/bin/redis-cli -s "$REDIS_SOCK" GRAPH.QUERY ashira "
            MATCH (p:Priority {project: '$PRIO_PROJECT'})
            OPTIONAL MATCH (p)-[:HAS_LEARNED_RULE]->(l:LearnedRule)
            OPTIONAL MATCH (p)-[:COMMITTED_AS]->(c:Commit)
            RETURN p.id, p.title, l.content, c.msg LIMIT 8
        " 2>/dev/null | tail -c 2500)
    fi

    # 11. Hermes trace recall β€” past similar tasks Hermes handled (from JSONL)
    HERMES_RECALL=""
    local TRACE_DIR="$HOME/axentx/surrogate/data/training-jsonl"
    if [[ -d "$TRACE_DIR" ]]; then
        HERMES_RECALL=$(python3 <<PYEOF
import json, re, glob
title = """${PRIO_TITLE}""".lower()
words = [w for w in re.sub(r'[^a-zA-Z0-9 ]', ' ', title).split() if len(w) > 4][:4]
if not words: exit()

hits = []
# Walk recent hermes-trace-YYYY-MM-DD.jsonl files (last 7 days)
import os
files = sorted(glob.glob(os.path.expanduser('~/axentx/surrogate/data/training-jsonl/hermes-trace-*.jsonl')))[-7:]
for f in files:
    try:
        for line in open(f):
            try: rec = json.loads(line)
            except: continue
            blob = (rec.get('instruction','') + ' ' + rec.get('output',''))[:2000].lower()
            score = sum(1 for w in words if w in blob)
            if score >= 2:
                hits.append((score, rec))
    except: pass

hits.sort(key=lambda x: -x[0])
for score, rec in hits[:3]:
    print(f"HERMES PREVIOUSLY [{rec.get('category','?')}]: {rec.get('instruction','')[:120]}")
    print(f"β†’ {rec.get('output','')[:400]}")
    print()
PYEOF
)
    fi
}

export -f build_rich_context