Spaces:
Runtime error
Runtime error
Ashira Pitchayapakayakul commited on
Commit Β·
5c8d6dd
1
Parent(s): 9d0ec79
feat: parallel orchestrate + agentic crawler + skill synthesis + 3-min sync
Browse files- orchestrate: stages 2+3 parallel (architect||qa-tdd), 5+6a parallel (qa-verify||ops) β ~40% faster
- agentic-crawler: SQLite URL frontier + visited stamps + BFS link discovery (parallel 6)
- skill-synthesis daemon: scans cloned/scraped repos, extracts patterns into ~/.surrogate/skills/<cat>/
- scrape: parallel 4β8 workers, cool-down 30β5-15s
- training-pair sync: every 10 min β every 3 min
- removed Mac scripts redundant with HF (Mac archived 20 LaunchAgent plists)
- bin/agentic-crawler.sh +223 -0
- bin/skill-synthesis-daemon.sh +148 -0
- bin/surrogate-orchestrate.sh +51 -32
- start.sh +18 -15
bin/agentic-crawler.sh
ADDED
|
@@ -0,0 +1,223 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# Agentic crawler β URL frontier with visited stamps + link discovery (BFS).
|
| 3 |
+
# Runs continuously: pop URL β fetch β extract links β score β push back to frontier.
|
| 4 |
+
# Stamps every visited URL in SQLite so we never revisit. Persists across restarts.
|
| 5 |
+
#
|
| 6 |
+
# Seeds (re-injected nightly): GitHub trending, arxiv recent, HF trending, MoC pages.
|
| 7 |
+
# Filtering: only follow links matching domain allowlist + minimum relevance.
|
| 8 |
+
# Output: training pairs (page β summary) pushed to HF dataset every 50 fetches.
|
| 9 |
+
set -uo pipefail
|
| 10 |
+
set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
|
| 11 |
+
|
| 12 |
+
DB="$HOME/.claude/state/agentic-frontier.db"
|
| 13 |
+
LOG="$HOME/.claude/logs/agentic-crawler.log"
|
| 14 |
+
PAIRS="$HOME/.surrogate/training-pairs.jsonl"
|
| 15 |
+
mkdir -p "$(dirname "$DB")" "$(dirname "$LOG")" "$(dirname "$PAIRS")"
|
| 16 |
+
|
| 17 |
+
# ββ Schema ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 18 |
+
sqlite3 "$DB" <<'SQL'
|
| 19 |
+
CREATE TABLE IF NOT EXISTS visited (
|
| 20 |
+
url TEXT PRIMARY KEY,
|
| 21 |
+
fetched_ts INTEGER NOT NULL,
|
| 22 |
+
status INTEGER NOT NULL,
|
| 23 |
+
title TEXT,
|
| 24 |
+
domain TEXT,
|
| 25 |
+
depth INTEGER DEFAULT 0,
|
| 26 |
+
bytes INTEGER DEFAULT 0
|
| 27 |
+
);
|
| 28 |
+
CREATE TABLE IF NOT EXISTS frontier (
|
| 29 |
+
url TEXT PRIMARY KEY,
|
| 30 |
+
score REAL NOT NULL,
|
| 31 |
+
depth INTEGER NOT NULL,
|
| 32 |
+
parent TEXT,
|
| 33 |
+
added_ts INTEGER NOT NULL
|
| 34 |
+
);
|
| 35 |
+
CREATE INDEX IF NOT EXISTS idx_frontier_score ON frontier(score DESC, added_ts);
|
| 36 |
+
CREATE INDEX IF NOT EXISTS idx_visited_domain ON visited(domain);
|
| 37 |
+
SQL
|
| 38 |
+
|
| 39 |
+
# ββ Seed if empty βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
+
COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM frontier;")
|
| 41 |
+
if [[ $COUNT -lt 5 ]]; then
|
| 42 |
+
echo "[$(date +%H:%M:%S)] seeding frontier" | tee -a "$LOG"
|
| 43 |
+
/usr/bin/python3 - "$DB" <<'PYEOF'
|
| 44 |
+
import sqlite3, sys, time
|
| 45 |
+
db = sys.argv[1]
|
| 46 |
+
seeds = [
|
| 47 |
+
# AI agent / coding
|
| 48 |
+
("https://github.com/trending?since=daily", 1.0, 0),
|
| 49 |
+
("https://github.com/trending/python?since=daily", 0.9, 0),
|
| 50 |
+
("https://github.com/trending/typescript?since=daily", 0.9, 0),
|
| 51 |
+
("https://github.com/trending/rust?since=daily", 0.85, 0),
|
| 52 |
+
("https://github.com/trending/go?since=daily", 0.85, 0),
|
| 53 |
+
("https://huggingface.co/models?sort=trending", 0.95, 0),
|
| 54 |
+
("https://huggingface.co/datasets?sort=trending", 0.85, 0),
|
| 55 |
+
("https://arxiv.org/list/cs.AI/recent", 0.95, 0),
|
| 56 |
+
("https://arxiv.org/list/cs.SE/recent", 0.9, 0),
|
| 57 |
+
("https://arxiv.org/list/cs.CR/recent", 0.85, 0),
|
| 58 |
+
("https://news.ycombinator.com/", 0.8, 0),
|
| 59 |
+
("https://lobste.rs/", 0.75, 0),
|
| 60 |
+
# DevSecOps / SRE / cloud
|
| 61 |
+
("https://aws.amazon.com/blogs/devops/", 0.7, 0),
|
| 62 |
+
("https://cloud.google.com/blog/products/devops-sre", 0.7, 0),
|
| 63 |
+
("https://kubernetes.io/blog/", 0.7, 0),
|
| 64 |
+
("https://www.cncf.io/blog/", 0.7, 0),
|
| 65 |
+
# Awesome lists (rich link sources)
|
| 66 |
+
("https://github.com/sindresorhus/awesome", 0.9, 0),
|
| 67 |
+
("https://github.com/stevenjoezhang/awesome-llm-agents", 0.95, 0),
|
| 68 |
+
("https://github.com/e2b-dev/awesome-ai-agents", 0.95, 0),
|
| 69 |
+
("https://github.com/Hannibal046/Awesome-LLM", 0.9, 0),
|
| 70 |
+
("https://github.com/punkpeye/awesome-mcp-servers", 0.95, 0),
|
| 71 |
+
]
|
| 72 |
+
con = sqlite3.connect(db)
|
| 73 |
+
now = int(time.time())
|
| 74 |
+
for url, score, depth in seeds:
|
| 75 |
+
con.execute("INSERT OR IGNORE INTO frontier(url,score,depth,parent,added_ts) VALUES (?,?,?,NULL,?)",
|
| 76 |
+
(url, score, depth, now))
|
| 77 |
+
con.commit()
|
| 78 |
+
print(f" seeded {len(seeds)} URLs")
|
| 79 |
+
PYEOF
|
| 80 |
+
fi
|
| 81 |
+
|
| 82 |
+
# ββ Worker: fetch one URL, extract links, score, push back to frontier βββββ
|
| 83 |
+
fetch_one() {
|
| 84 |
+
local url="$1" depth="$2"
|
| 85 |
+
/usr/bin/python3 - "$url" "$depth" "$DB" "$PAIRS" "${HF_TOKEN:-}" <<'PYEOF' 2>&1
|
| 86 |
+
import sys, sqlite3, urllib.request, urllib.parse, re, time, json, os
|
| 87 |
+
url, depth, db, pairs, hf_token = sys.argv[1], int(sys.argv[2]), sys.argv[3], sys.argv[4], sys.argv[5]
|
| 88 |
+
con = sqlite3.connect(db)
|
| 89 |
+
|
| 90 |
+
# Skip if already visited
|
| 91 |
+
if con.execute("SELECT 1 FROM visited WHERE url=?", (url,)).fetchone():
|
| 92 |
+
print(f" [skip-visited] {url[:80]}")
|
| 93 |
+
sys.exit(0)
|
| 94 |
+
|
| 95 |
+
domain = urllib.parse.urlparse(url).netloc
|
| 96 |
+
allow = {"github.com","huggingface.co","arxiv.org","news.ycombinator.com","lobste.rs",
|
| 97 |
+
"aws.amazon.com","cloud.google.com","azure.microsoft.com","kubernetes.io","cncf.io",
|
| 98 |
+
"anthropic.com","openai.com","mistral.ai","meta.com","ai.google.dev",
|
| 99 |
+
"datadog.com","newrelic.com","dynatrace.com","grafana.com","prometheus.io",
|
| 100 |
+
"redhat.com","docker.com","hashicorp.com","cncf.io","github.io","medium.com",
|
| 101 |
+
"dev.to","substack.com","blogspot.com"}
|
| 102 |
+
if domain not in allow and not any(domain.endswith("."+a) for a in allow):
|
| 103 |
+
con.execute("INSERT OR REPLACE INTO visited VALUES (?,?,?,?,?,?,?)",
|
| 104 |
+
(url, int(time.time()), -2, None, domain, depth, 0))
|
| 105 |
+
con.commit()
|
| 106 |
+
print(f" [skip-domain] {domain}")
|
| 107 |
+
sys.exit(0)
|
| 108 |
+
|
| 109 |
+
# Fetch
|
| 110 |
+
try:
|
| 111 |
+
req = urllib.request.Request(url, headers={
|
| 112 |
+
"User-Agent": "Mozilla/5.0 Surrogate-1/agentic-crawler",
|
| 113 |
+
"Accept": "text/html,application/xhtml+xml"})
|
| 114 |
+
with urllib.request.urlopen(req, timeout=20) as r:
|
| 115 |
+
body = r.read(2_000_000).decode("utf-8", errors="ignore")
|
| 116 |
+
status = r.status
|
| 117 |
+
except Exception as e:
|
| 118 |
+
con.execute("INSERT OR REPLACE INTO visited VALUES (?,?,?,?,?,?,?)",
|
| 119 |
+
(url, int(time.time()), -1, None, domain, depth, 0))
|
| 120 |
+
con.commit()
|
| 121 |
+
print(f" [fail] {url[:80]} :: {type(e).__name__}")
|
| 122 |
+
sys.exit(0)
|
| 123 |
+
|
| 124 |
+
# Title
|
| 125 |
+
m = re.search(r"<title[^>]*>([^<]+)</title>", body, re.IGNORECASE)
|
| 126 |
+
title = (m.group(1) if m else "").strip()[:200]
|
| 127 |
+
con.execute("INSERT OR REPLACE INTO visited VALUES (?,?,?,?,?,?,?)",
|
| 128 |
+
(url, int(time.time()), status, title, domain, depth, len(body)))
|
| 129 |
+
|
| 130 |
+
# Extract links + score
|
| 131 |
+
links = re.findall(r'href=["\'](https?://[^"\'#?\s<>]+)', body, re.IGNORECASE)
|
| 132 |
+
seen_set = set()
|
| 133 |
+
added = 0
|
| 134 |
+
for link in links:
|
| 135 |
+
if link in seen_set: continue
|
| 136 |
+
seen_set.add(link)
|
| 137 |
+
if con.execute("SELECT 1 FROM visited WHERE url=?", (link,)).fetchone(): continue
|
| 138 |
+
if con.execute("SELECT 1 FROM frontier WHERE url=?", (link,)).fetchone(): continue
|
| 139 |
+
ldomain = urllib.parse.urlparse(link).netloc
|
| 140 |
+
if not ldomain or len(link) > 500: continue
|
| 141 |
+
# Score: domain relevance + keyword bonus + depth penalty
|
| 142 |
+
score = 0.5
|
| 143 |
+
keywords_high = ("agent","llm","rag","mcp","claude","gpt","coder","devops","sre","kubernetes","terraform")
|
| 144 |
+
keywords_mid = ("ai","ml","cloud","devsec","security","python","typescript","go","rust","blog","paper")
|
| 145 |
+
low = link.lower()
|
| 146 |
+
if any(k in low for k in keywords_high): score += 0.3
|
| 147 |
+
elif any(k in low for k in keywords_mid): score += 0.1
|
| 148 |
+
if ldomain in allow or any(ldomain.endswith("."+a) for a in allow): score += 0.2
|
| 149 |
+
score -= 0.05 * (depth + 1)
|
| 150 |
+
if score < 0.3: continue
|
| 151 |
+
if depth + 1 > 4: continue # max depth
|
| 152 |
+
con.execute("INSERT OR IGNORE INTO frontier VALUES (?,?,?,?,?)",
|
| 153 |
+
(link, score, depth + 1, url, int(time.time())))
|
| 154 |
+
added += 1
|
| 155 |
+
if added > 30: break
|
| 156 |
+
|
| 157 |
+
con.commit()
|
| 158 |
+
print(f" [ok {status}] {title[:60]} β {url[:60]} (+{added} new links)")
|
| 159 |
+
|
| 160 |
+
# Save fetched page as training pair (page β summary) β summarize via local LLM later
|
| 161 |
+
# For now just log raw page metadata
|
| 162 |
+
text_only = re.sub(r"<[^>]+>", " ", body)
|
| 163 |
+
text_only = re.sub(r"\s+", " ", text_only).strip()[:6000]
|
| 164 |
+
if len(text_only) > 200:
|
| 165 |
+
pair = {
|
| 166 |
+
"ts": time.time(),
|
| 167 |
+
"source": "agentic-crawler",
|
| 168 |
+
"url": url,
|
| 169 |
+
"title": title,
|
| 170 |
+
"domain": domain,
|
| 171 |
+
"depth": depth,
|
| 172 |
+
"prompt": f"Summarize this page from {domain} (title: {title}):\n\n{text_only[:3000]}",
|
| 173 |
+
"response": f"[crawled {time.strftime('%Y-%m-%d %H:%M')} β auto-summary pending]",
|
| 174 |
+
}
|
| 175 |
+
with open(pairs, "a") as f:
|
| 176 |
+
f.write(json.dumps(pair, ensure_ascii=False) + "\n")
|
| 177 |
+
PYEOF
|
| 178 |
+
}
|
| 179 |
+
|
| 180 |
+
# ββ Main loop: parallel workers βββββββββββββββββββββββββββββββββββββββββββββ
|
| 181 |
+
PARALLEL="${1:-4}" # default 4 concurrent
|
| 182 |
+
BATCH_SIZE=20
|
| 183 |
+
echo "[$(date +%H:%M:%S)] crawler start (parallel=$PARALLEL)" | tee -a "$LOG"
|
| 184 |
+
|
| 185 |
+
while true; do
|
| 186 |
+
# Pop top-scoring URLs from frontier
|
| 187 |
+
BATCH=$(sqlite3 "$DB" "SELECT url||'|'||depth FROM frontier ORDER BY score DESC, added_ts ASC LIMIT $BATCH_SIZE;")
|
| 188 |
+
if [[ -z "$BATCH" ]]; then
|
| 189 |
+
echo "[$(date +%H:%M:%S)] frontier empty β sleeping 60s" >> "$LOG"
|
| 190 |
+
sleep 60
|
| 191 |
+
continue
|
| 192 |
+
fi
|
| 193 |
+
|
| 194 |
+
# Process in parallel
|
| 195 |
+
JOBS=0
|
| 196 |
+
while IFS='|' read -r URL DEPTH; do
|
| 197 |
+
[[ -z "$URL" ]] && continue
|
| 198 |
+
# Remove from frontier (atomic)
|
| 199 |
+
sqlite3 "$DB" "DELETE FROM frontier WHERE url='$URL';" 2>/dev/null
|
| 200 |
+
# Spawn fetch
|
| 201 |
+
fetch_one "$URL" "$DEPTH" >> "$LOG" 2>&1 &
|
| 202 |
+
JOBS=$((JOBS + 1))
|
| 203 |
+
if [[ $JOBS -ge $PARALLEL ]]; then
|
| 204 |
+
wait -n 2>/dev/null || wait
|
| 205 |
+
JOBS=$((JOBS - 1))
|
| 206 |
+
fi
|
| 207 |
+
done <<< "$BATCH"
|
| 208 |
+
wait # finish remaining
|
| 209 |
+
|
| 210 |
+
# Brief cool-down between batches
|
| 211 |
+
VISITED=$(sqlite3 "$DB" "SELECT COUNT(*) FROM visited;")
|
| 212 |
+
PENDING=$(sqlite3 "$DB" "SELECT COUNT(*) FROM frontier;")
|
| 213 |
+
echo "[$(date +%H:%M:%S)] batch done Β· visited=$VISITED Β· pending=$PENDING" >> "$LOG"
|
| 214 |
+
|
| 215 |
+
# Sleep adaptively: short if frontier full, longer if empty/rate-limit risk
|
| 216 |
+
if [[ $PENDING -gt 100 ]]; then
|
| 217 |
+
sleep 5
|
| 218 |
+
elif [[ $PENDING -gt 20 ]]; then
|
| 219 |
+
sleep 15
|
| 220 |
+
else
|
| 221 |
+
sleep 30
|
| 222 |
+
fi
|
| 223 |
+
done
|
bin/skill-synthesis-daemon.sh
ADDED
|
@@ -0,0 +1,148 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# Skill-synthesis daemon β reads cloned/scraped repos in /tmp and ~/.hermes/workspace/,
|
| 3 |
+
# extracts reusable patterns (functions, prompts, tool definitions, configs), and
|
| 4 |
+
# writes them as Surrogate skills under ~/.surrogate/skills/<category>/<slug>/SKILL.md.
|
| 5 |
+
#
|
| 6 |
+
# Inspired by Voyager paper (skill library) + community skills (anthropic-skills).
|
| 7 |
+
# Each pattern β SKILL.md frontmatter + content + example invocation.
|
| 8 |
+
set -uo pipefail
|
| 9 |
+
set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
|
| 10 |
+
|
| 11 |
+
SKILLS_DIR="$HOME/.surrogate/skills"
|
| 12 |
+
LOG="$HOME/.claude/logs/skill-synthesis.log"
|
| 13 |
+
PAIRS="$HOME/.surrogate/training-pairs.jsonl"
|
| 14 |
+
mkdir -p "$SKILLS_DIR" "$(dirname "$LOG")"
|
| 15 |
+
|
| 16 |
+
echo "[$(date +%H:%M:%S)] skill-synthesis start" | tee -a "$LOG"
|
| 17 |
+
|
| 18 |
+
# ββ Source dirs to scan for patterns ββββββββββββββββββββββββββββββββββββββββ
|
| 19 |
+
SCAN_DIRS=(
|
| 20 |
+
"/tmp/agentic-discovery"
|
| 21 |
+
"$HOME/.hermes/workspace/surrogate-scrape"
|
| 22 |
+
"$HOME/.hermes/workspace/projects"
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
while true; do
|
| 26 |
+
for src in "${SCAN_DIRS[@]}"; do
|
| 27 |
+
[[ ! -d "$src" ]] && continue
|
| 28 |
+
|
| 29 |
+
# Find candidate files (small, recent, code/prompt-like)
|
| 30 |
+
find "$src" -type f \( \
|
| 31 |
+
-name "*.md" -o -name "*.py" -o -name "*.ts" -o -name "*.go" -o \
|
| 32 |
+
-name "*.sh" -o -name "*.yaml" -o -name "*.toml" -o -name "*.json" \
|
| 33 |
+
\) -size -50k -mtime -3 2>/dev/null | head -200 | while read -r f; do
|
| 34 |
+
# Skip already-synthesized
|
| 35 |
+
HASH=$(/usr/bin/python3 -c "import hashlib; print(hashlib.md5(open('$f','rb').read()).hexdigest()[:12])" 2>/dev/null)
|
| 36 |
+
[[ -z "$HASH" ]] && continue
|
| 37 |
+
STAMP="$SKILLS_DIR/.synthesized/$HASH"
|
| 38 |
+
[[ -f "$STAMP" ]] && continue
|
| 39 |
+
mkdir -p "$(dirname "$STAMP")"
|
| 40 |
+
|
| 41 |
+
/usr/bin/python3 - "$f" "$SKILLS_DIR" "$PAIRS" "$STAMP" <<'PYEOF' 2>>"$LOG"
|
| 42 |
+
import sys, re, json, time, os, hashlib
|
| 43 |
+
from pathlib import Path
|
| 44 |
+
|
| 45 |
+
src_path, skills_dir, pairs_log, stamp = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]
|
| 46 |
+
src = Path(src_path)
|
| 47 |
+
content = src.read_text(errors="ignore")[:30000]
|
| 48 |
+
|
| 49 |
+
# Detect skill candidates by signal:
|
| 50 |
+
patterns = []
|
| 51 |
+
|
| 52 |
+
# 1. Python functions with descriptive docstrings (β₯ 3 lines)
|
| 53 |
+
for m in re.finditer(r'def (\w+)\([^)]*\)[^:]*:\s*\n\s*"""([^"]{40,500})"""', content):
|
| 54 |
+
name, doc = m.group(1), m.group(2).strip()
|
| 55 |
+
if any(noisy in name.lower() for noisy in ("test_","_test","setup","teardown","__")): continue
|
| 56 |
+
patterns.append(("python-fn", name, doc, m.group(0)[:2000]))
|
| 57 |
+
|
| 58 |
+
# 2. Tool/function-call schemas (JSON with name+description+parameters)
|
| 59 |
+
for m in re.finditer(r'\{\s*"name"\s*:\s*"([^"]+)"\s*,\s*"description"\s*:\s*"([^"]+)"\s*,\s*"parameters"', content):
|
| 60 |
+
patterns.append(("tool-schema", m.group(1), m.group(2), m.group(0)[:1500]))
|
| 61 |
+
|
| 62 |
+
# 3. Prompt templates (markdown with role headers)
|
| 63 |
+
if re.search(r'#+\s*(System|Role|You are|Instructions)', content, re.IGNORECASE):
|
| 64 |
+
title_m = re.search(r'^#\s+(.+)$', content, re.MULTILINE)
|
| 65 |
+
title = title_m.group(1) if title_m else src.stem
|
| 66 |
+
patterns.append(("prompt-template", title[:80], content[:200].replace('\n',' '), content[:3000]))
|
| 67 |
+
|
| 68 |
+
# 4. Bash function declarations with comment header
|
| 69 |
+
for m in re.finditer(r'#\s*(.{20,200})\n([a-z_]+)\(\)\s*\{', content):
|
| 70 |
+
desc, name = m.group(1).strip(), m.group(2)
|
| 71 |
+
if name in ("main","init","cleanup"): continue
|
| 72 |
+
patterns.append(("bash-fn", name, desc, m.group(0)[:1500]))
|
| 73 |
+
|
| 74 |
+
# Pick top 1 per file (avoid noise)
|
| 75 |
+
if not patterns:
|
| 76 |
+
Path(stamp).touch()
|
| 77 |
+
sys.exit(0)
|
| 78 |
+
ptype, name, summary, snippet = patterns[0]
|
| 79 |
+
|
| 80 |
+
# Slugify + categorize
|
| 81 |
+
slug = re.sub(r'[^a-z0-9-]+','-', name.lower()).strip('-')[:50]
|
| 82 |
+
category_map = {
|
| 83 |
+
"python-fn":"code-python",
|
| 84 |
+
"tool-schema":"agent-tools",
|
| 85 |
+
"prompt-template":"prompts",
|
| 86 |
+
"bash-fn":"ops-shell",
|
| 87 |
+
}
|
| 88 |
+
cat = category_map.get(ptype, "misc")
|
| 89 |
+
skill_dir = Path(skills_dir) / cat / slug
|
| 90 |
+
skill_dir.mkdir(parents=True, exist_ok=True)
|
| 91 |
+
skill_file = skill_dir / "SKILL.md"
|
| 92 |
+
|
| 93 |
+
# Don't overwrite existing skills with same slug β append number
|
| 94 |
+
if skill_file.exists():
|
| 95 |
+
n = 2
|
| 96 |
+
while (skill_dir.parent / f"{slug}-{n}").exists(): n += 1
|
| 97 |
+
skill_dir = skill_dir.parent / f"{slug}-{n}"
|
| 98 |
+
skill_dir.mkdir(parents=True, exist_ok=True)
|
| 99 |
+
skill_file = skill_dir / "SKILL.md"
|
| 100 |
+
|
| 101 |
+
frontmatter = f"""---
|
| 102 |
+
name: {name}
|
| 103 |
+
type: {ptype}
|
| 104 |
+
category: {cat}
|
| 105 |
+
source: {src.name}
|
| 106 |
+
synthesized_at: {time.strftime('%Y-%m-%dT%H:%M:%SZ')}
|
| 107 |
+
---
|
| 108 |
+
|
| 109 |
+
# {name}
|
| 110 |
+
|
| 111 |
+
**Source:** `{src}`
|
| 112 |
+
|
| 113 |
+
## What it does
|
| 114 |
+
{summary[:300]}
|
| 115 |
+
|
| 116 |
+
## Pattern
|
| 117 |
+
```
|
| 118 |
+
{snippet}
|
| 119 |
+
```
|
| 120 |
+
|
| 121 |
+
## Invocation
|
| 122 |
+
[How Surrogate would use this skill β auto-generate via LLM next pass]
|
| 123 |
+
"""
|
| 124 |
+
skill_file.write_text(frontmatter)
|
| 125 |
+
|
| 126 |
+
# Push as training pair
|
| 127 |
+
pair = {
|
| 128 |
+
"ts": time.time(),
|
| 129 |
+
"source": "skill-synthesis",
|
| 130 |
+
"skill_path": str(skill_file),
|
| 131 |
+
"category": cat,
|
| 132 |
+
"prompt": f"You have learned a new skill of type '{ptype}' named '{name}'. Use it when relevant.\n\nPattern:\n{snippet[:2000]}",
|
| 133 |
+
"response": summary,
|
| 134 |
+
}
|
| 135 |
+
with open(pairs_log, "a") as f:
|
| 136 |
+
f.write(json.dumps(pair, ensure_ascii=False) + "\n")
|
| 137 |
+
|
| 138 |
+
Path(stamp).touch()
|
| 139 |
+
print(f" β¨ skill: {cat}/{skill_dir.name} from {src.name}")
|
| 140 |
+
PYEOF
|
| 141 |
+
done
|
| 142 |
+
done
|
| 143 |
+
|
| 144 |
+
# Stats
|
| 145 |
+
SKILL_COUNT=$(find "$SKILLS_DIR" -name SKILL.md 2>/dev/null | wc -l | tr -d ' ')
|
| 146 |
+
echo "[$(date +%H:%M:%S)] cycle done Β· total skills=$SKILL_COUNT" >> "$LOG"
|
| 147 |
+
sleep 180 # 3 min between cycles
|
| 148 |
+
done
|
bin/surrogate-orchestrate.sh
CHANGED
|
@@ -278,7 +278,7 @@ PYEOF
|
|
| 278 |
fi
|
| 279 |
}
|
| 280 |
|
| 281 |
-
# ββ Stage 1: SOLUTION ARCHITECT ββ
|
| 282 |
SA_OUT="$WORKDIR/1-sa-design.md"
|
| 283 |
echo "${MA}${B}βββ Stage 1/6: SOLUTION ARCHITECT${R} ${D}β DDD + design patterns${R}"
|
| 284 |
call_agent "solution-architect" "
|
|
@@ -293,16 +293,19 @@ Cover (each as a heading):
|
|
| 293 |
6. **Non-functional impacts** β perf, security, scale, observability
|
| 294 |
7. **Risks + mitigations**
|
| 295 |
|
| 296 |
-
Be concrete.
|
| 297 |
|
| 298 |
Task: $TASK
|
| 299 |
" "$SA_OUT"
|
| 300 |
|
| 301 |
-
# ββ
|
| 302 |
ARCH_OUT="$WORKDIR/2-architect-plan.md"
|
|
|
|
| 303 |
echo ""
|
| 304 |
-
echo "${MA}${B}βββ
|
| 305 |
-
|
|
|
|
|
|
|
| 306 |
You are the Tech Architect. Take the SA design (at $SA_OUT) and produce a CONCRETE file-level execution plan.
|
| 307 |
|
| 308 |
Required headings:
|
|
@@ -313,31 +316,20 @@ Required headings:
|
|
| 313 |
5. **Migration plan** β schema/config rollouts
|
| 314 |
6. **Rollback** β how to undo on prod failure
|
| 315 |
|
| 316 |
-
Read 3β5 similar files first (read/grep) to follow existing patterns.
|
| 317 |
-
|
| 318 |
Task: $TASK
|
| 319 |
" "$ARCH_OUT"
|
|
|
|
|
|
|
| 320 |
|
| 321 |
-
|
| 322 |
-
|
| 323 |
-
echo "${B}βΈ Plan-only mode β stopping after architect${R}"
|
| 324 |
-
[[ -f "$ARCH_OUT" ]] && cat "$ARCH_OUT"
|
| 325 |
-
exit 0
|
| 326 |
-
fi
|
| 327 |
-
|
| 328 |
-
# ββ Stage 3: QA-FIRST (TDD tests) ββ
|
| 329 |
-
TDD_OUT="$WORKDIR/3-qa-tdd-tests.md"
|
| 330 |
-
echo ""
|
| 331 |
-
echo "${MA}${B}βββ Stage 3/6: QA-FIRST (TDD)${R} ${D}β failing tests first${R}"
|
| 332 |
-
call_agent "qa" "
|
| 333 |
You are the QA Engineer practicing TDD. Output FAILING test code BEFORE the dev writes any implementation.
|
| 334 |
|
| 335 |
Inputs:
|
| 336 |
-
- SA design: $SA_OUT
|
| 337 |
-
- Architect plan: $ARCH_OUT
|
| 338 |
|
| 339 |
Required output:
|
| 340 |
-
1. List of test file paths
|
| 341 |
2. Full test code for each file as fenced code blocks (\`\`\`python / \`\`\`typescript / etc.)
|
| 342 |
3. Each test: one assertion, factory functions for fixtures, descriptive name
|
| 343 |
4. Cover: happy path, edge cases, error paths, security boundaries
|
|
@@ -347,6 +339,18 @@ NO implementation code β only tests.
|
|
| 347 |
|
| 348 |
Task: $TASK
|
| 349 |
" "$TDD_OUT"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 350 |
|
| 351 |
# ββ Stage 4: DEV ββ
|
| 352 |
DEV_OUT="$WORKDIR/4-dev-summary.md"
|
|
@@ -412,11 +416,23 @@ print(f" total {written} files written")
|
|
| 412 |
PYEOF
|
| 413 |
fi
|
| 414 |
|
| 415 |
-
# ββ
|
| 416 |
QA_OUT="$WORKDIR/5-qa-verify.md"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 417 |
echo ""
|
| 418 |
-
|
| 419 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 420 |
You are QA in verification phase. Verify the dev's claim that tests pass.
|
| 421 |
|
| 422 |
Inputs:
|
|
@@ -431,13 +447,12 @@ Output:
|
|
| 431 |
|
| 432 |
Task: $TASK
|
| 433 |
" "$QA_OUT"
|
|
|
|
|
|
|
| 434 |
|
| 435 |
-
|
| 436 |
-
|
| 437 |
-
|
| 438 |
-
echo ""
|
| 439 |
-
echo "${MA}${B}βββ Stage 6a/6: OPS${R} ${D}β deploy + infra${R}"
|
| 440 |
-
call_agent "ops" "
|
| 441 |
Review infrastructure aspects of this task.
|
| 442 |
- Dockerfile / helm / terraform / cloudformation validity
|
| 443 |
- Secrets / env var handling
|
|
@@ -448,10 +463,14 @@ Review infrastructure aspects of this task.
|
|
| 448 |
Inputs: $DEV_OUT
|
| 449 |
Task: $TASK
|
| 450 |
" "$OPS_OUT"
|
|
|
|
|
|
|
|
|
|
| 451 |
else
|
| 452 |
-
|
| 453 |
echo "${GY}βββ Stage 6a/6: OPS β skipped (not infra task)${R}"
|
| 454 |
fi
|
|
|
|
| 455 |
|
| 456 |
# ββ Stage 6: REVIEWER ββ
|
| 457 |
REVIEW_OUT="$WORKDIR/6-review-verdict.md"
|
|
|
|
| 278 |
fi
|
| 279 |
}
|
| 280 |
|
| 281 |
+
# ββ Stage 1: SOLUTION ARCHITECT (must run first β blocks everything) ββ
|
| 282 |
SA_OUT="$WORKDIR/1-sa-design.md"
|
| 283 |
echo "${MA}${B}βββ Stage 1/6: SOLUTION ARCHITECT${R} ${D}β DDD + design patterns${R}"
|
| 284 |
call_agent "solution-architect" "
|
|
|
|
| 293 |
6. **Non-functional impacts** β perf, security, scale, observability
|
| 294 |
7. **Risks + mitigations**
|
| 295 |
|
| 296 |
+
Be concrete. No platitudes.
|
| 297 |
|
| 298 |
Task: $TASK
|
| 299 |
" "$SA_OUT"
|
| 300 |
|
| 301 |
+
# ββ Stages 2 + 3 in PARALLEL β both depend only on SA, independent of each other ββ
|
| 302 |
ARCH_OUT="$WORKDIR/2-architect-plan.md"
|
| 303 |
+
TDD_OUT="$WORKDIR/3-qa-tdd-tests.md"
|
| 304 |
echo ""
|
| 305 |
+
echo "${MA}${B}βββ Stages 2+3 (parallel): ARCHITECT β QA-TDD${R}"
|
| 306 |
+
|
| 307 |
+
(
|
| 308 |
+
call_agent "architect" "
|
| 309 |
You are the Tech Architect. Take the SA design (at $SA_OUT) and produce a CONCRETE file-level execution plan.
|
| 310 |
|
| 311 |
Required headings:
|
|
|
|
| 316 |
5. **Migration plan** β schema/config rollouts
|
| 317 |
6. **Rollback** β how to undo on prod failure
|
| 318 |
|
|
|
|
|
|
|
| 319 |
Task: $TASK
|
| 320 |
" "$ARCH_OUT"
|
| 321 |
+
) &
|
| 322 |
+
PID_ARCH=$!
|
| 323 |
|
| 324 |
+
(
|
| 325 |
+
call_agent "qa" "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 326 |
You are the QA Engineer practicing TDD. Output FAILING test code BEFORE the dev writes any implementation.
|
| 327 |
|
| 328 |
Inputs:
|
| 329 |
+
- SA design: $SA_OUT (read it for design context)
|
|
|
|
| 330 |
|
| 331 |
Required output:
|
| 332 |
+
1. List of test file paths
|
| 333 |
2. Full test code for each file as fenced code blocks (\`\`\`python / \`\`\`typescript / etc.)
|
| 334 |
3. Each test: one assertion, factory functions for fixtures, descriptive name
|
| 335 |
4. Cover: happy path, edge cases, error paths, security boundaries
|
|
|
|
| 339 |
|
| 340 |
Task: $TASK
|
| 341 |
" "$TDD_OUT"
|
| 342 |
+
) &
|
| 343 |
+
PID_QA=$!
|
| 344 |
+
|
| 345 |
+
wait $PID_ARCH $PID_QA
|
| 346 |
+
echo "${D} parallel stages 2+3 complete${R}"
|
| 347 |
+
|
| 348 |
+
if [[ "$MODE" == "plan" ]]; then
|
| 349 |
+
echo ""
|
| 350 |
+
echo "${B}βΈ Plan-only mode β stopping after architect${R}"
|
| 351 |
+
[[ -f "$ARCH_OUT" ]] && cat "$ARCH_OUT"
|
| 352 |
+
exit 0
|
| 353 |
+
fi
|
| 354 |
|
| 355 |
# ββ Stage 4: DEV ββ
|
| 356 |
DEV_OUT="$WORKDIR/4-dev-summary.md"
|
|
|
|
| 416 |
PYEOF
|
| 417 |
fi
|
| 418 |
|
| 419 |
+
# ββ Stages 5 + 6a in PARALLEL β both depend on dev, independent of each other ββ
|
| 420 |
QA_OUT="$WORKDIR/5-qa-verify.md"
|
| 421 |
+
OPS_OUT="$WORKDIR/6a-ops-checklist.md"
|
| 422 |
+
NEED_OPS=0
|
| 423 |
+
if echo "$TASK" | /usr/bin/grep -iqE "deploy|docker|helm|k8s|terraform|cicd|ci/cd|cloudformation|buildspec|ecs|lambda"; then
|
| 424 |
+
NEED_OPS=1
|
| 425 |
+
fi
|
| 426 |
+
|
| 427 |
echo ""
|
| 428 |
+
if [[ $NEED_OPS -eq 1 ]]; then
|
| 429 |
+
echo "${MA}${B}βββ Stages 5+6a (parallel): QA-VERIFY β OPS${R}"
|
| 430 |
+
else
|
| 431 |
+
echo "${MA}${B}βββ Stage 5/6: QA-VERIFY${R}"
|
| 432 |
+
fi
|
| 433 |
+
|
| 434 |
+
(
|
| 435 |
+
call_agent "qa" "
|
| 436 |
You are QA in verification phase. Verify the dev's claim that tests pass.
|
| 437 |
|
| 438 |
Inputs:
|
|
|
|
| 447 |
|
| 448 |
Task: $TASK
|
| 449 |
" "$QA_OUT"
|
| 450 |
+
) &
|
| 451 |
+
PID_QA2=$!
|
| 452 |
|
| 453 |
+
if [[ $NEED_OPS -eq 1 ]]; then
|
| 454 |
+
(
|
| 455 |
+
call_agent "ops" "
|
|
|
|
|
|
|
|
|
|
| 456 |
Review infrastructure aspects of this task.
|
| 457 |
- Dockerfile / helm / terraform / cloudformation validity
|
| 458 |
- Secrets / env var handling
|
|
|
|
| 463 |
Inputs: $DEV_OUT
|
| 464 |
Task: $TASK
|
| 465 |
" "$OPS_OUT"
|
| 466 |
+
) &
|
| 467 |
+
PID_OPS=$!
|
| 468 |
+
wait $PID_QA2 $PID_OPS
|
| 469 |
else
|
| 470 |
+
wait $PID_QA2
|
| 471 |
echo "${GY}βββ Stage 6a/6: OPS β skipped (not infra task)${R}"
|
| 472 |
fi
|
| 473 |
+
echo "${D} parallel stages 5+6a complete${R}"
|
| 474 |
|
| 475 |
# ββ Stage 6: REVIEWER ββ
|
| 476 |
REVIEW_OUT="$WORKDIR/6-review-verdict.md"
|
start.sh
CHANGED
|
@@ -144,32 +144,35 @@ if [[ -n "${DISCORD_BOT_TOKEN:-}" ]]; then
|
|
| 144 |
echo "[$(date +%H:%M:%S)] discord bot started"
|
| 145 |
fi
|
| 146 |
|
| 147 |
-
# ββ 7a. Continuous scrape daemon (
|
| 148 |
cat > /tmp/scrape-daemon.sh <<'SCRAPESH'
|
| 149 |
#!/bin/bash
|
| 150 |
-
#
|
| 151 |
set -a; source ~/.hermes/.env 2>/dev/null; set +a
|
| 152 |
LOG="${HOME}/.claude/logs/scrape-continuous.log"
|
| 153 |
mkdir -p "$(dirname "$LOG")"
|
| 154 |
while true; do
|
| 155 |
START=$(date +%s)
|
| 156 |
-
|
| 157 |
-
bash ~/.claude/bin/domain-scrape-loop.sh 800 4 >> "$LOG" 2>&1
|
| 158 |
DUR=$(( $(date +%s) - START ))
|
| 159 |
-
#
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
elif [[ $DUR -lt 300 ]]; then
|
| 164 |
-
sleep 60
|
| 165 |
-
else
|
| 166 |
-
sleep 30
|
| 167 |
fi
|
| 168 |
done
|
| 169 |
SCRAPESH
|
| 170 |
chmod +x /tmp/scrape-daemon.sh
|
| 171 |
nohup /tmp/scrape-daemon.sh > "$LOG_DIR/scrape-daemon.log" 2>&1 &
|
| 172 |
-
echo "[$(date +%H:%M:%S)] continuous scrape daemon started" >> "$LOG_DIR/boot.log"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
|
| 174 |
# ββ 7b. Cron loop β non-scrape daemons (scrape now runs continuously above) β
|
| 175 |
cat > /tmp/hermes-cron.sh <<'CRONSH'
|
|
@@ -183,8 +186,8 @@ while true; do
|
|
| 183 |
[[ $((M % 2)) -eq 0 ]] && bash ~/.claude/bin/surrogate-dev-loop.sh 1 >> "$LOG" 2>&1 &
|
| 184 |
# Every 5 min: producer pushes priorities to Redis
|
| 185 |
[[ $((M % 5)) -eq 0 ]] && bash ~/.claude/bin/work-queue-producer.sh >> "$LOG" 2>&1 &
|
| 186 |
-
# Every
|
| 187 |
-
[[ $((M %
|
| 188 |
# Every 20 min: full orchestrate chain (architect β dev β qa β reviewer + git push)
|
| 189 |
[[ $((M % 20)) -eq 0 ]] && bash ~/.claude/bin/auto-orchestrate-loop.sh >> "$LOG" 2>&1 &
|
| 190 |
# Every 30 min: research-apply (pop queue β orchestrate β ship feature)
|
|
|
|
| 144 |
echo "[$(date +%H:%M:%S)] discord bot started"
|
| 145 |
fi
|
| 146 |
|
| 147 |
+
# ββ 7a. Continuous scrape daemon (parallel 8 workers, ~10s cool-down) ββββββ
|
| 148 |
cat > /tmp/scrape-daemon.sh <<'SCRAPESH'
|
| 149 |
#!/bin/bash
|
| 150 |
+
# 8 concurrent scrape workers, near-zero idle time.
|
| 151 |
set -a; source ~/.hermes/.env 2>/dev/null; set +a
|
| 152 |
LOG="${HOME}/.claude/logs/scrape-continuous.log"
|
| 153 |
mkdir -p "$(dirname "$LOG")"
|
| 154 |
while true; do
|
| 155 |
START=$(date +%s)
|
| 156 |
+
bash ~/.claude/bin/domain-scrape-loop.sh 1500 8 >> "$LOG" 2>&1
|
|
|
|
| 157 |
DUR=$(( $(date +%s) - START ))
|
| 158 |
+
# Tight cool-downs β cloud has unlimited bandwidth, only rate-limit concern
|
| 159 |
+
if [[ $DUR -lt 30 ]]; then sleep 30 # queue likely exhausted, give it time
|
| 160 |
+
elif [[ $DUR -lt 120 ]]; then sleep 15
|
| 161 |
+
else sleep 5
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
fi
|
| 163 |
done
|
| 164 |
SCRAPESH
|
| 165 |
chmod +x /tmp/scrape-daemon.sh
|
| 166 |
nohup /tmp/scrape-daemon.sh > "$LOG_DIR/scrape-daemon.log" 2>&1 &
|
| 167 |
+
echo "[$(date +%H:%M:%S)] continuous scrape daemon (parallel=8) started" >> "$LOG_DIR/boot.log"
|
| 168 |
+
|
| 169 |
+
# ββ 7b. Agentic crawler (URL frontier + visited stamps + link discovery) ββββ
|
| 170 |
+
nohup bash ~/.claude/bin/agentic-crawler.sh 6 > "$LOG_DIR/agentic-crawler.log" 2>&1 &
|
| 171 |
+
echo "[$(date +%H:%M:%S)] agentic crawler started (parallel=6)" >> "$LOG_DIR/boot.log"
|
| 172 |
+
|
| 173 |
+
# ββ 7c. Skill-synthesis daemon (extract patterns from cloned repos β skills) β
|
| 174 |
+
nohup bash ~/.claude/bin/skill-synthesis-daemon.sh > "$LOG_DIR/skill-synthesis.log" 2>&1 &
|
| 175 |
+
echo "[$(date +%H:%M:%S)] skill-synthesis daemon started" >> "$LOG_DIR/boot.log"
|
| 176 |
|
| 177 |
# ββ 7b. Cron loop β non-scrape daemons (scrape now runs continuously above) β
|
| 178 |
cat > /tmp/hermes-cron.sh <<'CRONSH'
|
|
|
|
| 186 |
[[ $((M % 2)) -eq 0 ]] && bash ~/.claude/bin/surrogate-dev-loop.sh 1 >> "$LOG" 2>&1 &
|
| 187 |
# Every 5 min: producer pushes priorities to Redis
|
| 188 |
[[ $((M % 5)) -eq 0 ]] && bash ~/.claude/bin/work-queue-producer.sh >> "$LOG" 2>&1 &
|
| 189 |
+
# Every 3 min: training-pair push to HF (drains ~/.surrogate/training-pairs.jsonl)
|
| 190 |
+
[[ $((M % 3)) -eq 0 ]] && bash ~/.claude/bin/push-training-to-hf.sh >> "$LOG" 2>&1 &
|
| 191 |
# Every 20 min: full orchestrate chain (architect β dev β qa β reviewer + git push)
|
| 192 |
[[ $((M % 20)) -eq 0 ]] && bash ~/.claude/bin/auto-orchestrate-loop.sh >> "$LOG" 2>&1 &
|
| 193 |
# Every 30 min: research-apply (pop queue β orchestrate β ship feature)
|