Ashira Pitchayapakayakul commited on
Commit
023ab84
Β·
1 Parent(s): 3cbaec6

fix: strip Mac /usr/bin/* hardcoded paths + expand dataset-enrich to 21 sources

Browse files

- All /usr/bin/python3 β†’ python3 (PATH-based; works on Linux container)
- Same for curl/head/tail/grep/sed/awk
- dataset-enrich.sh: 9 β†’ 21 sources covering DevSecOps gaps:
+ 4 CVE/CWE: AlicanKiraz0, CyberNative DPO, DiverseVul, CodeXGLUE
+ 3 function-calling: Salesforce/xLAM, Glaive-v2, Hermes-FC
+ 3 code instr: m-a-p CodeFeedback Γ— 2, dolphin-coder
+ 1 agentic: orca-agentinstruct-1M
+ 1 review: VatsaDev/code-review
- 7 new schema branches: system-user-assistant, dpo-question, code-defect-cwe,
code-defect, tools-query-answers, system-chat, system-question-resp
- Researched + selected by spawned agent (background, parallel work pattern)

bin/agentic-crawler.sh CHANGED
@@ -40,7 +40,7 @@ SQL
40
  COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM frontier;")
41
  if [[ $COUNT -lt 5 ]]; then
42
  echo "[$(date +%H:%M:%S)] seeding frontier" | tee -a "$LOG"
43
- /usr/bin/python3 - "$DB" <<'PYEOF'
44
  import sqlite3, sys, time
45
  db = sys.argv[1]
46
  seeds = [
@@ -82,7 +82,7 @@ fi
82
  # ── Worker: fetch one URL, extract links, score, push back to frontier ─────
83
  fetch_one() {
84
  local url="$1" depth="$2"
85
- /usr/bin/python3 - "$url" "$depth" "$DB" "$PAIRS" "${HF_TOKEN:-}" <<'PYEOF' 2>&1
86
  import sys, sqlite3, urllib.request, urllib.parse, re, time, json, os
87
  url, depth, db, pairs, hf_token = sys.argv[1], int(sys.argv[2]), sys.argv[3], sys.argv[4], sys.argv[5]
88
  con = sqlite3.connect(db)
 
40
  COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM frontier;")
41
  if [[ $COUNT -lt 5 ]]; then
42
  echo "[$(date +%H:%M:%S)] seeding frontier" | tee -a "$LOG"
43
+ python3 - "$DB" <<'PYEOF'
44
  import sqlite3, sys, time
45
  db = sys.argv[1]
46
  seeds = [
 
82
  # ── Worker: fetch one URL, extract links, score, push back to frontier ─────
83
  fetch_one() {
84
  local url="$1" depth="$2"
85
+ python3 - "$url" "$depth" "$DB" "$PAIRS" "${HF_TOKEN:-}" <<'PYEOF' 2>&1
86
  import sys, sqlite3, urllib.request, urllib.parse, re, time, json, os
87
  url, depth, db, pairs, hf_token = sys.argv[1], int(sys.argv[2]), sys.argv[3], sys.argv[4], sys.argv[5]
88
  con = sqlite3.connect(db)
bin/ai-fallback.sh CHANGED
@@ -45,17 +45,17 @@ while [ $# -gt 0 ]; do
45
  *) QUERY="$QUERY $1"; shift ;;
46
  esac
47
  done
48
- QUERY=$(echo "$QUERY" | /usr/bin/sed 's/^ *//')
49
- [ -z "$QUERY" ] && { /usr/bin/head -15 "$0"; exit 1; }
50
 
51
  # --task <type> β€” pick the strongest free model per provider for the task.
52
  # Sets per-provider env vars that try_* functions read (bridge --model alias).
53
  # Auto-detect if not provided: code keywords β†’ coding, reasoning keywords β†’ reasoning.
54
  if [ -z "$TASK" ]; then
55
  q_lower=$(echo "$QUERY" | /usr/bin/tr '[:upper:]' '[:lower:]')
56
- if echo "$q_lower" | /usr/bin/grep -qE "code|function|implement|refactor|bug|class|method|api|sql|terraform|cloudformation|dockerfile|kubernetes|yaml|typescript|javascript|python|rust|golang"; then
57
  TASK="coding"
58
- elif echo "$q_lower" | /usr/bin/grep -qE "analyze|reason|explain why|compare|evaluate|architect|design|trade-?off|deep|think step|proof|calculate|complex"; then
59
  TASK="reasoning"
60
  fi
61
  fi
@@ -100,7 +100,7 @@ if [[ "$TASK" == "coding" || "$TASK" == "reasoning" || "$TASK" == "creative" ]];
100
  if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then
101
  EMB_COUNT=$(/usr/bin/sqlite3 "$HOME/.surrogate/embeddings.db" 'SELECT COUNT(*) FROM embeddings' 2>/dev/null || echo 0)
102
  if [[ "$EMB_COUNT" -ge 100 ]]; then
103
- SEM_CONTEXT=$(/usr/bin/python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$QUERY" 2>/dev/null | /usr/bin/head -15)
104
  if [[ -n "$SEM_CONTEXT" ]]; then
105
  QUERY="=== RAG CONTEXT (top-5 semantic matches from knowledge base) ===
106
  $SEM_CONTEXT
@@ -124,13 +124,13 @@ save_response() {
124
  # --- System prompt from knowledge base + auto code-search if code query ---
125
  build_system_prompt() {
126
  local kb="" profile="" code_ctx="" q_lower
127
- [ -f "$HOME/.surrogate/memory/knowledge_index.md" ] && kb="$(/usr/bin/head -50 $HOME/.surrogate/memory/knowledge_index.md)"
128
  [ -f "$HOME/.surrogate/memory/user_profile.md" ] && profile="$(cat $HOME/.surrogate/memory/user_profile.md)"
129
 
130
  q_lower=$(echo "$QUERY" | /usr/bin/tr '[:upper:]' '[:lower:]')
131
  local is_generate=0 is_code=0
132
- echo "$q_lower" | /usr/bin/grep -qE "code|function|implement|refactor|bug|error|class|method|api|endpoint|schema|model|service|controller|middleware|auth|database|query|sql|deploy|pipeline|terraform|cloudformation|dockerfile|kubernetes|helm|yaml" && is_code=1
133
- echo "$q_lower" | /usr/bin/grep -qE "create|generate|write|build|new|template|scaffold|design" && is_generate=1
134
 
135
  if [ "$is_code" = "1" ] && [ -d "$HOME/.surrogate/code-vector-db" ]; then
136
  if [ "$is_generate" = "1" ] && [ -x "$HOME/.surrogate/bin/find-gold-examples.sh" ]; then
@@ -138,7 +138,7 @@ build_system_prompt() {
138
  code_ctx=$("$HOME/.surrogate/bin/find-gold-examples.sh" --top 2 --max-bytes 5000 "$QUERY" 2>/dev/null)
139
  elif [ -x "$HOME/.surrogate/bin/code-search.sh" ]; then
140
  # Query task β†’ snippets only (faster)
141
- code_ctx=$("$HOME/.surrogate/bin/code-search.sh" --top 3 "$QUERY" 2>/dev/null | /usr/bin/head -60)
142
  fi
143
  fi
144
 
@@ -238,7 +238,7 @@ m = {'model':os.environ['ORM'],'max_tokens':4000,
238
  print(json.dumps(m))
239
  " 2>&1) || { log " body-build failed: $body"; return 1; }
240
  local resp code body_resp
241
- resp=$(/usr/bin/curl -sS -w "\n%{http_code}" \
242
  --max-time 90 \
243
  -X POST "https://openrouter.ai/api/v1/chat/completions" \
244
  -H "Authorization: Bearer $OPENROUTER_API_KEY" \
@@ -246,8 +246,8 @@ print(json.dumps(m))
246
  -H "X-Title: ai-fallback" \
247
  -H "content-type: application/json" \
248
  -d "$body" 2>&1)
249
- code=$(echo "$resp" | /usr/bin/tail -1)
250
- body_resp=$(echo "$resp" | /usr/bin/sed '$d')
251
  if [ "$code" != "200" ]; then
252
  # Log real error reason for debug
253
  local errmsg
@@ -284,11 +284,11 @@ m = {'systemInstruction':{'parts':[{'text':'''$SYSTEM'''}]},
284
  print(json.dumps(m))
285
  " 2>/dev/null)
286
  local resp code body_resp
287
- resp=$(/usr/bin/curl -sS -w "\n%{http_code}" \
288
  -X POST "https://generativelanguage.googleapis.com/v1beta/models/$model:generateContent?key=$GEMINI_API_KEY" \
289
  -H "content-type: application/json" -d "$body" 2>&1)
290
- code=$(echo "$resp" | /usr/bin/tail -1)
291
- body_resp=$(echo "$resp" | /usr/bin/sed '$d')
292
  [ "$code" != "200" ] && { log " [$code] falling through"; return 1; }
293
  local out
294
  out=$(echo "$body_resp" | "$HOME/.surrogate/venv/bin/python" -c "
@@ -361,7 +361,7 @@ try_cloudflare() {
361
  # gemma4:26b BLOCKED β€” user directive (too slow for this hw).
362
  try_granite() {
363
  # Check ollama running
364
- /usr/bin/curl -sS --max-time 3 http://localhost:11434/api/tags > /dev/null 2>&1 || return 2
365
  local alias="${LOCAL_MODEL:-granite}"
366
  log "β†’ Local Ollama: $alias (free, always-on)"
367
  local out
 
45
  *) QUERY="$QUERY $1"; shift ;;
46
  esac
47
  done
48
+ QUERY=$(echo "$QUERY" | sed 's/^ *//')
49
+ [ -z "$QUERY" ] && { head -15 "$0"; exit 1; }
50
 
51
  # --task <type> β€” pick the strongest free model per provider for the task.
52
  # Sets per-provider env vars that try_* functions read (bridge --model alias).
53
  # Auto-detect if not provided: code keywords β†’ coding, reasoning keywords β†’ reasoning.
54
  if [ -z "$TASK" ]; then
55
  q_lower=$(echo "$QUERY" | /usr/bin/tr '[:upper:]' '[:lower:]')
56
+ if echo "$q_lower" | grep -qE "code|function|implement|refactor|bug|class|method|api|sql|terraform|cloudformation|dockerfile|kubernetes|yaml|typescript|javascript|python|rust|golang"; then
57
  TASK="coding"
58
+ elif echo "$q_lower" | grep -qE "analyze|reason|explain why|compare|evaluate|architect|design|trade-?off|deep|think step|proof|calculate|complex"; then
59
  TASK="reasoning"
60
  fi
61
  fi
 
100
  if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then
101
  EMB_COUNT=$(/usr/bin/sqlite3 "$HOME/.surrogate/embeddings.db" 'SELECT COUNT(*) FROM embeddings' 2>/dev/null || echo 0)
102
  if [[ "$EMB_COUNT" -ge 100 ]]; then
103
+ SEM_CONTEXT=$(python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$QUERY" 2>/dev/null | head -15)
104
  if [[ -n "$SEM_CONTEXT" ]]; then
105
  QUERY="=== RAG CONTEXT (top-5 semantic matches from knowledge base) ===
106
  $SEM_CONTEXT
 
124
  # --- System prompt from knowledge base + auto code-search if code query ---
125
  build_system_prompt() {
126
  local kb="" profile="" code_ctx="" q_lower
127
+ [ -f "$HOME/.surrogate/memory/knowledge_index.md" ] && kb="$(head -50 $HOME/.surrogate/memory/knowledge_index.md)"
128
  [ -f "$HOME/.surrogate/memory/user_profile.md" ] && profile="$(cat $HOME/.surrogate/memory/user_profile.md)"
129
 
130
  q_lower=$(echo "$QUERY" | /usr/bin/tr '[:upper:]' '[:lower:]')
131
  local is_generate=0 is_code=0
132
+ echo "$q_lower" | grep -qE "code|function|implement|refactor|bug|error|class|method|api|endpoint|schema|model|service|controller|middleware|auth|database|query|sql|deploy|pipeline|terraform|cloudformation|dockerfile|kubernetes|helm|yaml" && is_code=1
133
+ echo "$q_lower" | grep -qE "create|generate|write|build|new|template|scaffold|design" && is_generate=1
134
 
135
  if [ "$is_code" = "1" ] && [ -d "$HOME/.surrogate/code-vector-db" ]; then
136
  if [ "$is_generate" = "1" ] && [ -x "$HOME/.surrogate/bin/find-gold-examples.sh" ]; then
 
138
  code_ctx=$("$HOME/.surrogate/bin/find-gold-examples.sh" --top 2 --max-bytes 5000 "$QUERY" 2>/dev/null)
139
  elif [ -x "$HOME/.surrogate/bin/code-search.sh" ]; then
140
  # Query task β†’ snippets only (faster)
141
+ code_ctx=$("$HOME/.surrogate/bin/code-search.sh" --top 3 "$QUERY" 2>/dev/null | head -60)
142
  fi
143
  fi
144
 
 
238
  print(json.dumps(m))
239
  " 2>&1) || { log " body-build failed: $body"; return 1; }
240
  local resp code body_resp
241
+ resp=$(curl -sS -w "\n%{http_code}" \
242
  --max-time 90 \
243
  -X POST "https://openrouter.ai/api/v1/chat/completions" \
244
  -H "Authorization: Bearer $OPENROUTER_API_KEY" \
 
246
  -H "X-Title: ai-fallback" \
247
  -H "content-type: application/json" \
248
  -d "$body" 2>&1)
249
+ code=$(echo "$resp" | tail -1)
250
+ body_resp=$(echo "$resp" | sed '$d')
251
  if [ "$code" != "200" ]; then
252
  # Log real error reason for debug
253
  local errmsg
 
284
  print(json.dumps(m))
285
  " 2>/dev/null)
286
  local resp code body_resp
287
+ resp=$(curl -sS -w "\n%{http_code}" \
288
  -X POST "https://generativelanguage.googleapis.com/v1beta/models/$model:generateContent?key=$GEMINI_API_KEY" \
289
  -H "content-type: application/json" -d "$body" 2>&1)
290
+ code=$(echo "$resp" | tail -1)
291
+ body_resp=$(echo "$resp" | sed '$d')
292
  [ "$code" != "200" ] && { log " [$code] falling through"; return 1; }
293
  local out
294
  out=$(echo "$body_resp" | "$HOME/.surrogate/venv/bin/python" -c "
 
361
  # gemma4:26b BLOCKED β€” user directive (too slow for this hw).
362
  try_granite() {
363
  # Check ollama running
364
+ curl -sS --max-time 3 http://localhost:11434/api/tags > /dev/null 2>&1 || return 2
365
  local alias="${LOCAL_MODEL:-granite}"
366
  log "β†’ Local Ollama: $alias (free, always-on)"
367
  local out
bin/dataset-enrich.sh CHANGED
@@ -17,13 +17,13 @@
17
  set -uo pipefail
18
  set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
19
 
20
- LOG="$HOME/.surrogate/logs/dataset-enrich.log"
21
  WORK="$HOME/.hermes/workspace/dataset-enrich"
22
  mkdir -p "$WORK" "$(dirname "$LOG")"
23
 
24
  echo "[$(date +%H:%M:%S)] dataset enrich start" | tee "$LOG"
25
 
26
- ~/.surrogate/venv/bin/python <<'PYEOF' 2>&1 | tee -a "$LOG"
27
  from huggingface_hub import HfApi
28
  from pathlib import Path
29
  from datasets import load_dataset
@@ -36,19 +36,33 @@ api = HfApi()
36
  # (id, license, slug, schema_hint, per_dataset_cap)
37
  DATASETS = [
38
  # ── Coding instruction-tuning ────────────────────────────────────────────
39
- ("ise-uiuc/Magicoder-OSS-Instruct-75K", "MIT", "magicoder-oss", "instr-resp", 75000),
40
- ("ise-uiuc/Magicoder-Evol-Instruct-110K", "Apache", "magicoder-evol", "instr-resp", 110000),
41
- ("theblackcat102/evol-codealpaca-v1", "Apache", "evol-codealpaca", "instr-resp", 100000),
42
- # ── Multi-turn dialogue (helpful assistant style) ───────────────────────
43
- ("HuggingFaceH4/ultrachat_200k", "MIT", "ultrachat", "messages", 200000),
44
- ("Open-Orca/SlimOrca-Dedup", "MIT", "slim-orca", "conversations",150000),
45
- # ── Real commits (code review / PR training) ────────────────────────────
46
- ("bigcode/commitpackft", "MIT", "commitpackft", "commit", 80000),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  # ── Reasoning / math ────────────────────────────────────────────────────
48
- ("TIGER-Lab/MathInstruct", "MIT", "math-instruct", "instr-resp", 60000),
49
- ("meta-math/MetaMathQA", "MIT", "metamath", "query-resp", 50000),
50
  # ── Helpfulness preferences ─────────────────────────────────────────────
51
- ("Anthropic/hh-rlhf", "MIT", "hh-rlhf", "chosen-rejected",40000),
52
  ]
53
 
54
  # 1. Existing axentx hashes for dedup
@@ -117,6 +131,32 @@ with open(out_path, "w") as out:
117
  elif schema == "chosen-rejected":
118
  prompt = str(row.get("chosen","")[:200] or row.get("prompt",""))
119
  response = str(row.get("chosen",""))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
120
  else:
121
  continue
122
 
 
17
  set -uo pipefail
18
  set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
19
 
20
+ LOG="$HOME/.claude/logs/dataset-enrich.log"
21
  WORK="$HOME/.hermes/workspace/dataset-enrich"
22
  mkdir -p "$WORK" "$(dirname "$LOG")"
23
 
24
  echo "[$(date +%H:%M:%S)] dataset enrich start" | tee "$LOG"
25
 
26
+ ~/.claude/venv/bin/python <<'PYEOF' 2>&1 | tee -a "$LOG"
27
  from huggingface_hub import HfApi
28
  from pathlib import Path
29
  from datasets import load_dataset
 
36
  # (id, license, slug, schema_hint, per_dataset_cap)
37
  DATASETS = [
38
  # ── Coding instruction-tuning ────────────────────────────────────────────
39
+ ("ise-uiuc/Magicoder-OSS-Instruct-75K", "MIT", "magicoder-oss", "instr-resp", 75000),
40
+ ("ise-uiuc/Magicoder-Evol-Instruct-110K", "Apache", "magicoder-evol", "instr-resp", 110000),
41
+ ("theblackcat102/evol-codealpaca-v1", "Apache", "evol-codealpaca", "instr-resp", 100000),
42
+ ("m-a-p/CodeFeedback-Filtered-Instruction", "Apache", "codefeedback-filt", "query-resp", 100000),
43
+ ("m-a-p/Code-Feedback", "Apache", "codefeedback-multi", "messages", 66383),
44
+ ("QuixiAI/dolphin-coder", "Apache", "dolphin-coder", "system-question-resp", 100000),
45
+ # ── Multi-turn dialogue + agentic reasoning ─────────────────────────────
46
+ ("HuggingFaceH4/ultrachat_200k", "MIT", "ultrachat", "messages", 200000),
47
+ ("Open-Orca/SlimOrca-Dedup", "MIT", "slim-orca", "conversations", 150000),
48
+ ("microsoft/orca-agentinstruct-1M-v1", "CDLA", "orca-agentinstruct", "messages", 150000),
49
+ # ── Real commits + code review ──────────────────────────────────────────
50
+ ("bigcode/commitpackft", "MIT", "commitpackft", "commit", 80000),
51
+ ("VatsaDev/code-review", "MIT", "vatsa-code-review", "instr-resp", 40000),
52
+ # ── DevSecOps: CVE / CWE / vulnerability detection ──────────────────────
53
+ ("AlicanKiraz0/All-CVE-Records-Training-Dataset","Apache", "cve-records-chat", "system-user-assistant", 30000),
54
+ ("CyberNative/Code_Vulnerability_Security_DPO", "Apache", "vuln-secure-dpo", "dpo-question", 4656),
55
+ ("bstee615/diversevul", "MIT-research","diversevul-cwe", "code-defect-cwe", 80000),
56
+ ("google/code_x_glue_cc_defect_detection", "C-UDA", "codexglue-defect", "code-defect", 27318),
57
+ # ── Function/tool calling (agentic core) ────────────────────────────────
58
+ ("Salesforce/xlam-function-calling-60k", "CC-BY-4.0", "xlam-fc", "tools-query-answers", 60000),
59
+ ("glaiveai/glaive-function-calling-v2", "Apache", "glaive-fc-v2", "system-chat", 112960),
60
+ ("NousResearch/hermes-function-calling-v1", "Apache", "hermes-fc", "conversations", 11578),
61
  # ── Reasoning / math ────────────────────────────────────────────────────
62
+ ("TIGER-Lab/MathInstruct", "MIT", "math-instruct", "instr-resp", 60000),
63
+ ("meta-math/MetaMathQA", "MIT", "metamath", "query-resp", 50000),
64
  # ── Helpfulness preferences ─────────────────────────────────────────────
65
+ ("Anthropic/hh-rlhf", "MIT", "hh-rlhf", "chosen-rejected", 40000),
66
  ]
67
 
68
  # 1. Existing axentx hashes for dedup
 
131
  elif schema == "chosen-rejected":
132
  prompt = str(row.get("chosen","")[:200] or row.get("prompt",""))
133
  response = str(row.get("chosen",""))
134
+ elif schema == "system-user-assistant": # AlicanKiraz0 CVE
135
+ prompt = f"{str(row.get('System','')).strip()}\n\nUser: {str(row.get('User','')).strip()}"
136
+ response = str(row.get("Assistant",""))
137
+ elif schema == "dpo-question": # CyberNative DPO
138
+ prompt = str(row.get("question",""))
139
+ response = str(row.get("chosen",""))
140
+ elif schema == "code-defect-cwe": # DiverseVul
141
+ cwes = row.get("cwe") or []
142
+ cwe_str = ",".join(cwes) if isinstance(cwes, list) and cwes else "none"
143
+ label = "VULNERABLE" if row.get("target") == 1 else "SAFE"
144
+ prompt = f"Audit this function for security vulnerabilities. Identify any CWE matches.\n```\n{str(row.get('func',''))[:6000]}\n```"
145
+ response = f"Verdict: {label}\nCWE: {cwe_str}\nProject: {row.get('project','')}\nCommit: {str(row.get('message',''))[:500]}"
146
+ elif schema == "code-defect": # CodeXGLUE
147
+ label = "VULNERABLE" if row.get("target") else "SAFE"
148
+ prompt = f"Review this C function for defects:\n```c\n{str(row.get('func',''))[:6000]}\n```"
149
+ response = f"Defect detected: {label}\nProject: {row.get('project','')}\nCommit: {row.get('commit_id','')}"
150
+ elif schema == "tools-query-answers": # xLAM
151
+ tools_json = json.dumps(row.get("tools",[]))[:3000]
152
+ prompt = f"You have access to these tools:\n{tools_json}\n\nUser query: {row.get('query','')}"
153
+ response = json.dumps(row.get("answers",[]), ensure_ascii=False)
154
+ elif schema == "system-chat": # Glaive-v2
155
+ prompt = str(row.get("system",""))
156
+ response = str(row.get("chat",""))
157
+ elif schema == "system-question-resp": # dolphin-coder
158
+ prompt = f"{str(row.get('system_prompt','')).strip()}\n\n{str(row.get('question','')).strip()}"
159
+ response = str(row.get("response",""))
160
  else:
161
  continue
162
 
bin/lib/context_builder.sh CHANGED
@@ -16,7 +16,7 @@ build_rich_context() {
16
  REPO_MAP=""
17
  for candidate in "$SHARED/repo-maps/${PRIO_PROJECT}_map.md" "$SHARED/repo-maps/${PRIO_PROJECT}.md"; do
18
  if [[ -f "$candidate" ]]; then
19
- REPO_MAP=$(/usr/bin/head -c 10000 "$candidate")
20
  break
21
  fi
22
  done
@@ -25,39 +25,39 @@ build_rich_context() {
25
  SIMILAR_FUNCS=""
26
  if [[ -d "$PROJECT_DIR" ]]; then
27
  # Extract keywords from title for grep
28
- local KW=$(echo "$PRIO_TITLE" | /usr/bin/tr '[:upper:]' '[:lower:]' | /usr/bin/tr -cs 'a-z0-9' ' ' | /usr/bin/tr ' ' '\n' | /usr/bin/awk 'length>4' | /usr/bin/head -3 | /usr/bin/tr '\n' '|' | /usr/bin/sed 's/|$//')
29
  if [[ -n "$KW" ]]; then
30
  SIMILAR_FUNCS=$(/usr/bin/find "$PROJECT_DIR" -type f \( -name '*.py' -o -name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.go' \) ! -path '*/node_modules/*' ! -path '*/.hermes-*' 2>/dev/null | \
31
- xargs /usr/bin/grep -lE "($KW)" 2>/dev/null | /usr/bin/head -3 | while read f; do
32
  echo "=== ${f#$PROJECT_DIR/} ==="
33
- /usr/bin/grep -A3 -E "^(def|function|export const|class|async def|interface)" "$f" 2>/dev/null | /usr/bin/head -30
34
- done 2>/dev/null | /usr/bin/head -c 4000)
35
  fi
36
  fi
37
 
38
  # 3. RAG: actual code patterns from project (SQLite FTS via ask-sqlite.py if exists)
39
  RAG_EXAMPLES=""
40
  if [[ -x "$HOME/.surrogate/bin/ask-sqlite.py" ]]; then
41
- RAG_EXAMPLES=$(/usr/bin/python3 "$HOME/.surrogate/bin/ask-sqlite.py" \
42
- "$PRIO_PROJECT $PRIO_TITLE" 2>/dev/null | /usr/bin/head -c 3000)
43
  fi
44
 
45
  # 4. Semantic RAG (from embeddings) β€” top-5 similar
46
  SEMANTIC_RAG=""
47
  if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then
48
- SEMANTIC_RAG=$(/usr/bin/python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$PRIO_TITLE" 2>/dev/null | /usr/bin/head -c 2000)
49
  fi
50
 
51
  # 5. Past ACCEPTED examples (few-shot from qualityβ‰₯7 history)
52
  FEWSHOT_ACCEPTED=""
53
- for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | /usr/bin/head -30); do
54
- if /usr/bin/grep -qE '"quality_score":\s*[789]|"quality_score":\s*10' "$review" 2>/dev/null; then
55
  local OUT_FILE=$(basename "$review" .review.json)
56
  # Search all worker output dirs
57
  for WD in qwen-coder dev-cloud-samba dev-cloud-github dev-cloud-cloudflare dev-cloud-groq dev-cloud-synthesis; do
58
  local OUT_PATH="$HOME/.hermes/workspace/$WD/${OUT_FILE}.md"
59
  if [[ -f "$OUT_PATH" ]]; then
60
- FEWSHOT_ACCEPTED=$(/usr/bin/head -c 2000 "$OUT_PATH")
61
  break 2
62
  fi
63
  done
@@ -66,8 +66,8 @@ build_rich_context() {
66
 
67
  # 6. Anti-patterns (last 5 rejection reasons across all workers)
68
  ANTI_PATTERNS=""
69
- for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | /usr/bin/head -10); do
70
- local bugs=$(/usr/bin/python3 -c "
71
  import json, re, sys
72
  try:
73
  txt = open('$review').read()
@@ -81,14 +81,14 @@ except: pass
81
  " 2>/dev/null)
82
  [[ -n "$bugs" ]] && ANTI_PATTERNS="$ANTI_PATTERNS$bugs"$'\n'
83
  done
84
- ANTI_PATTERNS=$(echo "$ANTI_PATTERNS" | /usr/bin/head -10)
85
 
86
  # 7. Active-learning prompt deltas β€” aggregate last 5 UNIQUE anti-patterns.
87
  # Preference: same-project anti-patterns first, then generic.
88
  # Dedup by first 80 chars of prompt_addition (similar bugs shouldn't bloat prompt).
89
  PROMPT_DELTAS=""
90
  if [[ -f "$HOME/.surrogate/memory/worker-prompt-deltas.jsonl" ]]; then
91
- PROMPT_DELTAS=$(/usr/bin/python3 -c "
92
  import json, sys
93
  from pathlib import Path
94
  try:
@@ -124,7 +124,7 @@ except Exception as e: pass
124
  # lower because they're supplementary; the spec is authoritative.
125
  PRIO_SPEC=""
126
  local SPEC_FILE="$HOME/.hermes/workspace/swarm-shared/specs/${PRIO_ID}.md"
127
- [[ -f "$SPEC_FILE" ]] && PRIO_SPEC=$(/usr/bin/head -c 6000 "$SPEC_FILE")
128
 
129
  # 9. Task-type authoritative sources β€” boost scraped knowledge based on title.
130
  # Security task β†’ CVE/MITRE/OWASP/Prowler. SRE β†’ Google SRE/postmortems.
@@ -132,7 +132,7 @@ except Exception as e: pass
132
  # This is THE fix that makes all our scraping actually used by Hermes workers.
133
  AUTHORITATIVE_CONTEXT=""
134
  if [[ -f "$HOME/.surrogate/index.db" ]]; then
135
- AUTHORITATIVE_CONTEXT=$(/usr/bin/python3 <<PYEOF
136
  import sqlite3, re
137
  title = """${PRIO_TITLE}""".lower()
138
  project = """${PRIO_PROJECT}""".lower()
@@ -223,7 +223,7 @@ PYEOF
223
 
224
  # 10. FalkorDB graph β€” related decisions + past priorities with similar theme
225
  GRAPH_CONTEXT=""
226
- local REDIS_SOCK=$(/usr/bin/find /var/folders /tmp -name 'redis.socket' -type s 2>/dev/null | /usr/bin/head -1)
227
  if [[ -n "$REDIS_SOCK" ]]; then
228
  # Get related priorities + learned rules
229
  GRAPH_CONTEXT=$(/opt/homebrew/bin/redis-cli -s "$REDIS_SOCK" GRAPH.QUERY ashira "
@@ -231,14 +231,14 @@ PYEOF
231
  OPTIONAL MATCH (p)-[:HAS_LEARNED_RULE]->(l:LearnedRule)
232
  OPTIONAL MATCH (p)-[:COMMITTED_AS]->(c:Commit)
233
  RETURN p.id, p.title, l.content, c.msg LIMIT 8
234
- " 2>/dev/null | /usr/bin/tail -c 2500)
235
  fi
236
 
237
  # 11. Hermes trace recall β€” past similar tasks Hermes handled (from JSONL)
238
  HERMES_RECALL=""
239
  local TRACE_DIR="$HOME/axentx/surrogate/data/training-jsonl"
240
  if [[ -d "$TRACE_DIR" ]]; then
241
- HERMES_RECALL=$(/usr/bin/python3 <<PYEOF
242
  import json, re, glob
243
  title = """${PRIO_TITLE}""".lower()
244
  words = [w for w in re.sub(r'[^a-zA-Z0-9 ]', ' ', title).split() if len(w) > 4][:4]
 
16
  REPO_MAP=""
17
  for candidate in "$SHARED/repo-maps/${PRIO_PROJECT}_map.md" "$SHARED/repo-maps/${PRIO_PROJECT}.md"; do
18
  if [[ -f "$candidate" ]]; then
19
+ REPO_MAP=$(head -c 10000 "$candidate")
20
  break
21
  fi
22
  done
 
25
  SIMILAR_FUNCS=""
26
  if [[ -d "$PROJECT_DIR" ]]; then
27
  # Extract keywords from title for grep
28
+ local KW=$(echo "$PRIO_TITLE" | /usr/bin/tr '[:upper:]' '[:lower:]' | /usr/bin/tr -cs 'a-z0-9' ' ' | /usr/bin/tr ' ' '\n' | awk 'length>4' | head -3 | /usr/bin/tr '\n' '|' | sed 's/|$//')
29
  if [[ -n "$KW" ]]; then
30
  SIMILAR_FUNCS=$(/usr/bin/find "$PROJECT_DIR" -type f \( -name '*.py' -o -name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.go' \) ! -path '*/node_modules/*' ! -path '*/.hermes-*' 2>/dev/null | \
31
+ xargs grep -lE "($KW)" 2>/dev/null | head -3 | while read f; do
32
  echo "=== ${f#$PROJECT_DIR/} ==="
33
+ grep -A3 -E "^(def|function|export const|class|async def|interface)" "$f" 2>/dev/null | head -30
34
+ done 2>/dev/null | head -c 4000)
35
  fi
36
  fi
37
 
38
  # 3. RAG: actual code patterns from project (SQLite FTS via ask-sqlite.py if exists)
39
  RAG_EXAMPLES=""
40
  if [[ -x "$HOME/.surrogate/bin/ask-sqlite.py" ]]; then
41
+ RAG_EXAMPLES=$(python3 "$HOME/.surrogate/bin/ask-sqlite.py" \
42
+ "$PRIO_PROJECT $PRIO_TITLE" 2>/dev/null | head -c 3000)
43
  fi
44
 
45
  # 4. Semantic RAG (from embeddings) β€” top-5 similar
46
  SEMANTIC_RAG=""
47
  if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then
48
+ SEMANTIC_RAG=$(python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$PRIO_TITLE" 2>/dev/null | head -c 2000)
49
  fi
50
 
51
  # 5. Past ACCEPTED examples (few-shot from qualityβ‰₯7 history)
52
  FEWSHOT_ACCEPTED=""
53
+ for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | head -30); do
54
+ if grep -qE '"quality_score":\s*[789]|"quality_score":\s*10' "$review" 2>/dev/null; then
55
  local OUT_FILE=$(basename "$review" .review.json)
56
  # Search all worker output dirs
57
  for WD in qwen-coder dev-cloud-samba dev-cloud-github dev-cloud-cloudflare dev-cloud-groq dev-cloud-synthesis; do
58
  local OUT_PATH="$HOME/.hermes/workspace/$WD/${OUT_FILE}.md"
59
  if [[ -f "$OUT_PATH" ]]; then
60
+ FEWSHOT_ACCEPTED=$(head -c 2000 "$OUT_PATH")
61
  break 2
62
  fi
63
  done
 
66
 
67
  # 6. Anti-patterns (last 5 rejection reasons across all workers)
68
  ANTI_PATTERNS=""
69
+ for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | head -10); do
70
+ local bugs=$(python3 -c "
71
  import json, re, sys
72
  try:
73
  txt = open('$review').read()
 
81
  " 2>/dev/null)
82
  [[ -n "$bugs" ]] && ANTI_PATTERNS="$ANTI_PATTERNS$bugs"$'\n'
83
  done
84
+ ANTI_PATTERNS=$(echo "$ANTI_PATTERNS" | head -10)
85
 
86
  # 7. Active-learning prompt deltas β€” aggregate last 5 UNIQUE anti-patterns.
87
  # Preference: same-project anti-patterns first, then generic.
88
  # Dedup by first 80 chars of prompt_addition (similar bugs shouldn't bloat prompt).
89
  PROMPT_DELTAS=""
90
  if [[ -f "$HOME/.surrogate/memory/worker-prompt-deltas.jsonl" ]]; then
91
+ PROMPT_DELTAS=$(python3 -c "
92
  import json, sys
93
  from pathlib import Path
94
  try:
 
124
  # lower because they're supplementary; the spec is authoritative.
125
  PRIO_SPEC=""
126
  local SPEC_FILE="$HOME/.hermes/workspace/swarm-shared/specs/${PRIO_ID}.md"
127
+ [[ -f "$SPEC_FILE" ]] && PRIO_SPEC=$(head -c 6000 "$SPEC_FILE")
128
 
129
  # 9. Task-type authoritative sources β€” boost scraped knowledge based on title.
130
  # Security task β†’ CVE/MITRE/OWASP/Prowler. SRE β†’ Google SRE/postmortems.
 
132
  # This is THE fix that makes all our scraping actually used by Hermes workers.
133
  AUTHORITATIVE_CONTEXT=""
134
  if [[ -f "$HOME/.surrogate/index.db" ]]; then
135
+ AUTHORITATIVE_CONTEXT=$(python3 <<PYEOF
136
  import sqlite3, re
137
  title = """${PRIO_TITLE}""".lower()
138
  project = """${PRIO_PROJECT}""".lower()
 
223
 
224
  # 10. FalkorDB graph β€” related decisions + past priorities with similar theme
225
  GRAPH_CONTEXT=""
226
+ local REDIS_SOCK=$(/usr/bin/find /var/folders /tmp -name 'redis.socket' -type s 2>/dev/null | head -1)
227
  if [[ -n "$REDIS_SOCK" ]]; then
228
  # Get related priorities + learned rules
229
  GRAPH_CONTEXT=$(/opt/homebrew/bin/redis-cli -s "$REDIS_SOCK" GRAPH.QUERY ashira "
 
231
  OPTIONAL MATCH (p)-[:HAS_LEARNED_RULE]->(l:LearnedRule)
232
  OPTIONAL MATCH (p)-[:COMMITTED_AS]->(c:Commit)
233
  RETURN p.id, p.title, l.content, c.msg LIMIT 8
234
+ " 2>/dev/null | tail -c 2500)
235
  fi
236
 
237
  # 11. Hermes trace recall β€” past similar tasks Hermes handled (from JSONL)
238
  HERMES_RECALL=""
239
  local TRACE_DIR="$HOME/axentx/surrogate/data/training-jsonl"
240
  if [[ -d "$TRACE_DIR" ]]; then
241
+ HERMES_RECALL=$(python3 <<PYEOF
242
  import json, re, glob
243
  title = """${PRIO_TITLE}""".lower()
244
  words = [w for w in re.sub(r'[^a-zA-Z0-9 ]', ' ', title).split() if len(w) > 4][:4]
bin/push-training-to-hf.sh CHANGED
@@ -31,7 +31,7 @@ if command -v huggingface-cli >/dev/null 2>&1 && [[ -n "${HF_TOKEN:-}" ]]; then
31
  --commit-message "auto-orchestrate: +${NEW_LINES} pairs ($(date +%H:%M))" \
32
  --token "$HF_TOKEN" 2>&1 | tee -a "$LOG"
33
  else
34
- /usr/bin/python3 - "$SLICE" "$NEW_LINES" "$DATE_TAG" <<'PYEOF' 2>&1 | tee -a "$LOG"
35
  import sys, os
36
  slice_path, n_pairs, date_tag = sys.argv[1], sys.argv[2], sys.argv[3]
37
  try:
 
31
  --commit-message "auto-orchestrate: +${NEW_LINES} pairs ($(date +%H:%M))" \
32
  --token "$HF_TOKEN" 2>&1 | tee -a "$LOG"
33
  else
34
+ python3 - "$SLICE" "$NEW_LINES" "$DATE_TAG" <<'PYEOF' 2>&1 | tee -a "$LOG"
35
  import sys, os
36
  slice_path, n_pairs, date_tag = sys.argv[1], sys.argv[2], sys.argv[3]
37
  try:
bin/skill-synthesis-daemon.sh CHANGED
@@ -32,13 +32,13 @@ while true; do
32
  -name "*.sh" -o -name "*.yaml" -o -name "*.toml" -o -name "*.json" \
33
  \) -size -50k -mtime -3 2>/dev/null | head -200 | while read -r f; do
34
  # Skip already-synthesized
35
- HASH=$(/usr/bin/python3 -c "import hashlib; print(hashlib.md5(open('$f','rb').read()).hexdigest()[:12])" 2>/dev/null)
36
  [[ -z "$HASH" ]] && continue
37
  STAMP="$SKILLS_DIR/.synthesized/$HASH"
38
  [[ -f "$STAMP" ]] && continue
39
  mkdir -p "$(dirname "$STAMP")"
40
 
41
- /usr/bin/python3 - "$f" "$SKILLS_DIR" "$PAIRS" "$STAMP" <<'PYEOF' 2>>"$LOG"
42
  import sys, re, json, time, os, hashlib
43
  from pathlib import Path
44
 
 
32
  -name "*.sh" -o -name "*.yaml" -o -name "*.toml" -o -name "*.json" \
33
  \) -size -50k -mtime -3 2>/dev/null | head -200 | while read -r f; do
34
  # Skip already-synthesized
35
+ HASH=$(python3 -c "import hashlib; print(hashlib.md5(open('$f','rb').read()).hexdigest()[:12])" 2>/dev/null)
36
  [[ -z "$HASH" ]] && continue
37
  STAMP="$SKILLS_DIR/.synthesized/$HASH"
38
  [[ -f "$STAMP" ]] && continue
39
  mkdir -p "$(dirname "$STAMP")"
40
 
41
+ python3 - "$f" "$SKILLS_DIR" "$PAIRS" "$STAMP" <<'PYEOF' 2>>"$LOG"
42
  import sys, re, json, time, os, hashlib
43
  from pathlib import Path
44
 
bin/surrogate CHANGED
@@ -206,7 +206,7 @@ run_agent() {
206
  export AGENT_EFFORT="$EFFORT"
207
  export AGENT_CWD="$(pwd)"
208
 
209
- /usr/bin/python3 <<'PYEOF'
210
  import os, sys, json, re, sqlite3, subprocess, urllib.request, urllib.error, time
211
  from datetime import datetime
212
  from pathlib import Path
@@ -278,7 +278,7 @@ def tool_glob(pattern, path=None):
278
 
279
  def tool_grep(pattern, path=None, glob='*'):
280
  base = os.path.expanduser(path) if path else CWD
281
- cmd = f"/usr/bin/grep -rn --include='{glob}' -E {subprocess.list2cmdline([pattern])} {base} 2>/dev/null | head -40"
282
  r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=15)
283
  return {'matches': r.stdout[:5000]}
284
 
@@ -505,7 +505,7 @@ print_statusline() {
505
  if [[ ! -f "$cache" ]] || [[ $(($(date +%s) - $(stat -f %m "$cache" 2>/dev/null || stat -c %Y "$cache" 2>/dev/null || echo 0))) -gt 60 ]]; then
506
  (curl -sS -m 5 -H "Authorization: Bearer ${OPENROUTER_API_KEY:-${OR_KEY:-}}" \
507
  https://openrouter.ai/api/v1/auth/key 2>/dev/null \
508
- | /usr/bin/python3 -c "import json,sys; d=json.load(sys.stdin).get('data',{}); print(f'\$OR={d.get(\"usage\",0):.3f}')" \
509
  > "$cache") 2>/dev/null &
510
  fi
511
  cost_str=$(cat "$cache" 2>/dev/null | head -1)
@@ -523,7 +523,7 @@ HISTORY_FILE="$SURROGATE_HOME/history.jsonl"
523
  mkdir -p "$(dirname "$HISTORY_FILE")"
524
  save_history() {
525
  local prompt="$1"
526
- /usr/bin/python3 -c "
527
  import json, sys, time
528
  from pathlib import Path
529
  Path('$HISTORY_FILE').parent.mkdir(parents=True, exist_ok=True)
@@ -630,7 +630,7 @@ repl() {
630
  ;;
631
  /history)
632
  if [[ -f "$HISTORY_FILE" ]]; then
633
- /usr/bin/python3 -c "
634
  import json
635
  from pathlib import Path
636
  import time
@@ -661,7 +661,7 @@ for l in lines:
661
  fi
662
  ;;
663
  /cost)
664
- bash -c 'source ~/.hermes/.env; curl -s -H "Authorization: Bearer $OPENROUTER_API_KEY" https://openrouter.ai/api/v1/auth/key' 2>&1 | /usr/bin/python3 -c "import json,sys; d=json.load(sys.stdin).get('data',{}); print(f' OpenRouter: \${d.get(\"usage\",0):.4f}')"
665
  ;;
666
  /cost-all) bash "$0" --status ;;
667
  /remote*)
@@ -739,7 +739,7 @@ init_project() {
739
  # ── Step A: web research the tech keywords (free, fast, grounds the PRD) ─
740
  echo "${MA}β–Ά Researching tech context...${R}"
741
  local research_md=""
742
- research_md=$(/usr/bin/python3 - "$Q_STACK $Q_DESC $Q_FEATURES" <<'PYEOF' 2>/dev/null
743
  import sys, urllib.request, urllib.parse, re
744
  text = sys.argv[1]
745
  # Extract candidate tech keywords (CamelCase, lowercase known stacks, version tags)
@@ -834,7 +834,7 @@ Output ONLY the markdown, no preamble. Adapt to the actual stack the user chose
834
  # ── Step C: direct LLM call (curl), bypassing the agent tool-loop ──
835
  local prd=""
836
  if [[ -n "${GEMINI_API_KEY:-}" ]]; then
837
- prd=$(/usr/bin/python3 - "$prompt" "$GEMINI_API_KEY" <<'PYEOF' 2>/dev/null
838
  import sys, json, urllib.request
839
  prompt, key = sys.argv[1], sys.argv[2]
840
  url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={key}"
@@ -853,7 +853,7 @@ PYEOF
853
  fi
854
  if [[ -z "$prd" ]] || [[ ${#prd} -lt 400 ]]; then
855
  if [[ -n "${OPENROUTER_API_KEY:-}" ]]; then
856
- prd=$(/usr/bin/python3 - "$prompt" "$OPENROUTER_API_KEY" <<'PYEOF' 2>/dev/null
857
  import sys, json, urllib.request
858
  prompt, key = sys.argv[1], sys.argv[2]
859
  body = {"model":"qwen/qwen3-coter","messages":[{"role":"user","content":prompt}],
@@ -884,7 +884,7 @@ PYEOF
884
  else
885
  echo "$prd" > "$target"
886
  # ── Step D: push PRD as training pair (HF dataset feedback loop) ───
887
- /usr/bin/python3 - "$prompt" "$prd" <<'PYEOF' 2>/dev/null &
888
  import sys, json, time, os
889
  from pathlib import Path
890
  log = Path.home() / '.surrogate' / 'training-pairs.jsonl'
@@ -941,7 +941,7 @@ auto_dev_mode() {
941
  # Drive tasks from plan until all done
942
  while true; do
943
  # Pop next pending task from plan
944
- NEXT_TASK=$(/usr/bin/python3 <<'PYEOF'
945
  import sys, re
946
  from pathlib import Path
947
  plan_file = Path.home() / '.surrogate' / 'active-plan.md'
@@ -960,7 +960,7 @@ PYEOF
960
  echo "${BCY}${B}β–Έ Next task:${R} $NEXT_TASK"
961
  bash ~/.surrogate/bin/surrogate-orchestrate.sh "$NEXT_TASK"
962
  # Mark done in plan
963
- /usr/bin/python3 <<PYEOF
964
  from pathlib import Path
965
  plan_file = Path.home() / '.surrogate' / 'active-plan.md'
966
  if plan_file.exists():
@@ -1035,7 +1035,7 @@ monitor_mode() {
1035
  # If critical β†’ spawn agent to investigate
1036
  if [[ $ERR_COUNT -gt 50 ]]; then
1037
  echo "${RE}⚠ elevated errors β€” dispatching investigator agent${R}"
1038
- (run_agent "ΰΉ€ΰΈŠΰΉ‡ΰΈ„ ~/.surrogate/logs/ ΰΈ«ΰΈ² pattern error ΰΈ—ΰΈ΅ΰΉˆ recur บ่อฒ แΰΈ₯ΰΈ°ΰΉ€ΰΈͺΰΈ™ΰΈ­ fix list (ห้าฑแก้เอง ΰΈ£ΰΈ²ΰΈ’ΰΈ‡ΰΈ²ΰΈ™ΰΈ­ΰΈ’ΰΉˆΰΈ²ΰΈ‡ΰΉ€ΰΈ”ΰΈ΅ΰΈ’ΰΈ§)" 2>&1 | /usr/bin/head -20) &
1039
  fi
1040
  sleep 30
1041
  done
@@ -1070,7 +1070,7 @@ show_agents() {
1070
  banner
1071
  echo ""
1072
  echo "${B}β–Έ Available agents (~/.surrogate/agents/)${R}"
1073
- ls ~/.surrogate/agents/*.md 2>/dev/null | /usr/bin/sed 's|.*/||;s|.md$||' | sed 's/^/ /'
1074
  }
1075
 
1076
  # ═══ Dispatch ═══
 
206
  export AGENT_EFFORT="$EFFORT"
207
  export AGENT_CWD="$(pwd)"
208
 
209
+ python3 <<'PYEOF'
210
  import os, sys, json, re, sqlite3, subprocess, urllib.request, urllib.error, time
211
  from datetime import datetime
212
  from pathlib import Path
 
278
 
279
  def tool_grep(pattern, path=None, glob='*'):
280
  base = os.path.expanduser(path) if path else CWD
281
+ cmd = f"grep -rn --include='{glob}' -E {subprocess.list2cmdline([pattern])} {base} 2>/dev/null | head -40"
282
  r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=15)
283
  return {'matches': r.stdout[:5000]}
284
 
 
505
  if [[ ! -f "$cache" ]] || [[ $(($(date +%s) - $(stat -f %m "$cache" 2>/dev/null || stat -c %Y "$cache" 2>/dev/null || echo 0))) -gt 60 ]]; then
506
  (curl -sS -m 5 -H "Authorization: Bearer ${OPENROUTER_API_KEY:-${OR_KEY:-}}" \
507
  https://openrouter.ai/api/v1/auth/key 2>/dev/null \
508
+ | python3 -c "import json,sys; d=json.load(sys.stdin).get('data',{}); print(f'\$OR={d.get(\"usage\",0):.3f}')" \
509
  > "$cache") 2>/dev/null &
510
  fi
511
  cost_str=$(cat "$cache" 2>/dev/null | head -1)
 
523
  mkdir -p "$(dirname "$HISTORY_FILE")"
524
  save_history() {
525
  local prompt="$1"
526
+ python3 -c "
527
  import json, sys, time
528
  from pathlib import Path
529
  Path('$HISTORY_FILE').parent.mkdir(parents=True, exist_ok=True)
 
630
  ;;
631
  /history)
632
  if [[ -f "$HISTORY_FILE" ]]; then
633
+ python3 -c "
634
  import json
635
  from pathlib import Path
636
  import time
 
661
  fi
662
  ;;
663
  /cost)
664
+ bash -c 'source ~/.hermes/.env; curl -s -H "Authorization: Bearer $OPENROUTER_API_KEY" https://openrouter.ai/api/v1/auth/key' 2>&1 | python3 -c "import json,sys; d=json.load(sys.stdin).get('data',{}); print(f' OpenRouter: \${d.get(\"usage\",0):.4f}')"
665
  ;;
666
  /cost-all) bash "$0" --status ;;
667
  /remote*)
 
739
  # ── Step A: web research the tech keywords (free, fast, grounds the PRD) ─
740
  echo "${MA}β–Ά Researching tech context...${R}"
741
  local research_md=""
742
+ research_md=$(python3 - "$Q_STACK $Q_DESC $Q_FEATURES" <<'PYEOF' 2>/dev/null
743
  import sys, urllib.request, urllib.parse, re
744
  text = sys.argv[1]
745
  # Extract candidate tech keywords (CamelCase, lowercase known stacks, version tags)
 
834
  # ── Step C: direct LLM call (curl), bypassing the agent tool-loop ──
835
  local prd=""
836
  if [[ -n "${GEMINI_API_KEY:-}" ]]; then
837
+ prd=$(python3 - "$prompt" "$GEMINI_API_KEY" <<'PYEOF' 2>/dev/null
838
  import sys, json, urllib.request
839
  prompt, key = sys.argv[1], sys.argv[2]
840
  url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={key}"
 
853
  fi
854
  if [[ -z "$prd" ]] || [[ ${#prd} -lt 400 ]]; then
855
  if [[ -n "${OPENROUTER_API_KEY:-}" ]]; then
856
+ prd=$(python3 - "$prompt" "$OPENROUTER_API_KEY" <<'PYEOF' 2>/dev/null
857
  import sys, json, urllib.request
858
  prompt, key = sys.argv[1], sys.argv[2]
859
  body = {"model":"qwen/qwen3-coter","messages":[{"role":"user","content":prompt}],
 
884
  else
885
  echo "$prd" > "$target"
886
  # ── Step D: push PRD as training pair (HF dataset feedback loop) ───
887
+ python3 - "$prompt" "$prd" <<'PYEOF' 2>/dev/null &
888
  import sys, json, time, os
889
  from pathlib import Path
890
  log = Path.home() / '.surrogate' / 'training-pairs.jsonl'
 
941
  # Drive tasks from plan until all done
942
  while true; do
943
  # Pop next pending task from plan
944
+ NEXT_TASK=$(python3 <<'PYEOF'
945
  import sys, re
946
  from pathlib import Path
947
  plan_file = Path.home() / '.surrogate' / 'active-plan.md'
 
960
  echo "${BCY}${B}β–Έ Next task:${R} $NEXT_TASK"
961
  bash ~/.surrogate/bin/surrogate-orchestrate.sh "$NEXT_TASK"
962
  # Mark done in plan
963
+ python3 <<PYEOF
964
  from pathlib import Path
965
  plan_file = Path.home() / '.surrogate' / 'active-plan.md'
966
  if plan_file.exists():
 
1035
  # If critical β†’ spawn agent to investigate
1036
  if [[ $ERR_COUNT -gt 50 ]]; then
1037
  echo "${RE}⚠ elevated errors β€” dispatching investigator agent${R}"
1038
+ (run_agent "ΰΉ€ΰΈŠΰΉ‡ΰΈ„ ~/.surrogate/logs/ ΰΈ«ΰΈ² pattern error ΰΈ—ΰΈ΅ΰΉˆ recur บ่อฒ แΰΈ₯ΰΈ°ΰΉ€ΰΈͺΰΈ™ΰΈ­ fix list (ห้าฑแก้เอง ΰΈ£ΰΈ²ΰΈ’ΰΈ‡ΰΈ²ΰΈ™ΰΈ­ΰΈ’ΰΉˆΰΈ²ΰΈ‡ΰΉ€ΰΈ”ΰΈ΅ΰΈ’ΰΈ§)" 2>&1 | head -20) &
1039
  fi
1040
  sleep 30
1041
  done
 
1070
  banner
1071
  echo ""
1072
  echo "${B}β–Έ Available agents (~/.surrogate/agents/)${R}"
1073
+ ls ~/.surrogate/agents/*.md 2>/dev/null | sed 's|.*/||;s|.md$||' | sed 's/^/ /'
1074
  }
1075
 
1076
  # ═══ Dispatch ═══
bin/surrogate-consolidate.sh CHANGED
@@ -15,7 +15,7 @@ LOG="$HOME/.surrogate/logs/surrogate-consolidate.log"
15
  CHECKPOINT="$MEM/consolidate.checkpoint"
16
  mkdir -p "$(dirname "$LOG")" "$MEM"
17
 
18
- /usr/bin/python3 <<'PYEOF' 2>>"$LOG"
19
  import json, os, sqlite3, urllib.request, hashlib, subprocess
20
  from datetime import datetime
21
  from pathlib import Path
 
15
  CHECKPOINT="$MEM/consolidate.checkpoint"
16
  mkdir -p "$(dirname "$LOG")" "$MEM"
17
 
18
+ python3 <<'PYEOF' 2>>"$LOG"
19
  import json, os, sqlite3, urllib.request, hashlib, subprocess
20
  from datetime import datetime
21
  from pathlib import Path
bin/surrogate-daemon.sh CHANGED
@@ -33,7 +33,7 @@ case "$CMD" in
33
  shift
34
  TASK="$*"
35
  [[ -z "$TASK" ]] && { echo "need task"; exit 2; }
36
- ENQUEUE_TASK="$TASK" /usr/bin/python3 - "$QUEUE" <<'PYEOF'
37
  import json, uuid, os, sys
38
  from datetime import datetime
39
  queue_path = sys.argv[1]
@@ -159,7 +159,7 @@ PYEOF
159
  _worker)
160
  # ── Pop one task from queue (P0-user first, then plan, then self-gen) ──────
161
  _pop_queue() {
162
- /usr/bin/python3 <<PYEOF
163
  import json, os, sys, fcntl
164
  from pathlib import Path
165
  q = Path(os.path.expanduser('$QUEUE'))
@@ -188,7 +188,7 @@ PYEOF
188
 
189
  # ── Pop next task from active plan (no sleep needed β€” plan drives work) ──
190
  _pop_plan() {
191
- /usr/bin/python3 <<'PYEOF'
192
  import sys, json, os, re, uuid
193
  from pathlib import Path
194
  from datetime import datetime
@@ -223,7 +223,7 @@ PYEOF
223
 
224
  # ── Self-generate task from pool (fallback when no plan + queue empty) ──
225
  _self_gen() {
226
- AUTO_TASK=$(/usr/bin/python3 <<'PYEOF'
227
  import json, os, random
228
  from pathlib import Path
229
  ep = Path(os.path.expanduser('~/.surrogate/state/surrogate-memory/episodes.jsonl'))
@@ -278,7 +278,7 @@ for t in random.sample(pool, len(pool)):
278
  print(chosen or pool[0])
279
  PYEOF
280
  )
281
- echo "{\"id\":\"auto-$(/usr/bin/python3 -c 'import uuid; print(uuid.uuid4().hex[:8])')\",\"task\":\"$AUTO_TASK\",\"self_generated\":true,\"source\":\"self-gen\"}"
282
  }
283
 
284
  # ── Task resolution: queue β†’ plan β†’ self-gen (no 60s sleep) ─────────────
@@ -297,9 +297,9 @@ PYEOF
297
  fi
298
 
299
  # Extract task
300
- TASK=$(echo "$TASK_JSON" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read())['task'])")
301
- TID=$(echo "$TASK_JSON" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read())['id'])")
302
- SOURCE=$(echo "$TASK_JSON" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('source','queue'))")
303
 
304
  echo "[$(date +%H:%M:%S)] worker picked $TID [$SOURCE]: ${TASK:0:80}" >> "$LOG"
305
  START=$(date +%s)
@@ -311,7 +311,7 @@ PYEOF
311
 
312
  # If task came from plan, mark as done ([ ] β†’ [x]) β€” env vars = safe quoting
313
  if [[ "$SOURCE" == "plan" ]]; then
314
- DAEMON_TASK="$TASK" /usr/bin/python3 - >> "$LOG" 2>&1 <<'PYEOF'
315
  import re, os
316
  from pathlib import Path
317
  plan_file = Path.home() / '.surrogate' / 'active-plan.md'
@@ -327,7 +327,7 @@ PYEOF
327
 
328
  # Mark done in audit log
329
  DAEMON_TASK="$TASK" DAEMON_OUTPUT="$(echo "$OUTPUT" | tail -20)" \
330
- /usr/bin/python3 - "$TID" "$SOURCE" "$DUR" "$DONE" >> "$LOG" 2>&1 <<'PYEOF'
331
  import json, os, sys
332
  tid, source, dur, done_path = sys.argv[1], sys.argv[2], int(sys.argv[3]), sys.argv[4]
333
  done = {
 
33
  shift
34
  TASK="$*"
35
  [[ -z "$TASK" ]] && { echo "need task"; exit 2; }
36
+ ENQUEUE_TASK="$TASK" python3 - "$QUEUE" <<'PYEOF'
37
  import json, uuid, os, sys
38
  from datetime import datetime
39
  queue_path = sys.argv[1]
 
159
  _worker)
160
  # ── Pop one task from queue (P0-user first, then plan, then self-gen) ──────
161
  _pop_queue() {
162
+ python3 <<PYEOF
163
  import json, os, sys, fcntl
164
  from pathlib import Path
165
  q = Path(os.path.expanduser('$QUEUE'))
 
188
 
189
  # ── Pop next task from active plan (no sleep needed β€” plan drives work) ──
190
  _pop_plan() {
191
+ python3 <<'PYEOF'
192
  import sys, json, os, re, uuid
193
  from pathlib import Path
194
  from datetime import datetime
 
223
 
224
  # ── Self-generate task from pool (fallback when no plan + queue empty) ──
225
  _self_gen() {
226
+ AUTO_TASK=$(python3 <<'PYEOF'
227
  import json, os, random
228
  from pathlib import Path
229
  ep = Path(os.path.expanduser('~/.surrogate/state/surrogate-memory/episodes.jsonl'))
 
278
  print(chosen or pool[0])
279
  PYEOF
280
  )
281
+ echo "{\"id\":\"auto-$(python3 -c 'import uuid; print(uuid.uuid4().hex[:8])')\",\"task\":\"$AUTO_TASK\",\"self_generated\":true,\"source\":\"self-gen\"}"
282
  }
283
 
284
  # ── Task resolution: queue β†’ plan β†’ self-gen (no 60s sleep) ─────────────
 
297
  fi
298
 
299
  # Extract task
300
+ TASK=$(echo "$TASK_JSON" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())['task'])")
301
+ TID=$(echo "$TASK_JSON" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())['id'])")
302
+ SOURCE=$(echo "$TASK_JSON" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('source','queue'))")
303
 
304
  echo "[$(date +%H:%M:%S)] worker picked $TID [$SOURCE]: ${TASK:0:80}" >> "$LOG"
305
  START=$(date +%s)
 
311
 
312
  # If task came from plan, mark as done ([ ] β†’ [x]) β€” env vars = safe quoting
313
  if [[ "$SOURCE" == "plan" ]]; then
314
+ DAEMON_TASK="$TASK" python3 - >> "$LOG" 2>&1 <<'PYEOF'
315
  import re, os
316
  from pathlib import Path
317
  plan_file = Path.home() / '.surrogate' / 'active-plan.md'
 
327
 
328
  # Mark done in audit log
329
  DAEMON_TASK="$TASK" DAEMON_OUTPUT="$(echo "$OUTPUT" | tail -20)" \
330
+ python3 - "$TID" "$SOURCE" "$DUR" "$DONE" >> "$LOG" 2>&1 <<'PYEOF'
331
  import json, os, sys
332
  tid, source, dur, done_path = sys.argv[1], sys.argv[2], int(sys.argv[3]), sys.argv[4]
333
  done = {
bin/surrogate-dev-loop.sh CHANGED
@@ -33,7 +33,7 @@ SEARCH_ROOTS=(
33
 
34
  # ── Task generators (pick one per cycle, weighted random) ────────────────────
35
  pick_task() {
36
- /usr/bin/python3 <<'PYEOF'
37
  import os, random, re, subprocess, json
38
  from pathlib import Path
39
 
@@ -178,7 +178,7 @@ load_reflexion_lessons() {
178
  local kind="$1"
179
  local file="$HOME/.hermes/workspace/reflexion/lessons-${kind}.jsonl"
180
  [[ ! -f "$file" ]] && { echo ""; return; }
181
- /usr/bin/python3 <<PYEOF
182
  import json
183
  from pathlib import Path
184
  p = Path("$file")
@@ -211,7 +211,7 @@ save_reflexion_lesson() {
211
  mkdir -p "$(dirname "$file")"
212
  # Pass payload via env vars + sys.argv (safe β€” no shell quoting issues with embedded quotes)
213
  REFLEX_RESP="$response" REFLEX_TASK="$task" \
214
- /usr/bin/python3 - "$kind" "$duration" "$file" <<'PYEOF'
215
  import json, re, os, sys
216
  from datetime import datetime
217
  kind, dur, out_file = sys.argv[1], int(sys.argv[2]), sys.argv[3]
@@ -251,11 +251,11 @@ run_cycle() {
251
  fi
252
 
253
  local kind path line task_text context
254
- kind=$(echo "$task_json" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('kind',''))")
255
- path=$(echo "$task_json" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('path',''))")
256
- line=$(echo "$task_json" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('line',0))")
257
- task_text=$(echo "$task_json" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('task',''))")
258
- context=$(echo "$task_json" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('context',''))")
259
 
260
  local id="$(date +%s)-${kind}"
261
  local out="$OUT_DIR/${id}.md"
@@ -277,7 +277,7 @@ $context
277
 
278
  # Call Surrogate-1 via Ollama (keep_alive=5m so model stays warm between cycles)
279
  local body
280
- body=$(PROMPT_VAR="$prompt" /usr/bin/python3 <<'PYEOF'
281
  import json, os
282
  print(json.dumps({
283
  "model": "surrogate-1",
@@ -290,13 +290,13 @@ print(json.dumps({
290
  PYEOF
291
  )
292
  local resp
293
- resp=$(/usr/bin/curl -sS --max-time 120 \
294
  http://localhost:11434/v1/chat/completions \
295
  -H 'Content-Type: application/json' \
296
  -d "$body" 2>/dev/null)
297
 
298
  local answer
299
- answer=$(echo "$resp" | /usr/bin/python3 -c "
300
  import json, sys
301
  try:
302
  d = json.load(sys.stdin)
@@ -335,7 +335,7 @@ EOF
335
 
336
  # Append to training-data candidate (env vars + argv = safe quoting)
337
  DEV_TASK="$task_text" DEV_ANSWER="$answer" \
338
- /usr/bin/python3 - "$kind" "$dur" <<'PYEOF'
339
  import json, os, sys
340
  from pathlib import Path
341
  from datetime import datetime
 
33
 
34
  # ── Task generators (pick one per cycle, weighted random) ────────────────────
35
  pick_task() {
36
+ python3 <<'PYEOF'
37
  import os, random, re, subprocess, json
38
  from pathlib import Path
39
 
 
178
  local kind="$1"
179
  local file="$HOME/.hermes/workspace/reflexion/lessons-${kind}.jsonl"
180
  [[ ! -f "$file" ]] && { echo ""; return; }
181
+ python3 <<PYEOF
182
  import json
183
  from pathlib import Path
184
  p = Path("$file")
 
211
  mkdir -p "$(dirname "$file")"
212
  # Pass payload via env vars + sys.argv (safe β€” no shell quoting issues with embedded quotes)
213
  REFLEX_RESP="$response" REFLEX_TASK="$task" \
214
+ python3 - "$kind" "$duration" "$file" <<'PYEOF'
215
  import json, re, os, sys
216
  from datetime import datetime
217
  kind, dur, out_file = sys.argv[1], int(sys.argv[2]), sys.argv[3]
 
251
  fi
252
 
253
  local kind path line task_text context
254
+ kind=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('kind',''))")
255
+ path=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('path',''))")
256
+ line=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('line',0))")
257
+ task_text=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('task',''))")
258
+ context=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('context',''))")
259
 
260
  local id="$(date +%s)-${kind}"
261
  local out="$OUT_DIR/${id}.md"
 
277
 
278
  # Call Surrogate-1 via Ollama (keep_alive=5m so model stays warm between cycles)
279
  local body
280
+ body=$(PROMPT_VAR="$prompt" python3 <<'PYEOF'
281
  import json, os
282
  print(json.dumps({
283
  "model": "surrogate-1",
 
290
  PYEOF
291
  )
292
  local resp
293
+ resp=$(curl -sS --max-time 120 \
294
  http://localhost:11434/v1/chat/completions \
295
  -H 'Content-Type: application/json' \
296
  -d "$body" 2>/dev/null)
297
 
298
  local answer
299
+ answer=$(echo "$resp" | python3 -c "
300
  import json, sys
301
  try:
302
  d = json.load(sys.stdin)
 
335
 
336
  # Append to training-data candidate (env vars + argv = safe quoting)
337
  DEV_TASK="$task_text" DEV_ANSWER="$answer" \
338
+ python3 - "$kind" "$dur" <<'PYEOF'
339
  import json, os, sys
340
  from pathlib import Path
341
  from datetime import datetime
bin/surrogate-orchestrate.sh CHANGED
@@ -42,7 +42,7 @@ RESEARCH_CONTEXT=""
42
  RESEARCH_OUT="$WORKDIR/0-research-context.md"
43
  if echo "$TASK" | grep -iqE "migrat|integrat|switch from|move to|adopt|setup|deploy"; then
44
  echo "${MA}${B}═══ Stage 0/6: WEB RESEARCH${R} ${D}β€” gather current docs first${R}"
45
- /usr/bin/python3 - "$TASK" "$RESEARCH_OUT" <<'PYEOF' 2>&1 | sed 's/^/ /' || true
46
  import sys, urllib.request, urllib.parse, json, re, os
47
  task, out_path = sys.argv[1], sys.argv[2]
48
  # Extract tech keywords (capitalized words, dot-versions, snake-case)
@@ -84,7 +84,7 @@ for prd_file in "$(pwd)/surrogate.md" "$(pwd)/SURROGATE.md"; do
84
  PRD_CONTEXT="
85
 
86
  === Project PRD (surrogate.md) ===
87
- $(/usr/bin/head -c 6000 "$prd_file")
88
  === End PRD ==="
89
  break
90
  fi
@@ -138,7 +138,7 @@ EOF
138
  CHUTES_KEY="${CHUTES_API_KEY:-}" \
139
  OR_KEY_ENV="${OPENROUTER_API_KEY:-}" \
140
  GH_POOL="${GITHUB_TOKEN_POOL:-}" \
141
- /usr/bin/python3 - "$prompt_file" <<'PYEOF' 2>&1
142
  import sys, json, urllib.request, os
143
  from pathlib import Path
144
  prompt = Path(sys.argv[1]).read_text()
@@ -249,7 +249,7 @@ PYEOF
249
  # ── Push every task pair to HF training dataset (background) ──
250
  push_training_pair() {
251
  local source="$1" prompt="$2" content="$3"
252
- /usr/bin/python3 - "$source" "$prompt" "$content" "$TRAINING_LOG" <<'PYEOF' 2>/dev/null &
253
  import sys, json, time, os
254
  src, p, c, log = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]
255
  pair = {
@@ -384,7 +384,7 @@ Task: $TASK
384
  # Extract code blocks from DEV output β†’ write actual files
385
  if [[ -f "$DEV_OUT" ]]; then
386
  echo "${D} Extracting code blocks β†’ real files${R}"
387
- /usr/bin/python3 - "$DEV_OUT" "$(pwd)" <<'PYEOF' 2>&1 | sed 's/^/ /'
388
  import sys, re, os
389
  from pathlib import Path
390
  md_path, cwd = sys.argv[1], sys.argv[2]
@@ -420,7 +420,7 @@ fi
420
  QA_OUT="$WORKDIR/5-qa-verify.md"
421
  OPS_OUT="$WORKDIR/6a-ops-checklist.md"
422
  NEED_OPS=0
423
- if echo "$TASK" | /usr/bin/grep -iqE "deploy|docker|helm|k8s|terraform|cicd|ci/cd|cloudformation|buildspec|ecs|lambda"; then
424
  NEED_OPS=1
425
  fi
426
 
@@ -509,7 +509,7 @@ ls -la "$WORKDIR/" 2>&1 | tail -n +2 | awk '{printf " %s %s\n", $5, $9}' | gre
509
 
510
  VERDICT_TEXT=""
511
  if [[ -f "$REVIEW_OUT" ]]; then
512
- VERDICT_TEXT=$(grep -iE "verdict|APPROVE|REWORK|REJECT" "$REVIEW_OUT" | /usr/bin/head -3)
513
  echo ""
514
  echo "${B}β–Έ Final verdict:${R}"
515
  echo "$VERDICT_TEXT" | sed 's/^/ /'
@@ -536,5 +536,5 @@ if echo "$VERDICT_TEXT" | grep -qi "APPROVE"; then
536
  elif echo "$VERDICT_TEXT" | grep -qi "REWORK"; then
537
  echo ""
538
  echo "${YE}${B}β–Έ Reviewer requested REWORK β€” re-run orchestrate after addressing notes${R}"
539
- grep -A5 -i "REWORK\|action item" "$REVIEW_OUT" | /usr/bin/head -10 | sed 's/^/ /'
540
  fi
 
42
  RESEARCH_OUT="$WORKDIR/0-research-context.md"
43
  if echo "$TASK" | grep -iqE "migrat|integrat|switch from|move to|adopt|setup|deploy"; then
44
  echo "${MA}${B}═══ Stage 0/6: WEB RESEARCH${R} ${D}β€” gather current docs first${R}"
45
+ python3 - "$TASK" "$RESEARCH_OUT" <<'PYEOF' 2>&1 | sed 's/^/ /' || true
46
  import sys, urllib.request, urllib.parse, json, re, os
47
  task, out_path = sys.argv[1], sys.argv[2]
48
  # Extract tech keywords (capitalized words, dot-versions, snake-case)
 
84
  PRD_CONTEXT="
85
 
86
  === Project PRD (surrogate.md) ===
87
+ $(head -c 6000 "$prd_file")
88
  === End PRD ==="
89
  break
90
  fi
 
138
  CHUTES_KEY="${CHUTES_API_KEY:-}" \
139
  OR_KEY_ENV="${OPENROUTER_API_KEY:-}" \
140
  GH_POOL="${GITHUB_TOKEN_POOL:-}" \
141
+ python3 - "$prompt_file" <<'PYEOF' 2>&1
142
  import sys, json, urllib.request, os
143
  from pathlib import Path
144
  prompt = Path(sys.argv[1]).read_text()
 
249
  # ── Push every task pair to HF training dataset (background) ──
250
  push_training_pair() {
251
  local source="$1" prompt="$2" content="$3"
252
+ python3 - "$source" "$prompt" "$content" "$TRAINING_LOG" <<'PYEOF' 2>/dev/null &
253
  import sys, json, time, os
254
  src, p, c, log = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]
255
  pair = {
 
384
  # Extract code blocks from DEV output β†’ write actual files
385
  if [[ -f "$DEV_OUT" ]]; then
386
  echo "${D} Extracting code blocks β†’ real files${R}"
387
+ python3 - "$DEV_OUT" "$(pwd)" <<'PYEOF' 2>&1 | sed 's/^/ /'
388
  import sys, re, os
389
  from pathlib import Path
390
  md_path, cwd = sys.argv[1], sys.argv[2]
 
420
  QA_OUT="$WORKDIR/5-qa-verify.md"
421
  OPS_OUT="$WORKDIR/6a-ops-checklist.md"
422
  NEED_OPS=0
423
+ if echo "$TASK" | grep -iqE "deploy|docker|helm|k8s|terraform|cicd|ci/cd|cloudformation|buildspec|ecs|lambda"; then
424
  NEED_OPS=1
425
  fi
426
 
 
509
 
510
  VERDICT_TEXT=""
511
  if [[ -f "$REVIEW_OUT" ]]; then
512
+ VERDICT_TEXT=$(grep -iE "verdict|APPROVE|REWORK|REJECT" "$REVIEW_OUT" | head -3)
513
  echo ""
514
  echo "${B}β–Έ Final verdict:${R}"
515
  echo "$VERDICT_TEXT" | sed 's/^/ /'
 
536
  elif echo "$VERDICT_TEXT" | grep -qi "REWORK"; then
537
  echo ""
538
  echo "${YE}${B}β–Έ Reviewer requested REWORK β€” re-run orchestrate after addressing notes${R}"
539
+ grep -A5 -i "REWORK\|action item" "$REVIEW_OUT" | head -10 | sed 's/^/ /'
540
  fi