Spaces:

axentx
/

surrogate-1

Runtime error

Ashira Pitchayapakayakul commited on 26 days ago

Commit

023ab84

1 Parent(s): 3cbaec6

fix: strip Mac /usr/bin/* hardcoded paths + expand dataset-enrich to 21 sources

- All /usr/bin/python3 → python3 (PATH-based; works on Linux container)
- Same for curl/head/tail/grep/sed/awk
- dataset-enrich.sh: 9 → 21 sources covering DevSecOps gaps:
+ 4 CVE/CWE: AlicanKiraz0, CyberNative DPO, DiverseVul, CodeXGLUE
+ 3 function-calling: Salesforce/xLAM, Glaive-v2, Hermes-FC
+ 3 code instr: m-a-p CodeFeedback × 2, dolphin-coder
+ 1 agentic: orca-agentinstruct-1M
+ 1 review: VatsaDev/code-review
- 7 new schema branches: system-user-assistant, dpo-question, code-defect-cwe,
code-defect, tools-query-answers, system-chat, system-question-resp
- Researched + selected by spawned agent (background, parallel work pattern)

Files changed (11) hide show

bin/agentic-crawler.sh +2 -2
bin/ai-fallback.sh +16 -16
bin/dataset-enrich.sh +53 -13
bin/lib/context_builder.sh +20 -20
bin/push-training-to-hf.sh +1 -1
bin/skill-synthesis-daemon.sh +2 -2
bin/surrogate +14 -14
bin/surrogate-consolidate.sh +1 -1
bin/surrogate-daemon.sh +10 -10
bin/surrogate-dev-loop.sh +12 -12
bin/surrogate-orchestrate.sh +8 -8

bin/agentic-crawler.sh CHANGED Viewed

@@ -40,7 +40,7 @@ SQL
 COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM frontier;")
 if [[ $COUNT -lt 5 ]]; then
     echo "[$(date +%H:%M:%S)] seeding frontier" | tee -a "$LOG"
-    /usr/bin/python3 - "$DB" <<'PYEOF'
 import sqlite3, sys, time
 db = sys.argv[1]
 seeds = [
@@ -82,7 +82,7 @@ fi
 # ── Worker: fetch one URL, extract links, score, push back to frontier ─────
 fetch_one() {
     local url="$1" depth="$2"
-    /usr/bin/python3 - "$url" "$depth" "$DB" "$PAIRS" "${HF_TOKEN:-}" <<'PYEOF' 2>&1
 import sys, sqlite3, urllib.request, urllib.parse, re, time, json, os
 url, depth, db, pairs, hf_token = sys.argv[1], int(sys.argv[2]), sys.argv[3], sys.argv[4], sys.argv[5]
 con = sqlite3.connect(db)

 COUNT=$(sqlite3 "$DB" "SELECT COUNT(*) FROM frontier;")
 if [[ $COUNT -lt 5 ]]; then
     echo "[$(date +%H:%M:%S)] seeding frontier" | tee -a "$LOG"
+    python3 - "$DB" <<'PYEOF'
 import sqlite3, sys, time
 db = sys.argv[1]
 seeds = [
 # ── Worker: fetch one URL, extract links, score, push back to frontier ─────
 fetch_one() {
     local url="$1" depth="$2"
+    python3 - "$url" "$depth" "$DB" "$PAIRS" "${HF_TOKEN:-}" <<'PYEOF' 2>&1
 import sys, sqlite3, urllib.request, urllib.parse, re, time, json, os
 url, depth, db, pairs, hf_token = sys.argv[1], int(sys.argv[2]), sys.argv[3], sys.argv[4], sys.argv[5]
 con = sqlite3.connect(db)

bin/ai-fallback.sh CHANGED Viewed

@@ -45,17 +45,17 @@ while [ $# -gt 0 ]; do
     *)          QUERY="$QUERY $1"; shift ;;
   esac
 done
-QUERY=$(echo "$QUERY" | /usr/bin/sed 's/^ *//')
-[ -z "$QUERY" ] && { /usr/bin/head -15 "$0"; exit 1; }
 # --task <type> — pick the strongest free model per provider for the task.
 # Sets per-provider env vars that try_* functions read (bridge --model alias).
 # Auto-detect if not provided: code keywords → coding, reasoning keywords → reasoning.
 if [ -z "$TASK" ]; then
   q_lower=$(echo "$QUERY" | /usr/bin/tr '[:upper:]' '[:lower:]')
-  if echo "$q_lower" | /usr/bin/grep -qE "code|function|implement|refactor|bug|class|method|api|sql|terraform|cloudformation|dockerfile|kubernetes|yaml|typescript|javascript|python|rust|golang"; then
     TASK="coding"
-  elif echo "$q_lower" | /usr/bin/grep -qE "analyze|reason|explain why|compare|evaluate|architect|design|trade-?off|deep|think step|proof|calculate|complex"; then
     TASK="reasoning"
   fi
 fi
@@ -100,7 +100,7 @@ if [[ "$TASK" == "coding" || "$TASK" == "reasoning" || "$TASK" == "creative" ]];
     if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then
         EMB_COUNT=$(/usr/bin/sqlite3 "$HOME/.surrogate/embeddings.db" 'SELECT COUNT(*) FROM embeddings' 2>/dev/null || echo 0)
         if [[ "$EMB_COUNT" -ge 100 ]]; then
-            SEM_CONTEXT=$(/usr/bin/python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$QUERY" 2>/dev/null | /usr/bin/head -15)
             if [[ -n "$SEM_CONTEXT" ]]; then
                 QUERY="=== RAG CONTEXT (top-5 semantic matches from knowledge base) ===
 $SEM_CONTEXT
@@ -124,13 +124,13 @@ save_response() {
 # --- System prompt from knowledge base + auto code-search if code query ---
 build_system_prompt() {
   local kb="" profile="" code_ctx="" q_lower
-  [ -f "$HOME/.surrogate/memory/knowledge_index.md" ] && kb="$(/usr/bin/head -50 $HOME/.surrogate/memory/knowledge_index.md)"
   [ -f "$HOME/.surrogate/memory/user_profile.md" ] && profile="$(cat $HOME/.surrogate/memory/user_profile.md)"
   q_lower=$(echo "$QUERY" | /usr/bin/tr '[:upper:]' '[:lower:]')
   local is_generate=0 is_code=0
-  echo "$q_lower" | /usr/bin/grep -qE "code|function|implement|refactor|bug|error|class|method|api|endpoint|schema|model|service|controller|middleware|auth|database|query|sql|deploy|pipeline|terraform|cloudformation|dockerfile|kubernetes|helm|yaml" && is_code=1
-  echo "$q_lower" | /usr/bin/grep -qE "create|generate|write|build|new|template|scaffold|design" && is_generate=1
   if [ "$is_code" = "1" ] && [ -d "$HOME/.surrogate/code-vector-db" ]; then
     if [ "$is_generate" = "1" ] && [ -x "$HOME/.surrogate/bin/find-gold-examples.sh" ]; then
@@ -138,7 +138,7 @@ build_system_prompt() {
       code_ctx=$("$HOME/.surrogate/bin/find-gold-examples.sh" --top 2 --max-bytes 5000 "$QUERY" 2>/dev/null)
     elif [ -x "$HOME/.surrogate/bin/code-search.sh" ]; then
       # Query task → snippets only (faster)
-      code_ctx=$("$HOME/.surrogate/bin/code-search.sh" --top 3 "$QUERY" 2>/dev/null | /usr/bin/head -60)
     fi
   fi
@@ -238,7 +238,7 @@ m = {'model':os.environ['ORM'],'max_tokens':4000,
 print(json.dumps(m))
 " 2>&1) || { log "  body-build failed: $body"; return 1; }
   local resp code body_resp
-  resp=$(/usr/bin/curl -sS -w "\n%{http_code}" \
     --max-time 90 \
     -X POST "https://openrouter.ai/api/v1/chat/completions" \
     -H "Authorization: Bearer $OPENROUTER_API_KEY" \
@@ -246,8 +246,8 @@ print(json.dumps(m))
     -H "X-Title: ai-fallback" \
     -H "content-type: application/json" \
     -d "$body" 2>&1)
-  code=$(echo "$resp" | /usr/bin/tail -1)
-  body_resp=$(echo "$resp" | /usr/bin/sed '$d')
   if [ "$code" != "200" ]; then
     # Log real error reason for debug
     local errmsg
@@ -284,11 +284,11 @@ m = {'systemInstruction':{'parts':[{'text':'''$SYSTEM'''}]},
 print(json.dumps(m))
 " 2>/dev/null)
   local resp code body_resp
-  resp=$(/usr/bin/curl -sS -w "\n%{http_code}" \
     -X POST "https://generativelanguage.googleapis.com/v1beta/models/$model:generateContent?key=$GEMINI_API_KEY" \
     -H "content-type: application/json" -d "$body" 2>&1)
-  code=$(echo "$resp" | /usr/bin/tail -1)
-  body_resp=$(echo "$resp" | /usr/bin/sed '$d')
   [ "$code" != "200" ] && { log "  [$code] falling through"; return 1; }
   local out
   out=$(echo "$body_resp" | "$HOME/.surrogate/venv/bin/python" -c "
@@ -361,7 +361,7 @@ try_cloudflare() {
 # gemma4:26b BLOCKED — user directive (too slow for this hw).
 try_granite() {
   # Check ollama running
-  /usr/bin/curl -sS --max-time 3 http://localhost:11434/api/tags > /dev/null 2>&1 || return 2
   local alias="${LOCAL_MODEL:-granite}"
   log "→ Local Ollama: $alias (free, always-on)"
   local out

     *)          QUERY="$QUERY $1"; shift ;;
   esac
 done
+QUERY=$(echo "$QUERY" | sed 's/^ *//')
+[ -z "$QUERY" ] && { head -15 "$0"; exit 1; }
 # --task <type> — pick the strongest free model per provider for the task.
 # Sets per-provider env vars that try_* functions read (bridge --model alias).
 # Auto-detect if not provided: code keywords → coding, reasoning keywords → reasoning.
 if [ -z "$TASK" ]; then
   q_lower=$(echo "$QUERY" | /usr/bin/tr '[:upper:]' '[:lower:]')
+  if echo "$q_lower" | grep -qE "code|function|implement|refactor|bug|class|method|api|sql|terraform|cloudformation|dockerfile|kubernetes|yaml|typescript|javascript|python|rust|golang"; then
     TASK="coding"
+  elif echo "$q_lower" | grep -qE "analyze|reason|explain why|compare|evaluate|architect|design|trade-?off|deep|think step|proof|calculate|complex"; then
     TASK="reasoning"
   fi
 fi
     if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then
         EMB_COUNT=$(/usr/bin/sqlite3 "$HOME/.surrogate/embeddings.db" 'SELECT COUNT(*) FROM embeddings' 2>/dev/null || echo 0)
         if [[ "$EMB_COUNT" -ge 100 ]]; then
+            SEM_CONTEXT=$(python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$QUERY" 2>/dev/null | head -15)
             if [[ -n "$SEM_CONTEXT" ]]; then
                 QUERY="=== RAG CONTEXT (top-5 semantic matches from knowledge base) ===
 $SEM_CONTEXT
 # --- System prompt from knowledge base + auto code-search if code query ---
 build_system_prompt() {
   local kb="" profile="" code_ctx="" q_lower
+  [ -f "$HOME/.surrogate/memory/knowledge_index.md" ] && kb="$(head -50 $HOME/.surrogate/memory/knowledge_index.md)"
   [ -f "$HOME/.surrogate/memory/user_profile.md" ] && profile="$(cat $HOME/.surrogate/memory/user_profile.md)"
   q_lower=$(echo "$QUERY" | /usr/bin/tr '[:upper:]' '[:lower:]')
   local is_generate=0 is_code=0
+  echo "$q_lower" | grep -qE "code|function|implement|refactor|bug|error|class|method|api|endpoint|schema|model|service|controller|middleware|auth|database|query|sql|deploy|pipeline|terraform|cloudformation|dockerfile|kubernetes|helm|yaml" && is_code=1
+  echo "$q_lower" | grep -qE "create|generate|write|build|new|template|scaffold|design" && is_generate=1
   if [ "$is_code" = "1" ] && [ -d "$HOME/.surrogate/code-vector-db" ]; then
     if [ "$is_generate" = "1" ] && [ -x "$HOME/.surrogate/bin/find-gold-examples.sh" ]; then
       code_ctx=$("$HOME/.surrogate/bin/find-gold-examples.sh" --top 2 --max-bytes 5000 "$QUERY" 2>/dev/null)
     elif [ -x "$HOME/.surrogate/bin/code-search.sh" ]; then
       # Query task → snippets only (faster)
+      code_ctx=$("$HOME/.surrogate/bin/code-search.sh" --top 3 "$QUERY" 2>/dev/null | head -60)
     fi
   fi
 print(json.dumps(m))
 " 2>&1) || { log "  body-build failed: $body"; return 1; }
   local resp code body_resp
+  resp=$(curl -sS -w "\n%{http_code}" \
     --max-time 90 \
     -X POST "https://openrouter.ai/api/v1/chat/completions" \
     -H "Authorization: Bearer $OPENROUTER_API_KEY" \
     -H "X-Title: ai-fallback" \
     -H "content-type: application/json" \
     -d "$body" 2>&1)
+  code=$(echo "$resp" | tail -1)
+  body_resp=$(echo "$resp" | sed '$d')
   if [ "$code" != "200" ]; then
     # Log real error reason for debug
     local errmsg
 print(json.dumps(m))
 " 2>/dev/null)
   local resp code body_resp
+  resp=$(curl -sS -w "\n%{http_code}" \
     -X POST "https://generativelanguage.googleapis.com/v1beta/models/$model:generateContent?key=$GEMINI_API_KEY" \
     -H "content-type: application/json" -d "$body" 2>&1)
+  code=$(echo "$resp" | tail -1)
+  body_resp=$(echo "$resp" | sed '$d')
   [ "$code" != "200" ] && { log "  [$code] falling through"; return 1; }
   local out
   out=$(echo "$body_resp" | "$HOME/.surrogate/venv/bin/python" -c "
 # gemma4:26b BLOCKED — user directive (too slow for this hw).
 try_granite() {
   # Check ollama running
+  curl -sS --max-time 3 http://localhost:11434/api/tags > /dev/null 2>&1 || return 2
   local alias="${LOCAL_MODEL:-granite}"
   log "→ Local Ollama: $alias (free, always-on)"
   local out

bin/dataset-enrich.sh CHANGED Viewed

@@ -17,13 +17,13 @@
 set -uo pipefail
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
-LOG="$HOME/.surrogate/logs/dataset-enrich.log"
 WORK="$HOME/.hermes/workspace/dataset-enrich"
 mkdir -p "$WORK" "$(dirname "$LOG")"
 echo "[$(date +%H:%M:%S)] dataset enrich start" | tee "$LOG"
-~/.surrogate/venv/bin/python <<'PYEOF' 2>&1 | tee -a "$LOG"
 from huggingface_hub import HfApi
 from pathlib import Path
 from datasets import load_dataset
@@ -36,19 +36,33 @@ api = HfApi()
 # (id, license, slug, schema_hint, per_dataset_cap)
 DATASETS = [
     # ── Coding instruction-tuning ────────────────────────────────────────────
-    ("ise-uiuc/Magicoder-OSS-Instruct-75K",   "MIT",     "magicoder-oss",        "instr-resp",   75000),
-    ("ise-uiuc/Magicoder-Evol-Instruct-110K", "Apache",  "magicoder-evol",       "instr-resp",  110000),
-    ("theblackcat102/evol-codealpaca-v1",     "Apache",  "evol-codealpaca",      "instr-resp",  100000),
-    # ── Multi-turn dialogue (helpful assistant style) ───────────────────────
-    ("HuggingFaceH4/ultrachat_200k",          "MIT",     "ultrachat",            "messages",    200000),
-    ("Open-Orca/SlimOrca-Dedup",              "MIT",     "slim-orca",            "conversations",150000),
-    # ── Real commits (code review / PR training) ────────────────────────────
-    ("bigcode/commitpackft",                  "MIT",     "commitpackft",         "commit",       80000),
     # ── Reasoning / math ────────────────────────────────────────────────────
-    ("TIGER-Lab/MathInstruct",                "MIT",     "math-instruct",        "instr-resp",   60000),
-    ("meta-math/MetaMathQA",                  "MIT",     "metamath",             "query-resp",   50000),
     # ── Helpfulness preferences ─────────────────────────────────────────────
-    ("Anthropic/hh-rlhf",                     "MIT",     "hh-rlhf",              "chosen-rejected",40000),
 ]
 # 1. Existing axentx hashes for dedup
@@ -117,6 +131,32 @@ with open(out_path, "w") as out:
                 elif schema == "chosen-rejected":
                     prompt = str(row.get("chosen","")[:200] or row.get("prompt",""))
                     response = str(row.get("chosen",""))
                 else:
                     continue

 set -uo pipefail
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
+LOG="$HOME/.claude/logs/dataset-enrich.log"
 WORK="$HOME/.hermes/workspace/dataset-enrich"
 mkdir -p "$WORK" "$(dirname "$LOG")"
 echo "[$(date +%H:%M:%S)] dataset enrich start" | tee "$LOG"
+~/.claude/venv/bin/python <<'PYEOF' 2>&1 | tee -a "$LOG"
 from huggingface_hub import HfApi
 from pathlib import Path
 from datasets import load_dataset
 # (id, license, slug, schema_hint, per_dataset_cap)
 DATASETS = [
     # ── Coding instruction-tuning ────────────────────────────────────────────
+    ("ise-uiuc/Magicoder-OSS-Instruct-75K",         "MIT",         "magicoder-oss",       "instr-resp",            75000),
+    ("ise-uiuc/Magicoder-Evol-Instruct-110K",       "Apache",      "magicoder-evol",      "instr-resp",           110000),
+    ("theblackcat102/evol-codealpaca-v1",           "Apache",      "evol-codealpaca",     "instr-resp",           100000),
+    ("m-a-p/CodeFeedback-Filtered-Instruction",     "Apache",      "codefeedback-filt",   "query-resp",           100000),
+    ("m-a-p/Code-Feedback",                         "Apache",      "codefeedback-multi",  "messages",              66383),
+    ("QuixiAI/dolphin-coder",                       "Apache",      "dolphin-coder",       "system-question-resp", 100000),
+    # ── Multi-turn dialogue + agentic reasoning ─────────────────────────────
+    ("HuggingFaceH4/ultrachat_200k",                "MIT",         "ultrachat",           "messages",             200000),
+    ("Open-Orca/SlimOrca-Dedup",                    "MIT",         "slim-orca",           "conversations",        150000),
+    ("microsoft/orca-agentinstruct-1M-v1",          "CDLA",        "orca-agentinstruct",  "messages",             150000),
+    # ── Real commits + code review ──────────────────────────────────────────
+    ("bigcode/commitpackft",                        "MIT",         "commitpackft",        "commit",                80000),
+    ("VatsaDev/code-review",                        "MIT",         "vatsa-code-review",   "instr-resp",            40000),
+    # ── DevSecOps: CVE / CWE / vulnerability detection ──────────────────────
+    ("AlicanKiraz0/All-CVE-Records-Training-Dataset","Apache",     "cve-records-chat",    "system-user-assistant", 30000),
+    ("CyberNative/Code_Vulnerability_Security_DPO", "Apache",      "vuln-secure-dpo",     "dpo-question",           4656),
+    ("bstee615/diversevul",                         "MIT-research","diversevul-cwe",      "code-defect-cwe",       80000),
+    ("google/code_x_glue_cc_defect_detection",      "C-UDA",       "codexglue-defect",    "code-defect",           27318),
+    # ── Function/tool calling (agentic core) ────────────────────────────────
+    ("Salesforce/xlam-function-calling-60k",        "CC-BY-4.0",   "xlam-fc",             "tools-query-answers",   60000),
+    ("glaiveai/glaive-function-calling-v2",         "Apache",      "glaive-fc-v2",        "system-chat",          112960),
+    ("NousResearch/hermes-function-calling-v1",     "Apache",      "hermes-fc",           "conversations",         11578),
     # ── Reasoning / math ────────────────────────────────────────────────────
+    ("TIGER-Lab/MathInstruct",                      "MIT",         "math-instruct",       "instr-resp",            60000),
+    ("meta-math/MetaMathQA",                        "MIT",         "metamath",            "query-resp",            50000),
     # ── Helpfulness preferences ─────────────────────────────────────────────
+    ("Anthropic/hh-rlhf",                           "MIT",         "hh-rlhf",             "chosen-rejected",       40000),
 ]
 # 1. Existing axentx hashes for dedup
                 elif schema == "chosen-rejected":
                     prompt = str(row.get("chosen","")[:200] or row.get("prompt",""))
                     response = str(row.get("chosen",""))
+                elif schema == "system-user-assistant":   # AlicanKiraz0 CVE
+                    prompt = f"{str(row.get('System','')).strip()}\n\nUser: {str(row.get('User','')).strip()}"
+                    response = str(row.get("Assistant",""))
+                elif schema == "dpo-question":            # CyberNative DPO
+                    prompt = str(row.get("question",""))
+                    response = str(row.get("chosen",""))
+                elif schema == "code-defect-cwe":         # DiverseVul
+                    cwes = row.get("cwe") or []
+                    cwe_str = ",".join(cwes) if isinstance(cwes, list) and cwes else "none"
+                    label = "VULNERABLE" if row.get("target") == 1 else "SAFE"
+                    prompt = f"Audit this function for security vulnerabilities. Identify any CWE matches.\n```\n{str(row.get('func',''))[:6000]}\n```"
+                    response = f"Verdict: {label}\nCWE: {cwe_str}\nProject: {row.get('project','')}\nCommit: {str(row.get('message',''))[:500]}"
+                elif schema == "code-defect":             # CodeXGLUE
+                    label = "VULNERABLE" if row.get("target") else "SAFE"
+                    prompt = f"Review this C function for defects:\n```c\n{str(row.get('func',''))[:6000]}\n```"
+                    response = f"Defect detected: {label}\nProject: {row.get('project','')}\nCommit: {row.get('commit_id','')}"
+                elif schema == "tools-query-answers":     # xLAM
+                    tools_json = json.dumps(row.get("tools",[]))[:3000]
+                    prompt = f"You have access to these tools:\n{tools_json}\n\nUser query: {row.get('query','')}"
+                    response = json.dumps(row.get("answers",[]), ensure_ascii=False)
+                elif schema == "system-chat":             # Glaive-v2
+                    prompt = str(row.get("system",""))
+                    response = str(row.get("chat",""))
+                elif schema == "system-question-resp":    # dolphin-coder
+                    prompt = f"{str(row.get('system_prompt','')).strip()}\n\n{str(row.get('question','')).strip()}"
+                    response = str(row.get("response",""))
                 else:
                     continue

bin/lib/context_builder.sh CHANGED Viewed

@@ -16,7 +16,7 @@ build_rich_context() {
     REPO_MAP=""
     for candidate in "$SHARED/repo-maps/${PRIO_PROJECT}_map.md" "$SHARED/repo-maps/${PRIO_PROJECT}.md"; do
         if [[ -f "$candidate" ]]; then
-            REPO_MAP=$(/usr/bin/head -c 10000 "$candidate")
             break
         fi
     done
@@ -25,39 +25,39 @@ build_rich_context() {
     SIMILAR_FUNCS=""
     if [[ -d "$PROJECT_DIR" ]]; then
         # Extract keywords from title for grep
-        local KW=$(echo "$PRIO_TITLE" | /usr/bin/tr '[:upper:]' '[:lower:]' | /usr/bin/tr -cs 'a-z0-9' ' ' | /usr/bin/tr ' ' '\n' | /usr/bin/awk 'length>4' | /usr/bin/head -3 | /usr/bin/tr '\n' '|' | /usr/bin/sed 's/|$//')
         if [[ -n "$KW" ]]; then
             SIMILAR_FUNCS=$(/usr/bin/find "$PROJECT_DIR" -type f \( -name '*.py' -o -name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.go' \) ! -path '*/node_modules/*' ! -path '*/.hermes-*' 2>/dev/null | \
-                xargs /usr/bin/grep -lE "($KW)" 2>/dev/null | /usr/bin/head -3 | while read f; do
                     echo "=== ${f#$PROJECT_DIR/} ==="
-                    /usr/bin/grep -A3 -E "^(def|function|export const|class|async def|interface)" "$f" 2>/dev/null | /usr/bin/head -30
-                done 2>/dev/null | /usr/bin/head -c 4000)
         fi
     fi
     # 3. RAG: actual code patterns from project (SQLite FTS via ask-sqlite.py if exists)
     RAG_EXAMPLES=""
     if [[ -x "$HOME/.surrogate/bin/ask-sqlite.py" ]]; then
-        RAG_EXAMPLES=$(/usr/bin/python3 "$HOME/.surrogate/bin/ask-sqlite.py" \
-            "$PRIO_PROJECT $PRIO_TITLE" 2>/dev/null | /usr/bin/head -c 3000)
     fi
     # 4. Semantic RAG (from embeddings) — top-5 similar
     SEMANTIC_RAG=""
     if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then
-        SEMANTIC_RAG=$(/usr/bin/python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$PRIO_TITLE" 2>/dev/null | /usr/bin/head -c 2000)
     fi
     # 5. Past ACCEPTED examples (few-shot from quality≥7 history)
     FEWSHOT_ACCEPTED=""
-    for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | /usr/bin/head -30); do
-        if /usr/bin/grep -qE '"quality_score":\s*[789]|"quality_score":\s*10' "$review" 2>/dev/null; then
             local OUT_FILE=$(basename "$review" .review.json)
             # Search all worker output dirs
             for WD in qwen-coder dev-cloud-samba dev-cloud-github dev-cloud-cloudflare dev-cloud-groq dev-cloud-synthesis; do
                 local OUT_PATH="$HOME/.hermes/workspace/$WD/${OUT_FILE}.md"
                 if [[ -f "$OUT_PATH" ]]; then
-                    FEWSHOT_ACCEPTED=$(/usr/bin/head -c 2000 "$OUT_PATH")
                     break 2
                 fi
             done
@@ -66,8 +66,8 @@ build_rich_context() {
     # 6. Anti-patterns (last 5 rejection reasons across all workers)
     ANTI_PATTERNS=""
-    for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | /usr/bin/head -10); do
-        local bugs=$(/usr/bin/python3 -c "
 import json, re, sys
 try:
     txt = open('$review').read()
@@ -81,14 +81,14 @@ except: pass
 " 2>/dev/null)
         [[ -n "$bugs" ]] && ANTI_PATTERNS="$ANTI_PATTERNS$bugs"$'\n'
     done
-    ANTI_PATTERNS=$(echo "$ANTI_PATTERNS" | /usr/bin/head -10)
     # 7. Active-learning prompt deltas — aggregate last 5 UNIQUE anti-patterns.
     # Preference: same-project anti-patterns first, then generic.
     # Dedup by first 80 chars of prompt_addition (similar bugs shouldn't bloat prompt).
     PROMPT_DELTAS=""
     if [[ -f "$HOME/.surrogate/memory/worker-prompt-deltas.jsonl" ]]; then
-        PROMPT_DELTAS=$(/usr/bin/python3 -c "
 import json, sys
 from pathlib import Path
 try:
@@ -124,7 +124,7 @@ except Exception as e: pass
     # lower because they're supplementary; the spec is authoritative.
     PRIO_SPEC=""
     local SPEC_FILE="$HOME/.hermes/workspace/swarm-shared/specs/${PRIO_ID}.md"
-    [[ -f "$SPEC_FILE" ]] && PRIO_SPEC=$(/usr/bin/head -c 6000 "$SPEC_FILE")
     # 9. Task-type authoritative sources — boost scraped knowledge based on title.
     # Security task → CVE/MITRE/OWASP/Prowler. SRE → Google SRE/postmortems.
@@ -132,7 +132,7 @@ except Exception as e: pass
     # This is THE fix that makes all our scraping actually used by Hermes workers.
     AUTHORITATIVE_CONTEXT=""
     if [[ -f "$HOME/.surrogate/index.db" ]]; then
-        AUTHORITATIVE_CONTEXT=$(/usr/bin/python3 <<PYEOF
 import sqlite3, re
 title = """${PRIO_TITLE}""".lower()
 project = """${PRIO_PROJECT}""".lower()
@@ -223,7 +223,7 @@ PYEOF
     # 10. FalkorDB graph — related decisions + past priorities with similar theme
     GRAPH_CONTEXT=""
-    local REDIS_SOCK=$(/usr/bin/find /var/folders /tmp -name 'redis.socket' -type s 2>/dev/null | /usr/bin/head -1)
     if [[ -n "$REDIS_SOCK" ]]; then
         # Get related priorities + learned rules
         GRAPH_CONTEXT=$(/opt/homebrew/bin/redis-cli -s "$REDIS_SOCK" GRAPH.QUERY ashira "
@@ -231,14 +231,14 @@ PYEOF
             OPTIONAL MATCH (p)-[:HAS_LEARNED_RULE]->(l:LearnedRule)
             OPTIONAL MATCH (p)-[:COMMITTED_AS]->(c:Commit)
             RETURN p.id, p.title, l.content, c.msg LIMIT 8
-        " 2>/dev/null | /usr/bin/tail -c 2500)
     fi
     # 11. Hermes trace recall — past similar tasks Hermes handled (from JSONL)
     HERMES_RECALL=""
     local TRACE_DIR="$HOME/axentx/surrogate/data/training-jsonl"
     if [[ -d "$TRACE_DIR" ]]; then
-        HERMES_RECALL=$(/usr/bin/python3 <<PYEOF
 import json, re, glob
 title = """${PRIO_TITLE}""".lower()
 words = [w for w in re.sub(r'[^a-zA-Z0-9 ]', ' ', title).split() if len(w) > 4][:4]

     REPO_MAP=""
     for candidate in "$SHARED/repo-maps/${PRIO_PROJECT}_map.md" "$SHARED/repo-maps/${PRIO_PROJECT}.md"; do
         if [[ -f "$candidate" ]]; then
+            REPO_MAP=$(head -c 10000 "$candidate")
             break
         fi
     done
     SIMILAR_FUNCS=""
     if [[ -d "$PROJECT_DIR" ]]; then
         # Extract keywords from title for grep
+        local KW=$(echo "$PRIO_TITLE" | /usr/bin/tr '[:upper:]' '[:lower:]' | /usr/bin/tr -cs 'a-z0-9' ' ' | /usr/bin/tr ' ' '\n' | awk 'length>4' | head -3 | /usr/bin/tr '\n' '|' | sed 's/|$//')
         if [[ -n "$KW" ]]; then
             SIMILAR_FUNCS=$(/usr/bin/find "$PROJECT_DIR" -type f \( -name '*.py' -o -name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.go' \) ! -path '*/node_modules/*' ! -path '*/.hermes-*' 2>/dev/null | \
+                xargs grep -lE "($KW)" 2>/dev/null | head -3 | while read f; do
                     echo "=== ${f#$PROJECT_DIR/} ==="
+                    grep -A3 -E "^(def|function|export const|class|async def|interface)" "$f" 2>/dev/null | head -30
+                done 2>/dev/null | head -c 4000)
         fi
     fi
     # 3. RAG: actual code patterns from project (SQLite FTS via ask-sqlite.py if exists)
     RAG_EXAMPLES=""
     if [[ -x "$HOME/.surrogate/bin/ask-sqlite.py" ]]; then
+        RAG_EXAMPLES=$(python3 "$HOME/.surrogate/bin/ask-sqlite.py" \
+            "$PRIO_PROJECT $PRIO_TITLE" 2>/dev/null | head -c 3000)
     fi
     # 4. Semantic RAG (from embeddings) — top-5 similar
     SEMANTIC_RAG=""
     if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then
+        SEMANTIC_RAG=$(python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$PRIO_TITLE" 2>/dev/null | head -c 2000)
     fi
     # 5. Past ACCEPTED examples (few-shot from quality≥7 history)
     FEWSHOT_ACCEPTED=""
+    for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | head -30); do
+        if grep -qE '"quality_score":\s*[789]|"quality_score":\s*10' "$review" 2>/dev/null; then
             local OUT_FILE=$(basename "$review" .review.json)
             # Search all worker output dirs
             for WD in qwen-coder dev-cloud-samba dev-cloud-github dev-cloud-cloudflare dev-cloud-groq dev-cloud-synthesis; do
                 local OUT_PATH="$HOME/.hermes/workspace/$WD/${OUT_FILE}.md"
                 if [[ -f "$OUT_PATH" ]]; then
+                    FEWSHOT_ACCEPTED=$(head -c 2000 "$OUT_PATH")
                     break 2
                 fi
             done
     # 6. Anti-patterns (last 5 rejection reasons across all workers)
     ANTI_PATTERNS=""
+    for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | head -10); do
+        local bugs=$(python3 -c "
 import json, re, sys
 try:
     txt = open('$review').read()
 " 2>/dev/null)
         [[ -n "$bugs" ]] && ANTI_PATTERNS="$ANTI_PATTERNS$bugs"$'\n'
     done
+    ANTI_PATTERNS=$(echo "$ANTI_PATTERNS" | head -10)
     # 7. Active-learning prompt deltas — aggregate last 5 UNIQUE anti-patterns.
     # Preference: same-project anti-patterns first, then generic.
     # Dedup by first 80 chars of prompt_addition (similar bugs shouldn't bloat prompt).
     PROMPT_DELTAS=""
     if [[ -f "$HOME/.surrogate/memory/worker-prompt-deltas.jsonl" ]]; then
+        PROMPT_DELTAS=$(python3 -c "
 import json, sys
 from pathlib import Path
 try:
     # lower because they're supplementary; the spec is authoritative.
     PRIO_SPEC=""
     local SPEC_FILE="$HOME/.hermes/workspace/swarm-shared/specs/${PRIO_ID}.md"
+    [[ -f "$SPEC_FILE" ]] && PRIO_SPEC=$(head -c 6000 "$SPEC_FILE")
     # 9. Task-type authoritative sources — boost scraped knowledge based on title.
     # Security task → CVE/MITRE/OWASP/Prowler. SRE → Google SRE/postmortems.
     # This is THE fix that makes all our scraping actually used by Hermes workers.
     AUTHORITATIVE_CONTEXT=""
     if [[ -f "$HOME/.surrogate/index.db" ]]; then
+        AUTHORITATIVE_CONTEXT=$(python3 <<PYEOF
 import sqlite3, re
 title = """${PRIO_TITLE}""".lower()
 project = """${PRIO_PROJECT}""".lower()
     # 10. FalkorDB graph — related decisions + past priorities with similar theme
     GRAPH_CONTEXT=""
+    local REDIS_SOCK=$(/usr/bin/find /var/folders /tmp -name 'redis.socket' -type s 2>/dev/null | head -1)
     if [[ -n "$REDIS_SOCK" ]]; then
         # Get related priorities + learned rules
         GRAPH_CONTEXT=$(/opt/homebrew/bin/redis-cli -s "$REDIS_SOCK" GRAPH.QUERY ashira "
             OPTIONAL MATCH (p)-[:HAS_LEARNED_RULE]->(l:LearnedRule)
             OPTIONAL MATCH (p)-[:COMMITTED_AS]->(c:Commit)
             RETURN p.id, p.title, l.content, c.msg LIMIT 8
+        " 2>/dev/null | tail -c 2500)
     fi
     # 11. Hermes trace recall — past similar tasks Hermes handled (from JSONL)
     HERMES_RECALL=""
     local TRACE_DIR="$HOME/axentx/surrogate/data/training-jsonl"
     if [[ -d "$TRACE_DIR" ]]; then
+        HERMES_RECALL=$(python3 <<PYEOF
 import json, re, glob
 title = """${PRIO_TITLE}""".lower()
 words = [w for w in re.sub(r'[^a-zA-Z0-9 ]', ' ', title).split() if len(w) > 4][:4]

bin/push-training-to-hf.sh CHANGED Viewed

@@ -31,7 +31,7 @@ if command -v huggingface-cli >/dev/null 2>&1 && [[ -n "${HF_TOKEN:-}" ]]; then
         --commit-message "auto-orchestrate: +${NEW_LINES} pairs ($(date +%H:%M))" \
         --token "$HF_TOKEN" 2>&1 | tee -a "$LOG"
 else
-    /usr/bin/python3 - "$SLICE" "$NEW_LINES" "$DATE_TAG" <<'PYEOF' 2>&1 | tee -a "$LOG"
 import sys, os
 slice_path, n_pairs, date_tag = sys.argv[1], sys.argv[2], sys.argv[3]
 try:

         --commit-message "auto-orchestrate: +${NEW_LINES} pairs ($(date +%H:%M))" \
         --token "$HF_TOKEN" 2>&1 | tee -a "$LOG"
 else
+    python3 - "$SLICE" "$NEW_LINES" "$DATE_TAG" <<'PYEOF' 2>&1 | tee -a "$LOG"
 import sys, os
 slice_path, n_pairs, date_tag = sys.argv[1], sys.argv[2], sys.argv[3]
 try:

bin/skill-synthesis-daemon.sh CHANGED Viewed

@@ -32,13 +32,13 @@ while true; do
             -name "*.sh" -o -name "*.yaml" -o -name "*.toml" -o -name "*.json" \
         \) -size -50k -mtime -3 2>/dev/null | head -200 | while read -r f; do
             # Skip already-synthesized
-            HASH=$(/usr/bin/python3 -c "import hashlib; print(hashlib.md5(open('$f','rb').read()).hexdigest()[:12])" 2>/dev/null)
             [[ -z "$HASH" ]] && continue
             STAMP="$SKILLS_DIR/.synthesized/$HASH"
             [[ -f "$STAMP" ]] && continue
             mkdir -p "$(dirname "$STAMP")"
-            /usr/bin/python3 - "$f" "$SKILLS_DIR" "$PAIRS" "$STAMP" <<'PYEOF' 2>>"$LOG"
 import sys, re, json, time, os, hashlib
 from pathlib import Path

             -name "*.sh" -o -name "*.yaml" -o -name "*.toml" -o -name "*.json" \
         \) -size -50k -mtime -3 2>/dev/null | head -200 | while read -r f; do
             # Skip already-synthesized
+            HASH=$(python3 -c "import hashlib; print(hashlib.md5(open('$f','rb').read()).hexdigest()[:12])" 2>/dev/null)
             [[ -z "$HASH" ]] && continue
             STAMP="$SKILLS_DIR/.synthesized/$HASH"
             [[ -f "$STAMP" ]] && continue
             mkdir -p "$(dirname "$STAMP")"
+            python3 - "$f" "$SKILLS_DIR" "$PAIRS" "$STAMP" <<'PYEOF' 2>>"$LOG"
 import sys, re, json, time, os, hashlib
 from pathlib import Path

bin/surrogate CHANGED Viewed

@@ -206,7 +206,7 @@ run_agent() {
     export AGENT_EFFORT="$EFFORT"
     export AGENT_CWD="$(pwd)"
-    /usr/bin/python3 <<'PYEOF'
 import os, sys, json, re, sqlite3, subprocess, urllib.request, urllib.error, time
 from datetime import datetime
 from pathlib import Path
@@ -278,7 +278,7 @@ def tool_glob(pattern, path=None):
 def tool_grep(pattern, path=None, glob='*'):
     base = os.path.expanduser(path) if path else CWD
-    cmd = f"/usr/bin/grep -rn --include='{glob}' -E {subprocess.list2cmdline([pattern])} {base} 2>/dev/null | head -40"
     r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=15)
     return {'matches': r.stdout[:5000]}
@@ -505,7 +505,7 @@ print_statusline() {
         if [[ ! -f "$cache" ]] || [[ $(($(date +%s) - $(stat -f %m "$cache" 2>/dev/null || stat -c %Y "$cache" 2>/dev/null || echo 0))) -gt 60 ]]; then
             (curl -sS -m 5 -H "Authorization: Bearer ${OPENROUTER_API_KEY:-${OR_KEY:-}}" \
                 https://openrouter.ai/api/v1/auth/key 2>/dev/null \
-                | /usr/bin/python3 -c "import json,sys; d=json.load(sys.stdin).get('data',{}); print(f'\$OR={d.get(\"usage\",0):.3f}')" \
                 > "$cache") 2>/dev/null &
         fi
         cost_str=$(cat "$cache" 2>/dev/null | head -1)
@@ -523,7 +523,7 @@ HISTORY_FILE="$SURROGATE_HOME/history.jsonl"
 mkdir -p "$(dirname "$HISTORY_FILE")"
 save_history() {
     local prompt="$1"
-    /usr/bin/python3 -c "
 import json, sys, time
 from pathlib import Path
 Path('$HISTORY_FILE').parent.mkdir(parents=True, exist_ok=True)
@@ -630,7 +630,7 @@ repl() {
                 ;;
             /history)
                 if [[ -f "$HISTORY_FILE" ]]; then
-                    /usr/bin/python3 -c "
 import json
 from pathlib import Path
 import time
@@ -661,7 +661,7 @@ for l in lines:
                 fi
                 ;;
             /cost)
-                bash -c 'source ~/.hermes/.env; curl -s -H "Authorization: Bearer $OPENROUTER_API_KEY" https://openrouter.ai/api/v1/auth/key' 2>&1 | /usr/bin/python3 -c "import json,sys; d=json.load(sys.stdin).get('data',{}); print(f'  OpenRouter: \${d.get(\"usage\",0):.4f}')"
                 ;;
             /cost-all) bash "$0" --status ;;
             /remote*)
@@ -739,7 +739,7 @@ init_project() {
     # ── Step A: web research the tech keywords (free, fast, grounds the PRD) ─
     echo "${MA}▶ Researching tech context...${R}"
     local research_md=""
-    research_md=$(/usr/bin/python3 - "$Q_STACK $Q_DESC $Q_FEATURES" <<'PYEOF' 2>/dev/null
 import sys, urllib.request, urllib.parse, re
 text = sys.argv[1]
 # Extract candidate tech keywords (CamelCase, lowercase known stacks, version tags)
@@ -834,7 +834,7 @@ Output ONLY the markdown, no preamble. Adapt to the actual stack the user chose
     # ── Step C: direct LLM call (curl), bypassing the agent tool-loop ──
     local prd=""
     if [[ -n "${GEMINI_API_KEY:-}" ]]; then
-        prd=$(/usr/bin/python3 - "$prompt" "$GEMINI_API_KEY" <<'PYEOF' 2>/dev/null
 import sys, json, urllib.request
 prompt, key = sys.argv[1], sys.argv[2]
 url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={key}"
@@ -853,7 +853,7 @@ PYEOF
     fi
     if [[ -z "$prd" ]] || [[ ${#prd} -lt 400 ]]; then
         if [[ -n "${OPENROUTER_API_KEY:-}" ]]; then
-            prd=$(/usr/bin/python3 - "$prompt" "$OPENROUTER_API_KEY" <<'PYEOF' 2>/dev/null
 import sys, json, urllib.request
 prompt, key = sys.argv[1], sys.argv[2]
 body = {"model":"qwen/qwen3-coter","messages":[{"role":"user","content":prompt}],
@@ -884,7 +884,7 @@ PYEOF
     else
         echo "$prd" > "$target"
         # ── Step D: push PRD as training pair (HF dataset feedback loop) ───
-        /usr/bin/python3 - "$prompt" "$prd" <<'PYEOF' 2>/dev/null &
 import sys, json, time, os
 from pathlib import Path
 log = Path.home() / '.surrogate' / 'training-pairs.jsonl'
@@ -941,7 +941,7 @@ auto_dev_mode() {
         # Drive tasks from plan until all done
         while true; do
             # Pop next pending task from plan
-            NEXT_TASK=$(/usr/bin/python3 <<'PYEOF'
 import sys, re
 from pathlib import Path
 plan_file = Path.home() / '.surrogate' / 'active-plan.md'
@@ -960,7 +960,7 @@ PYEOF
             echo "${BCY}${B}▸ Next task:${R} $NEXT_TASK"
             bash ~/.surrogate/bin/surrogate-orchestrate.sh "$NEXT_TASK"
             # Mark done in plan
-            /usr/bin/python3 <<PYEOF
 from pathlib import Path
 plan_file = Path.home() / '.surrogate' / 'active-plan.md'
 if plan_file.exists():
@@ -1035,7 +1035,7 @@ monitor_mode() {
         # If critical → spawn agent to investigate
         if [[ $ERR_COUNT -gt 50 ]]; then
             echo "${RE}⚠ elevated errors — dispatching investigator agent${R}"
-            (run_agent "เช็ค ~/.surrogate/logs/ หา pattern error ที่ recur บ่อย และเสนอ fix list (ห้ามแก้เอง รายงานอย่างเดียว)" 2>&1 | /usr/bin/head -20) &
         fi
         sleep 30
     done
@@ -1070,7 +1070,7 @@ show_agents() {
     banner
     echo ""
     echo "${B}▸ Available agents (~/.surrogate/agents/)${R}"
-    ls ~/.surrogate/agents/*.md 2>/dev/null | /usr/bin/sed 's|.*/||;s|.md$||' | sed 's/^/  /'
 }
 # ═══ Dispatch ═══

     export AGENT_EFFORT="$EFFORT"
     export AGENT_CWD="$(pwd)"
+    python3 <<'PYEOF'
 import os, sys, json, re, sqlite3, subprocess, urllib.request, urllib.error, time
 from datetime import datetime
 from pathlib import Path
 def tool_grep(pattern, path=None, glob='*'):
     base = os.path.expanduser(path) if path else CWD
+    cmd = f"grep -rn --include='{glob}' -E {subprocess.list2cmdline([pattern])} {base} 2>/dev/null | head -40"
     r = subprocess.run(cmd, shell=True, capture_output=True, text=True, timeout=15)
     return {'matches': r.stdout[:5000]}
         if [[ ! -f "$cache" ]] || [[ $(($(date +%s) - $(stat -f %m "$cache" 2>/dev/null || stat -c %Y "$cache" 2>/dev/null || echo 0))) -gt 60 ]]; then
             (curl -sS -m 5 -H "Authorization: Bearer ${OPENROUTER_API_KEY:-${OR_KEY:-}}" \
                 https://openrouter.ai/api/v1/auth/key 2>/dev/null \
+                | python3 -c "import json,sys; d=json.load(sys.stdin).get('data',{}); print(f'\$OR={d.get(\"usage\",0):.3f}')" \
                 > "$cache") 2>/dev/null &
         fi
         cost_str=$(cat "$cache" 2>/dev/null | head -1)
 mkdir -p "$(dirname "$HISTORY_FILE")"
 save_history() {
     local prompt="$1"
+    python3 -c "
 import json, sys, time
 from pathlib import Path
 Path('$HISTORY_FILE').parent.mkdir(parents=True, exist_ok=True)
                 ;;
             /history)
                 if [[ -f "$HISTORY_FILE" ]]; then
+                    python3 -c "
 import json
 from pathlib import Path
 import time
                 fi
                 ;;
             /cost)
+                bash -c 'source ~/.hermes/.env; curl -s -H "Authorization: Bearer $OPENROUTER_API_KEY" https://openrouter.ai/api/v1/auth/key' 2>&1 | python3 -c "import json,sys; d=json.load(sys.stdin).get('data',{}); print(f'  OpenRouter: \${d.get(\"usage\",0):.4f}')"
                 ;;
             /cost-all) bash "$0" --status ;;
             /remote*)
     # ── Step A: web research the tech keywords (free, fast, grounds the PRD) ─
     echo "${MA}▶ Researching tech context...${R}"
     local research_md=""
+    research_md=$(python3 - "$Q_STACK $Q_DESC $Q_FEATURES" <<'PYEOF' 2>/dev/null
 import sys, urllib.request, urllib.parse, re
 text = sys.argv[1]
 # Extract candidate tech keywords (CamelCase, lowercase known stacks, version tags)
     # ── Step C: direct LLM call (curl), bypassing the agent tool-loop ──
     local prd=""
     if [[ -n "${GEMINI_API_KEY:-}" ]]; then
+        prd=$(python3 - "$prompt" "$GEMINI_API_KEY" <<'PYEOF' 2>/dev/null
 import sys, json, urllib.request
 prompt, key = sys.argv[1], sys.argv[2]
 url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={key}"
     fi
     if [[ -z "$prd" ]] || [[ ${#prd} -lt 400 ]]; then
         if [[ -n "${OPENROUTER_API_KEY:-}" ]]; then
+            prd=$(python3 - "$prompt" "$OPENROUTER_API_KEY" <<'PYEOF' 2>/dev/null
 import sys, json, urllib.request
 prompt, key = sys.argv[1], sys.argv[2]
 body = {"model":"qwen/qwen3-coter","messages":[{"role":"user","content":prompt}],
     else
         echo "$prd" > "$target"
         # ── Step D: push PRD as training pair (HF dataset feedback loop) ───
+        python3 - "$prompt" "$prd" <<'PYEOF' 2>/dev/null &
 import sys, json, time, os
 from pathlib import Path
 log = Path.home() / '.surrogate' / 'training-pairs.jsonl'
         # Drive tasks from plan until all done
         while true; do
             # Pop next pending task from plan
+            NEXT_TASK=$(python3 <<'PYEOF'
 import sys, re
 from pathlib import Path
 plan_file = Path.home() / '.surrogate' / 'active-plan.md'
             echo "${BCY}${B}▸ Next task:${R} $NEXT_TASK"
             bash ~/.surrogate/bin/surrogate-orchestrate.sh "$NEXT_TASK"
             # Mark done in plan
+            python3 <<PYEOF
 from pathlib import Path
 plan_file = Path.home() / '.surrogate' / 'active-plan.md'
 if plan_file.exists():
         # If critical → spawn agent to investigate
         if [[ $ERR_COUNT -gt 50 ]]; then
             echo "${RE}⚠ elevated errors — dispatching investigator agent${R}"
+            (run_agent "เช็ค ~/.surrogate/logs/ หา pattern error ที่ recur บ่อย และเสนอ fix list (ห้ามแก้เอง รายงานอย่างเดียว)" 2>&1 | head -20) &
         fi
         sleep 30
     done
     banner
     echo ""
     echo "${B}▸ Available agents (~/.surrogate/agents/)${R}"
+    ls ~/.surrogate/agents/*.md 2>/dev/null | sed 's|.*/||;s|.md$||' | sed 's/^/  /'
 }
 # ═══ Dispatch ═══

bin/surrogate-consolidate.sh CHANGED Viewed

@@ -15,7 +15,7 @@ LOG="$HOME/.surrogate/logs/surrogate-consolidate.log"
 CHECKPOINT="$MEM/consolidate.checkpoint"
 mkdir -p "$(dirname "$LOG")" "$MEM"
-/usr/bin/python3 <<'PYEOF' 2>>"$LOG"
 import json, os, sqlite3, urllib.request, hashlib, subprocess
 from datetime import datetime
 from pathlib import Path

 CHECKPOINT="$MEM/consolidate.checkpoint"
 mkdir -p "$(dirname "$LOG")" "$MEM"
+python3 <<'PYEOF' 2>>"$LOG"
 import json, os, sqlite3, urllib.request, hashlib, subprocess
 from datetime import datetime
 from pathlib import Path

bin/surrogate-daemon.sh CHANGED Viewed

@@ -33,7 +33,7 @@ case "$CMD" in
         shift
         TASK="$*"
         [[ -z "$TASK" ]] && { echo "need task"; exit 2; }
-        ENQUEUE_TASK="$TASK" /usr/bin/python3 - "$QUEUE" <<'PYEOF'
 import json, uuid, os, sys
 from datetime import datetime
 queue_path = sys.argv[1]
@@ -159,7 +159,7 @@ PYEOF
     _worker)
         # ── Pop one task from queue (P0-user first, then plan, then self-gen) ──────
         _pop_queue() {
-            /usr/bin/python3 <<PYEOF
 import json, os, sys, fcntl
 from pathlib import Path
 q = Path(os.path.expanduser('$QUEUE'))
@@ -188,7 +188,7 @@ PYEOF
         # ── Pop next task from active plan (no sleep needed — plan drives work) ──
         _pop_plan() {
-            /usr/bin/python3 <<'PYEOF'
 import sys, json, os, re, uuid
 from pathlib import Path
 from datetime import datetime
@@ -223,7 +223,7 @@ PYEOF
         # ── Self-generate task from pool (fallback when no plan + queue empty) ──
         _self_gen() {
-            AUTO_TASK=$(/usr/bin/python3 <<'PYEOF'
 import json, os, random
 from pathlib import Path
 ep = Path(os.path.expanduser('~/.surrogate/state/surrogate-memory/episodes.jsonl'))
@@ -278,7 +278,7 @@ for t in random.sample(pool, len(pool)):
 print(chosen or pool[0])
 PYEOF
 )
-            echo "{\"id\":\"auto-$(/usr/bin/python3 -c 'import uuid; print(uuid.uuid4().hex[:8])')\",\"task\":\"$AUTO_TASK\",\"self_generated\":true,\"source\":\"self-gen\"}"
         }
         # ── Task resolution: queue → plan → self-gen (no 60s sleep) ─────────────
@@ -297,9 +297,9 @@ PYEOF
         fi
         # Extract task
-        TASK=$(echo "$TASK_JSON" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read())['task'])")
-        TID=$(echo "$TASK_JSON" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read())['id'])")
-        SOURCE=$(echo "$TASK_JSON" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('source','queue'))")
         echo "[$(date +%H:%M:%S)] worker picked $TID [$SOURCE]: ${TASK:0:80}" >> "$LOG"
         START=$(date +%s)
@@ -311,7 +311,7 @@ PYEOF
         # If task came from plan, mark as done ([ ] → [x]) — env vars = safe quoting
         if [[ "$SOURCE" == "plan" ]]; then
-            DAEMON_TASK="$TASK" /usr/bin/python3 - >> "$LOG" 2>&1 <<'PYEOF'
 import re, os
 from pathlib import Path
 plan_file = Path.home() / '.surrogate' / 'active-plan.md'
@@ -327,7 +327,7 @@ PYEOF
         # Mark done in audit log
         DAEMON_TASK="$TASK" DAEMON_OUTPUT="$(echo "$OUTPUT" | tail -20)" \
-            /usr/bin/python3 - "$TID" "$SOURCE" "$DUR" "$DONE" >> "$LOG" 2>&1 <<'PYEOF'
 import json, os, sys
 tid, source, dur, done_path = sys.argv[1], sys.argv[2], int(sys.argv[3]), sys.argv[4]
 done = {

         shift
         TASK="$*"
         [[ -z "$TASK" ]] && { echo "need task"; exit 2; }
+        ENQUEUE_TASK="$TASK" python3 - "$QUEUE" <<'PYEOF'
 import json, uuid, os, sys
 from datetime import datetime
 queue_path = sys.argv[1]
     _worker)
         # ── Pop one task from queue (P0-user first, then plan, then self-gen) ──────
         _pop_queue() {
+            python3 <<PYEOF
 import json, os, sys, fcntl
 from pathlib import Path
 q = Path(os.path.expanduser('$QUEUE'))
         # ── Pop next task from active plan (no sleep needed — plan drives work) ──
         _pop_plan() {
+            python3 <<'PYEOF'
 import sys, json, os, re, uuid
 from pathlib import Path
 from datetime import datetime
         # ── Self-generate task from pool (fallback when no plan + queue empty) ──
         _self_gen() {
+            AUTO_TASK=$(python3 <<'PYEOF'
 import json, os, random
 from pathlib import Path
 ep = Path(os.path.expanduser('~/.surrogate/state/surrogate-memory/episodes.jsonl'))
 print(chosen or pool[0])
 PYEOF
 )
+            echo "{\"id\":\"auto-$(python3 -c 'import uuid; print(uuid.uuid4().hex[:8])')\",\"task\":\"$AUTO_TASK\",\"self_generated\":true,\"source\":\"self-gen\"}"
         }
         # ── Task resolution: queue → plan → self-gen (no 60s sleep) ─────────────
         fi
         # Extract task
+        TASK=$(echo "$TASK_JSON" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())['task'])")
+        TID=$(echo "$TASK_JSON" | python3 -c "import json,sys; print(json.loads(sys.stdin.read())['id'])")
+        SOURCE=$(echo "$TASK_JSON" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('source','queue'))")
         echo "[$(date +%H:%M:%S)] worker picked $TID [$SOURCE]: ${TASK:0:80}" >> "$LOG"
         START=$(date +%s)
         # If task came from plan, mark as done ([ ] → [x]) — env vars = safe quoting
         if [[ "$SOURCE" == "plan" ]]; then
+            DAEMON_TASK="$TASK" python3 - >> "$LOG" 2>&1 <<'PYEOF'
 import re, os
 from pathlib import Path
 plan_file = Path.home() / '.surrogate' / 'active-plan.md'
         # Mark done in audit log
         DAEMON_TASK="$TASK" DAEMON_OUTPUT="$(echo "$OUTPUT" | tail -20)" \
+            python3 - "$TID" "$SOURCE" "$DUR" "$DONE" >> "$LOG" 2>&1 <<'PYEOF'
 import json, os, sys
 tid, source, dur, done_path = sys.argv[1], sys.argv[2], int(sys.argv[3]), sys.argv[4]
 done = {

bin/surrogate-dev-loop.sh CHANGED Viewed

@@ -33,7 +33,7 @@ SEARCH_ROOTS=(
 # ── Task generators (pick one per cycle, weighted random) ────────────────────
 pick_task() {
-    /usr/bin/python3 <<'PYEOF'
 import os, random, re, subprocess, json
 from pathlib import Path
@@ -178,7 +178,7 @@ load_reflexion_lessons() {
     local kind="$1"
     local file="$HOME/.hermes/workspace/reflexion/lessons-${kind}.jsonl"
     [[ ! -f "$file" ]] && { echo ""; return; }
-    /usr/bin/python3 <<PYEOF
 import json
 from pathlib import Path
 p = Path("$file")
@@ -211,7 +211,7 @@ save_reflexion_lesson() {
     mkdir -p "$(dirname "$file")"
     # Pass payload via env vars + sys.argv (safe — no shell quoting issues with embedded quotes)
     REFLEX_RESP="$response" REFLEX_TASK="$task" \
-        /usr/bin/python3 - "$kind" "$duration" "$file" <<'PYEOF'
 import json, re, os, sys
 from datetime import datetime
 kind, dur, out_file = sys.argv[1], int(sys.argv[2]), sys.argv[3]
@@ -251,11 +251,11 @@ run_cycle() {
     fi
     local kind path line task_text context
-    kind=$(echo "$task_json" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('kind',''))")
-    path=$(echo "$task_json" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('path',''))")
-    line=$(echo "$task_json" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('line',0))")
-    task_text=$(echo "$task_json" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('task',''))")
-    context=$(echo "$task_json" | /usr/bin/python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('context',''))")
     local id="$(date +%s)-${kind}"
     local out="$OUT_DIR/${id}.md"
@@ -277,7 +277,7 @@ $context
     # Call Surrogate-1 via Ollama (keep_alive=5m so model stays warm between cycles)
     local body
-    body=$(PROMPT_VAR="$prompt" /usr/bin/python3 <<'PYEOF'
 import json, os
 print(json.dumps({
     "model": "surrogate-1",
@@ -290,13 +290,13 @@ print(json.dumps({
 PYEOF
 )
     local resp
-    resp=$(/usr/bin/curl -sS --max-time 120 \
         http://localhost:11434/v1/chat/completions \
         -H 'Content-Type: application/json' \
         -d "$body" 2>/dev/null)
     local answer
-    answer=$(echo "$resp" | /usr/bin/python3 -c "
 import json, sys
 try:
     d = json.load(sys.stdin)
@@ -335,7 +335,7 @@ EOF
     # Append to training-data candidate (env vars + argv = safe quoting)
     DEV_TASK="$task_text" DEV_ANSWER="$answer" \
-        /usr/bin/python3 - "$kind" "$dur" <<'PYEOF'
 import json, os, sys
 from pathlib import Path
 from datetime import datetime

 # ── Task generators (pick one per cycle, weighted random) ────────────────────
 pick_task() {
+    python3 <<'PYEOF'
 import os, random, re, subprocess, json
 from pathlib import Path
     local kind="$1"
     local file="$HOME/.hermes/workspace/reflexion/lessons-${kind}.jsonl"
     [[ ! -f "$file" ]] && { echo ""; return; }
+    python3 <<PYEOF
 import json
 from pathlib import Path
 p = Path("$file")
     mkdir -p "$(dirname "$file")"
     # Pass payload via env vars + sys.argv (safe — no shell quoting issues with embedded quotes)
     REFLEX_RESP="$response" REFLEX_TASK="$task" \
+        python3 - "$kind" "$duration" "$file" <<'PYEOF'
 import json, re, os, sys
 from datetime import datetime
 kind, dur, out_file = sys.argv[1], int(sys.argv[2]), sys.argv[3]
     fi
     local kind path line task_text context
+    kind=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('kind',''))")
+    path=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('path',''))")
+    line=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('line',0))")
+    task_text=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('task',''))")
+    context=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('context',''))")
     local id="$(date +%s)-${kind}"
     local out="$OUT_DIR/${id}.md"
     # Call Surrogate-1 via Ollama (keep_alive=5m so model stays warm between cycles)
     local body
+    body=$(PROMPT_VAR="$prompt" python3 <<'PYEOF'
 import json, os
 print(json.dumps({
     "model": "surrogate-1",
 PYEOF
 )
     local resp
+    resp=$(curl -sS --max-time 120 \
         http://localhost:11434/v1/chat/completions \
         -H 'Content-Type: application/json' \
         -d "$body" 2>/dev/null)
     local answer
+    answer=$(echo "$resp" | python3 -c "
 import json, sys
 try:
     d = json.load(sys.stdin)
     # Append to training-data candidate (env vars + argv = safe quoting)
     DEV_TASK="$task_text" DEV_ANSWER="$answer" \
+        python3 - "$kind" "$dur" <<'PYEOF'
 import json, os, sys
 from pathlib import Path
 from datetime import datetime

bin/surrogate-orchestrate.sh CHANGED Viewed

@@ -42,7 +42,7 @@ RESEARCH_CONTEXT=""
 RESEARCH_OUT="$WORKDIR/0-research-context.md"
 if echo "$TASK" | grep -iqE "migrat|integrat|switch from|move to|adopt|setup|deploy"; then
     echo "${MA}${B}═══ Stage 0/6: WEB RESEARCH${R} ${D}— gather current docs first${R}"
-    /usr/bin/python3 - "$TASK" "$RESEARCH_OUT" <<'PYEOF' 2>&1 | sed 's/^/  /' || true
 import sys, urllib.request, urllib.parse, json, re, os
 task, out_path = sys.argv[1], sys.argv[2]
 # Extract tech keywords (capitalized words, dot-versions, snake-case)
@@ -84,7 +84,7 @@ for prd_file in "$(pwd)/surrogate.md" "$(pwd)/SURROGATE.md"; do
         PRD_CONTEXT="
 === Project PRD (surrogate.md) ===
-$(/usr/bin/head -c 6000 "$prd_file")
 === End PRD ==="
         break
     fi
@@ -138,7 +138,7 @@ EOF
               CHUTES_KEY="${CHUTES_API_KEY:-}" \
               OR_KEY_ENV="${OPENROUTER_API_KEY:-}" \
               GH_POOL="${GITHUB_TOKEN_POOL:-}" \
-              /usr/bin/python3 - "$prompt_file" <<'PYEOF' 2>&1
 import sys, json, urllib.request, os
 from pathlib import Path
 prompt = Path(sys.argv[1]).read_text()
@@ -249,7 +249,7 @@ PYEOF
 # ── Push every task pair to HF training dataset (background) ──
 push_training_pair() {
     local source="$1" prompt="$2" content="$3"
-    /usr/bin/python3 - "$source" "$prompt" "$content" "$TRAINING_LOG" <<'PYEOF' 2>/dev/null &
 import sys, json, time, os
 src, p, c, log = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]
 pair = {
@@ -384,7 +384,7 @@ Task: $TASK
 # Extract code blocks from DEV output → write actual files
 if [[ -f "$DEV_OUT" ]]; then
     echo "${D}  Extracting code blocks → real files${R}"
-    /usr/bin/python3 - "$DEV_OUT" "$(pwd)" <<'PYEOF' 2>&1 | sed 's/^/    /'
 import sys, re, os
 from pathlib import Path
 md_path, cwd = sys.argv[1], sys.argv[2]
@@ -420,7 +420,7 @@ fi
 QA_OUT="$WORKDIR/5-qa-verify.md"
 OPS_OUT="$WORKDIR/6a-ops-checklist.md"
 NEED_OPS=0
-if echo "$TASK" | /usr/bin/grep -iqE "deploy|docker|helm|k8s|terraform|cicd|ci/cd|cloudformation|buildspec|ecs|lambda"; then
     NEED_OPS=1
 fi
@@ -509,7 +509,7 @@ ls -la "$WORKDIR/" 2>&1 | tail -n +2 | awk '{printf "  %s  %s\n", $5, $9}' | gre
 VERDICT_TEXT=""
 if [[ -f "$REVIEW_OUT" ]]; then
-    VERDICT_TEXT=$(grep -iE "verdict|APPROVE|REWORK|REJECT" "$REVIEW_OUT" | /usr/bin/head -3)
     echo ""
     echo "${B}▸ Final verdict:${R}"
     echo "$VERDICT_TEXT" | sed 's/^/  /'
@@ -536,5 +536,5 @@ if echo "$VERDICT_TEXT" | grep -qi "APPROVE"; then
 elif echo "$VERDICT_TEXT" | grep -qi "REWORK"; then
     echo ""
     echo "${YE}${B}▸ Reviewer requested REWORK — re-run orchestrate after addressing notes${R}"
-    grep -A5 -i "REWORK\|action item" "$REVIEW_OUT" | /usr/bin/head -10 | sed 's/^/  /'
 fi

 RESEARCH_OUT="$WORKDIR/0-research-context.md"
 if echo "$TASK" | grep -iqE "migrat|integrat|switch from|move to|adopt|setup|deploy"; then
     echo "${MA}${B}═══ Stage 0/6: WEB RESEARCH${R} ${D}— gather current docs first${R}"
+    python3 - "$TASK" "$RESEARCH_OUT" <<'PYEOF' 2>&1 | sed 's/^/  /' || true
 import sys, urllib.request, urllib.parse, json, re, os
 task, out_path = sys.argv[1], sys.argv[2]
 # Extract tech keywords (capitalized words, dot-versions, snake-case)
         PRD_CONTEXT="
 === Project PRD (surrogate.md) ===
+$(head -c 6000 "$prd_file")
 === End PRD ==="
         break
     fi
               CHUTES_KEY="${CHUTES_API_KEY:-}" \
               OR_KEY_ENV="${OPENROUTER_API_KEY:-}" \
               GH_POOL="${GITHUB_TOKEN_POOL:-}" \
+              python3 - "$prompt_file" <<'PYEOF' 2>&1
 import sys, json, urllib.request, os
 from pathlib import Path
 prompt = Path(sys.argv[1]).read_text()
 # ── Push every task pair to HF training dataset (background) ──
 push_training_pair() {
     local source="$1" prompt="$2" content="$3"
+    python3 - "$source" "$prompt" "$content" "$TRAINING_LOG" <<'PYEOF' 2>/dev/null &
 import sys, json, time, os
 src, p, c, log = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]
 pair = {
 # Extract code blocks from DEV output → write actual files
 if [[ -f "$DEV_OUT" ]]; then
     echo "${D}  Extracting code blocks → real files${R}"
+    python3 - "$DEV_OUT" "$(pwd)" <<'PYEOF' 2>&1 | sed 's/^/    /'
 import sys, re, os
 from pathlib import Path
 md_path, cwd = sys.argv[1], sys.argv[2]
 QA_OUT="$WORKDIR/5-qa-verify.md"
 OPS_OUT="$WORKDIR/6a-ops-checklist.md"
 NEED_OPS=0
+if echo "$TASK" | grep -iqE "deploy|docker|helm|k8s|terraform|cicd|ci/cd|cloudformation|buildspec|ecs|lambda"; then
     NEED_OPS=1
 fi
 VERDICT_TEXT=""
 if [[ -f "$REVIEW_OUT" ]]; then
+    VERDICT_TEXT=$(grep -iE "verdict|APPROVE|REWORK|REJECT" "$REVIEW_OUT" | head -3)
     echo ""
     echo "${B}▸ Final verdict:${R}"
     echo "$VERDICT_TEXT" | sed 's/^/  /'
 elif echo "$VERDICT_TEXT" | grep -qi "REWORK"; then
     echo ""
     echo "${YE}${B}▸ Reviewer requested REWORK — re-run orchestrate after addressing notes${R}"
+    grep -A5 -i "REWORK\|action item" "$REVIEW_OUT" | head -10 | sed 's/^/  /'
 fi