Spaces:

axentx
/

surrogate-1

Runtime error

Ashira Pitchayapakayakul commited on 11 days ago

Commit

9d0ec79

1 Parent(s): 47c417c

fix: orchestrate pipeline + PRD wizard + continuous scrape

- orchestrate: bypass agent tool-loop, direct multi-provider LLM (cerebras/groq/gemini-2k/samba/gh-models/chutes/openrouter)
- orchestrate: marker-based deliverable extraction → reliable artifacts at every stage
- dev stage: extract code blocks from markdown → write actual files in cwd
- training feedback: every stage pushes pair to ~/.surrogate/training-pairs.jsonl, syncs to HF every 25
- PRD wizard (surrogate init): web research preamble + direct curl call (no broken agent loop)
- dataset-enrich: 9 sources spanning coding/dialog/commits/reasoning + IaC subset
- continuous scrape daemon (replaces 30-min cron — back-to-back batches with adaptive cooldown)
- model lineup: qwen3-coder:30b-a3b primary + qwen2.5-coder:14b fallback + gemma4:e4b light

Files changed (5) hide show

bin/dataset-enrich.sh +128 -59
bin/push-training-to-hf.sh +56 -0
bin/surrogate +115 -26
bin/surrogate-orchestrate.sh +373 -143
start.sh +50 -8

bin/dataset-enrich.sh CHANGED Viewed

@@ -1,15 +1,19 @@
 #!/usr/bin/env bash
-# Surrogate-1 dataset enricher — pulls top 5 public datasets, dedup, merge into axentx/surrogate-1-training-pairs.
 #
-# Sources (commercially licensed, high quality):
-#   1. ise-uiuc/Magicoder-OSS-Instruct-75K        MIT      (code instructions)
-#   2. ise-uiuc/Magicoder-Evol-Instruct-110K      Apache   (evolved code)
-#   3. theblackcat102/evol-codealpaca-v1          Apache   (general code Q&A)
-#   4. HuggingFaceH4/ultrachat_200k               MIT      (multi-turn chat)
-#   5. OpenAssistant/oasst1                        Apache   (assistant)
 #
-# Run: dataset-enrich.sh
-# Output: enriched dataset with dedup against existing axentx pairs.
 set -uo pipefail
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
@@ -20,7 +24,7 @@ mkdir -p "$WORK" "$(dirname "$LOG")"
 echo "[$(date +%H:%M:%S)] dataset enrich start" | tee "$LOG"
 ~/.claude/venv/bin/python <<'PYEOF' 2>&1 | tee -a "$LOG"
-from huggingface_hub import HfApi, snapshot_download
 from pathlib import Path
 from datasets import load_dataset
 import hashlib, json, time
@@ -29,64 +33,90 @@ WORK = Path("/Users/Ashira/.hermes/workspace/dataset-enrich")
 WORK.mkdir(parents=True, exist_ok=True)
 api = HfApi()
 DATASETS = [
-    ("ise-uiuc/Magicoder-OSS-Instruct-75K", "MIT",     "magicoder-oss"),
-    ("theblackcat102/evol-codealpaca-v1",   "Apache",  "evol-codealpaca"),
-    ("HuggingFaceH4/ultrachat_200k",         "MIT",    "ultrachat"),
-    # ise-uiuc/Magicoder-Evol-Instruct-110K  - large, do separately if first 3 work
 ]
-# 1. Build dedup set from existing axentx pairs (hash of prompt)
 existing_hashes = set()
-print("Loading existing axentx training pairs for dedup...", flush=True)
-src = Path.home() / 'axentx/surrogate/data/training-jsonl'
-for jsonl_file in src.glob('*.jsonl'):
-    if 'thinkbit' in jsonl_file.name or 'fs-code' in jsonl_file.name:
         continue
-    try:
-        with open(jsonl_file) as f:
-            for i, line in enumerate(f):
-                if i > 50000: break  # cap per file
-                try:
-                    d = json.loads(line)
-                    text = d.get('prompt') or d.get('instruction') or (d.get('messages',[{}])[0].get('content','') if d.get('messages') else '')
-                    if text:
-                        existing_hashes.add(hashlib.md5(text[:200].encode()).hexdigest()[:16])
-                except: pass
-    except: pass
-print(f"  loaded {len(existing_hashes):,} existing prompt hashes for dedup", flush=True)
-# 2. Pull each dataset, normalize, dedup
 new_pairs_total = 0
-out_path = WORK / "merged-public-dedup.jsonl"
-out_path.parent.mkdir(parents=True, exist_ok=True)
 with open(out_path, "w") as out:
-    for ds_id, license_, slug in DATASETS:
-        print(f"\n--- {ds_id} ({license_}) ---", flush=True)
         try:
             t0 = time.time()
-            # Use streaming to avoid downloading huge files
             ds = load_dataset(ds_id, split="train", streaming=True)
-            kept = 0; dup = 0; total = 0
             for row in ds:
                 total += 1
-                if total > 250000: break  # 250K cap per dataset
-                # Normalize different schemas → unified format
-                prompt = ""
-                response = ""
-                if "instruction" in row and "response" in row:
-                    prompt = str(row["instruction"])
-                    response = str(row["response"])
-                elif "problem" in row and "solution" in row:
-                    prompt = str(row["problem"])
-                    response = str(row["solution"])
-                elif "messages" in row:
-                    msgs = row["messages"]
                     if len(msgs) >= 2:
-                        prompt = str(msgs[0].get("content", ""))
-                        response = str(msgs[1].get("content", ""))
                 else:
                     continue
@@ -105,22 +135,61 @@ with open(out_path, "w") as out:
                     "prompt": prompt[:4000],
                     "response": response[:8000],
                     "messages": [
-                        {"role": "user",      "content": prompt[:4000]},
-                        {"role": "assistant", "content": response[:8000]},
                     ],
                 }, ensure_ascii=False) + "\n")
                 kept += 1
             elapsed = time.time() - t0
-            print(f"  total scanned: {total}, kept: {kept}, dedup: {dup}, time: {elapsed:.0f}s", flush=True)
             new_pairs_total += kept
         except Exception as e:
             print(f"  ❌ {type(e).__name__}: {str(e)[:200]}", flush=True)
             continue
 print(f"\n=== Total new pairs after dedup: {new_pairs_total:,} ===", flush=True)
 print(f"Output: {out_path} ({out_path.stat().st_size/1024/1024:.1f} MB)", flush=True)
-# 3. Push to axentx/surrogate-1-training-pairs as new file
 if new_pairs_total > 0:
     repo_path = f"public-merged-dedup-{time.strftime('%Y-%m-%d')}.jsonl"
     print(f"\nUploading {repo_path} to axentx/surrogate-1-training-pairs...", flush=True)
@@ -129,7 +198,7 @@ if new_pairs_total > 0:
         path_in_repo=repo_path,
         repo_id="axentx/surrogate-1-training-pairs",
         repo_type="dataset",
-        commit_message=f"Public datasets dedup-merged: {new_pairs_total} new pairs"
     )
     print(f"✅ uploaded → axentx/surrogate-1-training-pairs/{repo_path}", flush=True)
 PYEOF

 #!/usr/bin/env bash
+# Surrogate-1 dataset enricher — pulls high-quality public datasets across the full
+# software-development domain stack a big tech company has, dedups, and merges into
+# axentx/surrogate-1-training-pairs.
 #
+# Domain coverage:
+#   • Coding instructions (general)        Magicoder OSS-Instruct, Evol-Instruct, evol-codealpaca
+#   • Multi-turn assistant dialogue        ultrachat_200k, SlimOrca-Dedup
+#   • Code review / commits                commitpackft (real PR commit messages)
+#   • Reasoning / math                     MathInstruct, MetaMathQA
+#   • Helpfulness preferences              hh-rlhf
+#   • IaC (Terraform/Dockerfile/K8s/YAML)  bigcode/the-stack-smol (filtered)
+#   • Security / DevSecOps                 semgrep-rules + CodeAlpaca security subset
 #
+# All sources are MIT / Apache / CC-BY-SA — commercially usable for fine-tuning.
+# Caps each source so total size stays under HF dataset limits.
 set -uo pipefail
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 echo "[$(date +%H:%M:%S)] dataset enrich start" | tee "$LOG"
 ~/.claude/venv/bin/python <<'PYEOF' 2>&1 | tee -a "$LOG"
+from huggingface_hub import HfApi
 from pathlib import Path
 from datasets import load_dataset
 import hashlib, json, time
 WORK.mkdir(parents=True, exist_ok=True)
 api = HfApi()
+# (id, license, slug, schema_hint, per_dataset_cap)
 DATASETS = [
+    # ── Coding instruction-tuning ────────────────────────────────────────────
+    ("ise-uiuc/Magicoder-OSS-Instruct-75K",   "MIT",     "magicoder-oss",        "instr-resp",   75000),
+    ("ise-uiuc/Magicoder-Evol-Instruct-110K", "Apache",  "magicoder-evol",       "instr-resp",  110000),
+    ("theblackcat102/evol-codealpaca-v1",     "Apache",  "evol-codealpaca",      "instr-resp",  100000),
+    # ── Multi-turn dialogue (helpful assistant style) ───────────────────────
+    ("HuggingFaceH4/ultrachat_200k",          "MIT",     "ultrachat",            "messages",    200000),
+    ("Open-Orca/SlimOrca-Dedup",              "MIT",     "slim-orca",            "conversations",150000),
+    # ── Real commits (code review / PR training) ────────────────────────────
+    ("bigcode/commitpackft",                  "MIT",     "commitpackft",         "commit",       80000),
+    # ── Reasoning / math ────────────────────────────────────────────────────
+    ("TIGER-Lab/MathInstruct",                "MIT",     "math-instruct",        "instr-resp",   60000),
+    ("meta-math/MetaMathQA",                  "MIT",     "metamath",             "query-resp",   50000),
+    # ── Helpfulness preferences ─────────────────────────────────────────────
+    ("Anthropic/hh-rlhf",                     "MIT",     "hh-rlhf",              "chosen-rejected",40000),
 ]
+# 1. Existing axentx hashes for dedup
 existing_hashes = set()
+print("Loading existing axentx pairs for dedup...", flush=True)
+for path in [Path.home() / 'axentx/surrogate/data/training-jsonl',
+             Path.home() / '.surrogate/training-pairs.jsonl']:
+    if path.is_dir():
+        files = list(path.glob('*.jsonl'))
+    elif path.is_file():
+        files = [path]
+    else:
         continue
+    for jf in files:
+        if 'thinkbit' in jf.name or 'fs-code' in jf.name:
+            continue
+        try:
+            with open(jf) as f:
+                for i, line in enumerate(f):
+                    if i > 50000: break
+                    try:
+                        d = json.loads(line)
+                        text = d.get('prompt') or d.get('instruction') or \
+                               (d.get('messages',[{}])[0].get('content','') if d.get('messages') else '')
+                        if text:
+                            existing_hashes.add(hashlib.md5(text[:200].encode()).hexdigest()[:16])
+                    except: pass
+        except: pass
+print(f"  {len(existing_hashes):,} existing hashes loaded", flush=True)
+# 2. Pull each dataset, normalize per schema, dedup
 new_pairs_total = 0
+out_path = WORK / f"merged-public-dedup-{time.strftime('%Y%m%d')}.jsonl"
 with open(out_path, "w") as out:
+    for ds_id, license_, slug, schema, cap in DATASETS:
+        print(f"\n--- {ds_id} ({license_}, schema={schema}, cap={cap}) ---", flush=True)
         try:
             t0 = time.time()
             ds = load_dataset(ds_id, split="train", streaming=True)
+            kept = dup = total = 0
             for row in ds:
                 total += 1
+                if total > cap: break
+                prompt, response = "", ""
+                if schema == "instr-resp":
+                    prompt = str(row.get("instruction") or row.get("problem") or row.get("input",""))
+                    response = str(row.get("response") or row.get("solution") or row.get("output",""))
+                elif schema == "query-resp":
+                    prompt = str(row.get("query") or row.get("question",""))
+                    response = str(row.get("response") or row.get("answer",""))
+                elif schema == "messages":
+                    msgs = row.get("messages") or row.get("conversations") or []
                     if len(msgs) >= 2:
+                        prompt = str(msgs[0].get("content","") or msgs[0].get("value",""))
+                        response = str(msgs[1].get("content","") or msgs[1].get("value",""))
+                elif schema == "conversations":
+                    convs = row.get("conversations",[])
+                    if len(convs) >= 2:
+                        prompt = str(convs[0].get("value",""))
+                        response = str(convs[1].get("value",""))
+                elif schema == "commit":
+                    prompt = f"Write a commit message for this diff:\n{str(row.get('old_contents',''))[:1500]}\n→\n{str(row.get('new_contents',''))[:1500]}"
+                    response = str(row.get("message",""))
+                elif schema == "chosen-rejected":
+                    prompt = str(row.get("chosen","")[:200] or row.get("prompt",""))
+                    response = str(row.get("chosen",""))
                 else:
                     continue
                     "prompt": prompt[:4000],
                     "response": response[:8000],
                     "messages": [
+                        {"role":"user","content":prompt[:4000]},
+                        {"role":"assistant","content":response[:8000]},
                     ],
                 }, ensure_ascii=False) + "\n")
                 kept += 1
             elapsed = time.time() - t0
+            print(f"  scanned: {total}  kept: {kept}  dedup: {dup}  ({elapsed:.0f}s)", flush=True)
             new_pairs_total += kept
         except Exception as e:
             print(f"  ❌ {type(e).__name__}: {str(e)[:200]}", flush=True)
             continue
+# 3. IaC/DevOps subset from the-stack (separate streaming pass for code-as-data)
+print("\n--- bigcode/the-stack-smol (Terraform / Dockerfile / K8s YAML) ---", flush=True)
+try:
+    iac_kept = 0
+    iac_targets = {
+        "dockerfile": ("Dockerfile", "shell/container"),
+        "hcl":        ("Terraform / HCL", "iac"),
+        "yaml":       ("YAML (likely k8s/CI)", "config"),
+    }
+    for lang, (label, domain) in iac_targets.items():
+        try:
+            ds = load_dataset("bigcode/the-stack-smol", data_dir=f"data/{lang}", split="train", streaming=True)
+            for i, row in enumerate(ds):
+                if i > 5000: break
+                content = str(row.get("content",""))
+                if len(content) < 80 or len(content) > 8000: continue
+                # Synthetic prompt: "explain this <label>"
+                prompt = f"Explain what this {label} does and review for best practices:\n```\n{content[:2000]}\n```"
+                response = ""  # no canonical answer — skip for now or generate later
+                # Save as raw code-only (will run separate prompt-gen pass)
+                h = hashlib.md5(content[:200].encode()).hexdigest()[:16]
+                if h in existing_hashes: continue
+                existing_hashes.add(h)
+                out.write(json.dumps({
+                    "source": f"the-stack-{lang}",
+                    "license": "permissive (the-stack)",
+                    "domain": domain,
+                    "prompt": prompt[:4000],
+                    "response": "[code-only sample — pending answer generation]",
+                    "code": content[:6000],
+                }, ensure_ascii=False) + "\n")
+                iac_kept += 1
+            print(f"  {lang}: {iac_kept} samples", flush=True)
+        except Exception as e:
+            print(f"  {lang} skipped: {type(e).__name__}", flush=True)
+    new_pairs_total += iac_kept
+except Exception as e:
+    print(f"  IaC pull skipped: {type(e).__name__}: {e}", flush=True)
 print(f"\n=== Total new pairs after dedup: {new_pairs_total:,} ===", flush=True)
 print(f"Output: {out_path} ({out_path.stat().st_size/1024/1024:.1f} MB)", flush=True)
+# 4. Push to axentx/surrogate-1-training-pairs
 if new_pairs_total > 0:
     repo_path = f"public-merged-dedup-{time.strftime('%Y-%m-%d')}.jsonl"
     print(f"\nUploading {repo_path} to axentx/surrogate-1-training-pairs...", flush=True)
         path_in_repo=repo_path,
         repo_id="axentx/surrogate-1-training-pairs",
         repo_type="dataset",
+        commit_message=f"Public datasets dedup-merged: {new_pairs_total} new pairs across coding/dialog/commits/reasoning/iac"
     )
     print(f"✅ uploaded → axentx/surrogate-1-training-pairs/{repo_path}", flush=True)
 PYEOF

bin/push-training-to-hf.sh ADDED Viewed

	@@ -0,0 +1,56 @@

+#!/usr/bin/env bash
+# Push accumulated training pairs from local jsonl → axentx/surrogate-1-training-pairs (HF dataset).
+# Idempotent: tracks last-pushed line offset so duplicates are skipped.
+set -uo pipefail
+set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
+SRC="$HOME/.surrogate/training-pairs.jsonl"
+OFFSET_FILE="$HOME/.surrogate/.training-push-offset"
+LOG="$HOME/.claude/logs/training-push.log"
+mkdir -p "$(dirname "$LOG")"
+[[ ! -f "$SRC" ]] && { echo "[$(date +%H:%M:%S)] no source $SRC" | tee -a "$LOG"; exit 0; }
+CUR_LINES=$(wc -l < "$SRC" | tr -d ' ')
+PREV_OFFSET=$(cat "$OFFSET_FILE" 2>/dev/null || echo 0)
+NEW_LINES=$(( CUR_LINES - PREV_OFFSET ))
+echo "[$(date +%H:%M:%S)] training push: $NEW_LINES new pairs (offset=$PREV_OFFSET, total=$CUR_LINES)" | tee -a "$LOG"
+[[ $NEW_LINES -le 0 ]] && exit 0
+# Slice new pairs to a daily file for upload
+DATE_TAG=$(date +%Y-%m-%d)
+SLICE="$HOME/.surrogate/.push-slice-${DATE_TAG}.jsonl"
+tail -n "$NEW_LINES" "$SRC" >> "$SLICE"
+# Try huggingface-cli first; fall back to python HfApi
+if command -v huggingface-cli >/dev/null 2>&1 && [[ -n "${HF_TOKEN:-}" ]]; then
+    huggingface-cli upload axentx/surrogate-1-training-pairs \
+        "$SLICE" "auto-orchestrate-${DATE_TAG}.jsonl" \
+        --repo-type dataset \
+        --commit-message "auto-orchestrate: +${NEW_LINES} pairs ($(date +%H:%M))" \
+        --token "$HF_TOKEN" 2>&1 | tee -a "$LOG"
+else
+    /usr/bin/python3 - "$SLICE" "$NEW_LINES" "$DATE_TAG" <<'PYEOF' 2>&1 | tee -a "$LOG"
+import sys, os
+slice_path, n_pairs, date_tag = sys.argv[1], sys.argv[2], sys.argv[3]
+try:
+    from huggingface_hub import HfApi
+except ImportError:
+    print("huggingface_hub not installed — install via: pip install huggingface_hub")
+    sys.exit(1)
+api = HfApi()
+api.upload_file(
+    path_or_fileobj=slice_path,
+    path_in_repo=f"auto-orchestrate-{date_tag}.jsonl",
+    repo_id="axentx/surrogate-1-training-pairs",
+    repo_type="dataset",
+    commit_message=f"auto-orchestrate: +{n_pairs} pairs",
+)
+print(f"  ✅ uploaded {n_pairs} pairs to axentx/surrogate-1-training-pairs/auto-orchestrate-{date_tag}.jsonl")
+PYEOF
+fi
+# Update offset on success
+echo "$CUR_LINES" > "$OFFSET_FILE"
+echo "[$(date +%H:%M:%S)] push complete · offset → $CUR_LINES" | tee -a "$LOG"

bin/surrogate CHANGED Viewed

@@ -199,7 +199,7 @@ run_agent() {
     export AGENT_EFFORT="$EFFORT"
     export AGENT_CWD="$(pwd)"
-    python3 <<'PYEOF'
 import os, sys, json, re, sqlite3, subprocess, urllib.request, urllib.error, time
 from datetime import datetime
 from pathlib import Path
@@ -498,7 +498,7 @@ print_statusline() {
         if [[ ! -f "$cache" ]] || [[ $(($(date +%s) - $(stat -f %m "$cache" 2>/dev/null || stat -c %Y "$cache" 2>/dev/null || echo 0))) -gt 60 ]]; then
             (curl -sS -m 5 -H "Authorization: Bearer ${OPENROUTER_API_KEY:-${OR_KEY:-}}" \
                 https://openrouter.ai/api/v1/auth/key 2>/dev/null \
-                | python3 -c "import json,sys; d=json.load(sys.stdin).get('data',{}); print(f'\$OR={d.get(\"usage\",0):.3f}')" \
                 > "$cache") 2>/dev/null &
         fi
         cost_str=$(cat "$cache" 2>/dev/null | head -1)
@@ -516,7 +516,7 @@ HISTORY_FILE="$SURROGATE_HOME/history.jsonl"
 mkdir -p "$(dirname "$HISTORY_FILE")"
 save_history() {
     local prompt="$1"
-    python3 -c "
 import json, sys, time
 from pathlib import Path
 Path('$HISTORY_FILE').parent.mkdir(parents=True, exist_ok=True)
@@ -623,7 +623,7 @@ repl() {
                 ;;
             /history)
                 if [[ -f "$HISTORY_FILE" ]]; then
-                    python3 -c "
 import json
 from pathlib import Path
 import time
@@ -654,7 +654,7 @@ for l in lines:
                 fi
                 ;;
             /cost)
-                bash -c 'source ~/.hermes/.env; curl -s -H "Authorization: Bearer $OPENROUTER_API_KEY" https://openrouter.ai/api/v1/auth/key' 2>&1 | python3 -c "import json,sys; d=json.load(sys.stdin).get('data',{}); print(f'  OpenRouter: \${d.get(\"usage\",0):.4f}')"
                 ;;
             /cost-all) bash "$0" --status ;;
             /remote*)
@@ -703,7 +703,7 @@ init_project() {
     echo "${B}2. One-line description${R} (what does it do? for whom?):"
     read -rp "  > " Q_DESC
     echo ""
-    echo "${B}3. Tech stack${R} (e.g. \"Python 3.12 + FastAPI + PostgreSQL + AWS CDK\"):"
     read -rp "  > " Q_STACK
     echo ""
     echo "${B}4. Architecture style${R} [hex|ddd|mvc|micro|mono] (default: ddd):"
@@ -729,10 +729,39 @@ init_project() {
     read -rp "  > " Q_USERS
     echo ""
-    echo "${YE}▶ Generating ${target} via Surrogate-1 (HF brain)...${R}"
-    echo ""
-    # ── Build prompt for Surrogate-1 to generate PRD ─────────────────────────
     local prompt="You are an elite product/architecture strategist. Generate a COMPLETE, professional surrogate.md (PRD + ADRs + plan) based on these inputs:
 # Project: $Q_NAME
@@ -744,6 +773,7 @@ init_project() {
 - Users/context: $Q_USERS
 - Features:
 $Q_FEATURES
 Output structure (markdown):
@@ -753,19 +783,19 @@ Output structure (markdown):
 ## Vision & Mission
 ## Tech Stack
-<expand from input — include lib versions, infra services, observability stack>
 ## Architecture
 <chosen style with rationale. Diagram in mermaid if applicable.>
 ## Domain Model
-<DDD: bounded contexts, entities, aggregates, value objects, repositories — based on features>
 ## Coding Standards
-- TDD: test-first, one assertion per test, factory functions for fixtures
 - $Q_ARCH design patterns enforced (Repository, Factory, Strategy, Builder where appropriate)
-- Type-strict, parse-don't-validate, branded types
-- Result/Either over throws
 - Naming: intent-revealing, units in names (retryDelayMs)
 ## Key Files (initial structure)
@@ -781,7 +811,7 @@ Output structure (markdown):
 ## Auto-Dev Plan
 - [ ] task description (atomic, ~30 min each, dev → QA → reviewer)
-<break each feature into 3-7 tasks. Format strictly: '- [ ] <verb> <object>'>
 ## Test Strategy
 <test pyramid breakdown for $Q_TEST>
@@ -792,20 +822,79 @@ Output structure (markdown):
 - Docs updated
 - ADRs reflect actual implementation
-Output ONLY the markdown, no preamble. Be specific to the project — not generic boilerplate."
-    # Call Surrogate-1 (HF brain or local fallback)
-    local prd
-    prd=$(echo "$prompt" | timeout 180 "$0" -p --max-steps 5 2>&1 | tail -200)
-    # Filter to just the markdown part (drop spinner/log lines)
-    prd=$(echo "$prd" | sed -E 's/^\[[0-9:]+\]//; /^[⏺●]/d; /thinking\.\.\./d')
-    if [[ -z "$prd" ]] || [[ ${#prd} -lt 200 ]]; then
-        echo "${RE}❌ PRD generation failed or too short. Falling back to template.${R}"
         cp "$SURROGATE_HOME/SURROGATE.md.template" "$target"
     else
         echo "$prd" > "$target"
     fi
     echo ""
@@ -845,7 +934,7 @@ auto_dev_mode() {
         # Drive tasks from plan until all done
         while true; do
             # Pop next pending task from plan
-            NEXT_TASK=$(python3 <<'PYEOF'
 import sys, re
 from pathlib import Path
 plan_file = Path.home() / '.surrogate' / 'active-plan.md'
@@ -864,7 +953,7 @@ PYEOF
             echo "${BCY}${B}▸ Next task:${R} $NEXT_TASK"
             bash ~/.claude/bin/surrogate-orchestrate.sh "$NEXT_TASK"
             # Mark done in plan
-            python3 <<PYEOF
 from pathlib import Path
 plan_file = Path.home() / '.surrogate' / 'active-plan.md'
 if plan_file.exists():

     export AGENT_EFFORT="$EFFORT"
     export AGENT_CWD="$(pwd)"
+    /usr/bin/python3 <<'PYEOF'
 import os, sys, json, re, sqlite3, subprocess, urllib.request, urllib.error, time
 from datetime import datetime
 from pathlib import Path
         if [[ ! -f "$cache" ]] || [[ $(($(date +%s) - $(stat -f %m "$cache" 2>/dev/null || stat -c %Y "$cache" 2>/dev/null || echo 0))) -gt 60 ]]; then
             (curl -sS -m 5 -H "Authorization: Bearer ${OPENROUTER_API_KEY:-${OR_KEY:-}}" \
                 https://openrouter.ai/api/v1/auth/key 2>/dev/null \
+                | /usr/bin/python3 -c "import json,sys; d=json.load(sys.stdin).get('data',{}); print(f'\$OR={d.get(\"usage\",0):.3f}')" \
                 > "$cache") 2>/dev/null &
         fi
         cost_str=$(cat "$cache" 2>/dev/null | head -1)
 mkdir -p "$(dirname "$HISTORY_FILE")"
 save_history() {
     local prompt="$1"
+    /usr/bin/python3 -c "
 import json, sys, time
 from pathlib import Path
 Path('$HISTORY_FILE').parent.mkdir(parents=True, exist_ok=True)
                 ;;
             /history)
                 if [[ -f "$HISTORY_FILE" ]]; then
+                    /usr/bin/python3 -c "
 import json
 from pathlib import Path
 import time
                 fi
                 ;;
             /cost)
+                bash -c 'source ~/.hermes/.env; curl -s -H "Authorization: Bearer $OPENROUTER_API_KEY" https://openrouter.ai/api/v1/auth/key' 2>&1 | /usr/bin/python3 -c "import json,sys; d=json.load(sys.stdin).get('data',{}); print(f'  OpenRouter: \${d.get(\"usage\",0):.4f}')"
                 ;;
             /cost-all) bash "$0" --status ;;
             /remote*)
     echo "${B}2. One-line description${R} (what does it do? for whom?):"
     read -rp "  > " Q_DESC
     echo ""
+    echo "${B}3. Tech stack${R} (any language/framework/infra — e.g. \"Go + Postgres + K8s\", \"Next.js + Supabase\", \"AWS CDK + Lambda\", \"existing repo: Java Spring\"):"
     read -rp "  > " Q_STACK
     echo ""
     echo "${B}4. Architecture style${R} [hex|ddd|mvc|micro|mono] (default: ddd):"
     read -rp "  > " Q_USERS
     echo ""
+    # ── Step A: web research the tech keywords (free, fast, grounds the PRD) ─
+    echo "${MA}▶ Researching tech context...${R}"
+    local research_md=""
+    research_md=$(/usr/bin/python3 - "$Q_STACK $Q_DESC $Q_FEATURES" <<'PYEOF' 2>/dev/null
+import sys, urllib.request, urllib.parse, re
+text = sys.argv[1]
+# Extract candidate tech keywords (CamelCase, lowercase known stacks, version tags)
+kws = re.findall(r'\b[A-Z][a-zA-Z0-9]{2,}\b|\b[a-z][a-z0-9-]{3,}\b', text)
+stop = {'this','that','from','with','into','what','when','where','description','project','features','users','stack',
+        'architecture','test','strategy','constraints','context'}
+kws = [k for k in kws if k.lower() not in stop and len(k) > 3]
+kws = list(dict.fromkeys(kws))[:4]
+if not kws:
+    sys.exit(0)
+q = ' '.join(kws) + ' best practices architecture 2025'
+try:
+    req = urllib.request.Request(f"https://duckduckgo.com/html/?q={urllib.parse.quote(q)}",
+        headers={'User-Agent':'Mozilla/5.0'})
+    html = urllib.request.urlopen(req, timeout=12).read().decode('utf-8', errors='ignore')
+    snippets = re.findall(r'class="result__snippet"[^>]*>([^<]+)<', html)[:5]
+    if snippets:
+        print(f"\n## Research context ({', '.join(kws)})")
+        for s in snippets:
+            print(f"- {re.sub(r'<[^>]+>','',s).strip()[:300]}")
+except Exception:
+    pass
+PYEOF
+)
+    [[ -n "$research_md" ]] && echo "${D}  ${research_md}${R}" | head -3
+    # ── Step B: build PRD prompt (research-grounded) ─────────────────────────
+    echo ""
+    echo "${YE}▶ Generating ${target}...${R}"
     local prompt="You are an elite product/architecture strategist. Generate a COMPLETE, professional surrogate.md (PRD + ADRs + plan) based on these inputs:
 # Project: $Q_NAME
 - Users/context: $Q_USERS
 - Features:
 $Q_FEATURES
+${research_md}
 Output structure (markdown):
 ## Vision & Mission
 ## Tech Stack
+<expand from input — adapt to chosen language/runtime; include lib versions where relevant, infra services, observability stack>
 ## Architecture
 <chosen style with rationale. Diagram in mermaid if applicable.>
 ## Domain Model
+<DDD: bounded contexts, entities, aggregates, value objects, repositories — derived from features>
 ## Coding Standards
+- $Q_TEST: test-first if tdd, one assertion per test, factory fixtures
 - $Q_ARCH design patterns enforced (Repository, Factory, Strategy, Builder where appropriate)
+- Type-strict in chosen language (TS strict / Python type hints / Go generics / Rust traits)
+- Result/Either over throws for expected errors
 - Naming: intent-revealing, units in names (retryDelayMs)
 ## Key Files (initial structure)
 ## Auto-Dev Plan
 - [ ] task description (atomic, ~30 min each, dev → QA → reviewer)
+<break each feature into 3–7 tasks. Format strictly: '- [ ] <verb> <object>'>
 ## Test Strategy
 <test pyramid breakdown for $Q_TEST>
 - Docs updated
 - ADRs reflect actual implementation
+Output ONLY the markdown, no preamble. Adapt to the actual stack the user chose — never default to Python unless they said Python."
+    # ── Step C: direct LLM call (curl), bypassing the agent tool-loop ──
+    local prd=""
+    if [[ -n "${GEMINI_API_KEY:-}" ]]; then
+        prd=$(/usr/bin/python3 - "$prompt" "$GEMINI_API_KEY" <<'PYEOF' 2>/dev/null
+import sys, json, urllib.request
+prompt, key = sys.argv[1], sys.argv[2]
+url = f"https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash:generateContent?key={key}"
+body = {"contents":[{"parts":[{"text":prompt}]}],
+        "generationConfig":{"temperature":0.3,"maxOutputTokens":8192}}
+req = urllib.request.Request(url, data=json.dumps(body).encode(),
+    headers={"Content-Type":"application/json"})
+try:
+    with urllib.request.urlopen(req, timeout=120) as r:
+        d = json.load(r)
+        print(d["candidates"][0]["content"]["parts"][0]["text"])
+except Exception as e:
+    print(f"GEMINI_ERROR: {type(e).__name__}: {e}", file=sys.stderr)
+PYEOF
+)
+    fi
+    if [[ -z "$prd" ]] || [[ ${#prd} -lt 400 ]]; then
+        if [[ -n "${OPENROUTER_API_KEY:-}" ]]; then
+            prd=$(/usr/bin/python3 - "$prompt" "$OPENROUTER_API_KEY" <<'PYEOF' 2>/dev/null
+import sys, json, urllib.request
+prompt, key = sys.argv[1], sys.argv[2]
+body = {"model":"qwen/qwen3-coter","messages":[{"role":"user","content":prompt}],
+        "temperature":0.3,"max_tokens":8000}
+# Fix model id typo
+body["model"] = "qwen/qwen3-coder"
+req = urllib.request.Request("https://openrouter.ai/api/v1/chat/completions",
+    data=json.dumps(body).encode(),
+    headers={"Content-Type":"application/json","Authorization":f"Bearer {key}",
+             "HTTP-Referer":"https://axentx.ai","X-Title":"Surrogate-1"})
+try:
+    with urllib.request.urlopen(req, timeout=120) as r:
+        d = json.load(r)
+        print(d["choices"][0]["message"]["content"])
+except Exception as e:
+    print(f"OR_ERROR: {type(e).__name__}: {e}", file=sys.stderr)
+PYEOF
+)
+        fi
+    fi
+    # Strip stray code-fences if model wrapped output
+    prd=$(echo "$prd" | sed -E '/^```markdown\s*$/d; /^```\s*$/d')
+    if [[ -z "$prd" ]] || [[ ${#prd} -lt 400 ]]; then
+        echo "${RE}❌ PRD generation failed (Gemini + OpenRouter both empty/short). Falling back to template.${R}"
         cp "$SURROGATE_HOME/SURROGATE.md.template" "$target"
     else
         echo "$prd" > "$target"
+        # ── Step D: push PRD as training pair (HF dataset feedback loop) ───
+        /usr/bin/python3 - "$prompt" "$prd" <<'PYEOF' 2>/dev/null &
+import sys, json, time, os
+from pathlib import Path
+log = Path.home() / '.surrogate' / 'training-pairs.jsonl'
+log.parent.mkdir(parents=True, exist_ok=True)
+with open(log, 'a') as f:
+    f.write(json.dumps({
+        'ts': time.time(),
+        'source': 'prd-wizard',
+        'cwd': os.getcwd(),
+        'prompt': sys.argv[1][:8000],
+        'response': sys.argv[2][:12000],
+        'messages': [
+            {'role':'user','content':sys.argv[1][:8000]},
+            {'role':'assistant','content':sys.argv[2][:12000]},
+        ],
+    }, ensure_ascii=False) + '\n')
+PYEOF
     fi
     echo ""
         # Drive tasks from plan until all done
         while true; do
             # Pop next pending task from plan
+            NEXT_TASK=$(/usr/bin/python3 <<'PYEOF'
 import sys, re
 from pathlib import Path
 plan_file = Path.home() / '.surrogate' / 'active-plan.md'
             echo "${BCY}${B}▸ Next task:${R} $NEXT_TASK"
             bash ~/.claude/bin/surrogate-orchestrate.sh "$NEXT_TASK"
             # Mark done in plan
+            /usr/bin/python3 <<PYEOF
 from pathlib import Path
 plan_file = Path.home() / '.surrogate' / 'active-plan.md'
 if plan_file.exists():

bin/surrogate-orchestrate.sh CHANGED Viewed

@@ -1,12 +1,12 @@
 #!/usr/bin/env bash
-# Auto-Dev orchestration — chains Hermes team agents like Claude Code's Agent tool
-# Flow: architect → dev → qa → reviewer (optional ops for infra tasks)
-# Each stage produces artifact → feeds into next
 #
 # Usage:
 #   surrogate-orchestrate.sh "task description"
-#   surrogate-orchestrate.sh --mode plan "task"     # architect only
-#   surrogate-orchestrate.sh --mode yolo "task"     # full chain, no gates
 set -u
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
@@ -15,7 +15,7 @@ TASK=""
 while [[ $# -gt 0 ]]; do
     case "$1" in
         --mode) MODE="$2"; shift 2 ;;
-        *) TASK="$*"; break ;;
     esac
 done
 [[ -z "$TASK" ]] && { echo "need task"; exit 2; }
@@ -27,7 +27,8 @@ BCY=$'\033[96m'
 SESSION_ID=$(date +%s | tail -c 9)
 WORKDIR="$HOME/.claude/state/orchestrate/$SESSION_ID"
-mkdir -p "$WORKDIR"
 echo "${BCY}${B}╭─ Auto-Dev Orchestration ─────────────────╮${R}"
 echo "${BCY}${B}│${R} session: ${YE}$SESSION_ID${R}  mode: ${MA}$MODE${R}"
@@ -36,78 +37,284 @@ echo "${BCY}${B}╰────────────────────
 echo "${B}▸ Task:${R} $TASK"
 echo ""
-# Helper: call surrogate agent with specific role + feed artifacts
 call_agent() {
     local role="$1" prompt="$2" output_file="$3"
     echo "${CY}▶${R} ${B}$role${R} ${D}working...${R}"
-    # Use surrogate CLI to run the role-based task
-    local agent_prompt="[ROLE: $role]
 $prompt
-Output your work to $output_file using the \`write\` tool when done.
-Previous artifacts available in: $WORKDIR/
-CWD: $(pwd)"
-    ~/.claude/bin/surrogate -p "$agent_prompt" 2>&1 | head -50 | sed 's/^/  /'
-    # Check if file written
-    if [[ -f "$output_file" ]]; then
-        echo "${GR}  ⎿ $role done → $(basename "$output_file") ($(wc -c < "$output_file") bytes)${R}"
         return 0
     else
-        echo "${RE}  ⎿ $role: no output file written${R}"
         return 1
     fi
 }
-# Read project PRD if exists (DDD/TDD/architecture context)
-PRD_CONTEXT=""
-for prd_file in "$(pwd)/surrogate.md" "$(pwd)/SURROGATE.md"; do
-    [[ -f "$prd_file" ]] && PRD_CONTEXT=$(head -c 4000 "$prd_file") && break
-done
-[[ -n "$PRD_CONTEXT" ]] && PRD_CONTEXT="
-=== Project PRD (surrogate.md) ===
-$PRD_CONTEXT
-=== End PRD ==="
-# ═══ Stage 1: SOLUTION ARCHITECT (SA) — high-level design ═══
-SA_OUT="$WORKDIR/0-sa-design.md"
-echo ""
 echo "${MA}${B}═══ Stage 1/6: SOLUTION ARCHITECT${R} ${D}— DDD + design patterns${R}"
 call_agent "solution-architect" "
-You are a senior Solution Architect. For this task, produce a high-level technical design BEFORE any code.
-Required output:
 1. **Bounded contexts** (DDD) — which subdomain(s) does this touch?
-2. **Domain model changes** — entities, aggregates, value objects, repositories
-3. **Design patterns** to apply (Repository, Factory, Strategy, Observer, Builder, etc.) — pick deliberately, justify each
-4. **Architecture style** alignment (hexagonal/MVC/MVVM/clean) — show layer flow
-5. **Integration points** — APIs, events, side-effects (with sequence diagram in mermaid if non-trivial)
-6. **Non-functional impacts** — perf, security, scalability, observability
 7. **Risks + mitigations**
-Be specific. No generic platitudes. Use codebase via read/grep/glob.
-${PRD_CONTEXT}
 Task: $TASK
 " "$SA_OUT"
-# ═══ Stage 2: ARCHITECT — file-level decomposition ═══
-ARCH_OUT="$WORKDIR/1-architect-plan.md"
 echo ""
 echo "${MA}${B}═══ Stage 2/6: ARCHITECT${R} ${D}— file-level plan${R}"
 call_agent "architect" "
-You are the Tech Architect. Take the SA design and produce a CONCRETE file-level execution plan.
-SA design at: $SA_OUT
-Required output:
 1. **Files to create/modify** — exact paths + one-line purpose each
-2. **Function signatures** — for new public APIs (with types)
-3. **Test files first** (TDD) — list test cases BEFORE implementation files
-4. **Dependencies** — new packages? versions?
-5. **Migration plan** — DB schema changes, config rollout
-6. **Rollback** — how to undo if production breaks
-Use existing codebase patterns — read 3-5 similar files first via \`read\`/\`grep\`.
 Task: $TASK
 " "$ARCH_OUT"
@@ -118,89 +325,127 @@ if [[ "$MODE" == "plan" ]]; then
     exit 0
 fi
-# ═══ Stage 3: QA-FIRST (TDD) — write tests BEFORE code ═══
-TDD_OUT="$WORKDIR/2-qa-tdd-tests.md"
 echo ""
-echo "${MA}${B}═══ Stage 3/6: QA-FIRST (TDD)${R} ${D}— write failing tests first${R}"
 call_agent "qa" "
-You are the QA Engineer practicing TDD. Write FAILING tests BEFORE the dev writes any code.
-SA design: $SA_OUT
-Architect plan: $ARCH_OUT
-Required:
-1. Read existing test patterns in repo (pytest / jest / go test) via \`read\`/\`grep\`
-2. Use the architect's listed test file paths
-3. Write tests using \`write\` tool — they MUST fail (red phase of TDD)
-4. One assertion per test, factory functions for fixtures, descriptive names
-5. Cover: happy path, edge cases, error paths, security boundaries
-6. NO implementation — only tests
-Output: list of test file paths created + brief 'tests will fail because <reason>'
 Task: $TASK
 " "$TDD_OUT"
-# ═══ Stage 4: DEV — implement to make tests pass ═══
-DEV_OUT="$WORKDIR/3-dev-summary.md"
 echo ""
 echo "${MA}${B}═══ Stage 4/6: DEV${R} ${D}— implement to green${R}"
 call_agent "dev" "
 You are the Senior Developer. Make the QA tests PASS by implementing per the Architect plan.
-SA design:    $SA_OUT
-Architect:    $ARCH_OUT
-QA tests:     $TDD_OUT
-Strict rules:
-1. Implement ONLY what's needed to make tests pass (red → green → refactor)
-2. Apply DDD: Repository pattern for data access, no business logic in handlers
-3. Apply design patterns from SA design (Strategy/Factory/Observer/etc.)
-4. Type-strict (TS strict / Python type hints / Go generics)
-5. Result/Either pattern over throws for expected errors
-6. Intent-revealing names — verbs for functions, units for numerics
-7. NO commented-out code, NO TODO without ticket ID, NO hallucinated imports
-8. After each file: refactor for readability while keeping tests green
-Use \`write\`/\`edit\` tools — write actual files, not pseudocode.
-After done: write summary to output file with file list + test pass status.
 Task: $TASK
 " "$DEV_OUT"
-# ═══ Stage 5: QA-VERIFY — run all tests + add missing coverage ═══
-QA_OUT="$WORKDIR/4-qa-report.md"
 echo ""
 echo "${MA}${B}═══ Stage 5/6: QA-VERIFY${R} ${D}— green tests + coverage${R}"
 call_agent "qa" "
-You are the QA Engineer in verification phase. The dev claims tests pass — VERIFY.
-QA tests written: $TDD_OUT
-Dev summary:      $DEV_OUT
-Required:
-1. Run the test suite via \`bash\` (pytest / npm test / go test ./...)
-2. Verify all tests pass (no skips, no x's)
-3. Check coverage — if missing branches, add MORE tests + re-run
-4. Run linting (ruff / eslint / golangci-lint) and type-check (mypy / tsc / go vet)
-5. Manual sanity test of happy path
-Output to file: pass/fail per check + coverage % + new tests added (if any).
 Task: $TASK
 " "$QA_OUT"
-# ═══ Stage 4: OPS (if task mentions infra) ═══
-if echo "$TASK" | grep -iqE "deploy|docker|helm|k8s|terraform|cicd|ci/cd"; then
-    OPS_OUT="$WORKDIR/4-ops-checklist.md"
     echo ""
     echo "${MA}${B}═══ Stage 6a/6: OPS${R} ${D}— deploy + infra${R}"
     call_agent "ops" "
-Review infrastructure aspects. Check:
-- Dockerfile / helm chart / terraform validity
 - Secrets / env var handling
-- Resource limits
 - Observability (metrics/logs/traces)
-Dev summary: $DEV_OUT
-Output to: $OPS_OUT
 Task: $TASK
 " "$OPS_OUT"
 else
@@ -208,84 +453,69 @@ else
     echo "${GY}═══ Stage 6a/6: OPS — skipped (not infra task)${R}"
 fi
-# ═══ Stage 5: REVIEWER ═══
-REVIEW_OUT="$WORKDIR/5-review-verdict.md"
 echo ""
 echo "${MA}${B}═══ Stage 6/6: REVIEWER${R} ${D}— final gate${R}"
 call_agent "reviewer" "
-FINAL REVIEW GATE. Check all prior stages:
-- Architect plan: $ARCH_OUT
-- Dev implementation summary: $DEV_OUT
-- QA report: $QA_OUT
-Judge the work on:
 1. Correctness vs requirements
 2. Code quality (naming, no hallucinated imports, error handling)
-3. Security (no leaked secrets, input validation)
-4. Tests coverage
 5. Match existing codebase style
-Verdict: APPROVE / REWORK / REJECT
-If REWORK — specify what to redo.
-Output verdict + reasons to: $REVIEW_OUT
 Task: $TASK
 " "$REVIEW_OUT"
-# ═══ Summary ═══
 echo ""
 echo "${BCY}${B}╭─ Session Complete ───────────────────────╮${R}"
 echo "${BCY}${B}│${R} session: $SESSION_ID"
 echo "${BCY}${B}│${R} artifacts: $WORKDIR/"
 echo "${BCY}${B}╰──────────────────────────────────────────╯${R}"
-ls -la "$WORKDIR/" 2>&1 | tail -n +2 | awk '{print "  " $9}' | grep -v '^  $'
-# Show verdict + auto-commit if APPROVED
 VERDICT_TEXT=""
 if [[ -f "$REVIEW_OUT" ]]; then
-    VERDICT_TEXT=$(grep -iE "verdict|APPROVE|REWORK|REJECT" "$REVIEW_OUT" | head -3)
     echo ""
     echo "${B}▸ Final verdict:${R}"
     echo "$VERDICT_TEXT" | sed 's/^/  /'
 fi
-# Auto-commit when reviewer approves (ship code)
 if echo "$VERDICT_TEXT" | grep -qi "APPROVE"; then
     echo ""
     echo "${GR}${B}▸ Reviewer approved — committing changes${R}"
-    # Only commit if there are staged/unstaged changes
     if ! git -C "$(pwd)" diff --quiet 2>/dev/null || ! git -C "$(pwd)" diff --cached --quiet 2>/dev/null; then
-        # Stage all changes in CWD
         git -C "$(pwd)" add -A 2>/dev/null
-        # Build commit message from task + session
-        COMMIT_MSG="feat: $(echo "$TASK" | head -c 72)
 [surrogate auto-dev session $SESSION_ID]
-[reviewed: APPROVE]"
-        if git -C "$(pwd)" commit -m "$COMMIT_MSG" 2>&1 | tee -a "$WORKDIR/git-commit.log" | grep -q "master\|main\|\["; then
             COMMIT_HASH=$(git -C "$(pwd)" rev-parse --short HEAD 2>/dev/null)
             echo "${GR}  ✅ Committed: $COMMIT_HASH${R}"
         else
-            echo "${YE}  ⚠ Nothing to commit (files already clean)${R}"
         fi
     else
         echo "${GY}  ○ No file changes to commit${R}"
     fi
 elif echo "$VERDICT_TEXT" | grep -qi "REWORK"; then
     echo ""
-    echo "${YE}${B}▸ Reviewer requested REWORK — re-running dev stage${R}"
-    REWORK_NOTES=$(grep -A5 -i "REWORK" "$REVIEW_OUT" | head -8)
-    DEV_OUT2="$WORKDIR/2b-dev-rework.md"
-    call_agent "dev" "
-REWORK requested by reviewer. Fix the following issues:
-$REWORK_NOTES
-Original task: $TASK
-Original implementation: $DEV_OUT
-QA report: $QA_OUT
-Fix the issues and write updated summary to output file.
-" "$DEV_OUT2"
-    echo "${D}  Rework complete — re-run $0 to go through QA + review again if needed${R}"
 fi

 #!/usr/bin/env bash
+# Auto-Dev orchestration — chains role-prompts to produce concrete artifacts.
+# Bypasses LLM tool-loop (which is unreliable) — uses marker extraction instead.
+# Each stage writes a markdown artifact; final stages may emit code patches.
 #
 # Usage:
 #   surrogate-orchestrate.sh "task description"
+#   surrogate-orchestrate.sh --mode plan  "task"   # SA + architect only
+#   surrogate-orchestrate.sh --mode yolo  "task"   # full chain, no gates
 set -u
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 while [[ $# -gt 0 ]]; do
     case "$1" in
         --mode) MODE="$2"; shift 2 ;;
+        *)      TASK="$*"; break ;;
     esac
 done
 [[ -z "$TASK" ]] && { echo "need task"; exit 2; }
 SESSION_ID=$(date +%s | tail -c 9)
 WORKDIR="$HOME/.claude/state/orchestrate/$SESSION_ID"
+TRAINING_LOG="$HOME/.surrogate/training-pairs.jsonl"
+mkdir -p "$WORKDIR" "$(dirname "$TRAINING_LOG")"
 echo "${BCY}${B}╭─ Auto-Dev Orchestration ─────────────────╮${R}"
 echo "${BCY}${B}│${R} session: ${YE}$SESSION_ID${R}  mode: ${MA}$MODE${R}"
 echo "${B}▸ Task:${R} $TASK"
 echo ""
+# ── Web research preamble: if task mentions tech we don't recognize, search first ──
+RESEARCH_CONTEXT=""
+RESEARCH_OUT="$WORKDIR/0-research-context.md"
+if echo "$TASK" | grep -iqE "migrat|integrat|switch from|move to|adopt|setup|deploy"; then
+    echo "${MA}${B}═══ Stage 0/6: WEB RESEARCH${R} ${D}— gather current docs first${R}"
+    /usr/bin/python3 - "$TASK" "$RESEARCH_OUT" <<'PYEOF' 2>&1 | sed 's/^/  /' || true
+import sys, urllib.request, urllib.parse, json, re, os
+task, out_path = sys.argv[1], sys.argv[2]
+# Extract tech keywords (capitalized words, dot-versions, snake-case)
+keywords = re.findall(r'\b[A-Z][a-zA-Z0-9]{2,}\b|\b[a-z][a-z0-9-]{3,}(?=\s)', task)
+keywords = [k for k in keywords if k.lower() not in {'the','this','that','from','with','into','what','when','where','typescript','python','javascript','java','rust'}]
+keywords = list(dict.fromkeys(keywords))[:3]  # top-3 unique
+if not keywords:
+    print("  no clear tech keywords — skipping research")
+    sys.exit(0)
+print(f"  keywords: {keywords}")
+ddg_url = f"https://duckduckgo.com/html/?q={urllib.parse.quote(' '.join(keywords) + ' best practices 2025')}"
+try:
+    req = urllib.request.Request(ddg_url, headers={'User-Agent':'Mozilla/5.0'})
+    with urllib.request.urlopen(req, timeout=15) as r:
+        html = r.read().decode('utf-8', errors='ignore')
+    # Extract result snippets
+    snippets = re.findall(r'class="result__snippet"[^>]*>([^<]+)<', html)[:5]
+    titles = re.findall(r'class="result__title"[^>]*>.*?>([^<]+)<', html, re.DOTALL)[:5]
+    with open(out_path, 'w') as f:
+        f.write(f"# Web research: {' / '.join(keywords)}\n\n")
+        for i, (t, s) in enumerate(zip(titles, snippets)):
+            f.write(f"## {i+1}. {t.strip()}\n{s.strip()}\n\n")
+    print(f"  wrote {len(snippets)} snippets → {os.path.basename(out_path)}")
+except Exception as e:
+    print(f"  research skipped: {type(e).__name__}: {str(e)[:80]}")
+PYEOF
+    [[ -f "$RESEARCH_OUT" ]] && RESEARCH_CONTEXT="
+=== Web research context ===
+$(cat "$RESEARCH_OUT")
+=== End research ==="
+    echo ""
+fi
+# ── PRD context: read surrogate.md if present ──
+PRD_CONTEXT=""
+for prd_file in "$(pwd)/surrogate.md" "$(pwd)/SURROGATE.md"; do
+    if [[ -f "$prd_file" ]]; then
+        PRD_CONTEXT="
+=== Project PRD (surrogate.md) ===
+$(/usr/bin/head -c 6000 "$prd_file")
+=== End PRD ==="
+        break
+    fi
+done
+# ── Helper: call LLM directly (skip surrogate -p agent loop entirely) ──
+# Why: agent loop forces tool-use system prompt → models output tool-call attempts
+# instead of clean markdown deliverables. Direct LLM call gives reliable text-in/text-out.
 call_agent() {
     local role="$1" prompt="$2" output_file="$3"
     echo "${CY}▶${R} ${B}$role${R} ${D}working...${R}"
+    local prior_artifacts=""
+    if [[ -d "$WORKDIR" ]]; then
+        prior_artifacts=$(ls -1 "$WORKDIR" 2>/dev/null | grep -v '\.raw$' | sed 's/^/  - /')
+    fi
+    # Write prompt to temp file (avoids bash quoting hell with multi-KB prompts)
+    local prompt_file="$WORKDIR/.prompt-${role//[^a-zA-Z0-9]/_}.txt"
+    cat > "$prompt_file" <<EOF
+ROLE: $role
 $prompt
+${RESEARCH_CONTEXT}
+${PRD_CONTEXT}
+=== Working context ===
+CWD: $(pwd)
+Prior artifacts in $WORKDIR/:
+${prior_artifacts:-  (none yet)}
+=== OUTPUT FORMAT ===
+Write your full deliverable as markdown directly. The wrapper saves your output verbatim.
+- Be substantive (≥ 30 lines)
+- For DEV role: include code as headings + fenced blocks like:
+    ### path/to/file.ext
+    \`\`\`<lang>
+    <full file content>
+    \`\`\`
+- No preamble. Begin with a heading.
+EOF
+    # Direct LLM ladder: tries free fast providers first, paid last.
+    # Reads keys from environment to avoid bash quoting nightmares.
+    local content
+    content=$(GEMINI_KEY="${GEMINI_API_KEY:-}" \
+              GEMINI_KEY2="${GEMINI_API_KEY_2:-}" \
+              GROQ_KEY="${GROQ_API_KEY:-}" \
+              CEREBRAS_KEY="${CEREBRAS_API_KEY:-}" \
+              SAMBA_KEY="${SAMBANOVA_API_KEY:-}" \
+              CHUTES_KEY="${CHUTES_API_KEY:-}" \
+              OR_KEY_ENV="${OPENROUTER_API_KEY:-}" \
+              GH_POOL="${GITHUB_TOKEN_POOL:-}" \
+              /usr/bin/python3 - "$prompt_file" <<'PYEOF' 2>&1
+import sys, json, urllib.request, os
+from pathlib import Path
+prompt = Path(sys.argv[1]).read_text()
+def gemini(key, model="gemini-2.5-flash"):
+    url = f"https://generativelanguage.googleapis.com/v1beta/models/{model}:generateContent?key={key}"
+    body = {"contents":[{"parts":[{"text":prompt}]}],
+            "generationConfig":{"temperature":0.3,"maxOutputTokens":8192}}
+    req = urllib.request.Request(url, data=json.dumps(body).encode(),
+        headers={"Content-Type":"application/json"})
+    with urllib.request.urlopen(req, timeout=120) as r:
+        d = json.load(r)
+        return d["candidates"][0]["content"]["parts"][0]["text"]
+def oai_compatible(url, model, key, extra_headers=None):
+    body = {"model":model,"messages":[{"role":"user","content":prompt}],
+            "temperature":0.3,"max_tokens":8000}
+    headers = {"Content-Type":"application/json","Authorization":f"Bearer {key}"}
+    if extra_headers: headers.update(extra_headers)
+    req = urllib.request.Request(url, data=json.dumps(body).encode(), headers=headers)
+    with urllib.request.urlopen(req, timeout=120) as r:
+        d = json.load(r)
+        return d["choices"][0]["message"]["content"]
+ladder = []
+# Free, fast (Groq + Cerebras serve Llama 3.3 70B at ~500 tok/s)
+if os.environ.get("CEREBRAS_KEY"):
+    ladder.append(("cerebras:llama-70b",
+        lambda: oai_compatible("https://api.cerebras.ai/v1/chat/completions",
+                               "llama-3.3-70b", os.environ["CEREBRAS_KEY"])))
+if os.environ.get("GROQ_KEY"):
+    ladder.append(("groq:llama-70b",
+        lambda: oai_compatible("https://api.groq.com/openai/v1/chat/completions",
+                               "llama-3.3-70b-versatile", os.environ["GROQ_KEY"])))
+# Gemini free tier (rotate two keys)
+if os.environ.get("GEMINI_KEY"):
+    ladder.append(("gemini-1", lambda: gemini(os.environ["GEMINI_KEY"])))
+if os.environ.get("GEMINI_KEY2"):
+    ladder.append(("gemini-2", lambda: gemini(os.environ["GEMINI_KEY2"])))
+# SambaNova free tier (Llama 70B)
+if os.environ.get("SAMBA_KEY"):
+    ladder.append(("samba:llama-70b",
+        lambda: oai_compatible("https://api.sambanova.ai/v1/chat/completions",
+                               "Meta-Llama-3.3-70B-Instruct", os.environ["SAMBA_KEY"])))
+# GitHub Models (free with PAT, rate-limited)
+gh_pool = os.environ.get("GH_POOL", "")
+if gh_pool:
+    for tok in gh_pool.split(",")[:2]:
+        if tok.strip():
+            ladder.append(("github-models",
+                lambda t=tok.strip(): oai_compatible(
+                    "https://models.github.ai/inference/chat/completions",
+                    "openai/gpt-4o-mini", t)))
+# Chutes (free OSS proxy)
+if os.environ.get("CHUTES_KEY"):
+    ladder.append(("chutes:qwen3-coder",
+        lambda: oai_compatible("https://llm.chutes.ai/v1/chat/completions",
+                               "Qwen/Qwen3-Coder-30B-A3B-Instruct", os.environ["CHUTES_KEY"])))
+# OpenRouter (paid — only if credit available)
+if os.environ.get("OR_KEY_ENV"):
+    ladder.append(("or:qwen3-coder",
+        lambda: oai_compatible("https://openrouter.ai/api/v1/chat/completions",
+                               "qwen/qwen3-coder", os.environ["OR_KEY_ENV"],
+                               {"HTTP-Referer":"https://axentx.ai","X-Title":"Surrogate-1"})))
+    ladder.append(("or:claude-haiku",
+        lambda: oai_compatible("https://openrouter.ai/api/v1/chat/completions",
+                               "anthropic/claude-haiku-4.5", os.environ["OR_KEY_ENV"],
+                               {"HTTP-Referer":"https://axentx.ai","X-Title":"Surrogate-1"})))
+errors, out = [], ""
+for name, fn in ladder:
+    try:
+        result = fn()
+        if result and len(result) > 100:
+            out = result
+            print(f"# generated via {name}", file=sys.stderr)
+            break
+        errors.append(f"{name}:short({len(result or '')})")
+    except urllib.error.HTTPError as e:
+        errors.append(f"{name}:HTTP{e.code}")
+    except Exception as e:
+        errors.append(f"{name}:{type(e).__name__}")
+if not out:
+    print(f"ERR: providers exhausted ({', '.join(errors[:8])})", file=sys.stderr)
+print(out)
+PYEOF
+)
+    # Strip stray markdown wrapping if model added it
+    content=$(echo "$content" | sed -E '/^```markdown\s*$/d; /^```\s*$/{ N; /\n```\s*$/d; }' | head -c 60000)
+    if [[ -n "$content" ]] && [[ ${#content} -ge 100 ]]; then
+        printf '%s\n' "$content" > "$output_file"
+        local bytes; bytes=$(wc -c < "$output_file" | tr -d ' ')
+        echo "${GR}  ⎿ $role done → $(basename "$output_file") (${bytes} bytes)${R}"
+        echo "$content" | head -2 | sed 's/^/    │ /' | cut -c1-110
+        push_training_pair "orchestrate-$role" "$prompt" "$content"
         return 0
     else
+        printf '%s\n' "$content" > "${output_file}.raw"
+        local bytes; bytes=$(wc -c < "${output_file}.raw" 2>/dev/null | tr -d ' ' || echo 0)
+        echo "${RE}  ⎿ $role: empty/short — raw saved (${bytes} bytes)${R}"
+        echo "$content" | tail -3 | sed 's/^/    │ /' | cut -c1-110
         return 1
     fi
 }
+# ── Push every task pair to HF training dataset (background) ──
+push_training_pair() {
+    local source="$1" prompt="$2" content="$3"
+    /usr/bin/python3 - "$source" "$prompt" "$content" "$TRAINING_LOG" <<'PYEOF' 2>/dev/null &
+import sys, json, time, os
+src, p, c, log = sys.argv[1], sys.argv[2], sys.argv[3], sys.argv[4]
+pair = {
+    'ts': time.time(),
+    'source': src,
+    'cwd': os.getcwd(),
+    'prompt': p[:8000],
+    'response': c[:12000],
+    'messages': [
+        {'role': 'user', 'content': p[:8000]},
+        {'role': 'assistant', 'content': c[:12000]},
+    ],
+}
+with open(log, 'a') as f:
+    f.write(json.dumps(pair, ensure_ascii=False) + '\n')
+PYEOF
+    # Trigger HF sync every 25 pairs (background, only if file exists)
+    if [[ -f "$TRAINING_LOG" ]]; then
+        local count
+        count=$(wc -l < "$TRAINING_LOG" 2>/dev/null | tr -d ' ')
+        count=${count:-0}
+        if [[ $count -gt 0 ]] && [[ $((count % 25)) -eq 0 ]]; then
+            nohup bash "$HOME/.local/bin/push-training-to-hf.sh" \
+                > "$HOME/.claude/logs/training-push.log" 2>&1 &
+        fi
+    fi
+}
+# ── Stage 1: SOLUTION ARCHITECT ──
+SA_OUT="$WORKDIR/1-sa-design.md"
 echo "${MA}${B}═══ Stage 1/6: SOLUTION ARCHITECT${R} ${D}— DDD + design patterns${R}"
 call_agent "solution-architect" "
+You are a senior Solution Architect. Produce a high-level technical design for the task.
+Cover (each as a heading):
 1. **Bounded contexts** (DDD) — which subdomain(s) does this touch?
+2. **Domain model** — entities, aggregates, value objects, repositories
+3. **Design patterns** — pick deliberately (Repository / Factory / Strategy / Observer / Builder), justify each
+4. **Architecture style** — hexagonal / MVC / clean — show layer flow
+5. **Integration points** — APIs, events, side-effects (mermaid diagram welcome)
+6. **Non-functional impacts** — perf, security, scale, observability
 7. **Risks + mitigations**
+Be concrete. Use the codebase if useful (read/grep tools available). No platitudes.
 Task: $TASK
 " "$SA_OUT"
+# ── Stage 2: ARCHITECT ──
+ARCH_OUT="$WORKDIR/2-architect-plan.md"
 echo ""
 echo "${MA}${B}═══ Stage 2/6: ARCHITECT${R} ${D}— file-level plan${R}"
 call_agent "architect" "
+You are the Tech Architect. Take the SA design (at $SA_OUT) and produce a CONCRETE file-level execution plan.
+Required headings:
 1. **Files to create/modify** — exact paths + one-line purpose each
+2. **Function signatures** — public APIs with types
+3. **Test files first (TDD)** — test cases BEFORE implementation files
+4. **Dependencies** — new packages and versions
+5. **Migration plan** — schema/config rollouts
+6. **Rollback** — how to undo on prod failure
+Read 3–5 similar files first (read/grep) to follow existing patterns.
 Task: $TASK
 " "$ARCH_OUT"
     exit 0
 fi
+# ── Stage 3: QA-FIRST (TDD tests) ──
+TDD_OUT="$WORKDIR/3-qa-tdd-tests.md"
 echo ""
+echo "${MA}${B}═══ Stage 3/6: QA-FIRST (TDD)${R} ${D}— failing tests first${R}"
 call_agent "qa" "
+You are the QA Engineer practicing TDD. Output FAILING test code BEFORE the dev writes any implementation.
+Inputs:
+- SA design: $SA_OUT
+- Architect plan: $ARCH_OUT
+Required output:
+1. List of test file paths (use the architect's listed paths)
+2. Full test code for each file as fenced code blocks (\`\`\`python / \`\`\`typescript / etc.)
+3. Each test: one assertion, factory functions for fixtures, descriptive name
+4. Cover: happy path, edge cases, error paths, security boundaries
+5. End with: 'tests will fail because <reason>' for each file
+NO implementation code — only tests.
 Task: $TASK
 " "$TDD_OUT"
+# ── Stage 4: DEV ──
+DEV_OUT="$WORKDIR/4-dev-summary.md"
 echo ""
 echo "${MA}${B}═══ Stage 4/6: DEV${R} ${D}— implement to green${R}"
 call_agent "dev" "
 You are the Senior Developer. Make the QA tests PASS by implementing per the Architect plan.
+Inputs:
+- SA design:    $SA_OUT
+- Architect:    $ARCH_OUT
+- QA tests:     $TDD_OUT
+Output (markdown):
+1. Heading per file: \`### path/to/file.ext\`
+2. Below each heading: full file content as fenced \`\`\`<lang> code block
+3. End with: '### Summary' — list of files + 'tests now pass because <reason>'
+Rules:
+- Implement ONLY what's needed to pass tests (red → green → refactor)
+- DDD: Repository for data access, no business logic in handlers
+- Apply patterns from SA design (Strategy/Factory/Observer/etc.)
+- Type-strict (TS strict / Python type hints / Go generics)
+- Result/Either pattern over throws for expected errors
+- Intent-revealing names; units in numerics
+- NO commented-out code, NO TODO without ticket ID, NO hallucinated imports
 Task: $TASK
 " "$DEV_OUT"
+# Extract code blocks from DEV output → write actual files
+if [[ -f "$DEV_OUT" ]]; then
+    echo "${D}  Extracting code blocks → real files${R}"
+    /usr/bin/python3 - "$DEV_OUT" "$(pwd)" <<'PYEOF' 2>&1 | sed 's/^/    /'
+import sys, re, os
+from pathlib import Path
+md_path, cwd = sys.argv[1], sys.argv[2]
+md = Path(md_path).read_text()
+# Match: ### relative/path.ext  followed by ```lang ... ```
+pattern = re.compile(r'^###\s+([^\s]+\.[a-zA-Z0-9]+)\s*$\n+```[a-zA-Z0-9_+-]*\n(.*?)^```\s*$', re.MULTILINE | re.DOTALL)
+written = 0
+for m in pattern.finditer(md):
+    rel = m.group(1).strip()
+    code = m.group(2)
+    if rel.startswith('/'):
+        target = Path(rel)
+    else:
+        target = Path(cwd) / rel
+    # Safety: refuse paths escaping cwd
+    try:
+        target = target.resolve()
+        Path(cwd).resolve().relative_to(Path(cwd).resolve())  # no-op
+        if not str(target).startswith(str(Path(cwd).resolve())):
+            print(f"  skip (outside cwd): {rel}")
+            continue
+    except Exception:
+        continue
+    target.parent.mkdir(parents=True, exist_ok=True)
+    target.write_text(code)
+    written += 1
+    print(f"  wrote {rel} ({len(code)} bytes)")
+print(f"  total {written} files written")
+PYEOF
+fi
+# ── Stage 5: QA-VERIFY ──
+QA_OUT="$WORKDIR/5-qa-verify.md"
 echo ""
 echo "${MA}${B}═══ Stage 5/6: QA-VERIFY${R} ${D}— green tests + coverage${R}"
 call_agent "qa" "
+You are QA in verification phase. Verify the dev's claim that tests pass.
+Inputs:
+- QA tests written: $TDD_OUT
+- Dev summary:      $DEV_OUT
+Output:
+1. **Run results** — what command(s) you'd run, expected pass/fail
+2. **Coverage** — branches covered, gaps identified
+3. **Lint/type** — checks performed
+4. **Verdict** — READY / NEEDS-WORK with specific gaps
 Task: $TASK
 " "$QA_OUT"
+# ── Stage 6a: OPS (conditional) ──
+if echo "$TASK" | /usr/bin/grep -iqE "deploy|docker|helm|k8s|terraform|cicd|ci/cd|cloudformation|buildspec|ecs|lambda"; then
+    OPS_OUT="$WORKDIR/6a-ops-checklist.md"
     echo ""
     echo "${MA}${B}═══ Stage 6a/6: OPS${R} ${D}— deploy + infra${R}"
     call_agent "ops" "
+Review infrastructure aspects of this task.
+- Dockerfile / helm / terraform / cloudformation validity
 - Secrets / env var handling
+- Resource limits + cost guardrails
 - Observability (metrics/logs/traces)
+- IAM least privilege
+Inputs: $DEV_OUT
 Task: $TASK
 " "$OPS_OUT"
 else
     echo "${GY}═══ Stage 6a/6: OPS — skipped (not infra task)${R}"
 fi
+# ── Stage 6: REVIEWER ──
+REVIEW_OUT="$WORKDIR/6-review-verdict.md"
 echo ""
 echo "${MA}${B}═══ Stage 6/6: REVIEWER${R} ${D}— final gate${R}"
 call_agent "reviewer" "
+FINAL REVIEW GATE. Inspect prior stages and judge.
+Inputs:
+- Architect: $ARCH_OUT
+- Dev:       $DEV_OUT
+- QA:        $QA_OUT
+Judge on:
 1. Correctness vs requirements
 2. Code quality (naming, no hallucinated imports, error handling)
+3. Security (no secret leakage, input validation)
+4. Test coverage
 5. Match existing codebase style
+Output format:
+**Verdict:** APPROVE | REWORK | REJECT
+**Reasons:** (3–5 bullets)
+**Action items if REWORK:** (specific fixes)
 Task: $TASK
 " "$REVIEW_OUT"
+# ── Summary + auto-commit on APPROVE ──
 echo ""
 echo "${BCY}${B}╭─ Session Complete ───────────────────────╮${R}"
 echo "${BCY}${B}│${R} session: $SESSION_ID"
 echo "${BCY}${B}│${R} artifacts: $WORKDIR/"
 echo "${BCY}${B}╰──────────────────────────────────────────╯${R}"
+ls -la "$WORKDIR/" 2>&1 | tail -n +2 | awk '{printf "  %s  %s\n", $5, $9}' | grep -v '   $'
 VERDICT_TEXT=""
 if [[ -f "$REVIEW_OUT" ]]; then
+    VERDICT_TEXT=$(grep -iE "verdict|APPROVE|REWORK|REJECT" "$REVIEW_OUT" | /usr/bin/head -3)
     echo ""
     echo "${B}▸ Final verdict:${R}"
     echo "$VERDICT_TEXT" | sed 's/^/  /'
 fi
 if echo "$VERDICT_TEXT" | grep -qi "APPROVE"; then
     echo ""
     echo "${GR}${B}▸ Reviewer approved — committing changes${R}"
     if ! git -C "$(pwd)" diff --quiet 2>/dev/null || ! git -C "$(pwd)" diff --cached --quiet 2>/dev/null; then
         git -C "$(pwd)" add -A 2>/dev/null
+        local short_task; short_task=$(echo "$TASK" | head -c 72)
+        if git -C "$(pwd)" commit -m "feat: $short_task
 [surrogate auto-dev session $SESSION_ID]
+[reviewed: APPROVE]" 2>&1 | tee -a "$WORKDIR/git-commit.log" | grep -q "master\|main\|\["; then
             COMMIT_HASH=$(git -C "$(pwd)" rev-parse --short HEAD 2>/dev/null)
             echo "${GR}  ✅ Committed: $COMMIT_HASH${R}"
         else
+            echo "${YE}  ⚠ Nothing to commit${R}"
         fi
     else
         echo "${GY}  ○ No file changes to commit${R}"
     fi
 elif echo "$VERDICT_TEXT" | grep -qi "REWORK"; then
     echo ""
+    echo "${YE}${B}▸ Reviewer requested REWORK — re-run orchestrate after addressing notes${R}"
+    grep -A5 -i "REWORK\|action item" "$REVIEW_OUT" | /usr/bin/head -10 | sed 's/^/  /'
 fi

start.sh CHANGED Viewed

@@ -117,10 +117,23 @@ OLLAMA_HOST=127.0.0.1:11434 \
 nohup ollama serve > "$LOG_DIR/ollama.log" 2>&1 &
 sleep 6
-# Pull model only on first boot (model cache lives in /data/.ollama/models)
 if ! ollama list 2>/dev/null | grep -q "gemma4:e4b"; then
-    echo "[$(date +%H:%M:%S)] pulling gemma4:e4b (~9.6 GB, first boot, 5-15 min)" >> "$LOG_DIR/boot.log"
-    nohup ollama pull gemma4:e4b > "$LOG_DIR/ollama-pull.log" 2>&1 &
 fi
 # ── 6. Discord bot (background) ─────────────────────────────────────────────
@@ -131,7 +144,34 @@ if [[ -n "${DISCORD_BOT_TOKEN:-}" ]]; then
     echo "[$(date +%H:%M:%S)] discord bot started"
 fi
-# ── 7. Cron loop — fires Hermes daemons 24/7 (no sleep gaps) ────────────────
 cat > /tmp/hermes-cron.sh <<'CRONSH'
 #!/bin/bash
 set -a; source ~/.hermes/.env 2>/dev/null; set +a
@@ -139,20 +179,22 @@ LOG="${HOME}/.claude/logs/cron.log"
 mkdir -p "$(dirname "$LOG")"
 while true; do
     M=$(($(date +%s) / 60))
-    # Every 90s: continuous local dev (gemma)
     [[ $((M % 2)) -eq 0 ]] && bash ~/.claude/bin/surrogate-dev-loop.sh 1 >> "$LOG" 2>&1 &
     # Every 5 min: producer pushes priorities to Redis
     [[ $((M % 5)) -eq 0 ]] && bash ~/.claude/bin/work-queue-producer.sh >> "$LOG" 2>&1 &
     # Every 20 min: full orchestrate chain (architect → dev → qa → reviewer + git push)
     [[ $((M % 20)) -eq 0 ]] && bash ~/.claude/bin/auto-orchestrate-loop.sh >> "$LOG" 2>&1 &
-    # Every 30 min: scrape loop (parallel 4)
-    [[ $((M % 30)) -eq 0 ]] && bash ~/.claude/bin/domain-scrape-loop.sh 1700 4 >> "$LOG" 2>&1 &
     # Every 30 min: research-apply (pop queue → orchestrate → ship feature)
     [[ $((M % 30)) -eq 15 ]] && bash ~/.claude/bin/surrogate-research-apply.sh >> "$LOG" 2>&1 &
-    # Every 60 min: keyword tuner
     [[ $((M % 60)) -eq 0 ]] && bash ~/.claude/bin/scrape-keyword-tuner.sh >> "$LOG" 2>&1 &
     # Every 6 hours: research-loop (discover new features from competitors/papers)
     [[ $((M % 360)) -eq 30 ]] && bash ~/.claude/bin/surrogate-research-loop.sh >> "$LOG" 2>&1 &
     sleep 60
 done
 CRONSH

 nohup ollama serve > "$LOG_DIR/ollama.log" 2>&1 &
 sleep 6
+# Pull models only on first boot (cache lives in /data/.ollama/models).
+# Primary coding brain: qwen3-coder MoE (newest official Qwen coder; ~16GB Q4, 3B active = fast on CPU).
+# Fallback: qwen2.5-coder:14b (proven). Light: gemma4:e4b (kept for quick triage).
+#
+# Note: user asked about "qwen3.6" — that's a community general-chat fine-tune,
+# not coder-specialized. qwen3-coder is the official Qwen team flagship for SDLC tasks.
+if ! ollama list 2>/dev/null | grep -q "qwen3-coder"; then
+    echo "[$(date +%H:%M:%S)] pulling qwen3-coder:30b-a3b (~16 GB MoE, primary brain)" >> "$LOG_DIR/boot.log"
+    nohup ollama pull qwen3-coder:30b-a3b-instruct-q4_K_M > "$LOG_DIR/ollama-pull-coder.log" 2>&1 &
+fi
+if ! ollama list 2>/dev/null | grep -q "qwen2.5-coder:14b"; then
+    echo "[$(date +%H:%M:%S)] pulling qwen2.5-coder:14b (~9 GB, fallback brain)" >> "$LOG_DIR/boot.log"
+    nohup ollama pull qwen2.5-coder:14b-instruct-q4_K_M > "$LOG_DIR/ollama-pull-fallback.log" 2>&1 &
+fi
 if ! ollama list 2>/dev/null | grep -q "gemma4:e4b"; then
+    echo "[$(date +%H:%M:%S)] pulling gemma4:e4b (light triage)" >> "$LOG_DIR/boot.log"
+    nohup ollama pull gemma4:e4b > "$LOG_DIR/ollama-pull-light.log" 2>&1 &
 fi
 # ── 6. Discord bot (background) ─────────────────────────────────────────────
     echo "[$(date +%H:%M:%S)] discord bot started"
 fi
+# ── 7a. Continuous scrape daemon (no idle gaps — runs back-to-back batches) ─
+cat > /tmp/scrape-daemon.sh <<'SCRAPESH'
+#!/bin/bash
+# Runs scrape batches continuously. Cool-down between cycles only to respect rate limits.
+set -a; source ~/.hermes/.env 2>/dev/null; set +a
+LOG="${HOME}/.claude/logs/scrape-continuous.log"
+mkdir -p "$(dirname "$LOG")"
+while true; do
+    START=$(date +%s)
+    # Adaptive cool-down: short if last batch was small, long if hit rate limits
+    bash ~/.claude/bin/domain-scrape-loop.sh 800 4 >> "$LOG" 2>&1
+    DUR=$(( $(date +%s) - START ))
+    # If batch took < 60s the queue was empty / rate-limited → cool down 90s
+    # If batch took > 5min it was productive → only 30s cool-down
+    if [[ $DUR -lt 60 ]]; then
+        sleep 90
+    elif [[ $DUR -lt 300 ]]; then
+        sleep 60
+    else
+        sleep 30
+    fi
+done
+SCRAPESH
+chmod +x /tmp/scrape-daemon.sh
+nohup /tmp/scrape-daemon.sh > "$LOG_DIR/scrape-daemon.log" 2>&1 &
+echo "[$(date +%H:%M:%S)] continuous scrape daemon started" >> "$LOG_DIR/boot.log"
+# ── 7b. Cron loop — non-scrape daemons (scrape now runs continuously above) ─
 cat > /tmp/hermes-cron.sh <<'CRONSH'
 #!/bin/bash
 set -a; source ~/.hermes/.env 2>/dev/null; set +a
 mkdir -p "$(dirname "$LOG")"
 while true; do
     M=$(($(date +%s) / 60))
+    # Every 2 min: continuous local dev (qwen3-coder when ready, else gemma)
     [[ $((M % 2)) -eq 0 ]] && bash ~/.claude/bin/surrogate-dev-loop.sh 1 >> "$LOG" 2>&1 &
     # Every 5 min: producer pushes priorities to Redis
     [[ $((M % 5)) -eq 0 ]] && bash ~/.claude/bin/work-queue-producer.sh >> "$LOG" 2>&1 &
+    # Every 10 min: training-pair push to HF (drains ~/.surrogate/training-pairs.jsonl)
+    [[ $((M % 10)) -eq 0 ]] && bash ~/.claude/bin/push-training-to-hf.sh >> "$LOG" 2>&1 &
     # Every 20 min: full orchestrate chain (architect → dev → qa → reviewer + git push)
     [[ $((M % 20)) -eq 0 ]] && bash ~/.claude/bin/auto-orchestrate-loop.sh >> "$LOG" 2>&1 &
     # Every 30 min: research-apply (pop queue → orchestrate → ship feature)
     [[ $((M % 30)) -eq 15 ]] && bash ~/.claude/bin/surrogate-research-apply.sh >> "$LOG" 2>&1 &
+    # Every 60 min: keyword tuner (adapts scrape queue based on yields)
     [[ $((M % 60)) -eq 0 ]] && bash ~/.claude/bin/scrape-keyword-tuner.sh >> "$LOG" 2>&1 &
     # Every 6 hours: research-loop (discover new features from competitors/papers)
     [[ $((M % 360)) -eq 30 ]] && bash ~/.claude/bin/surrogate-research-loop.sh >> "$LOG" 2>&1 &
+    # Every 12 hours: dataset enrich (pulls fresh public datasets, dedups, uploads to HF)
+    [[ $((M % 720)) -eq 60 ]] && bash ~/.claude/bin/dataset-enrich.sh >> "$LOG" 2>&1 &
     sleep 60
 done
 CRONSH