diff --git a/Dockerfile b/Dockerfile
index 6718442ce13836a0b66cf21651dfe82422a61a2e..11a3f2070e1a15bf3d65d94daee8167cdcb91495 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,5 +1,5 @@
-# Hermes on Hugging Face Spaces (CPU 16 GB)
-# Single-container that runs Ollama + Redis + all Hermes daemons.
+# Surrogate-1 on Hugging Face Spaces (CPU 16 GB)
+# Single-container that runs Ollama + Redis + all Surrogate daemons.
 FROM python:3.12-slim
 
 # ── System deps ──────────────────────────────────────────────────────────────
@@ -14,32 +14,41 @@ RUN curl -fsSL https://ollama.com/install.sh | sh
 # ── App user (HF Spaces requires uid 1000) ──────────────────────────────────
 RUN useradd -m -u 1000 hermes
 ENV HOME=/home/hermes \
-    PATH=/home/hermes/.local/bin:/usr/local/bin:/usr/bin:/bin \
+    PATH=/home/hermes/.surrogate/bin:/home/hermes/.local/bin:/usr/local/bin:/usr/bin:/bin \
+    SURROGATE_HOME=/home/hermes/.surrogate \
     HERMES_HOME=/home/hermes/.hermes \
     PYTHONUNBUFFERED=1
 
 WORKDIR /home/hermes
 
-# ── Python deps for Hermes Discord bot + scrape + RAG ───────────────────────
+# ── Python deps for Discord bot + scrape + RAG ──────────────────────────────
 COPY --chown=hermes:hermes requirements.txt /tmp/requirements.txt
 RUN pip install --no-cache-dir -r /tmp/requirements.txt
 
-# ── Copy Hermes scripts + config skeleton ───────────────────────────────────
-COPY --chown=hermes:hermes bin/ /home/hermes/.claude/bin/
+# ── Copy Surrogate scripts + config skeleton ────────────────────────────────
+# Surrogate's home: ~/.surrogate/bin/  (separate from Claude Code's ~/.claude/)
+COPY --chown=hermes:hermes bin/ /home/hermes/.surrogate/bin/
 COPY --chown=hermes:hermes config/ /home/hermes/.hermes/config/
 COPY --chown=hermes:hermes start.sh /home/hermes/start.sh
-# start.sh orchestrates everything (Redis + Ollama + daemons + status server) — no supervisord needed
-RUN chmod +x /home/hermes/.claude/bin/*.sh /home/hermes/start.sh
+RUN chmod +x /home/hermes/.surrogate/bin/*.sh /home/hermes/start.sh
 
 USER hermes
 
-# ── Persistent dirs (HF mounts /data) ────────────────────────────────────────
-RUN mkdir -p /home/hermes/.claude/state /home/hermes/.claude/logs \
-    /home/hermes/.surrogate /home/hermes/.hermes/workspace \
-    /home/hermes/.ollama
+# ── Persistent dirs (HF mounts /data into ~/.surrogate symlink) ─────────────
+RUN mkdir -p /home/hermes/.surrogate/state /home/hermes/.surrogate/logs \
+    /home/hermes/.surrogate/workspace /home/hermes/.surrogate/memory \
+    /home/hermes/.surrogate/skills /home/hermes/.surrogate/sessions \
+    /home/hermes/.hermes/workspace /home/hermes/.ollama
+
+# ── Backward-compat: legacy refs to ~/.claude/bin/ + ~/.claude/logs/ ────────
+# Some scripts still reference old paths; symlink prevents breakage during
+# progressive migration. Eventually all callers should use ~/.surrogate/.
+RUN mkdir -p /home/hermes/.claude && \
+    ln -sfn /home/hermes/.surrogate/bin /home/hermes/.claude/bin && \
+    ln -sfn /home/hermes/.surrogate/logs /home/hermes/.claude/logs && \
+    ln -sfn /home/hermes/.surrogate/state /home/hermes/.claude/state
 
 # ── Expose port 7860 (HF default) ────────────────────────────────────────────
 EXPOSE 7860
 
-# Run supervisord — manages ollama + redis + all hermes daemons
 CMD ["/home/hermes/start.sh"]
diff --git a/bin/agentic-crawler.sh b/bin/agentic-crawler.sh
index 2b0e3aaa5e1393827e3b74a59bb539471cb61add..5a5bd91e2f07eac7d57e68bc4512877fed235957 100755
--- a/bin/agentic-crawler.sh
+++ b/bin/agentic-crawler.sh
@@ -9,8 +9,8 @@
 set -uo pipefail
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
-DB="$HOME/.claude/state/agentic-frontier.db"
-LOG="$HOME/.claude/logs/agentic-crawler.log"
+DB="$HOME/.surrogate/state/agentic-frontier.db"
+LOG="$HOME/.surrogate/logs/agentic-crawler.log"
 PAIRS="$HOME/.surrogate/training-pairs.jsonl"
 mkdir -p "$(dirname "$DB")" "$(dirname "$LOG")" "$(dirname "$PAIRS")"
 
diff --git a/bin/ai-fallback.sh b/bin/ai-fallback.sh
new file mode 100755
index 0000000000000000000000000000000000000000..8e2b4ee91ce0057d68d36633539f3a69d09f2dc8
--- /dev/null
+++ b/bin/ai-fallback.sh
@@ -0,0 +1,422 @@
+#!/usr/bin/env bash
+# AI Fallback Chain (cost-optimized, cloud-only, no local LLM)
+#
+# Priority chain:
+#   1. Claude Opus 4.7   via Max subscription  (primary, flat $100/mo)
+#   2. Claude Sonnet 4.6 via Max subscription  (separate quota pool!)
+#   3. OpenRouter        pay-per-use           (cheap+capable non-Sonnet picks)
+#   4. Gemini 2.5 FL     FREE 1000/day
+#   5. Groq Llama-3.3    FREE 1000/day
+#
+# Usage:
+#   ai-fallback.sh "your question"
+#   ai-fallback.sh --force gpt5 "your question"
+#   ai-fallback.sh --tier cheap "your question"     # OpenRouter uses DeepSeek
+#   ai-fallback.sh --skip claude-opus "your question"
+set -e
+
+# Source API keys FIRST — load BOTH env files (hermes + claude).
+# Order matters: claude.env first, hermes.env wins on conflict
+# (hermes has newer keys like GITHUB_MODELS_TOKEN, SAMBANOVA_API_KEY, CLOUDFLARE_*)
+# shellcheck disable=SC1090
+set -a
+[ -f "$HOME/.surrogate/.env" ] && . "$HOME/.surrogate/.env"
+[ -f "$HOME/.hermes/.env" ] && . "$HOME/.hermes/.env"
+set +a
+
+QUERY=""
+FORCE=""
+SKIP=""
+VERBOSE=0
+TASK=""
+export OR_TIER=""
+
+while [ $# -gt 0 ]; do
+  case "$1" in
+    --force)    FORCE="$2"; shift 2 ;;
+    --skip)     SKIP="$2"; shift 2 ;;
+    --tier)     export OR_TIER="$2"; shift 2 ;;
+    --task)     TASK="$2"; shift 2 ;;
+    --cheap)    export OR_TIER="cheap"; shift ;;
+    --fast)     export OR_TIER="fast"; shift ;;
+    --balanced) export OR_TIER="balanced"; shift ;;
+    --premium)  export OR_TIER="premium"; shift ;;
+    -v|--verbose) VERBOSE=1; shift ;;
+    *)          QUERY="$QUERY $1"; shift ;;
+  esac
+done
+QUERY=$(echo "$QUERY" | /usr/bin/sed 's/^ *//')
+[ -z "$QUERY" ] && { /usr/bin/head -15 "$0"; exit 1; }
+
+# --task <type> — pick the strongest free model per provider for the task.
+# Sets per-provider env vars that try_* functions read (bridge --model alias).
+# Auto-detect if not provided: code keywords → coding, reasoning keywords → reasoning.
+if [ -z "$TASK" ]; then
+  q_lower=$(echo "$QUERY" | /usr/bin/tr '[:upper:]' '[:lower:]')
+  if echo "$q_lower" | /usr/bin/grep -qE "code|function|implement|refactor|bug|class|method|api|sql|terraform|cloudformation|dockerfile|kubernetes|yaml|typescript|javascript|python|rust|golang"; then
+    TASK="coding"
+  elif echo "$q_lower" | /usr/bin/grep -qE "analyze|reason|explain why|compare|evaluate|architect|design|trade-?off|deep|think step|proof|calculate|complex"; then
+    TASK="reasoning"
+  fi
+fi
+
+case "$TASK" in
+  coding)
+    # Code = Codestral (GitHub, Mistral) / DeepSeek-V3.1 (SambaNova) / Qwen Coder (local)
+    export GITHUB_MODEL="codestral"     ; export SAMBANOVA_MODEL="deepseek"
+    export CLOUDFLARE_MODEL="deepseek"  ; export GROQ_MODEL="qwen"
+    export LOCAL_MODEL="qwen-coder"
+    ;;
+  reasoning)
+    # Reasoning = DeepSeek R1 (GitHub, <think> CoT) / Grok 3 / DeepSeek R1 distill (CF)
+    export GITHUB_MODEL="reasoning"     ; export SAMBANOVA_MODEL="deepseek-latest"
+    export CLOUDFLARE_MODEL="reasoning" ; export GROQ_MODEL="qwen"
+    export LOCAL_MODEL="granite"
+    ;;
+  fast)
+    # Fast = smallest/quickest tier per provider
+    export GITHUB_MODEL="mini"          ; export SAMBANOVA_MODEL="fast"
+    export CLOUDFLARE_MODEL="fast"      ; export GROQ_MODEL="fast"
+    export LOCAL_MODEL="tiny"
+    ;;
+  long-context|long|kimi)
+    # 200k+ context — Kimi on CF, gpt-oss-120b elsewhere
+    export GITHUB_MODEL="llama405"      ; export SAMBANOVA_MODEL="gpt-oss"
+    export CLOUDFLARE_MODEL="kimi"      ; export GROQ_MODEL="gpt-oss"
+    export LOCAL_MODEL="granite"
+    ;;
+  creative|chat|*)
+    # Default — smartest general-purpose free model per provider
+    export GITHUB_MODEL="gpt-4o"        ; export SAMBANOVA_MODEL="llama70"
+    export CLOUDFLARE_MODEL="gpt-oss"   ; export GROQ_MODEL="llama70"
+    export LOCAL_MODEL="granite"
+    ;;
+esac
+
+# --- Semantic RAG context injection (embedding-powered) ---
+# For coding/reasoning/creative tasks, fetch top-3 semantically similar docs
+# from embeddings.db and prepend to QUERY. ~50ms overhead, improves grounding.
+if [[ "$TASK" == "coding" || "$TASK" == "reasoning" || "$TASK" == "creative" ]]; then
+    if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then
+        EMB_COUNT=$(/usr/bin/sqlite3 "$HOME/.surrogate/embeddings.db" 'SELECT COUNT(*) FROM embeddings' 2>/dev/null || echo 0)
+        if [[ "$EMB_COUNT" -ge 100 ]]; then
+            SEM_CONTEXT=$(/usr/bin/python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$QUERY" 2>/dev/null | /usr/bin/head -15)
+            if [[ -n "$SEM_CONTEXT" ]]; then
+                QUERY="=== RAG CONTEXT (top-5 semantic matches from knowledge base) ===
+$SEM_CONTEXT
+
+=== TASK ===
+$QUERY"
+            fi
+        fi
+    fi
+fi
+
+log() { [ $VERBOSE -eq 1 ] && echo "[$(date +%H:%M:%S)] $*" >&2; }
+
+# Capture successful response → log to knowledge base (non-blocking)
+save_response() {
+  local provider="$1" model="$2" response="$3"
+  [ -z "$response" ] && return
+  ( "$HOME/.surrogate/bin/log-interaction.sh" "$QUERY" "$response" "$provider" "$model" > /dev/null 2>&1 & ) || true
+}
+
+# --- System prompt from knowledge base + auto code-search if code query ---
+build_system_prompt() {
+  local kb="" profile="" code_ctx="" q_lower
+  [ -f "$HOME/.surrogate/memory/knowledge_index.md" ] && kb="$(/usr/bin/head -50 $HOME/.surrogate/memory/knowledge_index.md)"
+  [ -f "$HOME/.surrogate/memory/user_profile.md" ] && profile="$(cat $HOME/.surrogate/memory/user_profile.md)"
+
+  q_lower=$(echo "$QUERY" | /usr/bin/tr '[:upper:]' '[:lower:]')
+  local is_generate=0 is_code=0
+  echo "$q_lower" | /usr/bin/grep -qE "code|function|implement|refactor|bug|error|class|method|api|endpoint|schema|model|service|controller|middleware|auth|database|query|sql|deploy|pipeline|terraform|cloudformation|dockerfile|kubernetes|helm|yaml" && is_code=1
+  echo "$q_lower" | /usr/bin/grep -qE "create|generate|write|build|new|template|scaffold|design" && is_generate=1
+
+  if [ "$is_code" = "1" ] && [ -d "$HOME/.surrogate/code-vector-db" ]; then
+    if [ "$is_generate" = "1" ] && [ -x "$HOME/.surrogate/bin/find-gold-examples.sh" ]; then
+      # Generation task → inject FULL reference files (better style match)
+      code_ctx=$("$HOME/.surrogate/bin/find-gold-examples.sh" --top 2 --max-bytes 5000 "$QUERY" 2>/dev/null)
+    elif [ -x "$HOME/.surrogate/bin/code-search.sh" ]; then
+      # Query task → snippets only (faster)
+      code_ctx=$("$HOME/.surrogate/bin/code-search.sh" --top 3 "$QUERY" 2>/dev/null | /usr/bin/head -60)
+    fi
+  fi
+
+  local prompt="You are Ashira's AI assistant. Context: $profile
+
+Pattern index: $kb"
+  if [ -n "$code_ctx" ]; then
+    prompt="$prompt
+
+=== ASHIRA'S EXISTING CODE (match this style EXACTLY) ===
+$code_ctx
+=== END EXAMPLES ===
+
+Style rules enforced:
+- Follow naming/indent/comment style from examples above
+- Use exact same Parameter/Resource names when applicable
+- Preserve existing conventions (tags, naming, Description format)"
+  fi
+  prompt="$prompt
+
+Be concise. Cite file paths when referencing existing code."
+  echo "$prompt"
+}
+SYSTEM=$(build_system_prompt)
+
+# --- Anthropic via Max plan (routes through claude-bridge.sh CLI) ---
+# Direct HTTPS to api.anthropic.com with OAuth token returns 401 — OAuth flow
+# is managed by `claude` CLI (keychain/config). Use the bridge instead.
+try_anthropic() {
+  local model="$1" extra="$2"
+  log "→ Claude Max: $model"
+  local out
+  out=$(echo "$QUERY" | "$HOME/.surrogate/bin/claude-bridge.sh" --model "$model" $extra 2>>/tmp/ai-fallback.err) || return 1
+  [ -z "$out" ] && return 1
+  echo "$out"
+  save_response "anthropic" "$model" "$out"
+  return 0
+}
+
+# Opus needs --force outside 01:00-06:00 window; sonnet is always available
+try_claude_opus()   { try_anthropic "opus" "--force"; }
+try_claude_sonnet() { try_anthropic "sonnet" ""; }
+
+# OpenRouter FREE — tries multiple free models (each has strict rate limit)
+# Order: coder-first → general-powerhouse → smaller fallbacks
+try_openrouter_free() {
+  [ -z "${OPENROUTER_API_KEY:-}" ] && return 2
+  local free_models=(
+    "qwen/qwen3-coder:free"
+    "qwen/qwen3-next-80b-a3b-instruct:free"
+    "openai/gpt-oss-120b:free"
+    "nvidia/nemotron-3-super-120b-a12b:free"
+    "meta-llama/llama-3.3-70b-instruct:free"
+    "z-ai/glm-4.5-air:free"
+    "google/gemma-4-31b-it:free"
+    "openai/gpt-oss-20b:free"
+  )
+  for m in "${free_models[@]}"; do
+    OPENROUTER_MODEL="$m" try_openrouter && return 0
+    log "  ↳ free '$m' unavailable, trying next free..."
+  done
+  return 1
+}
+
+# --- OpenRouter (cheap+capable non-Sonnet picks) ---
+try_openrouter() {
+  [ -z "${OPENROUTER_API_KEY:-}" ] && return 2
+  # Default: GPT-5.4 (beats Claude Opus 4.6 per benchmarks, -50% cost vs Opus 4.7)
+  local model="${OPENROUTER_MODEL:-openai/gpt-5.4}"
+  case "${OR_TIER:-}" in
+    # PAID tiers
+    cheap)     model="deepseek/deepseek-v3.2" ;;       # $0.26/$0.42 — cheapest capable
+    fast)      model="x-ai/grok-4.1-fast" ;;           # $0.20/$0.50 — ultra cheap, 2M ctx
+    balanced)  model="openai/gpt-5.4" ;;               # $2.50/$15 — DEFAULT, beats Opus 4.6
+    premium)   model="anthropic/claude-opus-4.7" ;;    # $5/$25 — if really need Opus
+    grok)      model="x-ai/grok-4.20" ;;               # $2/$6 — 2M ctx, cool
+    gemini)    model="google/gemini-3.1-pro-preview" ;;# $2/$12
+    # FREE tiers (29 models available)
+    free|free-coder) model="qwen/qwen3-coder:free" ;;  # coding, 262k ctx
+    free-large)  model="qwen/qwen3-next-80b-a3b-instruct:free" ;; # 80B MoE
+    free-nvidia) model="nvidia/nemotron-3-super-120b-a12b:free" ;; # 120B
+    free-gptoss) model="openai/gpt-oss-120b:free" ;;   # OpenAI open-sourced
+    free-llama)  model="meta-llama/llama-3.3-70b-instruct:free" ;;
+    free-kimi)   model="moonshotai/kimi-k2.5" ;;       # Kimi 256k ctx
+    free-glm)    model="z-ai/glm-4.5-air:free" ;;
+    free-gemma)  model="google/gemma-4-31b-it:free" ;; # Google Gemma 4
+  esac
+  log "→ OpenRouter: $model"
+  local body
+  # Use env vars — avoids quote-escape hell with multiline system prompt.
+  # max_tokens=4000 (GPT-5.4 requires >= 16; stay well above)
+  body=$(ORM="$model" SYS="$SYSTEM" Q="$QUERY" "$HOME/.surrogate/venv/bin/python" -c "
+import json, os
+m = {'model':os.environ['ORM'],'max_tokens':4000,
+     'messages':[{'role':'system','content':os.environ['SYS']},
+                 {'role':'user','content':os.environ['Q']}]}
+print(json.dumps(m))
+" 2>&1) || { log "  body-build failed: $body"; return 1; }
+  local resp code body_resp
+  resp=$(/usr/bin/curl -sS -w "\n%{http_code}" \
+    --max-time 90 \
+    -X POST "https://openrouter.ai/api/v1/chat/completions" \
+    -H "Authorization: Bearer $OPENROUTER_API_KEY" \
+    -H "HTTP-Referer: https://ashira.local" \
+    -H "X-Title: ai-fallback" \
+    -H "content-type: application/json" \
+    -d "$body" 2>&1)
+  code=$(echo "$resp" | /usr/bin/tail -1)
+  body_resp=$(echo "$resp" | /usr/bin/sed '$d')
+  if [ "$code" != "200" ]; then
+    # Log real error reason for debug
+    local errmsg
+    errmsg=$(echo "$body_resp" | "$HOME/.surrogate/venv/bin/python" -c "
+import sys, json
+try: d=json.load(sys.stdin); print(d.get('error',{}).get('message','unknown')[:120])
+except: print('parse-fail')
+" 2>/dev/null || echo "unknown")
+    log "  [$code] $errmsg — falling through"
+    return 1
+  fi
+  local out
+  out=$(echo "$body_resp" | "$HOME/.surrogate/venv/bin/python" -c "
+import sys, json
+d = json.load(sys.stdin)
+print(d['choices'][0]['message']['content'])
+") || return 1
+  echo "$out"
+  save_response "openrouter" "$model" "$out"
+  return 0
+}
+
+# --- Gemini (free) ---
+try_gemini() {
+  [ -z "${GEMINI_API_KEY:-}" ] && return 2
+  local model="${GEMINI_MODEL:-gemini-2.5-flash}"
+  log "→ Gemini: $model (free)"
+  local body
+  body=$("$HOME/.surrogate/venv/bin/python" -c "
+import json
+m = {'systemInstruction':{'parts':[{'text':'''$SYSTEM'''}]},
+     'contents':[{'role':'user','parts':[{'text':'''$QUERY'''}]}],
+     'generationConfig':{'maxOutputTokens':4000}}
+print(json.dumps(m))
+" 2>/dev/null)
+  local resp code body_resp
+  resp=$(/usr/bin/curl -sS -w "\n%{http_code}" \
+    -X POST "https://generativelanguage.googleapis.com/v1beta/models/$model:generateContent?key=$GEMINI_API_KEY" \
+    -H "content-type: application/json" -d "$body" 2>&1)
+  code=$(echo "$resp" | /usr/bin/tail -1)
+  body_resp=$(echo "$resp" | /usr/bin/sed '$d')
+  [ "$code" != "200" ] && { log "  [$code] falling through"; return 1; }
+  local out
+  out=$(echo "$body_resp" | "$HOME/.surrogate/venv/bin/python" -c "
+import sys, json
+d = json.load(sys.stdin)
+print(d['candidates'][0]['content']['parts'][0]['text'])
+") || return 1
+  echo "$out"
+  save_response "gemini" "$model" "$out"
+  return 0
+}
+
+# --- Groq (free, ultra-fast) ---
+try_groq() {
+  [ -z "${GROQ_API_KEY:-}" ] && return 2
+  local model="${GROQ_MODEL:-llama70}"
+  log "→ Groq: $model (free)"
+  # Route through groq-bridge for consistent alias handling (llama70, fast, qwen, gpt-oss...)
+  local out
+  out=$(echo "$QUERY" | "$HOME/.surrogate/bin/groq-bridge.sh" --model "$model" 2>>/tmp/ai-fallback.err) || return 1
+  [ -z "$out" ] && return 1
+  echo "$out"
+  save_response "groq" "$model" "$out"
+  return 0
+}
+
+# --- GitHub Models (free via PAT, OpenAI-compat, GPT-4o-mini/Llama 3.3/Mistral/DeepSeek) ---
+try_github() {
+  [ -z "${GITHUB_MODELS_TOKEN:-}${GITHUB_TOKEN:-}" ] && return 2
+  local model="${GITHUB_MODEL:-gpt-4o}"
+  log "→ GitHub Models: $model (free)"
+  local out
+  out=$(echo "$QUERY" | "$HOME/.surrogate/bin/github-bridge.sh" --model "$model" 2>>/tmp/ai-fallback.err) || return 1
+  [ -z "$out" ] && return 1
+  echo "$out"
+  save_response "github" "$model" "$out"
+  return 0
+}
+
+# --- SambaNova Cloud (free, ~500 tok/s Llama 3.3 70B / DeepSeek V3.2 / Llama 4) ---
+try_sambanova() {
+  [ -z "${SAMBANOVA_API_KEY:-}" ] && return 2
+  local model="${SAMBANOVA_MODEL:-llama70}"
+  log "→ SambaNova: $model (free)"
+  local out
+  out=$(echo "$QUERY" | "$HOME/.surrogate/bin/sambanova-bridge.sh" --model "$model" 2>>/tmp/ai-fallback.err) || return 1
+  [ -z "$out" ] && return 1
+  echo "$out"
+  save_response "sambanova" "$model" "$out"
+  return 0
+}
+
+# --- Cloudflare Workers AI (free 10k neurons/day, Llama 3.3 / Gemma-3 / Qwen Coder) ---
+try_cloudflare() {
+  [ -z "${CLOUDFLARE_API_TOKEN:-}${CF_API_TOKEN:-}" ] && return 2
+  [ -z "${CLOUDFLARE_ACCOUNT_ID:-}${CF_ACCOUNT_ID:-}" ] && return 2
+  local model="${CLOUDFLARE_MODEL:-gpt-oss}"
+  log "→ Cloudflare WAI: $model (free)"
+  local out
+  out=$(echo "$QUERY" | "$HOME/.surrogate/bin/cloudflare-bridge.sh" --model "$model" 2>>/tmp/ai-fallback.err) || return 1
+  [ -z "$out" ] && return 1
+  echo "$out"
+  save_response "cloudflare" "$model" "$out"
+  return 0
+}
+
+# --- Local Ollama — always-on, always-free ultimate fallback ---
+# Bench (M3 24GB): granite4:7b-a1b-h (4.2GB, ~7s/fib+memo — fast & correct).
+# Task-aware: code → qwen-coder:7b, chat → granite, tiny → qwen:3b.
+# gemma4:26b BLOCKED — user directive (too slow for this hw).
+try_granite() {
+  # Check ollama running
+  /usr/bin/curl -sS --max-time 3 http://localhost:11434/api/tags > /dev/null 2>&1 || return 2
+  local alias="${LOCAL_MODEL:-granite}"
+  log "→ Local Ollama: $alias (free, always-on)"
+  local out
+  out=$(echo "$QUERY" | "$HOME/.surrogate/bin/granite-bridge.sh" --model "$alias" 2>>/tmp/ai-fallback.err) || return 1
+  [ -z "$out" ] && return 1
+  echo "$out"
+  save_response "ollama-local" "$alias" "$out"
+  return 0
+}
+
+# --- Execute chain (FREE-FIRST for routine/bulk tasks) ---
+# Order: free APIs → claude-sonnet (Max plan safety net) → local Ollama (ultimate backstop)
+# IMPORTANT-tasks (retro/sprint/skill-sanitize/agent-critic/security-audit/mythos-audit)
+#   → call claude-bridge.sh --model opus --force DIRECTLY, bypass this chain
+# REVIEWER/hallucination-check → call claude-bridge.sh --model sonnet DIRECTLY
+# Paid OpenRouter removed per user direction (use Max plan instead of pay-per-use)
+PROVIDERS="github sambanova cloudflare groq openrouter-free gemini claude-sonnet granite"
+
+# Explicit --force
+if [ -n "$FORCE" ]; then
+  case "$FORCE" in
+    claude-opus|opus)    try_claude_opus   && exit 0 ;;
+    claude-sonnet|sonnet) try_claude_sonnet && exit 0 ;;
+    openrouter|or)       try_openrouter    && exit 0 ;;
+    openrouter-free|free) try_openrouter_free && exit 0 ;;
+    gpt5|gpt)            OPENROUTER_MODEL="openai/gpt-5.4" try_openrouter && exit 0 ;;
+    grok)                OPENROUTER_MODEL="x-ai/grok-4.20" try_openrouter && exit 0 ;;
+    deepseek)            OPENROUTER_MODEL="deepseek/deepseek-v3.2" try_openrouter && exit 0 ;;
+    gemini)              try_gemini        && exit 0 ;;
+    groq)                try_groq          && exit 0 ;;
+    github|gh)           try_github        && exit 0 ;;
+    sambanova|samba)     try_sambanova     && exit 0 ;;
+    cloudflare|cf)       try_cloudflare    && exit 0 ;;
+    granite|local|ollama) try_granite       && exit 0 ;;
+    *)                   echo "[error] unknown --force '$FORCE'" >&2; exit 1 ;;
+  esac
+  echo "[error] forced provider failed" >&2; exit 1
+fi
+
+# Auto chain with skip support
+for p in $PROVIDERS; do
+  if [ -n "$SKIP" ] && [ "$p" = "$SKIP" ]; then continue; fi
+  case "$p" in
+    github)          try_github          && exit 0 ;;
+    sambanova)       try_sambanova       && exit 0 ;;
+    cloudflare)      try_cloudflare      && exit 0 ;;
+    claude-opus)     try_claude_opus     && exit 0 ;;
+    claude-sonnet)   try_claude_sonnet   && exit 0 ;;
+    openrouter)      try_openrouter      && exit 0 ;;
+    openrouter-free) try_openrouter_free && exit 0 ;;
+    gemini)          try_gemini          && exit 0 ;;
+    groq)            try_groq            && exit 0 ;;
+    granite)         try_granite         && exit 0 ;;
+  esac
+done
+
+echo "[error] all providers exhausted" >&2
+exit 1
diff --git a/bin/ask-sqlite.py b/bin/ask-sqlite.py
new file mode 100755
index 0000000000000000000000000000000000000000..89022db767fa61306543c72b8bf321dbc9b1aa24
--- /dev/null
+++ b/bin/ask-sqlite.py
@@ -0,0 +1,175 @@
+#!/usr/bin/env python3
+"""
+Local RAG assistant — SQLite FTS5 (replaces Chroma) + local LLM.
+Stable, no Rust crashes, fast.
+
+Usage:
+  ask-sqlite.py "คำถาม"              # single shot
+  ask-sqlite.py -i                    # interactive
+  ask-sqlite.py --source code "คำถาม"  # filter by source
+  ask-sqlite.py --project Vanguard "คำถาม"
+"""
+import sys, json, sqlite3, argparse, subprocess, urllib.request, re
+from pathlib import Path
+
+DB = str(Path.home() / ".surrogate/index.db")
+OLLAMA = "http://localhost:11434/api/chat"
+DEFAULT_MODEL = "granite4:7b-a1b-h"
+
+AXENTX = Path("/Users/Ashira/axentx")
+PROJECTS = ["Costinel", "Vanguard", "arkship", "surrogate", "workio"]
+
+
+def fts_escape(query: str) -> str:
+    """Turn a natural query into FTS5 MATCH syntax — use each non-trivial word."""
+    words = re.findall(r"\w{3,}", query)  # keep alnum words ≥3 chars
+    if not words: return '"placeholder"'
+    # OR query for flexibility
+    return " OR ".join(f'"{w}"' for w in words[:10])
+
+
+def search(query: str, n: int = 10, source: str = None, project: str = None):
+    conn = sqlite3.connect(DB)
+    conn.row_factory = sqlite3.Row
+    fts_q = fts_escape(query)
+    sql = """
+        SELECT d.source, d.project, d.path, d.topic, d.instruction, d.response,
+               rank
+        FROM docs_fts f JOIN docs d ON f.rowid = d.id
+        WHERE docs_fts MATCH ?
+    """
+    params = [fts_q]
+    if source:
+        sql += " AND d.source LIKE ?"
+        params.append(f"%{source}%")
+    if project:
+        sql += " AND d.project LIKE ?"
+        params.append(f"%{project}%")
+    sql += " ORDER BY rank LIMIT ?"
+    params.append(n)
+
+    try:
+        rows = conn.execute(sql, params).fetchall()
+    except sqlite3.OperationalError as e:
+        # FTS syntax error — fallback to LIKE
+        conn = sqlite3.connect(DB)
+        conn.row_factory = sqlite3.Row
+        rows = conn.execute(
+            "SELECT source, project, path, topic, instruction, response FROM docs "
+            "WHERE instruction LIKE ? OR response LIKE ? LIMIT ?",
+            (f"%{query[:80]}%", f"%{query[:80]}%", n)
+        ).fetchall()
+    return rows
+
+
+def agents_md() -> str:
+    parts = []
+    for proj in PROJECTS:
+        md = AXENTX / proj / "AGENTS.md"
+        if md.exists():
+            parts.append(f"=== {proj}/AGENTS.md ===\n" + "\n".join(md.read_text().split("\n")[:15]))
+    return "\n\n".join(parts)
+
+
+def git_recent() -> str:
+    out = []
+    for proj in PROJECTS:
+        p = AXENTX / proj
+        if not (p / ".git").exists(): continue
+        try:
+            r = subprocess.run(["git","-C",str(p),"log","--oneline","-5"],
+                             capture_output=True, text=True, timeout=3)
+            if r.stdout.strip():
+                out.append(f"=== {proj} ===\n{r.stdout.strip()}")
+        except: pass
+    return "\n".join(out)
+
+
+def build_context(question, source=None, project=None):
+    parts = ["## AGENTS.md\n" + agents_md()]
+    g = git_recent()
+    if g: parts.append("## Recent commits\n" + g)
+
+    rows = search(question, n=8, source=source, project=project)
+    if rows:
+        hits = []
+        for r in rows:
+            tag = r["source"] or "?"
+            path = r["path"] or ""
+            proj = r["project"] or ""
+            content = r["response"] or r["instruction"] or ""
+            hits.append(f"[{tag}:{proj}/{path[-60:]}]\n{content[:500]}")
+        parts.append(f"## Relevant docs (SQLite FTS, {len(rows)} matches)\n" + "\n\n".join(hits))
+    return "\n\n".join(parts)[:12000]
+
+
+SYSTEM_PROMPT = (
+    "คุณคือ local assistant ตอบจาก Context เท่านั้น. ไม่รู้ก็บอก. "
+    "ภาษาไทย กระชับ. อ้าง path/source ที่เกี่ยวข้อง."
+)
+
+
+def ask_ollama(messages, model):
+    payload = {"model": model, "messages": messages, "stream": False}
+    req = urllib.request.Request(OLLAMA, data=json.dumps(payload).encode(),
+                                 headers={"Content-Type": "application/json"})
+    with urllib.request.urlopen(req, timeout=180) as r:
+        return json.loads(r.read()).get("message", {}).get("content", "(no response)")
+
+
+def single(question, model, source, project):
+    print(f"🔍 SQLite FTS search...", file=sys.stderr)
+    ctx = build_context(question, source, project)
+    print(f"   context: {len(ctx)} chars", file=sys.stderr)
+    print(f"🤖 {model}\n", file=sys.stderr)
+    msgs = [
+        {"role": "system", "content": SYSTEM_PROMPT},
+        {"role": "user", "content": f"### Context\n{ctx}\n\n### คำถาม\n{question}"},
+    ]
+    print(ask_ollama(msgs, model))
+
+
+def interactive(model, source, project):
+    print(f"🤖 Interactive — {model}, source={source}, project={project}", file=sys.stderr)
+    print(f"   type 'exit' to quit, ':s <src>' to set source filter", file=sys.stderr)
+    history = [{"role": "system", "content": SYSTEM_PROMPT}]
+    base_ctx = None
+    while True:
+        try: q = input("❯ ").strip()
+        except (EOFError, KeyboardInterrupt): break
+        if not q or q in ("exit","quit"): break
+        if q.startswith(":s "):
+            source = q[3:].strip() or None
+            print(f"  source filter: {source}")
+            continue
+
+        ctx = build_context(q, source, project)
+        msgs = history + [{"role": "user", "content": f"### Context\n{ctx}\n\n### คำถาม\n{q}"}]
+        ans = ask_ollama(msgs, model)
+        history.append({"role": "user", "content": q})
+        history.append({"role": "assistant", "content": ans})
+        print(f"\n{ans}\n")
+        if len(history) > 11:
+            history = [history[0]] + history[-10:]
+
+
+def main():
+    ap = argparse.ArgumentParser()
+    ap.add_argument("-i", "--interactive", action="store_true")
+    ap.add_argument("-m", "--model", default=DEFAULT_MODEL)
+    ap.add_argument("--source", help="filter by source (code, github-public, claude-conversation, ...)")
+    ap.add_argument("--project", help="filter by project")
+    ap.add_argument("question", nargs="*")
+    args = ap.parse_args()
+
+    if args.interactive:
+        interactive(args.model, args.source, args.project)
+    else:
+        if not args.question:
+            print("usage: ask 'คำถาม' OR ask -i OR ask --source code 'คำถาม'", file=sys.stderr)
+            sys.exit(1)
+        single(" ".join(args.question), args.model, args.source, args.project)
+
+
+if __name__ == "__main__":
+    main()
diff --git a/bin/auto-orchestrate-loop.sh b/bin/auto-orchestrate-loop.sh
index 45c76e269b82e0bfd91bb6fcfd629b1b8a045d37..9cefe9ce1ada6045b9173a88594b82871ece8bde 100755
--- a/bin/auto-orchestrate-loop.sh
+++ b/bin/auto-orchestrate-loop.sh
@@ -9,7 +9,7 @@
 set -uo pipefail
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
-LOG="$HOME/.claude/logs/auto-orchestrate-loop.log"
+LOG="$HOME/.surrogate/logs/auto-orchestrate-loop.log"
 mkdir -p "$(dirname "$LOG")"
 
 # Resource guard: 20% headroom
@@ -107,14 +107,14 @@ TASK_DESC="Resolve this TODO/FIXME in $PROJ_NAME at $FILE:$LINE: \"$CONTENT\". I
 cd "$PROJECT" || { echo "[$(date +%H:%M:%S)] cd failed" >> "$LOG"; exit 1; }
 
 # Run the orchestrate pipeline (auto-commits on APPROVE)
-bash "$HOME/.claude/bin/surrogate-orchestrate.sh" "$TASK_DESC" >> "$LOG" 2>&1
+bash "$HOME/.surrogate/bin/surrogate-orchestrate.sh" "$TASK_DESC" >> "$LOG" 2>&1
 RC=$?
 DUR=$(( $(date +%s) - START ))
 
 echo "[$(date +%H:%M:%S)] orchestrate done in ${DUR}s rc=$RC" >> "$LOG"
 
 # Discord notification
-NOTIFY="$HOME/.claude/bin/notify-discord.sh"
+NOTIFY="$HOME/.surrogate/bin/notify-discord.sh"
 if [[ -x "$NOTIFY" ]]; then
     if [[ $RC -eq 0 ]]; then
         "$NOTIFY" task "Auto-orchestrate: $PROJ_NAME" "$FILE:$LINE — \`$(echo "$CONTENT" | head -c 80)\` · ${DUR}s" 2>/dev/null &
diff --git a/bin/cerebras-bridge.sh b/bin/cerebras-bridge.sh
new file mode 100755
index 0000000000000000000000000000000000000000..17f7caf4846bc822b7b97368c4e960984b7ddb8b
--- /dev/null
+++ b/bin/cerebras-bridge.sh
@@ -0,0 +1,58 @@
+#!/usr/bin/env bash
+# Cerebras bridge — fastest inference (wafer-scale), llama/qwen/gpt-oss available
+set -u
+MODEL="llama3.1-8b"
+MAX_TOKENS=2000
+TEMP=0.3
+PROMPT=""
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --model)
+            case "$2" in
+                fast|small) MODEL="llama3.1-8b" ;;
+                big)        MODEL="qwen-3-235b-a22b-instruct-2507" ;;
+                gpt-oss)    MODEL="gpt-oss-120b" ;;
+                glm)        MODEL="zai-glm-4.7" ;;
+                *)          MODEL="$2" ;;
+            esac; shift 2 ;;
+        --max-tokens) MAX_TOKENS="$2"; shift 2 ;;
+        *) PROMPT="$*"; break ;;
+    esac
+done
+[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat)
+[[ -z "$PROMPT" ]] && { echo "cerebras-bridge: no prompt" >&2; exit 2; }
+
+LOG="$HOME/.surrogate/logs/cerebras-bridge.log"
+mkdir -p "$(dirname "$LOG")"
+set -a; source "$HOME/.hermes/.env"; set +a
+echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG"
+
+RESPONSE=$(python3 -c "
+import os
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read())
+import json, sys, os, urllib.request, urllib.error
+body = {
+    'model': '$MODEL',
+    'messages': [{'role':'user','content': sys.stdin.read()}],
+    'max_tokens': $MAX_TOKENS, 'temperature': $TEMP,
+}
+req = urllib.request.Request(
+    'https://api.cerebras.ai/v1/chat/completions',
+    data=json.dumps(body).encode(),
+    headers={'Content-Type':'application/json', 'User-Agent':'hermes-agent/1.0', 'Authorization':'Bearer '+os.environ.get('CEREBRAS_API_KEY','')}
+)
+try:
+    with urllib.request.urlopen(req, timeout=120) as r:
+        d = json.load(r)
+    print(d.get('choices',[{}])[0].get('message',{}).get('content',''))
+except urllib.error.HTTPError as e:
+    print(f'cerebras-bridge HTTP {e.code}: {e.read()[:200]}', file=sys.stderr)
+    sys.exit(e.code // 100)
+except Exception as e:
+    print(f'cerebras-bridge error: {e}', file=sys.stderr); sys.exit(1)
+" <<< "$PROMPT")
+RC=$?
+echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG"
+[[ $RC -ne 0 ]] && exit $RC
+echo "$RESPONSE"
diff --git a/bin/chutes-bridge.sh b/bin/chutes-bridge.sh
new file mode 100755
index 0000000000000000000000000000000000000000..474dd51fb0db52baf6ca9f1c21c752ffbc62503a
--- /dev/null
+++ b/bin/chutes-bridge.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+# Chutes.ai bridge — OpenAI-compat; free-tier, multi-model aggregator.
+# Endpoint: https://llm.chutes.ai/v1/chat/completions
+# Free tier: ~500 req/day, no CC, solid for Qwen/DeepSeek/Llama models.
+set -u
+MODEL="deepseek-ai/DeepSeek-V3.1"
+MAX_TOKENS=2000
+TEMP=0.3
+PROMPT=""
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --model)
+            case "$2" in
+                deepseek|v3)   MODEL="deepseek-ai/DeepSeek-V3.1" ;;
+                qwen|coder)    MODEL="Qwen/Qwen3-Coder-480B-A35B-Instruct" ;;
+                llama|l70)     MODEL="meta-llama/Llama-3.3-70B-Instruct" ;;
+                r1)            MODEL="deepseek-ai/DeepSeek-R1" ;;
+                glm)           MODEL="zai-org/GLM-4.6" ;;
+                *)             MODEL="$2" ;;
+            esac; shift 2 ;;
+        --max-tokens) MAX_TOKENS="$2"; shift 2 ;;
+        *) PROMPT="$*"; break ;;
+    esac
+done
+[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat)
+[[ -z "$PROMPT" ]] && { echo "chutes-bridge: no prompt" >&2; exit 2; }
+
+LOG="$HOME/.surrogate/logs/chutes-bridge.log"
+mkdir -p "$(dirname "$LOG")"
+set -a; source "$HOME/.hermes/.env"; set +a
+echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG"
+
+RESPONSE=$(python3 -c "
+import os
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read())
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/bridge_retry.py')).read())
+import json, sys
+body = {
+    'model': '$MODEL',
+    'messages': [{'role':'user','content': sys.stdin.read()}],
+    'max_tokens': $MAX_TOKENS, 'temperature': $TEMP,
+    'stream': False,
+}
+try:
+    d = request_with_retry(
+        'https://llm.chutes.ai/v1/chat/completions',
+        data=json.dumps(body).encode(),
+        headers={'Content-Type':'application/json', 'User-Agent':'hermes-agent/1.0', 'Authorization':'Bearer '+os.environ.get('CHUTES_API_KEY','')},
+        timeout=120, max_retries=4, base_delay=3.0, open_seconds=120,
+    )
+    print(d.get('choices',[{}])[0].get('message',{}).get('content',''))
+except Exception as e:
+    print(f'chutes-bridge error: {e}', file=sys.stderr); sys.exit(1)
+" <<< "$PROMPT")
+RC=$?
+echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG"
+[[ $RC -ne 0 ]] && exit $RC
+echo "$RESPONSE"
diff --git a/bin/cloudflare-bridge.sh b/bin/cloudflare-bridge.sh
new file mode 100755
index 0000000000000000000000000000000000000000..a6c343a2f961a157ad13a6760a288939810f840d
--- /dev/null
+++ b/bin/cloudflare-bridge.sh
@@ -0,0 +1,76 @@
+#!/usr/bin/env bash
+# Cloudflare Workers AI bridge — 10k neurons/day free tier
+# Endpoint: https://api.cloudflare.com/client/v4/accounts/$ACCOUNT_ID/ai/v1 (OpenAI-compat)
+# Key env:  CLOUDFLARE_API_TOKEN + CLOUDFLARE_ACCOUNT_ID
+# Usage:    cloudflare-bridge.sh [--model MODEL] "<prompt>"
+set -u
+# Default: gpt-oss-120b — 120B params, highest capability on CF Workers AI free tier.
+# Catalog verified 2026-04 — aliases point to models that ACTUALLY respond.
+MODEL="@cf/openai/gpt-oss-120b"
+MAX_TOKENS=2000
+TEMP=0.3
+PROMPT=""
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --model)
+            case "$2" in
+                fast|small|8b)        MODEL="@cf/meta/llama-3.1-8b-instruct-fp8" ;;
+                llama|llama70|70b)    MODEL="@cf/meta/llama-3.3-70b-instruct-fp8-fast" ;;
+                gpt-oss|oss|120b)     MODEL="@cf/openai/gpt-oss-120b" ;;
+                deepseek|r1|reasoning) MODEL="@cf/deepseek-ai/deepseek-r1-distill-qwen-32b" ;;
+                kimi|long-ctx)        MODEL="@cf/moonshotai/kimi-k2.6" ;;
+                glm|glm4)             MODEL="@cf/zai-org/glm-4.7-flash" ;;
+                *)                    MODEL="$2" ;;
+            esac; shift 2 ;;
+        --max-tokens) MAX_TOKENS="$2"; shift 2 ;;
+        --temperature) TEMP="$2"; shift 2 ;;
+        *) PROMPT="$*"; break ;;
+    esac
+done
+[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat)
+[[ -z "$PROMPT" ]] && { echo "cloudflare-bridge: no prompt" >&2; exit 2; }
+
+LOG="$HOME/.surrogate/logs/cloudflare-bridge.log"
+mkdir -p "$(dirname "$LOG")"
+set -a; source "$HOME/.hermes/.env" 2>/dev/null || true; set +a
+
+TOKEN="${CLOUDFLARE_API_TOKEN:-${CF_API_TOKEN:-}}"
+ACCOUNT="${CLOUDFLARE_ACCOUNT_ID:-${CF_ACCOUNT_ID:-}}"
+if [[ -z "$TOKEN" ]] || [[ -z "$ACCOUNT" ]]; then
+    echo "cloudflare-bridge: missing CLOUDFLARE_API_TOKEN or CLOUDFLARE_ACCOUNT_ID in ~/.hermes/.env" >&2
+    exit 3
+fi
+
+echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG"
+
+RESPONSE=$(CF_TOKEN="$TOKEN" CF_ACCOUNT="$ACCOUNT" python3 -c "
+import os
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read())
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/bridge_retry.py')).read())
+import json, sys
+body = {
+    'model': '$MODEL',
+    'messages': [{'role':'user','content': sys.stdin.read()}],
+    'max_tokens': $MAX_TOKENS, 'temperature': $TEMP,
+}
+url = f\"https://api.cloudflare.com/client/v4/accounts/{os.environ['CF_ACCOUNT']}/ai/v1/chat/completions\"
+try:
+    d = request_with_retry(
+        url,
+        data=json.dumps(body).encode(),
+        headers={
+            'Content-Type':'application/json',
+            'User-Agent':'hermes-agent/1.0',
+            'Authorization':'Bearer '+os.environ['CF_TOKEN'],
+        },
+        timeout=120, max_retries=6, base_delay=5.0, open_seconds=180,
+    )
+    print(d.get('choices',[{}])[0].get('message',{}).get('content',''))
+except Exception as e:
+    print(f'cloudflare-bridge error: {e}', file=sys.stderr); sys.exit(1)
+" <<< "$PROMPT")
+RC=$?
+echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG"
+[[ $RC -ne 0 ]] && exit $RC
+echo "$RESPONSE"
diff --git a/bin/crawl-rss.py b/bin/crawl-rss.py
index b11d8a6d2ee572e1cac78010400916e3c97dc86d..9db029493e834dcfd5e578fea8cad8ad05ee0d3b 100755
--- a/bin/crawl-rss.py
+++ b/bin/crawl-rss.py
@@ -5,7 +5,7 @@ Reads feed URLs from FEEDS env or default list, parses entries, writes JSONL
 to output file. Only writes entries not seen before (dedup by URL).
 
 Usage (from bash):
-    OUT=/tmp/out.jsonl python3 ~/.claude/bin/crawl-rss.py
+    OUT=/tmp/out.jsonl python3 ~/.surrogate/bin/crawl-rss.py
 
 All feeds VERIFIED to return 200 as of 2026-04-19. Failures are logged,
 not fatal — one bad feed doesn't kill the rest.
@@ -86,7 +86,7 @@ FEEDS: list[tuple[str, str]] = [
 ]
 
 OUT_PATH = os.environ.get("OUT", "/tmp/rss-crawl.jsonl")
-SEEN_PATH = os.environ.get("SEEN", os.path.expanduser("~/.claude/.rss-seen.json"))
+SEEN_PATH = os.environ.get("SEEN", os.path.expanduser("~/.surrogate/.rss-seen.json"))
 MAX_ENTRIES_PER_FEED = int(os.environ.get("MAX_PER_FEED", "10"))
 TIMEOUT = int(os.environ.get("TIMEOUT", "15"))
 
diff --git a/bin/daily-crawl.sh b/bin/daily-crawl.sh
index eb57a7e676f4ac19ba7cf7c79be9e5eeb7674f84..d42dbc3a1d89ddcccd041e09d20e9dca513b291a 100755
--- a/bin/daily-crawl.sh
+++ b/bin/daily-crawl.sh
@@ -14,17 +14,17 @@ while [ $# -gt 0 ]; do
 done
 
 export PATH=/usr/bin:/bin:/usr/local/bin:/opt/homebrew/bin:$PATH
-source ~/.claude/.env 2>/dev/null || true
+source ~/.hermes/.env 2>/dev/null || true
 # Also source ~/.hermes/.env (where Surrogate keeps the live tokens)
 set -a; source ~/.hermes/.env 2>/dev/null || true; set +a
 
 DATE=$(date +%Y-%m-%d)
 CRAWL_DIR="$HOME/Documents/Obsidian Vault/AI-Hub/crawls/$DATE"
-mkdir -p "$CRAWL_DIR/raw" "$HOME/.claude/logs"
-LOG="$HOME/.claude/logs/crawl-$DATE.log"
+mkdir -p "$CRAWL_DIR/raw" "$HOME/.surrogate/logs"
+LOG="$HOME/.surrogate/logs/crawl-$DATE.log"
 log() { echo "[$(date +%H:%M:%S)] $*" | tee -a "$LOG"; }
 
-PY=~/.claude/venv/bin/python
+PY=~/.surrogate/venv/bin/python
 
 # ═══════════ SOURCES — use Python scripts with explicit env passing ═══════════
 
@@ -403,6 +403,6 @@ for d in dirs[:60]:
 PY
 
 # Graph sync (async)
-[ -x "$HOME/.claude/bin/graph-sync.sh" ] && ("$HOME/.claude/bin/graph-sync.sh" > /dev/null 2>&1 &) || true
+[ -x "$HOME/.surrogate/bin/graph-sync.sh" ] && ("$HOME/.surrogate/bin/graph-sync.sh" > /dev/null 2>&1 &) || true
 
 log "=== Done: $CRAWL_DIR/digest.md ==="
diff --git a/bin/dataset-enrich.sh b/bin/dataset-enrich.sh
index d012694da8e755be2470c14d6d03d00e43651225..0464d476e351981d9652024dee40141c31e75d33 100755
--- a/bin/dataset-enrich.sh
+++ b/bin/dataset-enrich.sh
@@ -17,13 +17,13 @@
 set -uo pipefail
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
-LOG="$HOME/.claude/logs/dataset-enrich.log"
+LOG="$HOME/.surrogate/logs/dataset-enrich.log"
 WORK="$HOME/.hermes/workspace/dataset-enrich"
 mkdir -p "$WORK" "$(dirname "$LOG")"
 
 echo "[$(date +%H:%M:%S)] dataset enrich start" | tee "$LOG"
 
-~/.claude/venv/bin/python <<'PYEOF' 2>&1 | tee -a "$LOG"
+~/.surrogate/venv/bin/python <<'PYEOF' 2>&1 | tee -a "$LOG"
 from huggingface_hub import HfApi
 from pathlib import Path
 from datasets import load_dataset
diff --git a/bin/dev-cloud-daemon.sh b/bin/dev-cloud-daemon.sh
index 0220f58cc5a7c9ed45e13a2093953e10cf92281a..92be8174181c030393452e2ffb83300a2fd31bd5 100755
--- a/bin/dev-cloud-daemon.sh
+++ b/bin/dev-cloud-daemon.sh
@@ -8,7 +8,7 @@ set -u
 
 PROVIDER="${1:?usage: dev-cloud-daemon.sh <github|samba|cloudflare|groq|gemini>}"
 
-LOG="$HOME/.claude/logs/dev-cloud-daemon-${PROVIDER}.log"
+LOG="$HOME/.surrogate/logs/dev-cloud-daemon-${PROVIDER}.log"
 mkdir -p "$(dirname "$LOG")"
 
 # Redis connection: prefer Unix socket, fall back to TCP 127.0.0.1:6379.
@@ -65,15 +65,15 @@ except: print('OK')" 2>/dev/null)
     # and works on exactly what the daemon locked (avoids "no free priority"
     # dead-ends when the file lock was touched earlier for this same PRIO_ID).
     HERMES_PRIO_ID="$PRIO_ID" \
-        "$HOME/.claude/bin/dev-cloud-worker.sh" "$PROVIDER" 2>&1 | tail -3 >> "$LOG"
+        "$HOME/.surrogate/bin/dev-cloud-worker.sh" "$PROVIDER" 2>&1 | tail -3 >> "$LOG"
     RC=${PIPESTATUS[0]}
     DUR=$(( $(date +%s) - START ))
     echo "[$(date '+%H:%M:%S')] $PROVIDER $PRIO_ID done in ${DUR}s (rc=$RC)" >> "$LOG"
 
     # Discord: only notify failures + slow tasks (avoid spam on every success)
     if [[ $RC -ne 0 ]]; then
-        "$HOME/.claude/bin/notify-discord.sh" error "Worker failed" "$PROVIDER · $PRIO_ID · ${DUR}s · rc=$RC" 2>/dev/null &
+        "$HOME/.surrogate/bin/notify-discord.sh" error "Worker failed" "$PROVIDER · $PRIO_ID · ${DUR}s · rc=$RC" 2>/dev/null &
     elif [[ $DUR -gt 240 ]]; then
-        "$HOME/.claude/bin/notify-discord.sh" warn "Slow task" "$PROVIDER · $PRIO_ID · ${DUR}s" 2>/dev/null &
+        "$HOME/.surrogate/bin/notify-discord.sh" warn "Slow task" "$PROVIDER · $PRIO_ID · ${DUR}s" 2>/dev/null &
     fi
 done
diff --git a/bin/dev-cloud-worker.sh b/bin/dev-cloud-worker.sh
index 400732d4706f6a69a9ead83501297ef0fc501001..9892e358dc653cb2736fdcfd10665404af1ef0fd 100755
--- a/bin/dev-cloud-worker.sh
+++ b/bin/dev-cloud-worker.sh
@@ -7,17 +7,17 @@
 #   provider = github | samba | cloudflare | groq | gemini
 #
 # Rate-limit aware per provider (set by cron schedule, NOT inside script).
-# Cross-worker coordination: lockfile per (priority, provider) in ~/.claude/state/dev-locks/
+# Cross-worker coordination: lockfile per (priority, provider) in ~/.surrogate/state/dev-locks/
 # Global priority lock: 30-min window, so same priority only gets fresh attempt per provider
 # every 30 min (prevents redundant work, allows tournament of implementations over time).
 set -u
 
 PROVIDER="${1:?usage: dev-cloud-worker.sh <github|samba|cloudflare|groq|gemini>}"
 
-LOG="$HOME/.claude/logs/dev-cloud-$PROVIDER.log"
+LOG="$HOME/.surrogate/logs/dev-cloud-$PROVIDER.log"
 OUT_DIR="$HOME/.hermes/workspace/dev-cloud-$PROVIDER"
 SHARED="$HOME/.hermes/workspace/swarm-shared"
-LOCK_DIR="$HOME/.claude/state/dev-locks"
+LOCK_DIR="$HOME/.surrogate/state/dev-locks"
 mkdir -p "$(dirname "$LOG")" "$OUT_DIR" "$LOCK_DIR"
 
 START=$(date +%s)
@@ -143,7 +143,7 @@ PRIO_PROJECT=$(echo "$PRIORITY" | python3 -c "import json,sys; print(json.loads(
 echo "[$(date '+%H:%M:%S')] $PROVIDER picked $PRIO_ID ($PRIO_PROJECT: ${PRIO_TITLE:0:60})" >> "$LOG"
 
 # -------- Rich context injection (B: enrich with repo + similar funcs + few-shot + deltas) --------
-source "$HOME/.claude/bin/lib/context_builder.sh"
+source "$HOME/.surrogate/bin/lib/context_builder.sh"
 build_rich_context "$PRIO_PROJECT" "$PRIO_ID" "$PRIO_TITLE"
 # Sets: REPO_MAP, SIMILAR_FUNCS, RAG_EXAMPLES, SEMANTIC_RAG, FEWSHOT_ACCEPTED, ANTI_PATTERNS, PROMPT_DELTAS, PRIO_SPEC
 
@@ -285,37 +285,37 @@ case "$PROVIDER" in
     github)
         # Codestral-2501 is Mistral's dedicated code model — free via PAT, top-tier for code tasks.
         # Better than gpt-4o-mini for coding specifically. Budget-aware: falls through if HALT.
-        RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/github-bridge.sh" --model codestral 2>>"$LOG")
+        RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/github-bridge.sh" --model codestral 2>>"$LOG")
         ;;
     samba|sambanova)
-        RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/sambanova-bridge.sh" --model deepseek 2>>"$LOG")
+        RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/sambanova-bridge.sh" --model deepseek 2>>"$LOG")
         ;;
     cloudflare|cf)
-        RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/cloudflare-bridge.sh" --model deepseek 2>>"$LOG")
+        RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/cloudflare-bridge.sh" --model deepseek 2>>"$LOG")
         ;;
     groq)
-        RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/groq-bridge.sh" --model qwen 2>>"$LOG")
+        RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/groq-bridge.sh" --model qwen 2>>"$LOG")
         ;;
     gemini)
         # Use ai-fallback's gemini path
-        RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/ai-fallback.sh" --force gemini 2>>"$LOG")
+        RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/ai-fallback.sh" --force gemini 2>>"$LOG")
         ;;
     cerebras)
         # Wafer-scale — fastest inference on planet (~2000 tok/s). Qwen3 235B excellent for code.
-        RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/cerebras-bridge.sh" --model big 2>>"$LOG")
+        RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/cerebras-bridge.sh" --model big 2>>"$LOG")
         ;;
     nvidia|nim)
         # NVIDIA NIM — Llama 3.3 70B, diverse model pool (Nemotron, DeepSeek-R1, Qwen-Coder)
-        RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/nvidia-bridge.sh" --model qwen 2>>"$LOG")
+        RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/nvidia-bridge.sh" --model qwen 2>>"$LOG")
         ;;
     chutes)
         # Chutes.ai aggregator — free tier needs activation; currently may 402
-        RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/chutes-bridge.sh" --model deepseek 2>>"$LOG")
+        RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/chutes-bridge.sh" --model deepseek 2>>"$LOG")
         ;;
     surrogate|surrogate-1)
         # น้อง — local Ollama, Ashira-personalized (Qwen2.5-Coder-7B + Thai/DevSecOps prompt)
         # Will be upgraded with LoRA adapter after RunPod training.
-        RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/surrogate-bridge.sh" 2>>"$LOG")
+        RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/surrogate-bridge.sh" 2>>"$LOG")
         ;;
     *)
         echo "[$(date '+%H:%M:%S')] unknown provider $PROVIDER" >> "$LOG"
diff --git a/bin/domain-scrape-loop.sh b/bin/domain-scrape-loop.sh
index 4c8c9e46fe794db381a5b290750bedc2235b02ca..a4746d61839a906c45fece1d4b4325435f1fed3e 100755
--- a/bin/domain-scrape-loop.sh
+++ b/bin/domain-scrape-loop.sh
@@ -8,10 +8,10 @@
 set -u
 DUR="${1:-900}"
 PARALLEL="${2:-3}"
-LOG="$HOME/.claude/logs/domain-scrape-loop.log"
+LOG="$HOME/.surrogate/logs/domain-scrape-loop.log"
 START=$(date +%s)
 BEFORE_PAIRS=$(wc -l ~/axentx/surrogate/data/training-jsonl/*.jsonl 2>/dev/null | tail -1 | awk '{print $1}')
-BEFORE_LEDGER=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null)
+BEFORE_LEDGER=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null)
 
 echo "═══ LOOP START $(date +%H:%M:%S) duration=${DUR}s parallel=$PARALLEL" | tee -a "$LOG"
 echo "   before: pairs=$BEFORE_PAIRS ledger_repos=$BEFORE_LEDGER" | tee -a "$LOG"
@@ -33,7 +33,7 @@ while true; do
     # Fire N parallel instances, each picks different domain via ledger
     for i in $(seq 1 $PARALLEL); do
         (
-            ~/.claude/bin/github-domain-scrape.sh >> "$LOG" 2>&1
+            ~/.surrogate/bin/github-domain-scrape.sh >> "$LOG" 2>&1
         ) &
     done
     wait  # wait all parallel to finish (30-60s typical)
@@ -44,13 +44,13 @@ while true; do
     # Progress every 5 iters
     if (( ITER % 5 == 0 )); then
         PAIRS=$(wc -l ~/axentx/surrogate/data/training-jsonl/*.jsonl 2>/dev/null | tail -1 | awk '{print $1}')
-        LEDGER=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null)
+        LEDGER=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null)
         echo "  [iter=$ITER $((NOW - START))s] pairs=$PAIRS (+$((PAIRS - BEFORE_PAIRS))) ledger=$LEDGER (+$((LEDGER - BEFORE_LEDGER)))" | tee -a "$LOG"
     fi
 done
 
 AFTER_PAIRS=$(wc -l ~/axentx/surrogate/data/training-jsonl/*.jsonl 2>/dev/null | tail -1 | awk '{print $1}')
-AFTER_LEDGER=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null)
+AFTER_LEDGER=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null)
 echo "═══ LOOP DONE $(date +%H:%M:%S)" | tee -a "$LOG"
 echo "   iters: $ITER" | tee -a "$LOG"
 echo "   pairs added:  $((AFTER_PAIRS - BEFORE_PAIRS))" | tee -a "$LOG"
diff --git a/bin/github-bridge.sh b/bin/github-bridge.sh
new file mode 100755
index 0000000000000000000000000000000000000000..03087e1e0ab9fa5618544a33f6d78d5e5727d747
--- /dev/null
+++ b/bin/github-bridge.sh
@@ -0,0 +1,94 @@
+#!/usr/bin/env bash
+# GitHub Models bridge — free-tier GPT-4o / Llama 3.3 / Mistral via GitHub PAT
+# Endpoint: https://models.github.ai/inference (OpenAI-compat)
+# Key env:  GITHUB_MODELS_TOKEN (preferred) or GITHUB_TOKEN
+# Usage:    github-bridge.sh [--model MODEL] "<prompt>" | echo "..." | github-bridge.sh
+set -u
+# Default: full GPT-4o (free via PAT, far smarter than mini, same daily quota)
+MODEL="openai/gpt-4o"
+MAX_TOKENS=2000
+TEMP=0.3
+PROMPT=""
+
+# Aliases reflect ONLY models verified working with free PAT (2026-04).
+# GPT-5/o3/o1-mini etc. appear in /catalog but API returns 403/unavailable — not usable.
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --model)
+            case "$2" in
+                # OpenAI
+                gpt4o|gpt-4o)               MODEL="openai/gpt-4o" ;;
+                mini|gpt-4o-mini)           MODEL="openai/gpt-4o-mini" ;;
+                gpt41|gpt-4.1)              MODEL="openai/gpt-4.1" ;;
+                gpt41-mini|gpt-4.1-mini)    MODEL="openai/gpt-4.1-mini" ;;
+                # Meta Llama
+                llama|llama70)              MODEL="meta/Llama-3.3-70B-Instruct" ;;
+                llama4|maverick)            MODEL="meta/llama-4-maverick-17b-128e-instruct-fp8" ;;
+                llama405)                   MODEL="meta/meta-llama-3.1-405b-instruct" ;;
+                # DeepSeek
+                deepseek|deepseek-v3)       MODEL="deepseek/deepseek-v3-0324" ;;
+                deepseek-r1|r1|reasoning)   MODEL="deepseek/DeepSeek-R1" ;;
+                deepseek-r1-latest)         MODEL="deepseek/deepseek-r1-0528" ;;
+                # xAI
+                grok|grok3)                 MODEL="xai/grok-3" ;;
+                grok-mini)                  MODEL="xai/grok-3-mini" ;;
+                # Mistral
+                mistral|mistral-medium)     MODEL="mistral-ai/mistral-medium-2505" ;;
+                codestral|code)             MODEL="mistral-ai/codestral-2501" ;;
+                # Microsoft Phi
+                phi|phi4)                   MODEL="microsoft/phi-4" ;;
+                # Cohere
+                cohere|command-a)           MODEL="cohere/cohere-command-a" ;;
+                command-r)                  MODEL="cohere/cohere-command-r-plus-08-2024" ;;
+                *)                          MODEL="$2" ;;
+            esac; shift 2 ;;
+        --max-tokens) MAX_TOKENS="$2"; shift 2 ;;
+        --temperature) TEMP="$2"; shift 2 ;;
+        *) PROMPT="$*"; break ;;
+    esac
+done
+[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat)
+[[ -z "$PROMPT" ]] && { echo "github-bridge: no prompt" >&2; exit 2; }
+
+LOG="$HOME/.surrogate/logs/github-bridge.log"
+mkdir -p "$(dirname "$LOG")"
+set -a; source "$HOME/.hermes/.env" 2>/dev/null || true; set +a
+
+# Prefer dedicated models token, fall back to general PAT
+TOKEN="${GITHUB_MODELS_TOKEN:-${GITHUB_TOKEN:-}}"
+if [[ -z "$TOKEN" ]]; then
+    echo "github-bridge: missing GITHUB_MODELS_TOKEN or GITHUB_TOKEN in ~/.hermes/.env" >&2
+    exit 3
+fi
+
+echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG"
+
+RESPONSE=$(GH_TOKEN="$TOKEN" python3 -c "
+import os
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read())
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/bridge_retry.py')).read())
+import json, sys
+body = {
+    'model': '$MODEL',
+    'messages': [{'role':'user','content': sys.stdin.read()}],
+    'max_tokens': $MAX_TOKENS, 'temperature': $TEMP,
+}
+try:
+    d = request_with_retry(
+        'https://models.github.ai/inference/chat/completions',
+        data=json.dumps(body).encode(),
+        headers={
+            'Content-Type':'application/json',
+            'User-Agent':'hermes-agent/1.0',
+            'Authorization':'Bearer '+os.environ['GH_TOKEN'],
+        },
+        timeout=120, max_retries=4, base_delay=2.0,
+    )
+    print(d.get('choices',[{}])[0].get('message',{}).get('content',''))
+except Exception as e:
+    print(f'github-bridge error: {e}', file=sys.stderr); sys.exit(1)
+" <<< "$PROMPT")
+RC=$?
+echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG"
+[[ $RC -ne 0 ]] && exit $RC
+echo "$RESPONSE"
diff --git a/bin/github-domain-scrape.sh b/bin/github-domain-scrape.sh
index b41722f8633d18862a5a3c13c80e6c6946b21e36..6496d26e2ed180fd77d77ff026c4de71227aeb81 100755
--- a/bin/github-domain-scrape.sh
+++ b/bin/github-domain-scrape.sh
@@ -4,13 +4,13 @@
 set -u
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
-LEDGER="$HOME/.claude/state/scrape-ledger.db"
-LOG="$HOME/.claude/logs/github-domain-scrape.log"
+LEDGER="$HOME/.surrogate/state/scrape-ledger.db"
+LOG="$HOME/.surrogate/logs/github-domain-scrape.log"
 DATE=$(date +%Y-%m-%d)
 OUT="$HOME/axentx/surrogate/data/training-jsonl/github-domain-${DATE}.jsonl"
 mkdir -p "$(dirname "$LOG")" "$(dirname "$OUT")"
 
-[[ ! -f "$LEDGER" ]] && bash "$HOME/.claude/bin/scrape-ledger-init.sh"
+[[ ! -f "$LEDGER" ]] && bash "$HOME/.surrogate/bin/scrape-ledger-init.sh"
 
 TARGET="${1:-}"
 export LEDGER OUT GITHUB_TOKEN GITHUB_TOKEN_POOL TARGET
diff --git a/bin/graph-sync.sh b/bin/graph-sync.sh
new file mode 100755
index 0000000000000000000000000000000000000000..644fa4d479f5b21284e8b6e6ff8ff7077eac3150
--- /dev/null
+++ b/bin/graph-sync.sh
@@ -0,0 +1,107 @@
+#!/usr/bin/env bash
+# ... (original content unchanged)
+# Sync Obsidian markdown patterns/knowledge → FalkorDB Lite (graph DB)
+# Complements rag-index.sh (vector DB) — same sources, 2 different indexes.
+set -e
+PYTHON="${HOME}/.surrogate/venv/bin/python"
+[ -x "$PYTHON" ] || { echo "venv not found: $PYTHON"; exit 1; }
+
+"$PYTHON" <<'PY'
+import re, os
+from pathlib import Path
+from redislite.falkordb_client import FalkorDB
+import yaml
+
+HOME = Path.home()
+SOURCES = [
+    HOME / "Documents/Obsidian Vault/AI-Hub/patterns",
+    HOME / "Documents/Obsidian Vault/AI-Hub/knowledge",
+    HOME / "Documents/Obsidian Vault/AI-Hub/inbox",
+    HOME / ".surrogate/memory",
+]
+DB_FILE = str(HOME / ".surrogate/graph-db.rdb")
+
+db = FalkorDB(dbfilename=DB_FILE)
+g = db.select_graph("ashira")
+
+try: g.query("MATCH (n) DETACH DELETE n")
+except: pass
+
+frontmatter_re = re.compile(r'^---\n(.*?)\n---', re.DOTALL)
+wikilink_re = re.compile(r'\[\[([^\]|]+?)(?:\|[^\]]+)?\]\]')
+
+def esc(s):
+    return str(s).replace("\\", "\\\\").replace("'", "\\'") if s else ""
+
+nodes = {}
+edges = []
+
+for src in SOURCES:
+    if not src.exists(): continue
+    for md in src.rglob("*.md"):
+        stem = md.stem
+        text = md.read_text(errors="ignore")
+        fm_match = frontmatter_re.match(text)
+        fm = {}
+        if fm_match:
+            try: fm = yaml.safe_load(fm_match.group(1)) or {}
+            except: pass
+
+        tags = fm.get("tags", [])
+        if isinstance(tags, str): tags = [tags]
+
+        nodes[stem] = {
+            "path": str(md.relative_to(HOME)),
+            "tags": [str(t).replace("#","") for t in tags],
+            "category": md.parent.name,
+            "severity": str(fm.get("severity", "medium")),
+        }
+
+        for link in wikilink_re.findall(text):
+            target = link.split("/")[-1].split("|")[0].replace(".md", "").strip()
+            if target and target != stem:
+                edges.append((stem, target))
+
+for name, info in nodes.items():
+    g.query(
+        f"MERGE (n:Doc {{name:'{esc(name)}'}}) "
+        f"SET n.path='{esc(info['path'])}', "
+        f"n.category='{esc(info['category'])}', "
+        f"n.severity='{esc(info['severity'])}', "
+        f"n.tags='{esc(','.join(info['tags']))}'"
+    )
+
+edge_count = 0
+for src_name, tgt_name in edges:
+    try:
+        g.query(
+            f"MATCH (a:Doc {{name:'{esc(src_name)}'}}), (b:Doc {{name:'{esc(tgt_name)}'}}) "
+            f"MERGE (a)-[:LINKS_TO]->(b)"
+        )
+        edge_count += 1
+    except: pass
+
+all_tags = set()
+for info in nodes.values():
+    for t in info["tags"]:
+        if t: all_tags.add(t)
+for t in all_tags:
+    g.query(f"MERGE (:Tag {{name:'{esc(t)}'}})")
+for name, info in nodes.items():
+    for t in info["tags"]:
+        if not t: continue
+        g.query(
+            f"MATCH (d:Doc {{name:'{esc(name)}'}}), (t:Tag {{name:'{esc(t)}'}}) "
+            f"MERGE (d)-[:TAGGED]->(t)"
+        )
+
+print(f"Graph built: {len(nodes)} docs, {edge_count} links, {len(all_tags)} tags")
+
+r = g.query("MATCH (d:Doc)-[:TAGGED]->(t:Tag) RETURN t.name, count(d) AS c ORDER BY c DESC LIMIT 10")
+print("\nTop 10 tags:")
+for row in r.result_set: print(f"  #{row[0]}: {row[1]} docs")
+
+r = g.query("MATCH (d:Doc)-[r:LINKS_TO]-() RETURN d.name, count(r) AS c ORDER BY c DESC LIMIT 10")
+print("\nTop 10 hubs (most connected):")
+for row in r.result_set: print(f"  {row[0]}: {row[1]} links")
+PY
diff --git a/bin/groq-bridge.sh b/bin/groq-bridge.sh
new file mode 100755
index 0000000000000000000000000000000000000000..615fc4f428ab143eed83d31d7020aba97954b26d
--- /dev/null
+++ b/bin/groq-bridge.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+# Groq bridge — fast Llama/Qwen inference via Groq API (OpenAI-compat)
+# Usage: groq-bridge.sh [--model MODEL] "<prompt>"  |  echo "..." | groq-bridge.sh
+set -u
+# Default: Llama 3.3 70B — best quality on Groq free tier (still ultra-fast).
+# 8B is available as --model fast when latency matters more than quality.
+MODEL="llama-3.3-70b-versatile"
+MAX_TOKENS=2000
+TEMP=0.3
+PROMPT=""
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --model)
+            case "$2" in
+                fast|small|8b) MODEL="llama-3.1-8b-instant" ;;
+                llama|llama70) MODEL="llama-3.3-70b-versatile" ;;
+                qwen)          MODEL="qwen/qwen3-32b" ;;
+                llama4|scout)  MODEL="meta-llama/llama-4-scout-17b-16e-instruct" ;;
+                gpt-oss|oss)   MODEL="openai/gpt-oss-120b" ;;
+                *)             MODEL="$2" ;;
+            esac; shift 2 ;;
+        --max-tokens) MAX_TOKENS="$2"; shift 2 ;;
+        *) PROMPT="$*"; break ;;
+    esac
+done
+[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat)
+[[ -z "$PROMPT" ]] && { echo "groq-bridge: no prompt" >&2; exit 2; }
+
+LOG="$HOME/.surrogate/logs/groq-bridge.log"
+mkdir -p "$(dirname "$LOG")"
+set -a; source "$HOME/.hermes/.env"; set +a
+echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG"
+
+RESPONSE=$(python3 -c "
+import os
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read())
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/bridge_retry.py')).read())
+import json, sys
+body = {
+    'model': '$MODEL',
+    'messages': [{'role':'user','content': sys.stdin.read()}],
+    'max_tokens': $MAX_TOKENS, 'temperature': $TEMP,
+}
+try:
+    d = request_with_retry(
+        'https://api.groq.com/openai/v1/chat/completions',
+        data=json.dumps(body).encode(),
+        headers={'Content-Type':'application/json', 'User-Agent':'hermes-agent/1.0', 'Authorization':'Bearer '+os.environ.get('GROQ_API_KEY','')},
+        timeout=120, max_retries=4, base_delay=2.0,
+    )
+    print(d.get('choices',[{}])[0].get('message',{}).get('content',''))
+except Exception as e:
+    print(f'groq-bridge error: {e}', file=sys.stderr); sys.exit(1)
+" <<< "$PROMPT")
+RC=$?
+echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG"
+[[ $RC -ne 0 ]] && exit $RC
+echo "$RESPONSE"
diff --git a/bin/hermes-daily-summary.sh b/bin/hermes-daily-summary.sh
index bdbd58140bd5aa961437be357a961b98befed7b6..d16e39a2542dc3fcb0bbd2db51480e0e13f213e6 100755
--- a/bin/hermes-daily-summary.sh
+++ b/bin/hermes-daily-summary.sh
@@ -4,7 +4,7 @@
 set -u
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
-LOG="$HOME/.claude/logs/hermes-daily-summary.log"
+LOG="$HOME/.surrogate/logs/hermes-daily-summary.log"
 mkdir -p "$(dirname "$LOG")"
 
 # ── Collect metrics ──────────────────────────────────────────────────────────
@@ -12,23 +12,23 @@ TODAY=$(date +%Y-%m-%d)
 YESTERDAY=$(date -v-1d +%Y-%m-%d 2>/dev/null || date -d 'yesterday' +%Y-%m-%d)
 
 # 1. Tasks completed (24h)
-TASKS_DONE=$(grep -c "done in" ~/.claude/logs/hermes-dev-*-daemon.log 2>/dev/null | awk -F: '{s+=$2} END{print s+0}')
+TASKS_DONE=$(grep -c "done in" ~/.surrogate/logs/hermes-dev-*-daemon.log 2>/dev/null | awk -F: '{s+=$2} END{print s+0}')
 
 # 2. Tasks failed (24h)
-TASKS_FAIL=$(grep -c "failed after" ~/.claude/logs/hermes-dev-*-daemon.log 2>/dev/null | awk -F: '{s+=$2} END{print s+0}')
+TASKS_FAIL=$(grep -c "failed after" ~/.surrogate/logs/hermes-dev-*-daemon.log 2>/dev/null | awk -F: '{s+=$2} END{print s+0}')
 
 # 3. Scrape activity
-SCRAPE_TOTAL=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null || echo "?")
-SCRAPE_24H=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped WHERE scraped_at > datetime('now','-24 hours')" 2>/dev/null || echo "?")
+SCRAPE_TOTAL=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null || echo "?")
+SCRAPE_24H=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped WHERE scraped_at > datetime('now','-24 hours')" 2>/dev/null || echo "?")
 
 # 4. Training pairs
 PAIRS=$(wc -l ~/axentx/surrogate/data/training-jsonl/*.jsonl 2>/dev/null | tail -1 | awk '{print $1}' || echo "?")
 
 # 5. Index docs
-DOCS=$(sqlite3 ~/.claude/index.db "SELECT COUNT(*) FROM docs" 2>/dev/null || echo "?")
+DOCS=$(sqlite3 ~/.surrogate/index.db "SELECT COUNT(*) FROM docs" 2>/dev/null || echo "?")
 
 # 6. Episodes (surrogate memory)
-EPISODES=$(wc -l ~/.claude/state/surrogate-memory/episodes.jsonl 2>/dev/null | awk '{print $1}' || echo 0)
+EPISODES=$(wc -l ~/.surrogate/state/surrogate-memory/episodes.jsonl 2>/dev/null | awk '{print $1}' || echo 0)
 
 # 7. Daemons running
 DAEMONS_UP=$(pgrep -f "dev-cloud-daemon\|qwen-coder-daemon\|priority-json-watcher\|hermes" 2>/dev/null | wc -l | tr -d ' ')
@@ -41,7 +41,7 @@ for q in cerebras groq github samba nvidia cloudflare qwen-local; do
 done
 
 # 9. Recent errors (last 100 log lines)
-ERR_COUNT=$(tail -200 ~/.claude/logs/*.log 2>/dev/null | grep -cE "ERROR|CRITICAL|Fatal|429|500" 2>/dev/null || echo 0)
+ERR_COUNT=$(tail -200 ~/.surrogate/logs/*.log 2>/dev/null | grep -cE "ERROR|CRITICAL|Fatal|429|500" 2>/dev/null || echo 0)
 
 # ── Build digest body ────────────────────────────────────────────────────────
 BODY="$(cat <<EOF
@@ -62,4 +62,4 @@ LEVEL="info"
 
 echo "[$(date '+%H:%M:%S')] sending daily summary (${LEVEL}): done=$TASKS_DONE fail=$TASKS_FAIL scrape=$SCRAPE_24H" >> "$LOG"
 
-"$HOME/.claude/bin/notify-discord.sh" "$LEVEL" "Hermes daily summary · $TODAY" "$BODY"
+"$HOME/.surrogate/bin/notify-discord.sh" "$LEVEL" "Hermes daily summary · $TODAY" "$BODY"
diff --git a/bin/hermes-discord-bot.py b/bin/hermes-discord-bot.py
index 1153a0af9ef683065e6388dc985113b977e83cde..7532ea59a34b46b9b49590d04d0722a77e9fbae8 100755
--- a/bin/hermes-discord-bot.py
+++ b/bin/hermes-discord-bot.py
@@ -10,7 +10,7 @@ Triggers (responds when):
 Pipes user message → surrogate -p "..." → replies with output.
 
 Token comes from $DISCORD_BOT_TOKEN (read from ~/.hermes/.env).
-Logs to ~/.claude/logs/hermes-discord-bot.log.
+Logs to ~/.surrogate/logs/hermes-discord-bot.log.
 """
 from __future__ import annotations
 
@@ -26,12 +26,12 @@ import discord
 
 # ── Config ───────────────────────────────────────────────────────────────────
 HOME = Path.home()
-LOG_PATH = HOME / ".claude/logs/hermes-discord-bot.log"
+LOG_PATH = HOME / ".surrogate/logs/hermes-discord-bot.log"
 LOG_PATH.parent.mkdir(parents=True, exist_ok=True)
 
-# surrogate CLI path: prefer ~/.local/bin (installed), fallback ~/.claude/bin
+# surrogate CLI path: prefer ~/.local/bin (installed), fallback ~/.surrogate/bin
 SURROGATE_BIN = next(
-    p for p in [HOME / ".local/bin/surrogate", HOME / ".claude/bin/surrogate"] if p.exists()
+    p for p in [HOME / ".local/bin/surrogate", HOME / ".surrogate/bin/surrogate"] if p.exists()
 )
 
 PREFIX_RE = re.compile(r"^[!/]sg\b\s*", re.IGNORECASE)
@@ -169,7 +169,7 @@ async def on_ready() -> None:
     log.info("connected as %s (id=%s)", client.user, client.user.id if client.user else "?")
     print(f"✅ logged in as {client.user}")
     # Notify Discord channel via webhook that bot came online
-    notify = HOME / ".claude/bin/notify-discord.sh"
+    notify = HOME / ".surrogate/bin/notify-discord.sh"
     if notify.exists():
         subprocess.Popen(
             [str(notify), "success", "Discord bot online", f"Connected as {client.user}. DM or @mention to chat."],
diff --git a/bin/hermes-status-server.py b/bin/hermes-status-server.py
index 6a03545132331d221daf1de7321e4d9a45fe2424..9631dda4a318bbc241d50c02c6ce38796e36400a 100755
--- a/bin/hermes-status-server.py
+++ b/bin/hermes-status-server.py
@@ -26,9 +26,9 @@ from pydantic import BaseModel
 app = FastAPI(title="hermes", docs_url=None, redoc_url=None)
 
 HOME = Path(os.environ.get("HOME", "/home/hermes"))
-LEDGER = HOME / ".claude/state/scrape-ledger.db"
-EPISODES = HOME / ".claude/state/surrogate-memory/episodes.jsonl"
-LOG_DIR = HOME / ".claude/logs"
+LEDGER = HOME / ".surrogate/state/scrape-ledger.db"
+EPISODES = HOME / ".surrogate/state/surrogate-memory/episodes.jsonl"
+LOG_DIR = HOME / ".surrogate/logs"
 
 
 def _ledger_count() -> int:
@@ -92,7 +92,7 @@ async def chat(req: ChatRequest) -> JSONResponse:
     if not req.prompt.strip():
         raise HTTPException(status_code=400, detail="prompt is empty")
 
-    surrogate_bin = HOME / ".claude/bin/surrogate"
+    surrogate_bin = HOME / ".surrogate/bin/surrogate"
     if not surrogate_bin.exists():
         raise HTTPException(status_code=503, detail="surrogate CLI not installed in container")
 
diff --git a/bin/lib/__init__.py b/bin/lib/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/bin/lib/bridge_retry.py b/bin/lib/bridge_retry.py
new file mode 100644
index 0000000000000000000000000000000000000000..d5d943e6603d85554e3088b2ec65bafd750d19d3
--- /dev/null
+++ b/bin/lib/bridge_retry.py
@@ -0,0 +1,142 @@
+"""Shared HTTP retry library for all cloud bridges.
+Handles: exponential backoff + jitter + Retry-After + circuit breaker.
+Import at top of any bridge: exec(open(...).read())
+
+Exports: request_with_retry(url, data, headers, max_retries=4, base_delay=2.0)
+"""
+import json as _json
+import os as _os
+import random as _random
+import time as _time
+import urllib.request as _urlreq
+import urllib.error as _urlerr
+
+# Circuit breaker state — persisted in /tmp so all bridge invocations share
+_CB_DIR = "/tmp/bridge-circuits"
+_os.makedirs(_CB_DIR, exist_ok=True)
+
+
+def _cb_state_path(host):
+    return f"{_CB_DIR}/{host.replace('/', '_')}.json"
+
+
+def _circuit_open(host):
+    p = _cb_state_path(host)
+    try:
+        with open(p) as f:
+            s = _json.load(f)
+        # Circuit closed after timeout
+        if _time.time() > s.get("open_until", 0):
+            return False, 0
+        return True, int(s["open_until"] - _time.time())
+    except Exception:
+        return False, 0
+
+
+def _record_failure(host, open_seconds=60):
+    """Called on 429 or 5xx — track consecutive failures."""
+    p = _cb_state_path(host)
+    try:
+        with open(p) as f:
+            s = _json.load(f)
+    except Exception:
+        s = {"consec_fails": 0, "open_until": 0}
+    s["consec_fails"] = s.get("consec_fails", 0) + 1
+    # Open circuit after 3 consecutive failures
+    if s["consec_fails"] >= 3:
+        s["open_until"] = _time.time() + open_seconds
+    with open(p, "w") as f:
+        _json.dump(s, f)
+
+
+def _record_success(host):
+    """Called on 2xx — reset failure counter."""
+    p = _cb_state_path(host)
+    try:
+        with open(p, "w") as f:
+            _json.dump({"consec_fails": 0, "open_until": 0}, f)
+    except Exception:
+        pass
+
+
+def _parse_retry_after(headers, default_delay):
+    """Honor Retry-After header (seconds) or x-ratelimit-reset-after."""
+    for h in ("Retry-After", "retry-after", "x-ratelimit-reset-after", "x-ratelimit-reset"):
+        val = headers.get(h)
+        if val:
+            try:
+                n = int(val)
+                # x-ratelimit-reset may be absolute epoch — convert to delta
+                if n > 10_000_000_000:  # way in future = epoch ms
+                    n = n // 1000 - int(_time.time())
+                elif n > 1_000_000_000:  # epoch seconds
+                    n = n - int(_time.time())
+                return max(1, min(n, 300))  # clamp 1..300s
+            except (ValueError, TypeError):
+                pass
+    return default_delay
+
+
+def request_with_retry(url, data, headers, timeout=120, max_retries=4, base_delay=2.0, open_seconds=60):
+    """Make HTTP request with exp-backoff retry + circuit breaker.
+
+    Args:
+      open_seconds: how long to open circuit after 3 consecutive failures.
+        Default 60s. Callers with strict per-minute rate limits (Cloudflare,
+        SambaNova) should use 120-180s so we don't hammer during cooldown.
+
+    Returns: parsed JSON response.
+    Raises: Exception if circuit open or max retries exhausted.
+    """
+    from urllib.parse import urlparse
+
+    host = urlparse(url).netloc
+
+    # Circuit breaker check
+    is_open, remaining = _circuit_open(host)
+    if is_open:
+        raise Exception(f"circuit-open for {host} ({remaining}s remaining)")
+
+    last_err = None
+    for attempt in range(max_retries):
+        try:
+            req = _urlreq.Request(url, data=data, headers=headers)
+            with _urlreq.urlopen(req, timeout=timeout) as r:
+                result = _json.load(r)
+                _record_success(host)
+                return result
+        except _urlerr.HTTPError as e:
+            last_err = e
+            if e.code == 429:
+                # Rate-limited — honor Retry-After
+                base = base_delay * (2 ** attempt)
+                delay = _parse_retry_after(e.headers, base)
+                delay *= (1 + _random.uniform(-0.2, 0.2))  # jitter ±20%
+                if attempt < max_retries - 1:
+                    _time.sleep(min(delay, 60))
+                    continue
+                _record_failure(host, open_seconds=open_seconds)
+                raise Exception(f"HTTP 429 after {max_retries} retries (last Retry-After: {delay:.0f}s)")
+            elif 500 <= e.code < 600:
+                # Server error — exp backoff with jitter
+                delay = base_delay * (2 ** attempt) * (1 + _random.uniform(-0.2, 0.2))
+                if attempt < max_retries - 1:
+                    _time.sleep(min(delay, 30))
+                    continue
+                _record_failure(host, open_seconds=open_seconds)
+                raise Exception(f"HTTP {e.code} after {max_retries} retries")
+            else:
+                # 4xx other than 429 — not retryable (client error)
+                _record_failure(host, open_seconds=open_seconds)
+                raise
+        except (_urlerr.URLError, _os.error) as e:
+            last_err = e
+            # Network error — retry with backoff
+            delay = base_delay * (2 ** attempt) * (1 + _random.uniform(-0.2, 0.2))
+            if attempt < max_retries - 1:
+                _time.sleep(min(delay, 30))
+                continue
+            _record_failure(host, open_seconds=open_seconds)
+            raise
+
+    raise Exception(f"max retries ({max_retries}) exhausted: {last_err}")
diff --git a/bin/lib/checkpoint.py b/bin/lib/checkpoint.py
new file mode 100644
index 0000000000000000000000000000000000000000..a9e909f20dd836bd07b39753b5de6913ebf1d682
--- /dev/null
+++ b/bin/lib/checkpoint.py
@@ -0,0 +1,146 @@
+"""Checkpoint store — JSONL event log per task, append-only.
+
+Purpose:
+  - Crash-safe: every event appended immediately (no buffering)
+  - Resume-aware: load full event trail to reconstruct task state
+  - Distill-friendly: each file = complete conversation trace a future model can learn from
+
+Event types:
+  task_start, codebase_review, provider_selected, stream_chunk, model_switch,
+  result_draft, review_requested, review_verdict, revision_requested, task_done,
+  task_failed, provider_probe
+
+File layout:
+  ~/.surrogate/yolo/checkpoints/<task-id>.jsonl    — live tasks
+  ~/.surrogate/yolo/checkpoints_done/<task-id>.jsonl  — completed (archive)
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import json
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Iterator
+
+CHECKPOINT_DIR = Path.home() / ".surrogate" / "yolo" / "checkpoints"
+CHECKPOINT_DONE = Path.home() / ".surrogate" / "yolo" / "checkpoints_done"
+
+
+def _now() -> str:
+    return dt.datetime.now(dt.timezone.utc).isoformat()
+
+
+@dataclass
+class Checkpoint:
+    task_id: str
+    path: Path
+
+    @classmethod
+    def open(cls, task_id: str) -> "Checkpoint":
+        CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True)
+        return cls(task_id=task_id, path=CHECKPOINT_DIR / f"{task_id}.jsonl")
+
+    def append(self, event_type: str, **fields: Any) -> None:
+        """Atomically append event. Fields serialize via JSON."""
+        rec = {"t": _now(), "event": event_type, **fields}
+        with open(self.path, "a") as f:
+            f.write(json.dumps(rec, ensure_ascii=False, default=str) + "\n")
+
+    def events(self) -> list[dict]:
+        if not self.path.exists():
+            return []
+        out = []
+        with open(self.path) as f:
+            for line in f:
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    out.append(json.loads(line))
+                except json.JSONDecodeError:
+                    continue
+        return out
+
+    def last_event(self, event_type: str = "") -> dict | None:
+        for e in reversed(self.events()):
+            if not event_type or e.get("event") == event_type:
+                return e
+        return None
+
+    def resume_state(self) -> dict:
+        """Reconstruct what we know from the event trail.
+
+        Returns:
+          {
+            "started": bool,
+            "completed": bool,
+            "failed": bool,
+            "current_model": str | None,
+            "draft_text": str (partial output so far),
+            "attempts": int,
+            "last_event": dict | None,
+            "artifacts_reviewed": list[str],
+            "review_iterations": int,
+          }
+        """
+        ev = self.events()
+        state = {
+            "started": False,
+            "completed": False,
+            "failed": False,
+            "current_model": None,
+            "draft_text": "",
+            "attempts": 0,
+            "last_event": ev[-1] if ev else None,
+            "artifacts_reviewed": [],
+            "review_iterations": 0,
+        }
+        for e in ev:
+            etype = e.get("event")
+            if etype == "task_start":
+                state["started"] = True
+            elif etype == "provider_selected":
+                state["current_model"] = e.get("model")
+                state["attempts"] += 1
+            elif etype == "model_switch":
+                state["current_model"] = e.get("to")
+            elif etype == "codebase_review":
+                state["artifacts_reviewed"] = e.get("artifacts", [])
+            elif etype == "result_draft":
+                state["draft_text"] = e.get("text", state["draft_text"])
+            elif etype == "review_verdict":
+                state["review_iterations"] += 1
+            elif etype == "task_done":
+                state["completed"] = True
+            elif etype == "task_failed":
+                state["failed"] = True
+        return state
+
+    def archive(self) -> None:
+        """Move to checkpoints_done/ after task complete."""
+        CHECKPOINT_DONE.mkdir(parents=True, exist_ok=True)
+        dest = CHECKPOINT_DONE / self.path.name
+        if self.path.exists():
+            self.path.rename(dest)
+            self.path = dest
+
+
+def list_active() -> list[str]:
+    if not CHECKPOINT_DIR.exists():
+        return []
+    return [p.stem for p in CHECKPOINT_DIR.glob("*.jsonl")]
+
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 2:
+        print("usage: checkpoint.py <task-id> [replay]")
+        sys.exit(1)
+    cp = Checkpoint.open(sys.argv[1])
+    if len(sys.argv) > 2 and sys.argv[2] == "replay":
+        for e in cp.events():
+            print(json.dumps(e, ensure_ascii=False))
+    else:
+        state = cp.resume_state()
+        print(json.dumps(state, indent=2, ensure_ascii=False, default=str))
diff --git a/bin/lib/codebase_scanner.py b/bin/lib/codebase_scanner.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb89ea993a09ad4add998c791121732fe2575839
--- /dev/null
+++ b/bin/lib/codebase_scanner.py
@@ -0,0 +1,225 @@
+"""Codebase scanner — full review before each task iteration.
+
+Purpose (per Ashira): full scan first, then grep context that previous iteration
+left behind. "Review agent" relies on this to know what was done vs what remains.
+
+3-pass strategy:
+  Pass 1: List recently-modified files across watched roots (last 7 days)
+  Pass 2: Semantic search via ChromaDB (if index exists) using task keywords
+  Pass 3: Git status + diff for any repos found (to detect uncommitted work)
+
+Input: task description (string)
+Output: structured summary dict the dispatcher can feed to models as context
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import json
+import os
+import re
+import subprocess
+from pathlib import Path
+
+HOME = Path.home()
+WATCHED_ROOTS = [
+    HOME / "develope",
+    HOME / "axentx",
+    HOME / ".surrogate" / "bin",
+]
+RECENT_DAYS = 7
+MAX_FILE_SIZE = 100_000   # skip large binaries
+MAX_FILES_PASS1 = 50
+MAX_CHUNKS_PASS2 = 10
+CHROMA_DB = HOME / ".surrogate" / "code-vector-db"
+
+
+def _keywords(task: str) -> list[str]:
+    tokens = re.findall(r"[A-Za-z_][A-Za-z0-9_]*", task.lower())
+    stop = {"a", "an", "the", "is", "are", "was", "were", "be", "to", "and",
+            "or", "but", "if", "then", "else", "for", "with", "of", "in", "on",
+            "at", "this", "that", "from", "by", "as", "i", "you", "it", "we",
+            "they", "write", "create", "make", "build", "add", "update", "task"}
+    return [t for t in tokens if len(t) >= 3 and t not in stop][:10]
+
+
+def _recent_files(keywords: list[str], roots: list[Path]) -> list[dict]:
+    """Find recently modified source files matching keywords."""
+    cutoff = dt.datetime.now() - dt.timedelta(days=RECENT_DAYS)
+    out = []
+    for root in roots:
+        if not root.exists():
+            continue
+        for dirpath, dirnames, filenames in os.walk(root):
+            # skip hidden, node_modules, .git, venv
+            dirnames[:] = [d for d in dirnames if not d.startswith(".")
+                           and d not in {"node_modules", "vendor", "venv", ".venv",
+                                         "__pycache__", "dist", "build", "target"}]
+            for f in filenames:
+                p = Path(dirpath) / f
+                try:
+                    st = p.stat()
+                except OSError:
+                    continue
+                if st.st_size > MAX_FILE_SIZE:
+                    continue
+                mtime = dt.datetime.fromtimestamp(st.st_mtime)
+                if mtime < cutoff:
+                    continue
+                # score by keyword hits in name/path
+                path_lower = str(p).lower()
+                score = sum(1 for kw in keywords if kw in path_lower)
+                # light content match (first 4KB only for perf)
+                try:
+                    with open(p, "r", errors="replace") as fh:
+                        head = fh.read(4096).lower()
+                    score += sum(1 for kw in keywords if kw in head) * 2
+                except OSError:
+                    continue
+                if score > 0:
+                    out.append({
+                        "path": str(p),
+                        "mtime": mtime.isoformat(),
+                        "score": score,
+                        "size": st.st_size,
+                    })
+    out.sort(key=lambda x: -x["score"])
+    return out[:MAX_FILES_PASS1]
+
+
+def _chromadb_search(keywords: list[str], task: str) -> list[dict]:
+    """Query ChromaDB semantic index (if available)."""
+    if not CHROMA_DB.exists():
+        return []
+    try:
+        # Use existing helper if present
+        helper = HOME / ".surrogate" / "bin" / "code-search.sh"
+        if helper.exists():
+            proc = subprocess.run(
+                [str(helper), "--top", str(MAX_CHUNKS_PASS2), task],
+                capture_output=True, text=True, timeout=30,
+            )
+            if proc.returncode == 0 and proc.stdout:
+                out = []
+                for line in proc.stdout.splitlines()[:MAX_CHUNKS_PASS2]:
+                    m = re.match(r"(\S+):(\d+)\s+(.*)", line)
+                    if m:
+                        out.append({
+                            "path": m.group(1),
+                            "line": int(m.group(2)),
+                            "preview": m.group(3)[:200],
+                        })
+                return out
+    except (subprocess.TimeoutExpired, OSError):
+        pass
+    return []
+
+
+def _git_uncommitted(roots: list[Path]) -> list[dict]:
+    """Detect repos with uncommitted work (partial iterations)."""
+    out = []
+    # Find up to 3 levels of git repos
+    for root in roots:
+        if not root.exists():
+            continue
+        for depth_glob in ["*/.git", "*/*/.git", "*/*/*/.git"]:
+            for git_dir in root.glob(depth_glob):
+                repo = git_dir.parent
+                try:
+                    status = subprocess.run(
+                        ["git", "-C", str(repo), "status", "--short"],
+                        capture_output=True, text=True, timeout=5,
+                    )
+                    if status.returncode == 0 and status.stdout.strip():
+                        out.append({
+                            "repo": str(repo),
+                            "changes": status.stdout.strip().splitlines()[:20],
+                        })
+                except (subprocess.TimeoutExpired, OSError):
+                    continue
+    return out
+
+
+def scan(task: str, task_artifacts: list[str] | None = None) -> dict:
+    """Full codebase review → structured context dict.
+
+    Args:
+      task: natural-language task description
+      task_artifacts: paths mentioned in task (will be loaded in full)
+
+    Returns:
+      {
+        "keywords": [...],
+        "recent_files": [{path, mtime, score, size}, ...],
+        "semantic_hits": [{path, line, preview}, ...],
+        "uncommitted_repos": [{repo, changes: [...]}, ...],
+        "explicit_artifacts": {path: content, ...},  # loaded in full
+      }
+    """
+    keywords = _keywords(task)
+    report = {
+        "task_excerpt": task[:200],
+        "keywords": keywords,
+        "recent_files": _recent_files(keywords, WATCHED_ROOTS),
+        "semantic_hits": _chromadb_search(keywords, task),
+        "uncommitted_repos": _git_uncommitted(WATCHED_ROOTS),
+        "explicit_artifacts": {},
+    }
+    for a in task_artifacts or []:
+        p = Path(a)
+        if p.exists() and p.is_file() and p.stat().st_size < MAX_FILE_SIZE:
+            try:
+                report["explicit_artifacts"][str(p)] = p.read_text(errors="replace")[:10000]
+            except OSError:
+                pass
+    return report
+
+
+def as_context_prompt(scan_result: dict, max_chars: int = 8000) -> str:
+    """Render scan as context for LLM system prompt."""
+    lines = [
+        "## Codebase context (auto-generated)",
+        f"Task keywords: {', '.join(scan_result['keywords'])}",
+        "",
+    ]
+    if scan_result["uncommitted_repos"]:
+        lines.append("### Uncommitted work (may indicate previous partial iteration):")
+        for r in scan_result["uncommitted_repos"][:5]:
+            lines.append(f"  {r['repo']}")
+            for c in r["changes"][:8]:
+                lines.append(f"    {c}")
+        lines.append("")
+
+    if scan_result["recent_files"]:
+        lines.append(f"### Recently modified relevant files ({len(scan_result['recent_files'])}):")
+        for f in scan_result["recent_files"][:15]:
+            lines.append(f"  {f['path']} (score={f['score']}, mtime={f['mtime']})")
+        lines.append("")
+
+    if scan_result["semantic_hits"]:
+        lines.append("### Semantic search hits:")
+        for h in scan_result["semantic_hits"][:8]:
+            lines.append(f"  {h['path']}:{h.get('line','?')} — {h['preview'][:120]}")
+        lines.append("")
+
+    if scan_result["explicit_artifacts"]:
+        lines.append("### Explicit task artifacts (FULL content):")
+        for path, content in scan_result["explicit_artifacts"].items():
+            lines.append(f"--- {path} ---")
+            lines.append(content[:3000])
+            lines.append("")
+
+    result = "\n".join(lines)
+    return result[:max_chars]
+
+
+if __name__ == "__main__":
+    import sys
+    task = " ".join(sys.argv[1:]) or "refactor yolo daemon"
+    report = scan(task)
+    print(json.dumps(
+        {k: v if not isinstance(v, list) else v[:5] for k, v in report.items()},
+        indent=2, default=str, ensure_ascii=False
+    ))
+    print("\n=== AS CONTEXT PROMPT ===\n")
+    print(as_context_prompt(report, 3000))
diff --git a/bin/lib/context_builder.sh b/bin/lib/context_builder.sh
new file mode 100644
index 0000000000000000000000000000000000000000..7cd8fec18362fc34a5cc9bb954a6533c3201936b
--- /dev/null
+++ b/bin/lib/context_builder.sh
@@ -0,0 +1,272 @@
+#!/usr/bin/env bash
+# Shared context builder — sourced by qwen-coder-worker + dev-cloud-worker.
+# Produces rich context: repo-map + similar functions from project + past accepted examples.
+# Call: build_rich_context <project> <priority_id> <priority_title>
+# Sets env vars: REPO_MAP, SIMILAR_FUNCS, FEWSHOT_ACCEPTED, ANTI_PATTERNS
+build_rich_context() {
+    local PRIO_PROJECT="$1"
+    local PRIO_ID="$2"
+    local PRIO_TITLE="$3"
+    local SHARED="$HOME/.hermes/workspace/swarm-shared"
+    local PROJECT_DIR="$HOME/axentx/$PRIO_PROJECT"
+
+    # 1. Full repo-map (up to 10KB — was 3KB).
+    # build-repo-map.sh writes to "<proj>_map.md"; some older paths used "<proj>.md".
+    # Try both so we don't silently lose the strongest grounding signal.
+    REPO_MAP=""
+    for candidate in "$SHARED/repo-maps/${PRIO_PROJECT}_map.md" "$SHARED/repo-maps/${PRIO_PROJECT}.md"; do
+        if [[ -f "$candidate" ]]; then
+            REPO_MAP=$(/usr/bin/head -c 10000 "$candidate")
+            break
+        fi
+    done
+
+    # 2. Similar function signatures from project (grep in real codebase)
+    SIMILAR_FUNCS=""
+    if [[ -d "$PROJECT_DIR" ]]; then
+        # Extract keywords from title for grep
+        local KW=$(echo "$PRIO_TITLE" | /usr/bin/tr '[:upper:]' '[:lower:]' | /usr/bin/tr -cs 'a-z0-9' ' ' | /usr/bin/tr ' ' '\n' | /usr/bin/awk 'length>4' | /usr/bin/head -3 | /usr/bin/tr '\n' '|' | /usr/bin/sed 's/|$//')
+        if [[ -n "$KW" ]]; then
+            SIMILAR_FUNCS=$(/usr/bin/find "$PROJECT_DIR" -type f \( -name '*.py' -o -name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.go' \) ! -path '*/node_modules/*' ! -path '*/.hermes-*' 2>/dev/null | \
+                xargs /usr/bin/grep -lE "($KW)" 2>/dev/null | /usr/bin/head -3 | while read f; do
+                    echo "=== ${f#$PROJECT_DIR/} ==="
+                    /usr/bin/grep -A3 -E "^(def|function|export const|class|async def|interface)" "$f" 2>/dev/null | /usr/bin/head -30
+                done 2>/dev/null | /usr/bin/head -c 4000)
+        fi
+    fi
+
+    # 3. RAG: actual code patterns from project (SQLite FTS via ask-sqlite.py if exists)
+    RAG_EXAMPLES=""
+    if [[ -x "$HOME/.surrogate/bin/ask-sqlite.py" ]]; then
+        RAG_EXAMPLES=$(/usr/bin/python3 "$HOME/.surrogate/bin/ask-sqlite.py" \
+            "$PRIO_PROJECT $PRIO_TITLE" 2>/dev/null | /usr/bin/head -c 3000)
+    fi
+
+    # 4. Semantic RAG (from embeddings) — top-5 similar
+    SEMANTIC_RAG=""
+    if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then
+        SEMANTIC_RAG=$(/usr/bin/python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$PRIO_TITLE" 2>/dev/null | /usr/bin/head -c 2000)
+    fi
+
+    # 5. Past ACCEPTED examples (few-shot from quality≥7 history)
+    FEWSHOT_ACCEPTED=""
+    for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | /usr/bin/head -30); do
+        if /usr/bin/grep -qE '"quality_score":\s*[789]|"quality_score":\s*10' "$review" 2>/dev/null; then
+            local OUT_FILE=$(basename "$review" .review.json)
+            # Search all worker output dirs
+            for WD in qwen-coder dev-cloud-samba dev-cloud-github dev-cloud-cloudflare dev-cloud-groq dev-cloud-synthesis; do
+                local OUT_PATH="$HOME/.hermes/workspace/$WD/${OUT_FILE}.md"
+                if [[ -f "$OUT_PATH" ]]; then
+                    FEWSHOT_ACCEPTED=$(/usr/bin/head -c 2000 "$OUT_PATH")
+                    break 2
+                fi
+            done
+        fi
+    done
+
+    # 6. Anti-patterns (last 5 rejection reasons across all workers)
+    ANTI_PATTERNS=""
+    for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | /usr/bin/head -10); do
+        local bugs=$(/usr/bin/python3 -c "
+import json, re, sys
+try:
+    txt = open('$review').read()
+    m = re.search(r'\{.*\}', txt, re.DOTALL)
+    if not m: sys.exit()
+    d = json.loads(m.group(0))
+    if d.get('verdict') in ('reject','rework') and d.get('bugs'):
+        for b in d['bugs'][:2]:
+            print(f'- {b[:180]}')
+except: pass
+" 2>/dev/null)
+        [[ -n "$bugs" ]] && ANTI_PATTERNS="$ANTI_PATTERNS$bugs"$'\n'
+    done
+    ANTI_PATTERNS=$(echo "$ANTI_PATTERNS" | /usr/bin/head -10)
+
+    # 7. Active-learning prompt deltas — aggregate last 5 UNIQUE anti-patterns.
+    # Preference: same-project anti-patterns first, then generic.
+    # Dedup by first 80 chars of prompt_addition (similar bugs shouldn't bloat prompt).
+    PROMPT_DELTAS=""
+    if [[ -f "$HOME/.surrogate/memory/worker-prompt-deltas.jsonl" ]]; then
+        PROMPT_DELTAS=$(/usr/bin/python3 -c "
+import json, sys
+from pathlib import Path
+try:
+    entries = []
+    for l in Path('$HOME/.surrogate/memory/worker-prompt-deltas.jsonl').read_text().splitlines():
+        if not l.strip(): continue
+        try: entries.append(json.loads(l))
+        except: pass
+    # Dedup by first 80 chars
+    seen = set()
+    picked = []
+    # Walk newest → oldest, cap 5 unique
+    for e in reversed(entries):
+        addn = (e.get('prompt_addition') or '').strip()
+        if not addn: continue
+        key = addn[:80]
+        if key in seen: continue
+        seen.add(key)
+        picked.append(addn)
+        if len(picked) >= 5: break
+    if picked:
+        out = ['ACTIVE-LEARNED RULES (avoid these past mistakes):']
+        for i, a in enumerate(picked, 1):
+            out.append(f'{i}. {a[:400]}')
+        print('\n'.join(out))
+except Exception as e: pass
+" 2>/dev/null)
+    fi
+
+    # 8. Priority full spec (if a detailed spec file exists)
+    # Spec is the single most important signal — cap high (6KB) so the full
+    # Context/Requirements/DO NOT sections fit.  Other RAG signals are capped
+    # lower because they're supplementary; the spec is authoritative.
+    PRIO_SPEC=""
+    local SPEC_FILE="$HOME/.hermes/workspace/swarm-shared/specs/${PRIO_ID}.md"
+    [[ -f "$SPEC_FILE" ]] && PRIO_SPEC=$(/usr/bin/head -c 6000 "$SPEC_FILE")
+
+    # 9. Task-type authoritative sources — boost scraped knowledge based on title.
+    # Security task → CVE/MITRE/OWASP/Prowler. SRE → Google SRE/postmortems.
+    # Observability → OTel/Prometheus/Grafana/Honeycomb. etc.
+    # This is THE fix that makes all our scraping actually used by Hermes workers.
+    AUTHORITATIVE_CONTEXT=""
+    if [[ -f "$HOME/.surrogate/index.db" ]]; then
+        AUTHORITATIVE_CONTEXT=$(/usr/bin/python3 <<PYEOF
+import sqlite3, re
+title = """${PRIO_TITLE}""".lower()
+project = """${PRIO_PROJECT}""".lower()
+# Classify task → preferred source whitelist
+routes = {
+    # Security tasks
+    ('security','cve','vuln','prowler','kyverno','opa','admission','ciem','sigma','mitre','attack','cosign','sbom','falco','threat','malware','exploit'): ['cisa-kev','mitre-attack','owasp-cheatsheet','domain:sec-cloudsec','domain:sec-appsec','domain:sec-devsecops','code-deep:sec-appsec','code-deep:sec-cloudsec'],
+    # SRE / incident / postmortem
+    ('sre','slo','sli','incident','postmortem','runbook','chaos','rca','dora','mttr','blameless','on-call','pager','outage'): ['google-sre','postmortems-index','firecrawl','eng-blog:charity-majors','eng-blog:high-scalability','mythos-ai-engineering','domain:ops-sre','code-deep:ops-sre'],
+    # Observability
+    ('observab','otel','telemetry','prometheus','grafana','loki','tempo','metric','trace','log','honeycomb','ebpf'): ['opentelemetry-spec','prometheus-docs','grafana-docs','firecrawl','domain:ops-observability'],
+    # Cloud / K8s / Terraform
+    ('kubernetes','k8s','helm','istio','terraform','aws','ecs','eks','lambda','cloudformation','cdk','gcp','azure','argocd','flux'): ['firecrawl','github-public','code-deep:ops-devops','domain:ops-devops','mythos-cloud','github-trending'],
+    # AI / multi-agent
+    ('agent','autogen','crewai','langgraph','orchestra','mcp','reflexion','dspy','rag','llm'): ['anthropic-cookbook','arxiv','mythos-ai-agent','mythos-ai-engineering','domain:ai-engineering','code-deep:ai-engineering','firecrawl','hf-papers'],
+    # FinOps
+    ('cost','finops','focus','rightsizing','kubecost','opencost','savings','budget','spend','waste'): ['firecrawl','rss','eng-blog:high-scalability','domain:ops-devops','arxiv'],
+    # Frontend / FE
+    ('frontend','react','nextjs','typescript','tsx','ui'): ['domain:dev-frontend','domain:design-ux','code-deep:dev-frontend','stackoverflow','github-trending'],
+    # Backend / API / DB
+    ('backend','api','fastapi','database','sql','postgres','asyncpg','sqlalchemy'): ['domain:dev-backend','domain:dev-fullstack','code-deep:dev-backend','github-public','stackoverflow','hf-papers'],
+    # Mobile
+    ('mobile','android','ios','flutter','reactnative','line','workio'): ['domain:dev-mobile','code-deep:dev-mobile','firecrawl','stackoverflow'],
+}
+# Project-specific boost
+project_preferred = {
+    'vanguard': ['cisa-kev','mitre-attack','owasp-cheatsheet','code-deep:sec-appsec'],
+    'costinel': ['firecrawl','rss','arxiv','mythos-ai-engineering'],
+    'arkship':  ['google-sre','postmortems-index','anthropic-cookbook','opentelemetry-spec','firecrawl'],
+    'surrogate':['arxiv','hf-papers','anthropic-cookbook','mythos-ai-agent'],
+    'workio':   ['firecrawl','stackoverflow','github-public'],
+}
+
+preferred_sources = set()
+for keywords, srcs in routes.items():
+    if any(k in title for k in keywords):
+        preferred_sources.update(srcs)
+for proj_key, srcs in project_preferred.items():
+    if proj_key in project:
+        preferred_sources.update(srcs)
+
+if not preferred_sources:
+    print(''); exit()
+
+# FTS query — prefer authoritative sources
+conn = sqlite3.connect('$HOME/.surrogate/index.db')
+conn.row_factory = sqlite3.Row
+# Simple keyword from title
+kw = ' '.join([w for w in re.sub(r'[^a-zA-Z0-9 ]', ' ', title).split() if len(w) > 3][:5])
+if not kw: exit()
+
+src_list = ','.join(f"'{s}'" for s in preferred_sources)
+# Strategy: 3-tier fallback — preferred+match → any+match → preferred random
+rows = []
+try:
+    # Tier 1: preferred sources + FTS match on keywords
+    q = f"""SELECT d.source, d.instruction, substr(d.response, 1, 600) as body
+            FROM docs_fts f JOIN docs d ON d.id = f.rowid
+            WHERE f.docs_fts MATCH ? AND d.source IN ({src_list})
+            ORDER BY bm25(docs_fts) LIMIT 6"""
+    rows = conn.execute(q, (kw,)).fetchall()
+except sqlite3.OperationalError: pass
+
+if not rows:
+    # Tier 2: FTS match on ANY source — relax source filter
+    try:
+        q2 = """SELECT d.source, d.instruction, substr(d.response, 1, 600) as body
+                FROM docs_fts f JOIN docs d ON d.id = f.rowid
+                WHERE f.docs_fts MATCH ? ORDER BY bm25(docs_fts) LIMIT 6"""
+        rows = conn.execute(q2, (kw,)).fetchall()
+    except sqlite3.OperationalError: pass
+
+if not rows:
+    # Tier 3: random sample from preferred sources (even if no keyword match)
+    rows = conn.execute(f"SELECT source, instruction, substr(response,1,600) as body FROM docs WHERE source IN ({src_list}) ORDER BY RANDOM() LIMIT 6").fetchall()
+
+conn.close()
+
+out = []
+for r in rows:
+    out.append(f"[{r['source']}] {(r['instruction'] or '')[:120]}")
+    out.append((r['body'] or '')[:500])
+    out.append('')
+print('\n'.join(out)[:3500])
+PYEOF
+)
+    fi
+
+    # 10. FalkorDB graph — related decisions + past priorities with similar theme
+    GRAPH_CONTEXT=""
+    local REDIS_SOCK=$(/usr/bin/find /var/folders /tmp -name 'redis.socket' -type s 2>/dev/null | /usr/bin/head -1)
+    if [[ -n "$REDIS_SOCK" ]]; then
+        # Get related priorities + learned rules
+        GRAPH_CONTEXT=$(/opt/homebrew/bin/redis-cli -s "$REDIS_SOCK" GRAPH.QUERY ashira "
+            MATCH (p:Priority {project: '$PRIO_PROJECT'})
+            OPTIONAL MATCH (p)-[:HAS_LEARNED_RULE]->(l:LearnedRule)
+            OPTIONAL MATCH (p)-[:COMMITTED_AS]->(c:Commit)
+            RETURN p.id, p.title, l.content, c.msg LIMIT 8
+        " 2>/dev/null | /usr/bin/tail -c 2500)
+    fi
+
+    # 11. Hermes trace recall — past similar tasks Hermes handled (from JSONL)
+    HERMES_RECALL=""
+    local TRACE_DIR="$HOME/axentx/surrogate/data/training-jsonl"
+    if [[ -d "$TRACE_DIR" ]]; then
+        HERMES_RECALL=$(/usr/bin/python3 <<PYEOF
+import json, re, glob
+title = """${PRIO_TITLE}""".lower()
+words = [w for w in re.sub(r'[^a-zA-Z0-9 ]', ' ', title).split() if len(w) > 4][:4]
+if not words: exit()
+
+hits = []
+# Walk recent hermes-trace-YYYY-MM-DD.jsonl files (last 7 days)
+import os
+files = sorted(glob.glob(os.path.expanduser('~/axentx/surrogate/data/training-jsonl/hermes-trace-*.jsonl')))[-7:]
+for f in files:
+    try:
+        for line in open(f):
+            try: rec = json.loads(line)
+            except: continue
+            blob = (rec.get('instruction','') + ' ' + rec.get('output',''))[:2000].lower()
+            score = sum(1 for w in words if w in blob)
+            if score >= 2:
+                hits.append((score, rec))
+    except: pass
+
+hits.sort(key=lambda x: -x[0])
+for score, rec in hits[:3]:
+    print(f"HERMES PREVIOUSLY [{rec.get('category','?')}]: {rec.get('instruction','')[:120]}")
+    print(f"→ {rec.get('output','')[:400]}")
+    print()
+PYEOF
+)
+    fi
+}
+
+export -f build_rich_context
diff --git a/bin/lib/dns_fallback.py b/bin/lib/dns_fallback.py
new file mode 100644
index 0000000000000000000000000000000000000000..d9550a75eff1f688227cdcd969742efbf54241ff
--- /dev/null
+++ b/bin/lib/dns_fallback.py
@@ -0,0 +1,27 @@
+# DNS fallback helper — patches socket.getaddrinfo to use dig @8.8.8.8
+# when system resolver fails (ISP DNS filtering certain AI endpoints).
+# Import at top of any Python script: exec(open(...).read())
+import socket as _sock
+import subprocess as _sp
+
+_orig_getaddrinfo = _sock.getaddrinfo
+
+def _resilient_getaddrinfo(host, *args, **kwargs):
+    try:
+        return _orig_getaddrinfo(host, *args, **kwargs)
+    except _sock.gaierror:
+        # Fall back: resolve via public DNS (bypass ISP filtering)
+        for resolver in ("1.1.1.1", "8.8.8.8", "9.9.9.9"):
+            try:
+                out = _sp.check_output(
+                    ["dig", "+short", "+time=3", "+tries=1", f"@{resolver}", host],
+                    text=True, timeout=5, stderr=_sp.DEVNULL
+                ).strip().splitlines()
+                ip = next((ln for ln in out if ln and ln[0].isdigit()), None)
+                if ip:
+                    return _orig_getaddrinfo(ip, *args, **kwargs)
+            except Exception:
+                continue
+        raise
+
+_sock.getaddrinfo = _resilient_getaddrinfo
diff --git a/bin/lib/ground_truth.py b/bin/lib/ground_truth.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc65860f11b2f450541ad3084348efa5e2a09ba2
--- /dev/null
+++ b/bin/lib/ground_truth.py
@@ -0,0 +1,280 @@
+"""Ground-truth check — objective verification beyond reviewer opinion.
+
+When task produces code, run external validators:
+  - Python: ast.parse (syntax) + optional ruff / mypy / pytest
+  - TypeScript/JS: tsc / eslint (if available)
+  - Terraform: terraform validate + tfsec (if available)
+  - CloudFormation: cfn-lint (if available)
+  - Shell: bash -n (syntax) + shellcheck (if available)
+  - JSON/YAML: parse check
+
+Reviewer opinion + ground-truth = double check. Review says pass BUT compile
+fails → overrides to fail.
+
+Output: {"verdict": "pass|fail", "checks": [...], "blocking_failure": bool}
+"""
+
+from __future__ import annotations
+
+import ast
+import json
+import re
+import shutil
+import subprocess
+import tempfile
+from pathlib import Path
+from typing import Optional
+
+CODE_BLOCK_RE = re.compile(r"```(\w+)?\n(.*?)```", re.DOTALL)
+
+
+def extract_code_blocks(text: str) -> list[tuple[str, str]]:
+    """Return list of (language, content) pairs from markdown fenced blocks."""
+    blocks = []
+    for m in CODE_BLOCK_RE.finditer(text):
+        lang = (m.group(1) or "").lower().strip()
+        content = m.group(2).strip()
+        if content:
+            blocks.append((lang, content))
+    return blocks
+
+
+def _have(cmd: str) -> bool:
+    return shutil.which(cmd) is not None
+
+
+def _run(cmd: list[str], stdin: Optional[str] = None, timeout: int = 30) -> tuple[int, str]:
+    try:
+        r = subprocess.run(
+            cmd, input=stdin, capture_output=True, text=True, timeout=timeout
+        )
+        return r.returncode, (r.stdout + r.stderr)[:2000]
+    except subprocess.TimeoutExpired:
+        return -1, "timeout"
+    except OSError as e:
+        return -1, str(e)
+
+
+# ----------------------------------------------------------------------
+# Per-language checkers
+# ----------------------------------------------------------------------
+def check_python(code: str) -> list[dict]:
+    out = []
+    # 1. syntax
+    try:
+        ast.parse(code)
+        out.append({"tool": "python-syntax", "pass": True, "msg": "syntactically valid"})
+    except SyntaxError as e:
+        out.append({"tool": "python-syntax", "pass": False,
+                   "msg": f"SyntaxError: {e}", "blocking": True})
+        return out  # no point in running linters
+    # 2. ruff (if installed)
+    if _have("ruff"):
+        with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False) as f:
+            f.write(code)
+            path = f.name
+        try:
+            rc, output = _run(["ruff", "check", "--select=E,F", "--output-format=concise", path])
+            passed = rc == 0
+            out.append({"tool": "ruff", "pass": passed,
+                       "msg": output[:500] if output else "clean"})
+        finally:
+            Path(path).unlink(missing_ok=True)
+    # 3. mypy (if installed, non-blocking)
+    if _have("mypy"):
+        with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False) as f:
+            f.write(code)
+            path = f.name
+        try:
+            rc, output = _run(["mypy", "--no-error-summary", "--ignore-missing-imports", path])
+            out.append({"tool": "mypy", "pass": rc == 0, "msg": output[:500]})
+        finally:
+            Path(path).unlink(missing_ok=True)
+    return out
+
+
+def check_typescript(code: str) -> list[dict]:
+    out = []
+    if not _have("npx") and not _have("tsc"):
+        return [{"tool": "typescript", "pass": True, "msg": "tsc/npx not installed — skipped"}]
+    with tempfile.NamedTemporaryFile("w", suffix=".ts", delete=False) as f:
+        f.write(code)
+        path = f.name
+    try:
+        cmd = (["tsc", "--noEmit", "--allowJs", "--target", "ES2022",
+                "--moduleResolution", "node", path] if _have("tsc")
+               else ["npx", "-y", "--package=typescript", "--",
+                     "tsc", "--noEmit", "--target", "ES2022", path])
+        rc, output = _run(cmd, timeout=60)
+        out.append({"tool": "tsc", "pass": rc == 0,
+                    "msg": output[:600] if output else "clean",
+                    "blocking": rc != 0})
+    finally:
+        Path(path).unlink(missing_ok=True)
+    return out
+
+
+def check_shell(code: str) -> list[dict]:
+    out = []
+    # bash -n (syntax only — no execution). Use file path; stdin parser is lenient.
+    with tempfile.NamedTemporaryFile("w", suffix=".sh", delete=False) as f:
+        f.write(code)
+        path = f.name
+    try:
+        rc, output = _run(["bash", "-n", path])
+    finally:
+        Path(path).unlink(missing_ok=True)
+    out.append({"tool": "bash-syntax", "pass": rc == 0, "msg": output or "valid",
+                "blocking": rc != 0})
+    if _have("shellcheck"):
+        with tempfile.NamedTemporaryFile("w", suffix=".sh", delete=False) as f:
+            f.write(code)
+            path = f.name
+        try:
+            rc, output = _run(["shellcheck", "-f", "gcc", path])
+            # shellcheck returns nonzero for warnings — non-blocking
+            out.append({"tool": "shellcheck", "pass": rc == 0, "msg": output[:500]})
+        finally:
+            Path(path).unlink(missing_ok=True)
+    return out
+
+
+def check_terraform(code: str) -> list[dict]:
+    out = []
+    if not _have("terraform"):
+        return [{"tool": "terraform", "pass": True, "msg": "terraform not installed — skipped"}]
+    with tempfile.TemporaryDirectory() as d:
+        Path(d, "main.tf").write_text(code)
+        rc, output = _run(["terraform", "-chdir=" + d, "init", "-backend=false", "-input=false"], timeout=60)
+        if rc != 0:
+            out.append({"tool": "terraform-init", "pass": False, "msg": output[:500],
+                        "blocking": True})
+            return out
+        rc, output = _run(["terraform", "-chdir=" + d, "validate"])
+        out.append({"tool": "terraform-validate", "pass": rc == 0,
+                    "msg": output[:500] if output else "clean",
+                    "blocking": rc != 0})
+        if _have("tfsec"):
+            rc, output = _run(["tfsec", d, "--no-color"])
+            out.append({"tool": "tfsec", "pass": rc == 0, "msg": output[:500]})
+    return out
+
+
+def check_cloudformation(code: str) -> list[dict]:
+    if not _have("cfn-lint"):
+        return [{"tool": "cfn-lint", "pass": True, "msg": "cfn-lint not installed — skipped"}]
+    with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as f:
+        f.write(code)
+        path = f.name
+    try:
+        rc, output = _run(["cfn-lint", path])
+        return [{"tool": "cfn-lint", "pass": rc == 0, "msg": output[:500],
+                 "blocking": rc != 0}]
+    finally:
+        Path(path).unlink(missing_ok=True)
+
+
+def check_json(code: str) -> list[dict]:
+    try:
+        json.loads(code)
+        return [{"tool": "json-parse", "pass": True, "msg": "valid JSON"}]
+    except json.JSONDecodeError as e:
+        return [{"tool": "json-parse", "pass": False, "msg": str(e), "blocking": True}]
+
+
+def check_yaml(code: str) -> list[dict]:
+    try:
+        import yaml  # type: ignore
+    except ImportError:
+        return [{"tool": "yaml-parse", "pass": True, "msg": "pyyaml not installed — skipped"}]
+    try:
+        yaml.safe_load(code)
+        return [{"tool": "yaml-parse", "pass": True, "msg": "valid YAML"}]
+    except yaml.YAMLError as e:
+        return [{"tool": "yaml-parse", "pass": False, "msg": str(e)[:300], "blocking": True}]
+
+
+LANG_CHECKERS = {
+    "python": check_python, "py": check_python,
+    "typescript": check_typescript, "ts": check_typescript,
+    "javascript": check_typescript, "js": check_typescript,
+    "bash": check_shell, "sh": check_shell, "shell": check_shell,
+    "terraform": check_terraform, "hcl": check_terraform, "tf": check_terraform,
+    "cloudformation": check_cloudformation, "yaml": check_yaml, "yml": check_yaml,
+    "json": check_json,
+}
+
+
+# ----------------------------------------------------------------------
+# Orchestrator
+# ----------------------------------------------------------------------
+def check(work_product: str) -> dict:
+    """Extract code blocks + run checkers. Returns aggregate verdict.
+
+    Returns:
+      {
+        "has_code": bool,
+        "verdict": "pass" | "fail",
+        "blocking_failure": bool,
+        "checks": [{tool, pass, msg, blocking?}, ...],
+        "blocks_checked": int,
+      }
+    """
+    blocks = extract_code_blocks(work_product)
+    all_checks: list[dict] = []
+    has_code = False
+
+    for lang, content in blocks:
+        checker = LANG_CHECKERS.get(lang)
+        if not checker:
+            continue
+        has_code = True
+        results = checker(content)
+        for r in results:
+            r["language"] = lang
+        all_checks.extend(results)
+
+    blocking_failure = any(c.get("blocking") and not c.get("pass") for c in all_checks)
+    # Only blocking checks determine pass/fail. Non-blocking (warn) tools like
+    # mypy or shellcheck can fail without sinking the verdict.
+    blocking_passed = all(c.get("pass") for c in all_checks if c.get("blocking"))
+    any_blocking = any(c.get("blocking") for c in all_checks)
+
+    if not has_code:
+        return {
+            "has_code": False,
+            "verdict": "pass",  # nothing to check → don't block review
+            "blocking_failure": False,
+            "checks": [],
+            "blocks_checked": 0,
+        }
+
+    if blocking_failure:
+        verdict = "fail"
+    elif not any_blocking:
+        # no blocking checks ran (e.g. tools missing) — warn
+        verdict = "warn"
+    else:
+        # all blocking checks passed — non-blocking may still complain, but ship it
+        any_non_blocking_failed = any(
+            not c.get("pass") and not c.get("blocking") for c in all_checks
+        )
+        verdict = "warn" if any_non_blocking_failed else "pass"
+
+    return {
+        "has_code": True,
+        "verdict": verdict,
+        "blocking_failure": blocking_failure,
+        "checks": all_checks,
+        "blocks_checked": len(blocks),
+    }
+
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1:
+        text = Path(sys.argv[1]).read_text()
+    else:
+        text = sys.stdin.read()
+    result = check(text)
+    print(json.dumps(result, indent=2))
diff --git a/bin/lib/max_client.py b/bin/lib/max_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..bfd6a006a466ac47bff1684fb5ed0c1a08e07e0f
--- /dev/null
+++ b/bin/lib/max_client.py
@@ -0,0 +1,365 @@
+"""Claude Max plan OAuth client.
+
+Handles:
+  - Read OAuth token from macOS keychain (`Claude Code-credentials`)
+  - Auto-refresh before expiry (lazy, on API call)
+  - Call Anthropic `/v1/messages` with OAuth Bearer
+  - Parse `anthropic-ratelimit-*` headers → quota state
+  - Cache quota state (5-min TTL) to avoid probing too often
+
+Quota model (verified 2026-04-19):
+  Max plan uses UNIFIED pool — Opus + Sonnet share quota.
+  Haiku has separate pool (confirmed via live probe).
+  5-hour window + 7-day window, both monitored.
+
+Headers (from live response):
+  anthropic-ratelimit-unified-5h-status: allowed|rate_limited
+  anthropic-ratelimit-unified-5h-reset: <unix-ts>
+  anthropic-ratelimit-unified-5h-utilization: 0.0-1.0
+  anthropic-ratelimit-unified-7d-status
+  anthropic-ratelimit-unified-7d-reset
+  anthropic-ratelimit-unified-7d-utilization
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import subprocess
+import time
+import urllib.error
+import urllib.request
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Any, Optional
+
+KEYCHAIN_SERVICE = "Claude Code-credentials"
+OAUTH_REFRESH_URL = "https://claude.ai/v1/oauth/token"
+OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e"
+ANTHROPIC_API = "https://api.anthropic.com/v1/messages"
+ANTHROPIC_BETA = "oauth-2025-04-20"
+ANTHROPIC_VERSION = "2023-06-01"
+
+QUOTA_CACHE_PATH = Path.home() / ".surrogate" / "yolo" / "max-quota.json"
+QUOTA_CACHE_TTL = 300  # 5 minutes
+
+# --- Model IDs (from live probe 2026-04-19) ---
+MODEL_OPUS = "claude-opus-4-20250514"
+MODEL_SONNET = "claude-sonnet-4-20250514"
+MODEL_HAIKU = "claude-haiku-4-5-20251001"
+
+
+@dataclass
+class QuotaState:
+    """Rate-limit state parsed from response headers."""
+    model: str
+    status: str = "unknown"               # allowed | rate_limited | unknown
+    reset_at: int = 0                     # unix timestamp when window resets
+    utilization_5h: float = 0.0
+    utilization_7d: float = 0.0
+    last_checked: float = 0.0             # unix seconds
+    last_error: str = ""
+
+    @property
+    def available(self) -> bool:
+        return self.status == "allowed"
+
+    @property
+    def seconds_until_reset(self) -> int:
+        return max(0, int(self.reset_at - time.time()))
+
+
+@dataclass
+class MaxResponse:
+    """Successful response from Max plan."""
+    content: str
+    model_requested: str
+    model_served: str
+    input_tokens: int
+    output_tokens: int
+    quota: QuotaState = field(default_factory=lambda: QuotaState(model=""))
+
+
+class MaxUnavailable(Exception):
+    """Raised when Max plan cannot serve the request (429 or auth)."""
+    def __init__(self, model: str, reset_at: int = 0, msg: str = ""):
+        self.model = model
+        self.reset_at = reset_at
+        self.msg = msg
+        super().__init__(f"Max {model} unavailable: {msg} (reset in {max(0, reset_at - int(time.time()))}s)")
+
+
+class MaxAuthError(Exception):
+    """Raised when OAuth token refresh fails permanently — needs relogin."""
+
+
+# ----------------------------------------------------------------------
+# Keychain I/O
+# ----------------------------------------------------------------------
+def read_token() -> dict:
+    """Read full credential blob from keychain."""
+    try:
+        raw = subprocess.check_output(
+            ["security", "find-generic-password", "-s", KEYCHAIN_SERVICE, "-w"],
+            stderr=subprocess.DEVNULL,
+        ).decode().strip()
+        return json.loads(raw)
+    except subprocess.CalledProcessError:
+        raise MaxAuthError(f"Keychain entry '{KEYCHAIN_SERVICE}' not found — run `claude` to login")
+    except json.JSONDecodeError as e:
+        raise MaxAuthError(f"Invalid JSON in keychain: {e}")
+
+
+def write_token(cred: dict) -> None:
+    """Atomically replace keychain entry."""
+    body = json.dumps(cred)
+    subprocess.run(
+        ["security", "delete-generic-password", "-s", KEYCHAIN_SERVICE],
+        stderr=subprocess.DEVNULL,
+    )
+    subprocess.run(
+        ["security", "add-generic-password",
+         "-s", KEYCHAIN_SERVICE,
+         "-a", os.environ.get("USER", "Ashira"),
+         "-w", body,
+         "-U"],
+        check=True,
+        stderr=subprocess.DEVNULL,
+    )
+
+
+# ----------------------------------------------------------------------
+# OAuth refresh
+# ----------------------------------------------------------------------
+def refresh_if_needed(cred: dict, buffer_seconds: int = 120) -> dict:
+    """Refresh access token if expiring in <buffer_seconds. Writes back to keychain."""
+    oa = cred["claudeAiOauth"]
+    expires_at = oa["expiresAt"] / 1000
+    if time.time() + buffer_seconds < expires_at:
+        return cred  # still fresh
+
+    # Refresh
+    req = urllib.request.Request(
+        OAUTH_REFRESH_URL,
+        data=json.dumps({
+            "grant_type": "refresh_token",
+            "refresh_token": oa["refreshToken"],
+            "client_id": OAUTH_CLIENT_ID,
+        }).encode(),
+        headers={"content-type": "application/json"},
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=15) as r:
+            new = json.loads(r.read())
+    except urllib.error.HTTPError as e:
+        raise MaxAuthError(
+            f"OAuth refresh failed ({e.code}): {e.read().decode()[:200]}. "
+            "Run `claude` in a new terminal to re-login."
+        )
+
+    oa["accessToken"] = new["access_token"]
+    oa["refreshToken"] = new["refresh_token"]
+    oa["expiresAt"] = int((time.time() + new["expires_in"]) * 1000)
+    write_token(cred)
+    return cred
+
+
+# ----------------------------------------------------------------------
+# Quota cache
+# ----------------------------------------------------------------------
+def load_quota_cache() -> dict[str, QuotaState]:
+    """Load cached quota state (per model)."""
+    if not QUOTA_CACHE_PATH.exists():
+        return {}
+    try:
+        raw = json.loads(QUOTA_CACHE_PATH.read_text())
+        return {k: QuotaState(**v) for k, v in raw.items()}
+    except (json.JSONDecodeError, TypeError):
+        return {}
+
+
+def save_quota_cache(cache: dict[str, QuotaState]) -> None:
+    QUOTA_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True)
+    data = {k: v.__dict__ for k, v in cache.items()}
+    QUOTA_CACHE_PATH.write_text(json.dumps(data, indent=2))
+
+
+def parse_quota_headers(model: str, headers: dict[str, str]) -> QuotaState:
+    """Parse anthropic-ratelimit-* headers into QuotaState."""
+    h = {k.lower(): v for k, v in headers.items()}
+
+    def fget(key: str, default: float = 0.0) -> float:
+        try:
+            return float(h.get(key, default))
+        except (ValueError, TypeError):
+            return default
+
+    def iget(key: str, default: int = 0) -> int:
+        try:
+            return int(float(h.get(key, default)))
+        except (ValueError, TypeError):
+            return default
+
+    status = h.get("anthropic-ratelimit-unified-5h-status", "unknown")
+    reset_5h = iget("anthropic-ratelimit-unified-5h-reset")
+    reset_7d = iget("anthropic-ratelimit-unified-7d-reset")
+
+    return QuotaState(
+        model=model,
+        status=status,
+        reset_at=max(reset_5h, reset_7d) if reset_5h and reset_7d else reset_5h or reset_7d,
+        utilization_5h=fget("anthropic-ratelimit-unified-5h-utilization"),
+        utilization_7d=fget("anthropic-ratelimit-unified-7d-utilization"),
+        last_checked=time.time(),
+    )
+
+
+# ----------------------------------------------------------------------
+# Call Anthropic via Max OAuth
+# ----------------------------------------------------------------------
+def call_max(
+    model: str,
+    messages: list[dict],
+    max_tokens: int = 4096,
+    system: Optional[str] = None,
+    timeout: int = 180,
+) -> MaxResponse:
+    """Make a Max-plan OAuth call. Raises MaxUnavailable on 429."""
+    cred = refresh_if_needed(read_token())
+    token = cred["claudeAiOauth"]["accessToken"]
+
+    body: dict[str, Any] = {
+        "model": model,
+        "max_tokens": max_tokens,
+        "messages": messages,
+    }
+    if system:
+        body["system"] = system
+
+    req = urllib.request.Request(
+        ANTHROPIC_API,
+        data=json.dumps(body).encode(),
+        headers={
+            "Authorization": f"Bearer {token}",
+            "anthropic-version": ANTHROPIC_VERSION,
+            "anthropic-beta": ANTHROPIC_BETA,
+            "content-type": "application/json",
+        },
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as r:
+            data = json.loads(r.read())
+            quota = parse_quota_headers(model, dict(r.getheaders()))
+            _update_cache(quota)
+            return MaxResponse(
+                content=data["content"][0]["text"],
+                model_requested=model,
+                model_served=data.get("model", model),
+                input_tokens=data["usage"]["input_tokens"],
+                output_tokens=data["usage"]["output_tokens"],
+                quota=quota,
+            )
+    except urllib.error.HTTPError as e:
+        err_body = e.read().decode()
+        headers = dict(e.headers)
+        quota = parse_quota_headers(model, headers)
+        # Override: 429 always means rate_limited regardless of header contents
+        quota.status = "rate_limited" if e.code == 429 else "error"
+        quota.last_error = f"HTTP {e.code}: {err_body[:200]}"
+        # If 429 but no reset header, set a safe cooldown (5 min) so pick_max_model skips
+        if e.code == 429 and quota.reset_at <= time.time():
+            quota.reset_at = int(time.time() + 300)
+        _update_cache(quota)
+        if e.code == 429:
+            raise MaxUnavailable(model, quota.reset_at, err_body)
+        if e.code == 401:
+            raise MaxAuthError(f"Max auth failed ({e.code}) — relogin needed")
+        raise MaxUnavailable(model, 0, f"HTTP {e.code}: {err_body[:200]}")
+
+
+def _update_cache(quota: QuotaState) -> None:
+    cache = load_quota_cache()
+    cache[quota.model] = quota
+    save_quota_cache(cache)
+
+
+# ----------------------------------------------------------------------
+# Tier selection
+# ----------------------------------------------------------------------
+MAX_TIER_ORDER = [MODEL_OPUS, MODEL_SONNET, MODEL_HAIKU]
+
+
+def pick_max_model(prefer: str = MODEL_OPUS) -> Optional[str]:
+    """Pick best available Max-plan model.
+
+    Strategy:
+      1. If cache status=allowed AND fresh (< TTL) → use it immediately
+      2. If cache stale (> TTL) → eligible to re-probe (real probe will confirm)
+      3. If cache rate_limited:
+           - If reset_at > 0 AND reset_at still in future → NOT eligible (honor cooldown)
+           - Only eligible when reset_at passed + cache went stale
+      4. Walk Opus → Sonnet → Haiku; use first eligible
+
+    Returns model name or None if all rate-limited within cooldown.
+    """
+    cache = load_quota_cache()
+    now = time.time()
+
+    def eligible(model: str) -> bool:
+        q = cache.get(model)
+        if not q:
+            return True  # unknown → worth one probe
+        # Fresh + allowed
+        if q.status == "allowed" and now - q.last_checked <= QUOTA_CACHE_TTL:
+            return True
+        # Rate-limited + still within cooldown window → skip
+        if q.status == "rate_limited" and q.reset_at > now:
+            return False
+        # Stale (either status) + no active cooldown → re-probe OK
+        if now - q.last_checked > QUOTA_CACHE_TTL:
+            return True
+        # Rate-limited but reset_at is 0 or in past → try again cautiously
+        if q.status == "rate_limited" and q.reset_at <= now:
+            return now - q.last_checked > 30  # wait 30s between retries
+        return False
+
+    order = [prefer] + [m for m in MAX_TIER_ORDER if m != prefer]
+    for model in order:
+        if eligible(model):
+            return model
+    return None
+
+
+def probe_and_refresh_cache() -> dict[str, QuotaState]:
+    """Send minimal probes to each tier to refresh cache. Called every 5 min."""
+    out: dict[str, QuotaState] = {}
+    for model in MAX_TIER_ORDER:
+        try:
+            resp = call_max(model, [{"role": "user", "content": "."}], max_tokens=5)
+            out[model] = resp.quota
+        except MaxUnavailable as e:
+            # already cached in _update_cache
+            cache = load_quota_cache()
+            out[model] = cache.get(model, QuotaState(model=model, status="rate_limited",
+                                                    reset_at=e.reset_at))
+        except MaxAuthError:
+            raise
+    return out
+
+
+if __name__ == "__main__":
+    # CLI self-test
+    import sys
+    if len(sys.argv) > 1 and sys.argv[1] == "probe":
+        for model, q in probe_and_refresh_cache().items():
+            print(f"{model}: {q.status}  util5h={q.utilization_5h:.2f}  "
+                  f"reset_in={q.seconds_until_reset}s")
+    elif len(sys.argv) > 1 and sys.argv[1] == "pick":
+        print(pick_max_model() or "NONE_AVAILABLE")
+    else:
+        # quick call
+        m = pick_max_model() or MODEL_HAIKU
+        r = call_max(m, [{"role": "user", "content": sys.argv[1] if len(sys.argv) > 1 else "hi"}], max_tokens=50)
+        print(f"[{r.model_served}] {r.content[:200]}")
diff --git a/bin/lib/openrouter_client.py b/bin/lib/openrouter_client.py
new file mode 100644
index 0000000000000000000000000000000000000000..be17efd8b067ed6ee9b5d46ee622520ecaf367ca
--- /dev/null
+++ b/bin/lib/openrouter_client.py
@@ -0,0 +1,195 @@
+"""OpenRouter client — free-first then paid tiers.
+
+Tiers (per Ashira 2026-04-19):
+  FREE:    qwen, gpt-oss, llama, nemotron, glm
+  CHEAP:   deepseek-v3.2, grok-4.1-fast
+  PREMIUM: gpt-5.4, claude-haiku-4.5, claude-sonnet-4.6, claude-opus-4.7
+
+Per-model cooldown tracked in ~/.surrogate/yolo/or-cooldowns.json to avoid
+hammering rate-limited free models.
+"""
+
+from __future__ import annotations
+
+import json
+import os
+import time
+import urllib.error
+import urllib.request
+from dataclasses import dataclass, field
+from pathlib import Path
+from typing import Optional
+
+OR_URL = "https://openrouter.ai/api/v1/chat/completions"
+COOLDOWN_PATH = Path.home() / ".surrogate" / "yolo" / "or-cooldowns.json"
+
+FREE_MODELS = [
+    "qwen/qwen3-coder:free",
+    "openai/gpt-oss-120b:free",
+    "meta-llama/llama-3.3-70b-instruct:free",
+    "nvidia/nemotron-3-super-120b-a12b:free",
+    "z-ai/glm-4.5-air:free",
+]
+
+CHEAP_MODELS = [
+    "deepseek/deepseek-v3.2",
+    "x-ai/grok-4.1-fast",
+]
+
+PREMIUM_MODELS = [
+    "openai/gpt-5.4",
+    "anthropic/claude-haiku-4.5",
+    "anthropic/claude-sonnet-4.6",
+    "x-ai/grok-4.20",
+    "anthropic/claude-opus-4.7",
+]
+
+DEFAULT_COOLDOWN_SECONDS = 60  # after 429, wait 60s before retrying this model
+
+
+class ORUnavailable(Exception):
+    def __init__(self, model: str, code: int, body: str):
+        self.model = model
+        self.code = code
+        self.body = body
+        super().__init__(f"OR {model}: {code} {body[:200]}")
+
+
+@dataclass
+class ORResponse:
+    content: str
+    model_requested: str
+    model_served: str
+    input_tokens: int = 0
+    output_tokens: int = 0
+
+
+def _load_cooldowns() -> dict[str, float]:
+    if not COOLDOWN_PATH.exists():
+        return {}
+    try:
+        return json.loads(COOLDOWN_PATH.read_text())
+    except (json.JSONDecodeError, OSError):
+        return {}
+
+
+def _save_cooldowns(c: dict[str, float]) -> None:
+    COOLDOWN_PATH.parent.mkdir(parents=True, exist_ok=True)
+    COOLDOWN_PATH.write_text(json.dumps(c))
+
+
+def is_on_cooldown(model: str) -> bool:
+    c = _load_cooldowns()
+    return c.get(model, 0) > time.time()
+
+
+def mark_cooldown(model: str, seconds: int = DEFAULT_COOLDOWN_SECONDS) -> None:
+    c = _load_cooldowns()
+    c[model] = time.time() + seconds
+    # Prune expired entries
+    c = {k: v for k, v in c.items() if v > time.time()}
+    _save_cooldowns(c)
+
+
+def call_openrouter(
+    model: str,
+    messages: list[dict],
+    max_tokens: int = 4000,
+    system: Optional[str] = None,
+    timeout: int = 120,
+) -> ORResponse:
+    """Call OpenRouter directly. Raises ORUnavailable on error."""
+    api_key = os.environ.get("OPENROUTER_API_KEY", "")
+    if not api_key:
+        # Try loading from .env (accepts both `KEY=val` and `export KEY=val` formats)
+        env_file = Path.home() / ".surrogate" / ".env"
+        if env_file.exists():
+            for line in env_file.read_text().splitlines():
+                s = line.strip()
+                if s.startswith("export "):
+                    s = s[len("export "):].lstrip()
+                if s.startswith("OPENROUTER_API_KEY="):
+                    api_key = s.split("=", 1)[1].strip().strip('"').strip("'")
+                    break
+    if not api_key:
+        raise ORUnavailable(model, 0, "OPENROUTER_API_KEY not set")
+
+    body_msgs = list(messages)
+    if system:
+        body_msgs = [{"role": "system", "content": system}] + body_msgs
+
+    body = json.dumps({
+        "model": model,
+        "max_tokens": max_tokens,
+        "messages": body_msgs,
+    }).encode()
+
+    req = urllib.request.Request(
+        OR_URL,
+        data=body,
+        headers={
+            "Authorization": f"Bearer {api_key}",
+            "HTTP-Referer": "https://github.com/Ashira/axentx",
+            "X-Title": "axentx-smart-dispatcher",
+            "content-type": "application/json",
+        },
+        method="POST",
+    )
+    try:
+        with urllib.request.urlopen(req, timeout=timeout) as r:
+            data = json.loads(r.read())
+            if "choices" not in data:
+                raise ORUnavailable(model, 0, str(data)[:200])
+            choice = data["choices"][0]
+            content = choice["message"]["content"]
+            usage = data.get("usage", {})
+            return ORResponse(
+                content=content,
+                model_requested=model,
+                model_served=data.get("model", model),
+                input_tokens=usage.get("prompt_tokens", 0),
+                output_tokens=usage.get("completion_tokens", 0),
+            )
+    except urllib.error.HTTPError as e:
+        body = e.read().decode()
+        # 429 or 503 → mark cooldown
+        if e.code in (429, 503, 502):
+            mark_cooldown(model)
+        raise ORUnavailable(model, e.code, body)
+    except Exception as e:  # network errors
+        raise ORUnavailable(model, 0, str(e))
+
+
+def pick_free() -> Optional[str]:
+    """First free model not on cooldown."""
+    for m in FREE_MODELS:
+        if not is_on_cooldown(m):
+            return m
+    return None
+
+
+def pick_cheap() -> Optional[str]:
+    for m in CHEAP_MODELS:
+        if not is_on_cooldown(m):
+            return m
+    return None
+
+
+def pick_premium() -> Optional[str]:
+    for m in PREMIUM_MODELS:
+        if not is_on_cooldown(m):
+            return m
+    return None
+
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) > 1 and sys.argv[1] == "pick":
+        print(f"free:    {pick_free()}")
+        print(f"cheap:   {pick_cheap()}")
+        print(f"premium: {pick_premium()}")
+    else:
+        m = pick_free() or pick_cheap() or pick_premium()
+        q = sys.argv[1] if len(sys.argv) > 1 else "say OK"
+        r = call_openrouter(m, [{"role": "user", "content": q}], max_tokens=30)
+        print(f"[{r.model_served}] {r.content[:100]}")
diff --git a/bin/lib/prompt_cache.py b/bin/lib/prompt_cache.py
new file mode 100644
index 0000000000000000000000000000000000000000..d761ebdd491e17a7dcc231626d08861b4622f0ac
--- /dev/null
+++ b/bin/lib/prompt_cache.py
@@ -0,0 +1,17 @@
+"""Anthropic prompt caching helper — adds cache_control to messages so repeated
+system prompts / long contexts cost 10% of full price.
+Usage: import this in any bridge that calls Anthropic API directly.
+"""
+def add_cache_control(messages, threshold=2048):
+    """Add cache_control to the longest system message if it's over threshold chars.
+    Anthropic cache: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
+    Requires anthropic-beta: prompt-caching-2024-07-31 header."""
+    if not messages: return messages
+    for m in messages:
+        if m.get('role') == 'system' and isinstance(m.get('content'), str):
+            if len(m['content']) >= threshold:
+                # Convert to structured content with cache marker
+                m['content'] = [{'type': 'text', 'text': m['content'],
+                                  'cache_control': {'type': 'ephemeral'}}]
+                break
+    return messages
diff --git a/bin/lib/review_agent.py b/bin/lib/review_agent.py
new file mode 100644
index 0000000000000000000000000000000000000000..efd841c85711e319d3a35fe373010d8bd5baaf82
--- /dev/null
+++ b/bin/lib/review_agent.py
@@ -0,0 +1,328 @@
+"""Review agent — tier-gated + consensus + ground-truth.
+
+Replaces the simple review() in smart_dispatcher.py. Rules:
+
+  1. Reviewer rank >= Writer rank (strict)
+  2. Reviewer provider != Writer provider (cross-provider)
+  3. For `critical=True` tasks: Reviewer rank >= Writer rank + 1, and 2-of-3 consensus
+  4. If no eligible reviewer available RIGHT NOW → block (queue-wait),
+     retry when cache refreshes. DO NOT downgrade to lower tier.
+  5. Ground-truth check runs alongside reviewer opinion:
+       code has blocking compile/parse failure → hard-fail regardless of reviewer
+"""
+
+from __future__ import annotations
+
+import json
+import re
+import sys
+import time
+from pathlib import Path
+from typing import Optional
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+from ground_truth import check as gt_check
+from max_client import MAX_TIER_ORDER, MaxUnavailable, call_max, pick_max_model
+from openrouter_client import (
+    CHEAP_MODELS,
+    FREE_MODELS,
+    PREMIUM_MODELS,
+    ORUnavailable,
+    call_openrouter,
+    is_on_cooldown,
+)
+from tier_rank import _provider_family, is_eligible_reviewer, pick_reviewer_from, rank
+
+
+REVIEWER_SYSTEM = """You are a strict code review agent.
+
+Your job:
+  1. Check if the work fully addresses the task
+  2. Check for correctness (syntax, logic, hallucinations)
+  3. Check for completeness (edge cases, error handling)
+  4. Rate severity of issues (low | med | high)
+
+Output JSON only (no markdown, no prose):
+{
+  "verdict": "pass" | "needs_revision",
+  "score": 0-10,
+  "issues": [{"severity":"low|med|high","desc":"..."}],
+  "suggestions": ["...", "..."],
+  "reasoning": "1-2 sentences"
+}
+
+Rules:
+  - Any "high" severity issue → always "needs_revision"
+  - If you detect hallucinated APIs/functions → "needs_revision" with severity=high
+  - Be rigorous — pass only when genuinely good
+"""
+
+
+class NoEligibleReviewer(Exception):
+    """No reviewer currently available at required tier. Queue-wait."""
+
+
+def _available_reviewers() -> list[str]:
+    """Enumerate all currently available reviewer candidates.
+
+    Max plan tiers (check quota) + OR tiers (check cooldowns).
+    """
+    cands: list[str] = []
+
+    # Max tiers (use pick_max_model to respect cache)
+    # We collect all three; caller picks based on tier
+    for m in MAX_TIER_ORDER:
+        # only include if not currently rate-limited long-term
+        from max_client import load_quota_cache
+        q = load_quota_cache().get(m)
+        if not q or q.status == "allowed" or q.seconds_until_reset < 60:
+            cands.append(m)
+
+    # OR tiers
+    for m in PREMIUM_MODELS + CHEAP_MODELS + FREE_MODELS:
+        if not is_on_cooldown(m):
+            cands.append(m)
+    return cands
+
+
+def _call_model_for_review(model: str, prompt: str, system: str) -> tuple[str, str]:
+    """Route to Max or OR depending on model name. Returns (text, served_model_id)."""
+    if model in MAX_TIER_ORDER:
+        r = call_max(model, [{"role": "user", "content": prompt}],
+                     max_tokens=1500, system=system, timeout=120)
+        return r.content, r.model_served
+    r = call_openrouter(model, [{"role": "user", "content": prompt}],
+                        max_tokens=1500, system=system, timeout=120)
+    return r.content, r.model_served
+
+
+def _parse_json_verdict(text: str) -> dict:
+    text = text.strip()
+    if text.startswith("```"):
+        text = text.split("```", 2)[1] if "```" in text[3:] else text[3:]
+        text = text.lstrip("json").lstrip()
+        if "```" in text:
+            text = text.rsplit("```", 1)[0]
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        m = re.search(r"\{.*\}", text, re.DOTALL)
+        if m:
+            try:
+                return json.loads(m.group(0))
+            except json.JSONDecodeError:
+                pass
+    return {"verdict": "needs_revision", "reasoning": "review parse failed",
+            "raw": text[:500], "score": 0, "issues": [], "suggestions": []}
+
+
+def review_once(
+    task_prompt: str,
+    work_product: str,
+    writer_model: str,
+    critical: bool = False,
+    queue_wait_max_seconds: int = 600,
+    poll_interval: int = 15,
+) -> dict:
+    """Single-reviewer review with tier enforcement.
+
+    Blocks (queue-wait) up to queue_wait_max_seconds if no eligible reviewer.
+    Raises NoEligibleReviewer after timeout.
+    """
+    deadline = time.time() + queue_wait_max_seconds
+
+    reviewer: Optional[str] = None
+    waits = 0
+    while time.time() < deadline:
+        cands = _available_reviewers()
+        reviewer = pick_reviewer_from(cands, writer_model, critical=critical)
+        if reviewer:
+            break
+        waits += 1
+        time.sleep(poll_interval)
+
+    if not reviewer:
+        raise NoEligibleReviewer(
+            f"no reviewer with rank>={rank(writer_model) + (1 if critical else 0)} "
+            f"and provider!={_provider_family(writer_model)} after {queue_wait_max_seconds}s"
+        )
+
+    review_prompt = f"""# TASK
+{task_prompt}
+
+# WORK PRODUCT
+{work_product}
+
+# YOUR REVIEW (valid JSON only):"""
+
+    try:
+        text, served = _call_model_for_review(reviewer, review_prompt, REVIEWER_SYSTEM)
+    except (MaxUnavailable, ORUnavailable) as e:
+        # Reviewer itself errored — retry with fresh pool
+        return {"verdict": "needs_revision", "reasoning": f"reviewer call failed: {e}",
+                "reviewer_model": reviewer, "score": 0,
+                "transport_error": True}
+
+    parsed = _parse_json_verdict(text)
+    parsed["reviewer_model"] = served
+    parsed["reviewer_provider_family"] = _provider_family(served)
+    parsed["reviewer_rank"] = rank(served)
+    parsed["writer_rank"] = rank(writer_model)
+    parsed["wait_cycles"] = waits
+    return parsed
+
+
+def review_with_consensus(
+    task_prompt: str,
+    work_product: str,
+    writer_model: str,
+    num_reviewers: int = 3,
+    required_agree: int = 2,
+    critical: bool = True,
+    queue_wait_max_seconds: int = 600,
+) -> dict:
+    """Multi-reviewer consensus review. Used for critical tasks.
+
+    Picks N reviewers from DIFFERENT provider families (+ cross-provider from writer).
+    Verdict = pass if required_agree reviewers say "pass".
+    """
+    deadline = time.time() + queue_wait_max_seconds
+    reviewers: list[str] = []
+    used_families: set[str] = {_provider_family(writer_model)}
+
+    # Collect N reviewers from N distinct families
+    while len(reviewers) < num_reviewers and time.time() < deadline:
+        cands = _available_reviewers()
+        # Filter: eligible + family not yet used
+        new_picks: list[str] = []
+        for c in cands:
+            fam = _provider_family(c)
+            if fam in used_families:
+                continue
+            ok, _ = is_eligible_reviewer(writer_model, c, critical=critical)
+            if ok:
+                new_picks.append(c)
+        # Pick highest rank per family
+        by_family: dict[str, tuple[int, str]] = {}
+        for c in new_picks:
+            fam = _provider_family(c)
+            r = rank(c)
+            if fam not in by_family or by_family[fam][0] < r:
+                by_family[fam] = (r, c)
+        for fam, (_, model) in sorted(by_family.items(), key=lambda x: -x[1][0]):
+            if len(reviewers) >= num_reviewers:
+                break
+            reviewers.append(model)
+            used_families.add(fam)
+        if len(reviewers) < num_reviewers:
+            time.sleep(15)
+
+    if len(reviewers) < required_agree:
+        raise NoEligibleReviewer(
+            f"consensus needs {required_agree} distinct-family reviewers, got {len(reviewers)}"
+        )
+
+    # Fire reviews
+    individual_verdicts: list[dict] = []
+    for rv in reviewers:
+        try:
+            v = review_once(task_prompt, work_product, writer_model,
+                            critical=critical, queue_wait_max_seconds=30)
+            # Force it to use THIS specific reviewer
+            # (review_once picks top; we need to override — run directly)
+            text, served = _call_model_for_review(
+                rv,
+                f"# TASK\n{task_prompt}\n\n# WORK PRODUCT\n{work_product}\n\n# YOUR REVIEW (JSON):",
+                REVIEWER_SYSTEM,
+            )
+            parsed = _parse_json_verdict(text)
+            parsed["reviewer_model"] = served
+            parsed["reviewer_rank"] = rank(served)
+            parsed["reviewer_provider_family"] = _provider_family(served)
+            individual_verdicts.append(parsed)
+        except (MaxUnavailable, ORUnavailable) as e:
+            individual_verdicts.append(
+                {"verdict": "needs_revision", "reasoning": f"reviewer error: {e}",
+                 "reviewer_model": rv, "transport_error": True}
+            )
+
+    passes = sum(1 for v in individual_verdicts if v.get("verdict") == "pass")
+    consensus_verdict = "pass" if passes >= required_agree else "needs_revision"
+
+    # Aggregate issues from ALL reviewers (even if majority passes)
+    all_issues: list[dict] = []
+    all_suggestions: list[str] = []
+    for v in individual_verdicts:
+        all_issues.extend(v.get("issues", []) or [])
+        all_suggestions.extend(v.get("suggestions", []) or [])
+
+    return {
+        "verdict": consensus_verdict,
+        "consensus_pass_count": passes,
+        "consensus_required": required_agree,
+        "individual_verdicts": individual_verdicts,
+        "issues": all_issues,
+        "suggestions": all_suggestions,
+        "reviewers": [v.get("reviewer_model") for v in individual_verdicts],
+        "writer_rank": rank(writer_model),
+        "reasoning": f"consensus {passes}/{len(individual_verdicts)} pass (required {required_agree})",
+    }
+
+
+def review_full(
+    task_prompt: str,
+    work_product: str,
+    writer_model: str,
+    critical: bool = False,
+    use_consensus: bool = False,
+) -> dict:
+    """Full review = reviewer opinion + ground-truth check.
+
+    Ground-truth BLOCKING failure → hard fail regardless of reviewer.
+    """
+    # 1. Ground-truth
+    gt = gt_check(work_product)
+
+    # 2. Reviewer opinion
+    if use_consensus:
+        reviewer = review_with_consensus(
+            task_prompt, work_product, writer_model,
+            num_reviewers=3, required_agree=2, critical=critical,
+        )
+    else:
+        reviewer = review_once(task_prompt, work_product, writer_model, critical=critical)
+
+    # 3. Combine
+    final_verdict = reviewer.get("verdict", "needs_revision")
+    if gt.get("blocking_failure"):
+        final_verdict = "needs_revision"
+
+    return {
+        "verdict": final_verdict,
+        "reviewer": reviewer,
+        "ground_truth": gt,
+        "override_by_ground_truth": gt.get("blocking_failure", False),
+    }
+
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) < 3:
+        print("usage: review_agent.py <task-prompt> <work-product-file>")
+        sys.exit(1)
+    task = sys.argv[1]
+    work = Path(sys.argv[2]).read_text()
+    writer = sys.argv[3] if len(sys.argv) > 3 else "claude-haiku-4-5-20251001"
+    critical = "--critical" in sys.argv
+    consensus = "--consensus" in sys.argv
+    r = review_full(task, work, writer, critical=critical, use_consensus=consensus)
+    print(json.dumps({
+        "verdict": r["verdict"],
+        "ground_truth_verdict": r["ground_truth"]["verdict"],
+        "ground_truth_blocking": r["ground_truth"]["blocking_failure"],
+        "override_by_ground_truth": r["override_by_ground_truth"],
+        "reviewer_model": r["reviewer"].get("reviewer_model"),
+        "reviewer_rank": r["reviewer"].get("reviewer_rank"),
+        "reviewer_verdict": r["reviewer"].get("verdict"),
+    }, indent=2))
diff --git a/bin/lib/smart_dispatcher.py b/bin/lib/smart_dispatcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..0fff593018d9464f12a67c40b8c1829b45e74a14
--- /dev/null
+++ b/bin/lib/smart_dispatcher.py
@@ -0,0 +1,420 @@
+"""Smart dispatcher — Max plan → OR free → OR paid with checkpoint + review.
+
+Tier priority (per Ashira 2026-04-19):
+  1. Max Opus 4.x    (leverage flat-rate first)
+  2. Max Sonnet 4.x  (same plan, same pool typically)
+  3. Max Haiku 4.x   (cheapest Max tier)
+  4. OR FREE models  (qwen / gpt-oss / llama / nemotron / glm)
+  5. OR CHEAP paid   (deepseek / grok-fast)
+  6. OR PREMIUM paid (gpt-5 / claude-opus / claude-sonnet via OR)
+
+Continuous re-check: every 5 min probe Max tiers — if Opus/Sonnet come back
+available, subsequent calls return to them (honor Max plan flat-rate).
+
+Review retry: INFINITE per Ashira — runs revisions until reviewer passes.
+"""
+
+from __future__ import annotations
+
+import datetime as dt
+import json
+import sys
+import time
+from pathlib import Path
+from typing import Callable, Optional
+
+sys.path.insert(0, str(Path(__file__).parent))
+
+from checkpoint import Checkpoint
+from codebase_scanner import as_context_prompt, scan
+from max_client import (
+    MAX_TIER_ORDER,
+    MODEL_HAIKU,
+    MODEL_OPUS,
+    MODEL_SONNET,
+    MaxAuthError,
+    MaxUnavailable,
+    call_max,
+    pick_max_model,
+    probe_and_refresh_cache,
+)
+from openrouter_client import (
+    CHEAP_MODELS,
+    FREE_MODELS,
+    PREMIUM_MODELS,
+    ORResponse,
+    ORUnavailable,
+    call_openrouter,
+    is_on_cooldown,
+)
+from review_agent import NoEligibleReviewer, review_full
+
+
+LAST_MAX_PROBE: list[float] = [0.0]
+MAX_PROBE_INTERVAL = 300  # 5 min
+
+
+class DispatchResult:
+    def __init__(self, text: str, provider: str, model: str, input_tokens: int = 0, output_tokens: int = 0):
+        self.text = text
+        self.provider = provider
+        self.model = model
+        self.input_tokens = input_tokens
+        self.output_tokens = output_tokens
+
+
+def _tier_iter() -> list[tuple[str, list[str]]]:
+    """Ordered tiers to try in strict priority."""
+    return [
+        ("max",     MAX_TIER_ORDER),
+        ("or_free", FREE_MODELS),
+        ("or_cheap", CHEAP_MODELS),
+        ("or_premium", PREMIUM_MODELS),
+    ]
+
+
+def _maybe_probe_max() -> None:
+    """Every 5 min, send minimal probes to each Max tier to refresh cache."""
+    if time.time() - LAST_MAX_PROBE[0] > MAX_PROBE_INTERVAL:
+        try:
+            probe_and_refresh_cache()
+            LAST_MAX_PROBE[0] = time.time()
+        except MaxAuthError:
+            pass  # handled at call time
+
+
+def dispatch(
+    prompt: str,
+    system: Optional[str] = None,
+    task_id: Optional[str] = None,
+    max_tokens: int = 4096,
+    checkpoint: Optional[Checkpoint] = None,
+    prefer_max: bool = True,
+    exclude_providers: set[str] | None = None,
+    on_attempt: Optional[Callable[[str, str], None]] = None,
+) -> DispatchResult:
+    """Try tiers in order until one succeeds. Logs to checkpoint.
+
+    Args:
+      prompt: user message
+      system: system prompt (optional)
+      task_id: for logging
+      max_tokens: output cap
+      checkpoint: Checkpoint instance for event logging
+      prefer_max: try Max first (True) — set False for review agent (cross-provider)
+      exclude_providers: skip these providers (e.g. {"max"} to force OR)
+      on_attempt: callback(provider, model) called per attempt (for debugging)
+
+    Returns DispatchResult or raises if ALL tiers exhausted.
+    """
+    exclude = exclude_providers or set()
+    messages = [{"role": "user", "content": prompt}]
+    _maybe_probe_max()
+
+    tiers = _tier_iter()
+    if not prefer_max:
+        tiers = [t for t in tiers if t[0] != "max"]
+
+    errors: list[str] = []
+
+    for tier_name, models in tiers:
+        if tier_name in exclude:
+            continue
+
+        if tier_name == "max":
+            m = pick_max_model()
+            if m is None:
+                errors.append("max: all tiers rate-limited")
+                continue
+            if on_attempt:
+                on_attempt("max", m)
+            if checkpoint:
+                checkpoint.append("provider_selected", provider="max", model=m)
+            try:
+                r = call_max(m, messages, max_tokens=max_tokens, system=system)
+                if checkpoint:
+                    checkpoint.append("provider_success", provider="max", model=m,
+                                      content_preview=r.content[:200],
+                                      input_tokens=r.input_tokens,
+                                      output_tokens=r.output_tokens)
+                return DispatchResult(r.content, "max", m, r.input_tokens, r.output_tokens)
+            except MaxUnavailable as e:
+                errors.append(f"max:{m} 429 (reset {e.reset_at})")
+                if checkpoint:
+                    checkpoint.append("provider_failed", provider="max", model=m,
+                                      reason=f"rate_limit reset_at={e.reset_at}")
+                continue
+            except MaxAuthError as e:
+                errors.append(f"max auth: {e}")
+                if checkpoint:
+                    checkpoint.append("provider_failed", provider="max", reason=f"auth: {e}")
+                # Max totally broken — skip tier but keep going with OR
+                continue
+        else:
+            # OR tier
+            for m in models:
+                if is_on_cooldown(m):
+                    continue
+                if on_attempt:
+                    on_attempt(tier_name, m)
+                if checkpoint:
+                    checkpoint.append("provider_selected", provider=tier_name, model=m)
+                try:
+                    r = call_openrouter(m, messages, max_tokens=max_tokens, system=system)
+                    if checkpoint:
+                        checkpoint.append("provider_success", provider=tier_name, model=m,
+                                          content_preview=r.content[:200],
+                                          input_tokens=r.input_tokens,
+                                          output_tokens=r.output_tokens)
+                    return DispatchResult(r.content, tier_name, m, r.input_tokens, r.output_tokens)
+                except ORUnavailable as e:
+                    errors.append(f"{tier_name}:{m} {e.code}")
+                    if checkpoint:
+                        checkpoint.append("provider_failed", provider=tier_name, model=m,
+                                          reason=f"{e.code}: {e.body[:100]}")
+                    continue
+
+    # All tiers exhausted
+    raise RuntimeError(f"all providers exhausted: {errors}")
+
+
+# ----------------------------------------------------------------------
+# Review agent (cross-provider debate)
+# ----------------------------------------------------------------------
+REVIEWER_SYSTEM = """You are a strict code review agent. You review another AI's work for a given task.
+Your job:
+  1. Check if the work fully addresses the task
+  2. Check for correctness (syntax, logic, hallucinations)
+  3. Check for completeness (edge cases, error handling)
+  4. Rate severity of issues
+
+Output JSON only, no prose:
+{
+  "verdict": "pass" | "needs_revision",
+  "score": 0-10,
+  "issues": [{"severity":"low|med|high","desc":"..."}],
+  "suggestions": ["...", "..."],
+  "reasoning": "1-2 sentences"
+}
+
+If no issues, "pass". If ANY "high" severity issue → always "needs_revision"."""
+
+
+def review(
+    task_prompt: str,
+    work_product: str,
+    writer_provider: str,
+    checkpoint: Optional[Checkpoint] = None,
+) -> dict:
+    """Send work for cross-provider review. Uses different provider than writer.
+
+    Returns:
+      {"verdict": "pass|needs_revision", "score": int, "issues": [...],
+       "suggestions": [...], "reasoning": "...", "reviewer_model": "..."}
+    """
+    # Cross-provider: if writer was Max/Anthropic → reviewer from OR non-Anthropic
+    exclude = set()
+    if writer_provider == "max":
+        exclude.add("max")  # reviewer uses OR
+
+    review_prompt = f"""# TASK ORIGINAL
+{task_prompt}
+
+# WORK PRODUCT TO REVIEW
+{work_product}
+
+# YOUR REVIEW (JSON only):"""
+
+    if checkpoint:
+        checkpoint.append("review_requested", writer_provider=writer_provider)
+
+    result = dispatch(
+        prompt=review_prompt,
+        system=REVIEWER_SYSTEM,
+        checkpoint=checkpoint,
+        max_tokens=1500,
+        exclude_providers=exclude,
+        prefer_max=(writer_provider != "max"),
+    )
+
+    # Parse JSON from response
+    text = result.text.strip()
+    # Strip markdown fence
+    if text.startswith("```"):
+        text = text.split("```", 2)[1] if "```" in text[3:] else text[3:]
+        text = text.lstrip("json").lstrip()
+        if "```" in text:
+            text = text.rsplit("```", 1)[0]
+    try:
+        parsed = json.loads(text)
+    except json.JSONDecodeError:
+        # Look for {...} block
+        import re
+        m = re.search(r"\{.*\}", text, re.DOTALL)
+        if m:
+            try:
+                parsed = json.loads(m.group(0))
+            except json.JSONDecodeError:
+                parsed = {"verdict": "needs_revision", "reasoning": "review parse failed",
+                          "raw": text[:500]}
+        else:
+            parsed = {"verdict": "needs_revision", "reasoning": "review parse failed",
+                      "raw": text[:500]}
+
+    parsed["reviewer_provider"] = result.provider
+    parsed["reviewer_model"] = result.model
+    if checkpoint:
+        checkpoint.append("review_verdict", **parsed)
+    return parsed
+
+
+# ----------------------------------------------------------------------
+# Full orchestration
+# ----------------------------------------------------------------------
+def execute_task(
+    task_id: str,
+    prompt: str,
+    system_base: str = "",
+    max_tokens: int = 4096,
+    max_review_iterations: int = 0,  # 0 = infinite (per Ashira)
+    codebase_artifacts: list[str] | None = None,
+    critical: bool = False,          # True → reviewer rank > writer + consensus 2/3
+    use_consensus: bool = False,     # True → 2-of-3 reviewers vote
+) -> dict:
+    """End-to-end: scan codebase → dispatch → review → revise until pass.
+
+    Returns: {"task_id","final_text","iterations","reviewer_verdict",...}
+    """
+    cp = Checkpoint.open(task_id)
+
+    # Resume support
+    existing_state = cp.resume_state()
+    iteration = existing_state["review_iterations"]
+    draft = existing_state["draft_text"]
+    if existing_state["completed"]:
+        return {"task_id": task_id, "status": "already_done",
+                "final_text": draft, "iterations": iteration}
+
+    if not existing_state["started"]:
+        cp.append("task_start", prompt=prompt[:500])
+
+        # Phase 1: codebase review
+        report = scan(prompt, codebase_artifacts)
+        cp.append("codebase_review",
+                  artifacts=[f["path"] for f in report["recent_files"][:15]],
+                  uncommitted_repos=len(report["uncommitted_repos"]),
+                  semantic_hits=len(report["semantic_hits"]))
+        codebase_ctx = as_context_prompt(report, 6000)
+        system = (system_base + "\n\n" + codebase_ctx).strip()
+    else:
+        # Resume: re-scan codebase (may have changed)
+        report = scan(prompt, codebase_artifacts)
+        cp.append("codebase_review",
+                  artifacts=[f["path"] for f in report["recent_files"][:15]],
+                  resumed=True)
+        codebase_ctx = as_context_prompt(report, 6000)
+        system = (system_base + "\n\n" + codebase_ctx).strip()
+        # Include prior draft as context for continuation
+        if draft:
+            system += f"\n\n## Previous attempt (continue/refine this):\n{draft[:3000]}"
+
+    # Phase 2: dispatch + review loop
+    last_review: dict | None = None
+    accumulated_feedback = ""
+
+    while True:
+        iteration += 1
+        iter_prompt = prompt
+        if accumulated_feedback:
+            iter_prompt = f"{prompt}\n\n## Reviewer feedback from prior iteration (address these):\n{accumulated_feedback}"
+
+        result = dispatch(
+            prompt=iter_prompt,
+            system=system,
+            checkpoint=cp,
+            max_tokens=max_tokens,
+        )
+        draft = result.text
+        cp.append("result_draft", text=draft, iteration=iteration,
+                  provider=result.provider, model=result.model)
+
+        # Review — tier-enforced + ground-truth via review_agent.review_full
+        try:
+            full_review = review_full(
+                task_prompt=prompt,
+                work_product=draft,
+                writer_model=result.model,
+                critical=critical,
+                use_consensus=use_consensus or critical,
+            )
+            cp.append("review_full",
+                      verdict=full_review["verdict"],
+                      reviewer_model=full_review["reviewer"].get("reviewer_model"),
+                      reviewer_rank=full_review["reviewer"].get("reviewer_rank"),
+                      writer_rank=full_review["reviewer"].get("writer_rank"),
+                      ground_truth_verdict=full_review["ground_truth"]["verdict"],
+                      ground_truth_blocking=full_review["ground_truth"]["blocking_failure"],
+                      override_by_ground_truth=full_review["override_by_ground_truth"])
+            last_review = dict(full_review["reviewer"])
+            last_review["verdict"] = full_review["verdict"]
+            last_review["ground_truth"] = full_review["ground_truth"]
+        except NoEligibleReviewer as e:
+            cp.append("review_blocked", reason=str(e))
+            # Queue-wait: don't consume iteration, poll + retry
+            time.sleep(30)
+            iteration -= 1
+            continue
+
+        verdict = last_review.get("verdict", "needs_revision")
+        if verdict == "pass":
+            cp.append("task_done", iteration=iteration, final_length=len(draft))
+            cp.archive()
+            return {
+                "task_id": task_id,
+                "status": "done",
+                "final_text": draft,
+                "iterations": iteration,
+                "last_review": last_review,
+                "writer": f"{result.provider}/{result.model}",
+            }
+
+        # needs_revision — assemble feedback
+        issues = last_review.get("issues", [])
+        suggestions = last_review.get("suggestions", [])
+        fb_lines = []
+        for i in issues:
+            fb_lines.append(f"- [{i.get('severity','?')}] {i.get('desc','')}")
+        for s in suggestions:
+            fb_lines.append(f"- {s}")
+        accumulated_feedback = "\n".join(fb_lines) if fb_lines else last_review.get("reasoning", "")
+        cp.append("revision_requested", iteration=iteration,
+                  feedback=accumulated_feedback[:500])
+
+        # Safety: if max_review_iterations > 0, enforce it. 0 = infinite.
+        if max_review_iterations > 0 and iteration >= max_review_iterations:
+            cp.append("task_failed", reason=f"max_iterations_{max_review_iterations}")
+            cp.archive()
+            return {
+                "task_id": task_id,
+                "status": "failed_max_iter",
+                "final_text": draft,
+                "iterations": iteration,
+                "last_review": last_review,
+            }
+
+
+if __name__ == "__main__":
+    import uuid
+    if len(sys.argv) < 2:
+        print("usage: smart_dispatcher.py <prompt>")
+        sys.exit(1)
+    task_id = "adhoc-" + uuid.uuid4().hex[:8]
+    prompt = " ".join(sys.argv[1:])
+    r = execute_task(task_id, prompt, max_tokens=500)
+    print(json.dumps({
+        "task_id": r["task_id"],
+        "status": r["status"],
+        "iterations": r["iterations"],
+        "writer": r.get("writer"),
+        "preview": r["final_text"][:400],
+    }, indent=2))
diff --git a/bin/lib/tier_rank.py b/bin/lib/tier_rank.py
new file mode 100644
index 0000000000000000000000000000000000000000..dd380aca09768450b9caa1053042c74f272d2bd3
--- /dev/null
+++ b/bin/lib/tier_rank.py
@@ -0,0 +1,192 @@
+"""Model tier rank — enforces "reviewer >= writer" quality rule.
+
+Rank scale (1-10, approximate SWE-Bench Verified + LMArena Q1 2026):
+  10  Claude Opus 4.7, GPT-5.4
+   9  Claude Sonnet 4.6, GPT-5.4-pro, Grok 4.20, Gemini 3.1 Pro
+   8  Claude Opus 4.6, DeepSeek V3.2 (coding strong)
+   7  Claude Haiku 4.5, Grok 4.1 Fast, Qwen 3.6 35B-MoE
+   6  Llama 3.3 70B, Mistral Large 3, Kimi K2.5, Qwen 3.5 Coder 32B
+   5  Nemotron 120B, GLM 4.5 Air, Qwen 3.5 Coder 14B
+   4  GPT-OSS 120B, Gemma 4 31B
+   3  GPT-OSS 20B, Llama 3.3 8B, small local
+
+Policy (per Ashira 2026-04-19):
+  - Reviewer tier MUST be >= writer tier.
+  - For code/IaC/security tasks, prefer reviewer tier > writer by 1.
+  - If no eligible reviewer available → queue-wait (DO NOT downgrade writer).
+"""
+
+from __future__ import annotations
+
+TIER_RANK: dict[str, int] = {
+    # === 10: frontier ===
+    "anthropic/claude-opus-4.7": 10,
+    "openai/gpt-5.4": 10,
+    "openrouter/anthropic/claude-opus-4.7": 10,
+    "openrouter/openai/gpt-5.4": 10,
+
+    # === 9: premium ===
+    "anthropic/claude-sonnet-4.6": 9,
+    "openai/gpt-5.4-pro": 9,
+    "x-ai/grok-4.20": 9,
+    "google/gemini-3.1-pro": 9,
+    "openrouter/anthropic/claude-sonnet-4.6": 9,
+    "openrouter/x-ai/grok-4.20": 9,
+    # Max-plan native (OAuth)
+    "claude-opus-4-20250514": 9,      # Opus 4 (Max plan native)
+    "claude-sonnet-4-20250514": 9,    # Sonnet 4 (Max plan native)
+
+    # === 8: strong ===
+    "anthropic/claude-opus-4.6": 8,
+    "deepseek/deepseek-v3.2": 8,
+    "openrouter/deepseek/deepseek-v3.2": 8,
+
+    # === 7: capable ===
+    "anthropic/claude-haiku-4.5": 7,
+    "x-ai/grok-4.1-fast": 7,
+    "openrouter/anthropic/claude-haiku-4.5": 7,
+    "openrouter/x-ai/grok-4.1-fast": 7,
+    "claude-haiku-4-5-20251001": 7,   # Haiku 4.5 (Max plan native)
+    "qwen/qwen3.6-35b-a3b": 7,
+    "openrouter/qwen/qwen3.6-35b-a3b": 7,
+
+    # === 6: mid ===
+    "meta-llama/llama-3.3-70b-instruct": 6,
+    "qwen/qwen3-next-80b-a3b-instruct": 6,
+    "qwen/qwen3-coder": 6,
+    "moonshotai/kimi-k2.5": 6,
+    "mistral-large-3": 6,
+
+    # === 5: weak-mid ===
+    "nvidia/nemotron-3-super-120b-a12b": 5,
+    "z-ai/glm-4.5-air": 5,
+
+    # === 4: small ===
+    "openai/gpt-oss-120b": 4,
+    "google/gemma-4-31b-it": 4,
+
+    # === 3: tiny / free ===
+    "openai/gpt-oss-20b": 3,
+    "meta-llama/llama-3.3-8b-instruct": 3,
+}
+
+
+def rank(model: str) -> int:
+    """Return rank 1-10, defaulting to 5 for unknown models."""
+    if not model:
+        return 5
+    # Strip :free suffix
+    base = model.replace(":free", "").strip("/")
+    if base in TIER_RANK:
+        return TIER_RANK[base]
+    # Try progressively stripping path components
+    for prefix in ("openrouter/", ""):
+        for candidate in [prefix + base, base.replace(prefix, "")]:
+            if candidate in TIER_RANK:
+                return TIER_RANK[candidate]
+    # Partial match (last-resort — for unknown variants of known families)
+    lower = base.lower()
+    if "opus-4.7" in lower or "opus-4-7" in lower: return 10
+    if "gpt-5.4" in lower and "mini" not in lower and "nano" not in lower: return 10
+    if "sonnet-4.6" in lower or "sonnet-4-6" in lower: return 9
+    if "opus-4" in lower or "opus_4" in lower: return 8
+    if "grok-4.2" in lower: return 9
+    if "gemini-3" in lower and "flash" not in lower: return 9
+    if "haiku-4" in lower: return 7
+    if "deepseek-v3" in lower: return 8
+    if "grok-4.1" in lower or "grok-fast" in lower: return 7
+    if "qwen3.6" in lower: return 7
+    if "llama-3.3-70" in lower: return 6
+    if "nemotron" in lower: return 5
+    if "glm-4.5" in lower: return 5
+    if "gpt-oss-120" in lower: return 4
+    if "gemma-4-31" in lower: return 4
+    if "gpt-oss-20" in lower: return 3
+    return 5
+
+
+def is_eligible_reviewer(writer_model: str, reviewer_model: str,
+                         critical: bool = False,
+                         cross_provider_required: bool = True) -> tuple[bool, str]:
+    """Check if reviewer qualifies.
+
+    Rules:
+      1. rank(reviewer) >= rank(writer)         [always]
+      2. rank(reviewer) >= rank(writer) + 1     [when critical]
+      3. reviewer provider != writer provider   [when cross_provider_required]
+
+    Returns (ok, reason).
+    """
+    wr = rank(writer_model)
+    rr = rank(reviewer_model)
+    min_rank = wr + 1 if critical else wr
+
+    if rr < min_rank:
+        return False, f"reviewer rank {rr} < required {min_rank} (writer={wr})"
+
+    if cross_provider_required:
+        wp = _provider_family(writer_model)
+        rp = _provider_family(reviewer_model)
+        if wp == rp and wp != "unknown":
+            return False, f"same provider family '{wp}' — need cross-provider"
+
+    return True, f"ok: rank {rr} >= {min_rank}, cross-provider satisfied"
+
+
+def _provider_family(model: str) -> str:
+    """Group models by maker for cross-provider check."""
+    m = model.lower()
+    if "claude" in m or "anthropic" in m:
+        return "anthropic"
+    if "gpt-" in m or "openai" in m or "gpt_" in m:
+        return "openai"
+    if "gemini" in m or "gemma" in m:
+        return "google"
+    if "grok" in m or "x-ai" in m:
+        return "xai"
+    if "deepseek" in m:
+        return "deepseek"
+    if "qwen" in m:
+        return "qwen"
+    if "llama" in m or "meta" in m:
+        return "meta"
+    if "kimi" in m or "moonshot" in m:
+        return "moonshot"
+    if "mistral" in m:
+        return "mistral"
+    if "nemotron" in m or "nvidia" in m:
+        return "nvidia"
+    if "glm" in m or "z-ai" in m:
+        return "zai"
+    return "unknown"
+
+
+def pick_reviewer_from(candidates: list[str], writer_model: str,
+                       critical: bool = False) -> str | None:
+    """Pick highest-rank eligible reviewer from a list of available models."""
+    scored: list[tuple[int, str]] = []
+    for c in candidates:
+        ok, _ = is_eligible_reviewer(writer_model, c, critical=critical)
+        if ok:
+            scored.append((rank(c), c))
+    if not scored:
+        return None
+    scored.sort(key=lambda x: -x[0])
+    return scored[0][1]
+
+
+if __name__ == "__main__":
+    import sys
+    if len(sys.argv) >= 3:
+        w, r = sys.argv[1], sys.argv[2]
+        crit = "--critical" in sys.argv
+        ok, reason = is_eligible_reviewer(w, r, critical=crit)
+        print(f"writer={w} rank={rank(w)}")
+        print(f"reviewer={r} rank={rank(r)}")
+        print(f"eligible={ok}: {reason}")
+    else:
+        for m in ["claude-opus-4-20250514", "claude-sonnet-4-20250514",
+                  "claude-haiku-4-5-20251001", "openai/gpt-5.4",
+                  "deepseek/deepseek-v3.2", "openai/gpt-oss-120b:free",
+                  "qwen/qwen3-coder:free", "meta-llama/llama-3.3-70b-instruct:free"]:
+            print(f"  rank({m}) = {rank(m)}  [{_provider_family(m)}]")
diff --git a/bin/notify-discord.sh b/bin/notify-discord.sh
index d868ec07ba2cecbb1b2e185732dde93afdad6dc2..009b0be35f2c7417bc8ee430f9b3d4d4ebfde53e 100755
--- a/bin/notify-discord.sh
+++ b/bin/notify-discord.sh
@@ -10,7 +10,7 @@
 # Examples:
 #   notify-discord.sh success "Task done" "p42 completed in 180s"
 #   notify-discord.sh error "Daemon crashed" "qwen-coder exit 1"
-#   tail -50 ~/.claude/logs/scrape.log | notify-discord.sh scrape "Scrape report"
+#   tail -50 ~/.surrogate/logs/scrape.log | notify-discord.sh scrape "Scrape report"
 set -u
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
diff --git a/bin/nvidia-bridge.sh b/bin/nvidia-bridge.sh
new file mode 100755
index 0000000000000000000000000000000000000000..f62c7c21a23e58c0784b6a5aebd9f27a0bf935e0
--- /dev/null
+++ b/bin/nvidia-bridge.sh
@@ -0,0 +1,59 @@
+#!/usr/bin/env bash
+# NVIDIA NIM bridge — OpenAI-compat via integrate.api.nvidia.com
+# Free tier: ~1000 req/day, 50+ models (Llama, DeepSeek, Nemotron, Qwen, etc.)
+set -u
+MODEL="meta/llama-3.3-70b-instruct"
+MAX_TOKENS=2000
+TEMP=0.3
+PROMPT=""
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --model)
+            case "$2" in
+                llama|l70)     MODEL="meta/llama-3.3-70b-instruct" ;;
+                nemotron)      MODEL="nvidia/nemotron-4-340b-instruct" ;;
+                nemotron-nano) MODEL="nvidia/nemotron-3-nano-9b-v1" ;;
+                deepseek|r1)   MODEL="deepseek-ai/deepseek-r1" ;;
+                qwen|coder)    MODEL="qwen/qwen2.5-coder-32b-instruct" ;;
+                mistral)       MODEL="mistralai/mistral-large-2-instruct" ;;
+                *)             MODEL="$2" ;;
+            esac; shift 2 ;;
+        --max-tokens) MAX_TOKENS="$2"; shift 2 ;;
+        *) PROMPT="$*"; break ;;
+    esac
+done
+[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat)
+[[ -z "$PROMPT" ]] && { echo "nvidia-bridge: no prompt" >&2; exit 2; }
+
+LOG="$HOME/.surrogate/logs/nvidia-bridge.log"
+mkdir -p "$(dirname "$LOG")"
+set -a; source "$HOME/.hermes/.env"; set +a
+echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG"
+
+RESPONSE=$(python3 -c "
+import os
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read())
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/bridge_retry.py')).read())
+import json, sys
+body = {
+    'model': '$MODEL',
+    'messages': [{'role':'user','content': sys.stdin.read()}],
+    'max_tokens': $MAX_TOKENS, 'temperature': $TEMP,
+    'stream': False,
+}
+try:
+    d = request_with_retry(
+        'https://integrate.api.nvidia.com/v1/chat/completions',
+        data=json.dumps(body).encode(),
+        headers={'Content-Type':'application/json', 'User-Agent':'hermes-agent/1.0', 'Authorization':'Bearer '+os.environ.get('NVIDIA_API_KEY','')},
+        timeout=120, max_retries=4, base_delay=3.0, open_seconds=120,
+    )
+    print(d.get('choices',[{}])[0].get('message',{}).get('content',''))
+except Exception as e:
+    print(f'nvidia-bridge error: {e}', file=sys.stderr); sys.exit(1)
+" <<< "$PROMPT")
+RC=$?
+echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG"
+[[ $RC -ne 0 ]] && exit $RC
+echo "$RESPONSE"
diff --git a/bin/perf-watchdog.sh b/bin/perf-watchdog.sh
index 0550610791ad5b092a049a8a53fa2c64d05c3e21..34f7499f2634719d1a87c3e8f9dbe8ce41fd51ba 100755
--- a/bin/perf-watchdog.sh
+++ b/bin/perf-watchdog.sh
@@ -5,15 +5,15 @@
 #   - load avg 1min (kill if > 10, warn if > 7)
 #   - memory free pages (warn if < 30k, emergency < 15k)
 #   - swap I/O rate (emergency if spiking)
-#   - disk space on ~/.claude/state (warn if < 2GB)
+#   - disk space on ~/.surrogate/state (warn if < 2GB)
 #   - scrape process count (cap at 30, kill oldest if exceeded)
 #
 # Actions:
 #   - WARN: log + throttle (pause new burst triggers via state file)
 #   - EMERGENCY: kill all scrape processes, set pause flag for 10 min
 set -u
-LOG="$HOME/.claude/logs/perf-watchdog.log"
-PAUSE_FLAG="$HOME/.claude/state/scrape-paused"
+LOG="$HOME/.surrogate/logs/perf-watchdog.log"
+PAUSE_FLAG="$HOME/.surrogate/state/scrape-paused"
 mkdir -p "$(dirname "$LOG")" "$(dirname "$PAUSE_FLAG")"
 
 # Thresholds
diff --git a/bin/push-training-to-hf.sh b/bin/push-training-to-hf.sh
index 290788098432db89f3e94d0d42d15975541e86bb..1d5030f4b3e63c19adb1d7810e034ca7f608332e 100755
--- a/bin/push-training-to-hf.sh
+++ b/bin/push-training-to-hf.sh
@@ -6,7 +6,7 @@ set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
 SRC="$HOME/.surrogate/training-pairs.jsonl"
 OFFSET_FILE="$HOME/.surrogate/.training-push-offset"
-LOG="$HOME/.claude/logs/training-push.log"
+LOG="$HOME/.surrogate/logs/training-push.log"
 mkdir -p "$(dirname "$LOG")"
 
 [[ ! -f "$SRC" ]] && { echo "[$(date +%H:%M:%S)] no source $SRC" | tee -a "$LOG"; exit 0; }
diff --git a/bin/qwen-coder-daemon.sh b/bin/qwen-coder-daemon.sh
index 82c688a17975f1e0dbcd57e070590a032dc83206..be5115d291b8bd4967806dd652f8d73fcb6ab733 100755
--- a/bin/qwen-coder-daemon.sh
+++ b/bin/qwen-coder-daemon.sh
@@ -4,7 +4,7 @@
 # Pulls priority → invokes qwen-coder-worker.sh with pre-selected priority (env var).
 set -u
 
-LOG="$HOME/.claude/logs/qwen-coder-daemon.log"
+LOG="$HOME/.surrogate/logs/qwen-coder-daemon.log"
 mkdir -p "$(dirname "$LOG")"
 
 # Resolve Redis: Unix socket → TCP fallback. Build a redis-cli arg array reused below.
@@ -45,7 +45,7 @@ while true; do
     # can't race with other workers / stale file locks.
     START=$(date +%s)
     HERMES_PRIO_ID="$PRIO_ID" \
-        "$HOME/.claude/bin/qwen-coder-worker.sh" 2>&1 | tail -3 >> "$LOG"
+        "$HOME/.surrogate/bin/qwen-coder-worker.sh" 2>&1 | tail -3 >> "$LOG"
     DUR=$(( $(date +%s) - START ))
     echo "[$(date '+%H:%M:%S')] $PRIO_ID done in ${DUR}s" >> "$LOG"
 
diff --git a/bin/qwen-coder-worker.sh b/bin/qwen-coder-worker.sh
index 76cef4017d1dbcc1278fe673337f1d79e72d9d28..f363a2429c3146e0bf05fb559379232dd95fef4a 100755
--- a/bin/qwen-coder-worker.sh
+++ b/bin/qwen-coder-worker.sh
@@ -7,7 +7,7 @@
 # Philosophy: cheap + fast iteration — reviewer catches bad outputs.
 set -u
 
-LOG="$HOME/.claude/logs/qwen-coder-worker.log"
+LOG="$HOME/.surrogate/logs/qwen-coder-worker.log"
 OUT_DIR="$HOME/.hermes/workspace/qwen-coder"
 SHARED="$HOME/.hermes/workspace/swarm-shared"
 mkdir -p "$(dirname "$LOG")" "$OUT_DIR"
@@ -58,8 +58,8 @@ MAP_FILE="$SHARED/repo-maps/${PRIO_PROJECT}.md"
 # RAG: fetch real code examples from THIS project's actual codebase via FTS
 # Grounds the model in real APIs/imports/patterns instead of hallucinating
 RAG_EXAMPLES=""
-if [[ -x "$HOME/.claude/bin/ask-sqlite.py" ]]; then
-    RAG_EXAMPLES=$(python3 "$HOME/.claude/bin/ask-sqlite.py" \
+if [[ -x "$HOME/.surrogate/bin/ask-sqlite.py" ]]; then
+    RAG_EXAMPLES=$(python3 "$HOME/.surrogate/bin/ask-sqlite.py" \
         "$PRIO_PROJECT $PRIO_TITLE" 2>/dev/null | head -c 2500)
 fi
 
diff --git a/bin/sambanova-bridge.sh b/bin/sambanova-bridge.sh
new file mode 100755
index 0000000000000000000000000000000000000000..2b0c9cd4c89ebe027eca90b14cd6fe8b2f6c97ac
--- /dev/null
+++ b/bin/sambanova-bridge.sh
@@ -0,0 +1,75 @@
+#!/usr/bin/env bash
+# SambaNova Cloud bridge — fast Llama 3.3 70B/405B + DeepSeek-V3 free tier
+# Endpoint: https://api.sambanova.ai/v1 (OpenAI-compat, ~500 tok/s)
+# Key env:  SAMBANOVA_API_KEY
+# Usage:    sambanova-bridge.sh [--model MODEL] "<prompt>"
+set -u
+# Default: Llama 3.3 70B — best speed (500 tok/s) × quality tradeoff on SambaNova.
+# Full catalog verified 2026-04: DeepSeek-V3.1/V3.1-cb/V3.2, Llama-4-Maverick,
+# gpt-oss-120b, gemma-3-12b-it, MiniMax-M2.5 (service-tier-locked).
+MODEL="Meta-Llama-3.3-70B-Instruct"
+MAX_TOKENS=2000
+TEMP=0.3
+PROMPT=""
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --model)
+            case "$2" in
+                fast|small|gemma|gemma3)  MODEL="gemma-3-12b-it" ;;
+                llama|llama70|70b)        MODEL="Meta-Llama-3.3-70B-Instruct" ;;
+                llama4|maverick)          MODEL="Llama-4-Maverick-17B-128E-Instruct" ;;
+                deepseek|deepseek-v3)     MODEL="DeepSeek-V3.1" ;;
+                deepseek-latest|v32)      MODEL="DeepSeek-V3.2" ;;
+                deepseek-cb|cb)           MODEL="DeepSeek-V3.1-cb" ;;
+                gpt-oss|oss|120b)         MODEL="gpt-oss-120b" ;;
+                *)                        MODEL="$2" ;;
+            esac; shift 2 ;;
+        --max-tokens) MAX_TOKENS="$2"; shift 2 ;;
+        --temperature) TEMP="$2"; shift 2 ;;
+        *) PROMPT="$*"; break ;;
+    esac
+done
+[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat)
+[[ -z "$PROMPT" ]] && { echo "sambanova-bridge: no prompt" >&2; exit 2; }
+
+LOG="$HOME/.surrogate/logs/sambanova-bridge.log"
+mkdir -p "$(dirname "$LOG")"
+set -a; source "$HOME/.hermes/.env" 2>/dev/null || true; set +a
+
+if [[ -z "${SAMBANOVA_API_KEY:-}" ]]; then
+    echo "sambanova-bridge: missing SAMBANOVA_API_KEY in ~/.hermes/.env" >&2
+    exit 3
+fi
+
+echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG"
+
+RESPONSE=$(python3 -c "
+import os
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read())
+exec(open(os.path.expanduser('~/.surrogate/bin/lib/bridge_retry.py')).read())
+import json, sys
+body = {
+    'model': '$MODEL',
+    'messages': [{'role':'user','content': sys.stdin.read()}],
+    'max_tokens': $MAX_TOKENS, 'temperature': $TEMP,
+}
+try:
+    d = request_with_retry(
+        'https://api.sambanova.ai/v1/chat/completions',
+        data=json.dumps(body).encode(),
+        headers={
+            'Content-Type':'application/json',
+            'User-Agent':'hermes-agent/1.0',
+            'Authorization':'Bearer '+os.environ.get('SAMBANOVA_API_KEY',''),
+        },
+        timeout=120, max_retries=4, base_delay=2.0,
+    )
+    print(d.get('choices',[{}])[0].get('message',{}).get('content',''))
+except Exception as e:
+    print(f'sambanova-bridge error: {e}', file=sys.stderr); sys.exit(1)
+" <<< "$PROMPT")
+RC=$?
+echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG"
+[[ $RC -ne 0 ]] && exit $RC
+echo "$RESPONSE"
diff --git a/bin/scrape-keyword-tuner.sh b/bin/scrape-keyword-tuner.sh
index 51ba6c1c27c845e2cbf1c93cb46b8dcaff83ea54..a292b785466643625ca7e03099399eba08b3fb4b 100755
--- a/bin/scrape-keyword-tuner.sh
+++ b/bin/scrape-keyword-tuner.sh
@@ -11,7 +11,7 @@
 set -uo pipefail
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
-LOG="$HOME/.claude/logs/scrape-keyword-tuner.log"
+LOG="$HOME/.surrogate/logs/scrape-keyword-tuner.log"
 mkdir -p "$(dirname "$LOG")"
 
 TOKEN="${GITHUB_TOKEN_POOL%%,*}"   # first non-empty
@@ -33,7 +33,7 @@ python3 <<PYEOF >> "$LOG" 2>&1
 import os, re, json, sqlite3, time, urllib.request, urllib.error, urllib.parse
 
 TOKEN = "$TOKEN"
-DB = os.path.expanduser("~/.claude/state/scrape-ledger.db")
+DB = os.path.expanduser("~/.surrogate/state/scrape-ledger.db")
 
 def github_count(keywords: str) -> int:
     """Return total_count from GitHub Search API (or -1 on error)."""
diff --git a/bin/scrape-ledger-init.sh b/bin/scrape-ledger-init.sh
new file mode 100755
index 0000000000000000000000000000000000000000..6051527dca79dd69db9ddb0ce5d94bb6250e4e2a
--- /dev/null
+++ b/bin/scrape-ledger-init.sh
@@ -0,0 +1,123 @@
+#!/usr/bin/env bash
+# Initialize global scrape ledger — single source of truth for "what's been scraped"
+# All scrapers check ledger before scraping + write after.
+# DB: ~/.surrogate/state/scrape-ledger.db  (SQLite WAL for concurrent safety)
+set -u
+DB="$HOME/.surrogate/state/scrape-ledger.db"
+mkdir -p "$(dirname "$DB")"
+
+sqlite3 "$DB" <<'SQL'
+PRAGMA journal_mode=WAL;
+PRAGMA synchronous=NORMAL;
+
+CREATE TABLE IF NOT EXISTS scraped (
+    id INTEGER PRIMARY KEY AUTOINCREMENT,
+    source TEXT NOT NULL,           -- 'github', 'rss', 'stackoverflow', 'fs', 'crawl4ai'
+    identifier TEXT NOT NULL,       -- 'owner/repo' or URL or file path hash
+    domain TEXT,                    -- 'security', 'devops', 'ai-ml', 'frontend', etc.
+    subdomain TEXT,                 -- 'cve', 'kyverno', 'observability', etc.
+    language TEXT,                  -- 'python', 'go', 'terraform'
+    stars INTEGER DEFAULT 0,
+    scraped_at TEXT NOT NULL,
+    pairs_written INTEGER DEFAULT 0,
+    status TEXT DEFAULT 'ok',       -- 'ok', 'err', 'skipped', 'partial'
+    notes TEXT
+);
+
+CREATE UNIQUE INDEX IF NOT EXISTS idx_scraped_src_id ON scraped(source, identifier);
+CREATE INDEX IF NOT EXISTS idx_scraped_domain ON scraped(domain);
+CREATE INDEX IF NOT EXISTS idx_scraped_ts ON scraped(scraped_at);
+
+-- Domain taxonomy — what every enterprise software company deals with
+CREATE TABLE IF NOT EXISTS domain_taxonomy (
+    domain TEXT PRIMARY KEY,
+    subdomain TEXT,
+    search_keywords TEXT,
+    priority INTEGER DEFAULT 5,      -- 1=critical, 10=nice-to-have
+    target_repos INTEGER DEFAULT 100
+);
+
+-- Seed taxonomy
+INSERT OR IGNORE INTO domain_taxonomy (domain, subdomain, search_keywords, priority, target_repos) VALUES
+-- CODING (per language)
+('coding','python-framework','fastapi django flask poetry uv ruff mypy pydantic',1,150),
+('coding','python-async','asyncio aiohttp httpx anyio trio',1,80),
+('coding','typescript-framework','nextjs remix astro svelte solid react vue nuxt',1,150),
+('coding','typescript-tooling','vite tsup esbuild turbopack biome',2,80),
+('coding','go-ecosystem','gin echo fiber chi gorilla cobra viper',1,120),
+('coding','rust-ecosystem','tokio axum actix warp rocket serde clap',1,100),
+('coding','java-kotlin','spring boot ktor micronaut quarkus',2,80),
+('coding','mobile-native','swiftui jetpack compose react-native flutter',2,100),
+-- SECURITY
+('security','appsec','owasp top10 cwe sast dast semgrep bandit eslint-security',1,120),
+('security','cloudsec','prowler scoutsuite cloudcustodian checkov tfsec iam-cli',1,120),
+('security','container-sec','trivy grype syft kyverno opa falco tetragon',1,100),
+('security','supply-chain','cosign sigstore slsa sbom cyclonedx in-toto',1,80),
+('security','secrets','vault sops age gitleaks trufflehog detect-secrets',1,60),
+('security','identity','keycloak authentik ory hydra dex oidc-provider',2,60),
+('security','detection','sigma mitre-attack falco-rules wazuh yara sentinelone',1,80),
+('security','offensive','metasploit nuclei gobuster ffuf burp-extensions',3,40),
+-- OPS / DEVOPS / SRE
+('ops','devops-ci','github-actions gitlab-ci jenkins dagger buildkit',1,100),
+('ops','iac','terraform pulumi cdk cloudformation ansible',1,150),
+('ops','kubernetes','k8s helm kustomize argocd flux crossplane istio linkerd',1,200),
+('ops','sre','sre-book postmortem slo burn-rate chaos-engineering',1,80),
+('ops','chaos','chaos-mesh litmus gremlin chaos-toolkit',2,40),
+('ops','config-mgmt','ansible chef puppet salt',3,40),
+('observability','metrics','prometheus thanos mimir victoriametrics alertmanager',1,100),
+('observability','logs','loki elasticsearch opensearch fluentbit vector',1,80),
+('observability','traces','tempo jaeger zipkin skywalking honeycomb',1,80),
+('observability','apm','datadog newrelic dynatrace appdynamics instana',2,40),
+('observability','profiling','pyroscope parca gprofiler py-spy flamegraph',2,40),
+('observability','otel','opentelemetry-collector otel-sdk semantic-conventions',1,60),
+('observability','ebpf','cilium tetragon pixie falco inspektor-gadget',1,60),
+-- CLOUD
+('cloud','aws','aws-cdk aws-samples aws-solutions aws-copilot sam',1,200),
+('cloud','gcp','gcp-samples terraform-google anthos',1,100),
+('cloud','azure','azure-samples bicep terraform-azurerm',1,100),
+('cloud','multicloud','crossplane cluster-api karpenter external-dns',2,60),
+('cloud','serverless','sam sst cdk serverless-framework workers wrangler',1,100),
+('finops','finops','kubecost opencost cloudhealth crane infracost',1,60),
+-- AI / ML / AGENTS
+('ai','llm-serving','vllm tgi ollama llama.cpp exllama sglang',1,100),
+('ai','llm-training','unsloth axolotl peft trl ms-swift torchtune',1,100),
+('ai','agents','langgraph crewai autogen mcp-server dspy haystack',1,120),
+('ai','rag','llamaindex langchain colbert chroma qdrant weaviate',1,100),
+('ai','ml-frameworks','pytorch-lightning jax equinox flax transformers diffusers',2,80),
+('ai','ml-ops','mlflow wandb comet kedro zenml',2,60),
+('ai','eval','lm-evaluation-harness deepeval ragas opik',2,40),
+-- DATA
+('data','databases','postgres mysql pgvector cockroachdb tidb',1,100),
+('data','streaming','kafka nats redpanda pulsar flink',1,80),
+('data','warehouses','clickhouse duckdb snowflake trino presto starrocks',1,80),
+('data','orchestration','airflow prefect dagster temporal',1,80),
+('data','formats','parquet iceberg delta-lake hudi avro',2,40),
+('data','etl','dbt meltano singer airbyte',2,40),
+-- FRONTEND / UX
+('frontend','components','shadcn-ui radix headlessui mantine chakra',2,80),
+('frontend','state','zustand jotai redux-toolkit tanstack-query swr',2,60),
+('frontend','styling','tailwindcss unocss vanilla-extract stitches',2,60),
+('frontend','animations','framer-motion auto-animate gsap lottie',3,40),
+-- BACKEND
+('backend','graphql','apollo relay urql hasura postgraphile',2,60),
+('backend','grpc','grpc-web buf connect-go',2,40),
+('backend','queues','bullmq sidekiq celery rq',2,60),
+-- ARCHITECTURE
+('architecture','patterns','hexagonal ddd cqrs event-sourcing saga outbox',1,60),
+('architecture','messaging','cloudevents asyncapi schema-registry',2,40),
+-- QUALITY / TESTING
+('quality','unit-test','pytest vitest jest junit5 testify',2,60),
+('quality','e2e','playwright cypress puppeteer selenium',2,60),
+('quality','load-test','k6 locust gatling vegeta',2,40),
+('quality','contract','pact dredd schemathesis',3,30),
+-- COMPLIANCE
+('compliance','audit','pdpa gdpr soc2 iso27001 pci-dss hipaa',1,60),
+('compliance','policy-as-code','opa kyverno gatekeeper conftest',1,60),
+-- PRODUCT / BUSINESS
+('product','analytics','posthog plausible amplitude mixpanel',2,40),
+('product','feature-flags','unleash flagsmith growthbook launchdarkly',2,40);
+
+SELECT 'ledger initialized: ' || COUNT(*) || ' domains' FROM domain_taxonomy;
+SQL
+
+echo "✅ Ledger at $DB"
diff --git a/bin/skill-synthesis-daemon.sh b/bin/skill-synthesis-daemon.sh
index 111d45fa342a339b24abb05684c17a64d1d0cdc0..242f2af8cb629dc7023c6f5eaea955d3d5dba7eb 100755
--- a/bin/skill-synthesis-daemon.sh
+++ b/bin/skill-synthesis-daemon.sh
@@ -9,7 +9,7 @@ set -uo pipefail
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
 SKILLS_DIR="$HOME/.surrogate/skills"
-LOG="$HOME/.claude/logs/skill-synthesis.log"
+LOG="$HOME/.surrogate/logs/skill-synthesis.log"
 PAIRS="$HOME/.surrogate/training-pairs.jsonl"
 mkdir -p "$SKILLS_DIR" "$(dirname "$LOG")"
 
diff --git a/bin/surrogate b/bin/surrogate
index 0513fdea55e70b2cc0ac9e32ef799e46ed0f9b77..d9bb873526dba2160ce72e042a3433c6e8d55ff7 100755
--- a/bin/surrogate
+++ b/bin/surrogate
@@ -29,7 +29,7 @@ init_surrogate_home() {
   },
   "agents": ["architect","dev","qa","ops","reviewer"],
   "memory": {
-    "episodesFile": "~/.claude/state/surrogate-memory/episodes.jsonl",
+    "episodesFile": "~/.surrogate/state/episodes.jsonl",
     "projectFiles": "~/.surrogate/projects"
   }
 }
@@ -116,7 +116,7 @@ while [[ $# -gt 0 ]]; do
         init) MODE="init-project"; shift ;;
         plan)
             # surrogate plan set <file> | show | clear
-            bash ~/.claude/bin/surrogate-daemon.sh plan "$@"
+            bash ~/.surrogate/bin/surrogate-daemon.sh plan "$@"
             exit 0
             ;;
         .) shift ;;
@@ -224,7 +224,7 @@ GEMINI = os.environ.get('GEMINI_API_KEY','')
 GEMINI2 = os.environ.get('GEMINI_API_KEY_2','')
 GH_POOL = [t.strip() for t in os.environ.get('GITHUB_TOKEN_POOL','').split(',') if t.strip()]
 
-MEM_DIR = Path(os.path.expanduser('~/.claude/state/surrogate-memory'))
+MEM_DIR = Path(os.path.expanduser('~/.surrogate/state'))
 MEM_DIR.mkdir(parents=True, exist_ok=True)
 EPISODES = MEM_DIR / 'episodes.jsonl'
 
@@ -284,7 +284,7 @@ def tool_grep(pattern, path=None, glob='*'):
 
 def tool_rag_query(query, limit=5):
     try:
-        conn = sqlite3.connect(os.path.expanduser('~/.claude/index.db'))
+        conn = sqlite3.connect(os.path.expanduser('~/.surrogate/index.db'))
         kw = ' '.join(w for w in re.sub(r'[^a-zA-Z0-9ก-๙ ]',' ',query.lower()).split() if len(w)>2)[:200]
         rows = conn.execute("SELECT d.source, d.path, substr(d.response,1,500) FROM docs_fts f JOIN docs d ON d.id=f.rowid WHERE f.docs_fts MATCH ? ORDER BY bm25(docs_fts) LIMIT ?", (kw,limit)).fetchall()
         conn.close()
@@ -476,7 +476,7 @@ ${B}Configuration${R}:
   ${CY}/cwd${R} <path>           change working directory
 
 ${B}Diagnostics${R}:
-  ${CY}/memory${R}               show ~/.claude/state/surrogate-memory/
+  ${CY}/memory${R}               show ~/.surrogate/state/
   ${CY}/cost${R}                 OpenRouter usage today
   ${CY}/cost-all${R}              all provider usage breakdown
   ${CY}/health${R}               check HF endpoint + local CLI status
@@ -605,7 +605,7 @@ repl() {
                     *) echo "${GY}valid: plan | auto | yolo | default | acceptEdits${R}" ;;
                 esac
                 ;;
-            /memory) ls -lh ~/.claude/state/surrogate-memory/ 2>&1 | head -10 ;;
+            /memory) ls -lh ~/.surrogate/state/ 2>&1 | head -10 ;;
             /undo)
                 # Restore last checkpoint (git stash if uncommitted changes from last task)
                 if git -C "$(pwd)" rev-parse --git-dir &>/dev/null; then
@@ -958,7 +958,7 @@ PYEOF
 )
             [[ -z "$NEXT_TASK" ]] && { echo "${GR}✅ Plan complete — all tasks done!${R}"; break; }
             echo "${BCY}${B}▸ Next task:${R} $NEXT_TASK"
-            bash ~/.claude/bin/surrogate-orchestrate.sh "$NEXT_TASK"
+            bash ~/.surrogate/bin/surrogate-orchestrate.sh "$NEXT_TASK"
             # Mark done in plan
             /usr/bin/python3 <<PYEOF
 from pathlib import Path
@@ -984,11 +984,11 @@ PYEOF
             if ! IFS= read -r line; then echo ""; break; fi
             [[ -z "$line" ]] && continue
             [[ "$line" == "/exit" || "$line" == "exit" ]] && break
-            bash ~/.claude/bin/surrogate-orchestrate.sh "$line"
+            bash ~/.surrogate/bin/surrogate-orchestrate.sh "$line"
             echo ""
         done
     else
-        bash ~/.claude/bin/surrogate-orchestrate.sh "$task"
+        bash ~/.surrogate/bin/surrogate-orchestrate.sh "$task"
     fi
 }
 
@@ -999,13 +999,13 @@ plan_mode() {
         echo -en "${B}${YE}▶ plan >${R} "
         read -r task
     fi
-    bash ~/.claude/bin/surrogate-orchestrate.sh --mode plan "$task"
+    bash ~/.surrogate/bin/surrogate-orchestrate.sh --mode plan "$task"
 }
 
 # ═══ Monitor mode (watch cloud/logs, auto-fix) ═══
 monitor_mode() {
     echo "${B}${MA}▶ MONITOR MODE${R}"
-    echo "${D}  Watching ~/.claude/logs/, ~/.hermes/workspace/healer/, system load.${R}"
+    echo "${D}  Watching ~/.surrogate/logs/, ~/.hermes/workspace/healer/, system load.${R}"
     echo "${D}  Ctrl+C to stop.${R}"
     echo ""
     ITER=0
@@ -1027,15 +1027,15 @@ monitor_mode() {
         ls -t ~/.hermes/workspace/healer/*.md 2>/dev/null | head -3 | awk '{print "  " $0}' | xargs -I{} basename {} 2>/dev/null | sed 's/^/  /'
         # Training + graph
         PAIRS=$(wc -l ~/axentx/surrogate/data/training-jsonl/*.jsonl 2>/dev/null | tail -1 | awk '{print $1}')
-        REPOS=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null)
+        REPOS=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null)
         echo "${B}data${R}    pairs=$PAIRS  repos=$REPOS"
         # Recent errors in logs (auto-heal trigger)
-        ERR_COUNT=$(tail -200 ~/.claude/logs/*.log 2>/dev/null | grep -cE "ERROR|Fatal|CRITICAL|429|403|500" || echo 0)
+        ERR_COUNT=$(tail -200 ~/.surrogate/logs/*.log 2>/dev/null | grep -cE "ERROR|Fatal|CRITICAL|429|403|500" || echo 0)
         echo "${B}errors${R}  last 200 log lines: $ERR_COUNT"
         # If critical → spawn agent to investigate
         if [[ $ERR_COUNT -gt 50 ]]; then
             echo "${RE}⚠ elevated errors — dispatching investigator agent${R}"
-            (run_agent "เช็ค ~/.claude/logs/ หา pattern error ที่ recur บ่อย และเสนอ fix list (ห้ามแก้เอง รายงานอย่างเดียว)" 2>&1 | /usr/bin/head -20) &
+            (run_agent "เช็ค ~/.surrogate/logs/ หา pattern error ที่ recur บ่อย และเสนอ fix list (ห้ามแก้เอง รายงานอย่างเดียว)" 2>&1 | /usr/bin/head -20) &
         fi
         sleep 30
     done
@@ -1045,9 +1045,9 @@ monitor_mode() {
 show_status() {
     banner
     echo ""
-    REPOS=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null || echo "?")
+    REPOS=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null || echo "?")
     PAIRS=$(wc -l ~/axentx/surrogate/data/training-jsonl/*.jsonl 2>/dev/null | tail -1 | awk '{print $1}' || echo "?")
-    EP=$(wc -l ~/.claude/state/surrogate-memory/episodes.jsonl 2>/dev/null | awk '{print $1}' || echo "0")
+    EP=$(wc -l ~/.surrogate/state/episodes.jsonl 2>/dev/null | awk '{print $1}' || echo "0")
     PLAN_FILE="$SURROGATE_HOME/active-plan.md"
     echo "${B}▸ Session${R}"
     echo "  cwd:            ${GR}$(pwd)${R}"
@@ -1069,8 +1069,8 @@ show_status() {
 show_agents() {
     banner
     echo ""
-    echo "${B}▸ Available agents (~/.claude/agents/)${R}"
-    ls ~/.claude/agents/*.md 2>/dev/null | /usr/bin/sed 's|.*/||;s|.md$||' | sed 's/^/  /'
+    echo "${B}▸ Available agents (~/.surrogate/agents/)${R}"
+    ls ~/.surrogate/agents/*.md 2>/dev/null | /usr/bin/sed 's|.*/||;s|.md$||' | sed 's/^/  /'
 }
 
 # ═══ Dispatch ═══
@@ -1086,7 +1086,7 @@ case "$MODE" in
         if [[ -n "$PROMPT" ]]; then plan_mode "$PROMPT"
         else
             # No task — show plan status
-            bash ~/.claude/bin/surrogate-daemon.sh plan show
+            bash ~/.surrogate/bin/surrogate-daemon.sh plan show
         fi
         ;;
     print)
diff --git a/bin/surrogate-agent.sh b/bin/surrogate-agent.sh
index cf8a8c5f321fe5988473b4d3a36d6eeef43b370f..0581aa77b368fbea83ed37a86fd48fe5d5da288f 100755
--- a/bin/surrogate-agent.sh
+++ b/bin/surrogate-agent.sh
@@ -33,7 +33,7 @@ while [[ $# -gt 0 ]]; do
 done
 [[ -z "$TASK" ]] && { echo "usage: $0 [--max-steps N] [--model M] <task>" >&2; exit 2; }
 
-MEM_DIR="$HOME/.claude/state/surrogate-memory"
+MEM_DIR="$HOME/.surrogate/state/surrogate-memory"
 mkdir -p "$MEM_DIR"
 
 export AGENT_TASK="$TASK"
@@ -49,7 +49,7 @@ TASK = os.environ['AGENT_TASK']
 MAX_STEPS = int(os.environ['AGENT_MAX_STEPS'])
 MODEL_OVERRIDE = os.environ.get('AGENT_MODEL_OVERRIDE', '')
 OPENROUTER = os.environ.get('OPENROUTER_API_KEY', '')
-MEM_DIR = Path(os.path.expanduser('~/.claude/state/surrogate-memory'))
+MEM_DIR = Path(os.path.expanduser('~/.surrogate/state/surrogate-memory'))
 EPISODES = MEM_DIR / 'episodes.jsonl'
 PATTERNS = MEM_DIR / 'patterns.jsonl'
 SYS_PROMPT = ''
@@ -148,7 +148,7 @@ def tool_rag_query(query, limit=5, source_filter=None):
     import subprocess as _sp
     try:
         # 1. BM25 via SQLite FTS
-        conn = sqlite3.connect(os.path.expanduser('~/.claude/index.db'))
+        conn = sqlite3.connect(os.path.expanduser('~/.surrogate/index.db'))
         kw = ' '.join(w for w in re.sub(r'[^a-zA-Z0-9ก-๙ ]', ' ', query.lower()).split() if len(w) > 2)[:200]
         q = "SELECT d.source, d.path, substr(d.response, 1, 500), d.id FROM docs_fts f JOIN docs d ON d.id=f.rowid WHERE f.docs_fts MATCH ?"
         params = [kw]
@@ -166,9 +166,9 @@ def tool_rag_query(query, limit=5, source_filter=None):
         dense_docs = []
         if len(query) > 10:
             try:
-                cmd = f"""~/.claude/state/crawler-venv/bin/python -c "
+                cmd = f"""~/.surrogate/state/crawler-venv/bin/python -c "
 import chromadb, json, sys
-client = chromadb.PersistentClient(path='/Users/Ashira/.claude/code-vector-db')
+client = chromadb.PersistentClient(path='$HOME/.surrogate/code-vector-db')
 cols = client.list_collections()
 if cols:
     r = cols[0].query(query_texts=['{query[:200].replace(chr(39),chr(92)+chr(39))}'], n_results={max(limit*3,20)})
@@ -206,7 +206,7 @@ def tool_rag_code(query, limit=5):
     """Query code knowledge — routed through SQLite FTS (no Chroma load, crash-safe).
     Searches `code` + `code-vector` + `code-deep:*` sources in index.db via BM25."""
     try:
-        conn = sqlite3.connect(os.path.expanduser('~/.claude/index.db'))
+        conn = sqlite3.connect(os.path.expanduser('~/.surrogate/index.db'))
         kw = ' '.join(w for w in re.sub(r'[^a-zA-Z0-9ก-๙ ]', ' ', query.lower()).split() if len(w) > 2)[:200]
         rows = conn.execute("""
             SELECT d.source, d.path, substr(d.response, 1, 500)
@@ -222,7 +222,7 @@ def tool_rag_code(query, limit=5):
 
 def tool_web_fetch(url, timeout=45):
     try:
-        cmd = f"""$HOME/.claude/state/crawler-venv/bin/python -c "
+        cmd = f"""$HOME/.surrogate/state/crawler-venv/bin/python -c "
 import asyncio
 from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig
 async def f():
@@ -254,7 +254,7 @@ def tool_task(prompt, max_steps=5):
     sub_id = uuid.uuid4().hex[:8]
     print(f"   ↳ [sub-agent {sub_id}] spawning: {prompt[:80]}", flush=True)
     try:
-        cmd = ['bash', os.path.expanduser('~/.claude/bin/surrogate-agent.sh'),
+        cmd = ['bash', os.path.expanduser('~/.surrogate/bin/surrogate-agent.sh'),
                '--max-steps', str(max_steps), prompt]
         r = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
         return {'sub_id': sub_id, 'output': r.stdout[-4000:], 'rc': r.returncode}
@@ -274,7 +274,7 @@ def tool_orchestrate(subtasks, pattern='parallel', max_steps=5):
     def run_one(prompt):
         try:
             r = subprocess.run(
-                ['bash', os.path.expanduser('~/.claude/bin/surrogate-agent.sh'),
+                ['bash', os.path.expanduser('~/.surrogate/bin/surrogate-agent.sh'),
                  '--max-steps', str(max_steps), prompt],
                 capture_output=True, text=True, timeout=600
             )
@@ -426,7 +426,7 @@ TOOLS = {
 def check_budget():
     """Return True if under daily budget ($2/day default). Caller aborts if False."""
     import time as _t
-    cache = Path(os.path.expanduser('~/.claude/state/openrouter-budget-cache.json'))
+    cache = Path(os.path.expanduser('~/.surrogate/state/openrouter-budget-cache.json'))
     # Cache balance check for 5 min (reduce API calls)
     try:
         if cache.exists() and _t.time() - cache.stat().st_mtime < 300:
@@ -439,7 +439,7 @@ def check_budget():
             cache.parent.mkdir(parents=True, exist_ok=True)
             cache.write_text(json.dumps({'usage': d.get('usage',0), 'ts': _t.time()}))
         # Check today's marker
-        today_f = Path(os.path.expanduser('~/.claude/state/openrouter-today-start.txt'))
+        today_f = Path(os.path.expanduser('~/.surrogate/state/openrouter-today-start.txt'))
         today_str = datetime.now().strftime('%Y-%m-%d')
         if not today_f.exists() or today_f.read_text().split(':')[0] != today_str:
             today_f.parent.mkdir(parents=True, exist_ok=True)
diff --git a/bin/surrogate-bridge.sh b/bin/surrogate-bridge.sh
new file mode 100755
index 0000000000000000000000000000000000000000..06c8d3689b5c1c177760efd6c4b25b1d34d22fc4
--- /dev/null
+++ b/bin/surrogate-bridge.sh
@@ -0,0 +1,52 @@
+#!/usr/bin/env bash
+# Surrogate-1 bridge — local Ollama endpoint for the Ashira-personalized model.
+# Currently uses base Qwen2.5-Coder-7B + Thai/DevSecOps SYSTEM prompt as placeholder.
+# After LoRA training on RunPod, rebuild Ollama model with merged adapter.
+# Model URL: http://localhost:11434 (Ollama)
+set -u
+MODEL="surrogate-1"
+MAX_TOKENS=2000
+TEMP=0.3
+PROMPT=""
+
+while [[ $# -gt 0 ]]; do
+    case "$1" in
+        --model) MODEL="$2"; shift 2 ;;
+        --max-tokens) MAX_TOKENS="$2"; shift 2 ;;
+        *) PROMPT="$*"; break ;;
+    esac
+done
+[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat)
+[[ -z "$PROMPT" ]] && { echo "surrogate-bridge: no prompt" >&2; exit 2; }
+
+LOG="$HOME/.surrogate/logs/surrogate-bridge.log"
+mkdir -p "$(dirname "$LOG")"
+echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG"
+
+# Ollama OpenAI-compat endpoint
+RESPONSE=$(python3 -c "
+import json, sys, urllib.request, urllib.error
+
+body = {
+    'model': '$MODEL',
+    'messages': [{'role':'user','content': sys.stdin.read()}],
+    'max_tokens': $MAX_TOKENS,
+    'temperature': $TEMP,
+    'stream': False,
+}
+req = urllib.request.Request(
+    'http://localhost:11434/v1/chat/completions',
+    data=json.dumps(body).encode(),
+    headers={'Content-Type':'application/json','Authorization':'Bearer ollama'}
+)
+try:
+    with urllib.request.urlopen(req, timeout=180) as r:
+        d = json.load(r)
+    print(d.get('choices',[{}])[0].get('message',{}).get('content',''))
+except Exception as e:
+    print(f'surrogate-bridge error: {e}', file=sys.stderr); sys.exit(1)
+" <<< "$PROMPT")
+RC=$?
+echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG"
+[[ $RC -ne 0 ]] && exit $RC
+echo "$RESPONSE"
diff --git a/bin/surrogate-consolidate.sh b/bin/surrogate-consolidate.sh
new file mode 100755
index 0000000000000000000000000000000000000000..0b6c4bb4f75817a16eeccd5d87498f26d3e58851
--- /dev/null
+++ b/bin/surrogate-consolidate.sh
@@ -0,0 +1,163 @@
+#!/usr/bin/env bash
+# Episode consolidation — nightly summarize episodes → patterns → Graphiti + DPO training data
+#
+# Input:  ~/.surrogate/state/surrogate-memory/episodes.jsonl
+# Output:
+#   1. ~/.surrogate/state/surrogate-memory/patterns.jsonl (learned patterns)
+#   2. ~/.surrogate/index.db (source='surrogate-episodes') — pattern ingested for RAG
+#   3. ~/axentx/surrogate/data/training-jsonl/dpo-pairs.jsonl (user+reply for future LoRA)
+#   4. FalkorDB graph (episodic → semantic bitemporal edges)
+set -u
+set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
+
+MEM="$HOME/.surrogate/state/surrogate-memory"
+LOG="$HOME/.surrogate/logs/surrogate-consolidate.log"
+CHECKPOINT="$MEM/consolidate.checkpoint"
+mkdir -p "$(dirname "$LOG")" "$MEM"
+
+/usr/bin/python3 <<'PYEOF' 2>>"$LOG"
+import json, os, sqlite3, urllib.request, hashlib, subprocess
+from datetime import datetime
+from pathlib import Path
+
+MEM = Path(os.path.expanduser('~/.surrogate/state/surrogate-memory'))
+EP = MEM / 'episodes.jsonl'
+PAT = MEM / 'patterns.jsonl'
+CKPT = MEM / 'consolidate.checkpoint'
+DPO = Path(os.path.expanduser('~/axentx/surrogate/data/training-jsonl/dpo-pairs.jsonl'))
+DPO.parent.mkdir(parents=True, exist_ok=True)
+
+OR_KEY = os.environ.get('OPENROUTER_API_KEY','')
+
+# Checkpoint: last consolidated line #
+last_line = 0
+if CKPT.exists():
+    try: last_line = int(CKPT.read_text().strip())
+    except: last_line = 0
+
+if not EP.exists():
+    print("[consolidate] no episodes yet")
+    exit()
+
+lines = EP.read_text(errors='replace').splitlines()
+new_lines = lines[last_line:]
+if not new_lines:
+    print(f"[consolidate] no new since line {last_line}")
+    exit()
+
+print(f"[consolidate] processing {len(new_lines)} new episodes")
+
+episodes = []
+for line in new_lines:
+    try: episodes.append(json.loads(line))
+    except: continue
+
+# Step 1: Append to DPO training data (for future RunPod LoRA)
+with open(DPO, 'a') as f:
+    for ep in episodes:
+        if not ep.get('task') or not ep.get('final'): continue
+        if '[error' in ep.get('final','') or '[timeout' in ep.get('final',''): continue
+        pair = {
+            'instruction': ep['task'][:500],
+            'input': '',
+            'output': ep['final'][:3000],
+            'source': 'surrogate-episode',
+            'timestamp': ep.get('ts', datetime.utcnow().isoformat()),
+        }
+        f.write(json.dumps(pair, ensure_ascii=False) + '\n')
+
+# Step 2: Summarize batches → pattern (every 10 episodes)
+def summarize_batch(batch):
+    if not OR_KEY: return None
+    prompt = "Below are recent Surrogate agent episodes (task + final answer). Extract 2-3 concise reusable patterns — what kind of tasks + what approaches worked. Output as bullet list. Thai OK.\n\n"
+    for i, ep in enumerate(batch):
+        prompt += f"--- Episode {i+1} ---\nTask: {ep.get('task','')[:300]}\nAnswer: {ep.get('final','')[:500]}\n\n"
+    body = {
+        'model': 'google/gemini-2.5-flash',  # cheap, good summarizer
+        'messages': [{'role':'user','content': prompt[:15000]}],
+        'temperature': 0.2, 'max_tokens': 600,
+    }
+    try:
+        req = urllib.request.Request(
+            'https://openrouter.ai/api/v1/chat/completions',
+            data=json.dumps(body).encode(),
+            headers={'Content-Type':'application/json','Authorization':f'Bearer {OR_KEY}',
+                     'HTTP-Referer':'https://axentx.ai','X-Title':'Surrogate-Consolidate'}
+        )
+        with urllib.request.urlopen(req, timeout=60) as r:
+            d = json.load(r)
+        return d['choices'][0]['message']['content']
+    except Exception as e:
+        print(f"[consolidate] llm err: {e}")
+        return None
+
+# Batch into groups of 10
+patterns_added = 0
+for batch_start in range(0, len(episodes), 10):
+    batch = episodes[batch_start:batch_start+10]
+    summary = summarize_batch(batch)
+    if not summary: continue
+    pattern = {
+        'ts': datetime.utcnow().isoformat(),
+        'episodes_range': [batch_start, batch_start+len(batch)-1],
+        'pattern_summary': summary[:2000],
+        'n_episodes': len(batch),
+    }
+    with open(PAT, 'a') as f:
+        f.write(json.dumps(pattern, ensure_ascii=False) + '\n')
+    patterns_added += 1
+
+# Step 3: Ingest patterns into index.db so future RAG finds them
+conn = sqlite3.connect(os.path.expanduser('~/.surrogate/index.db'))
+conn.execute('PRAGMA journal_mode=WAL')
+cur = conn.cursor()
+if PAT.exists():
+    for line in PAT.read_text().splitlines()[-50:]:
+        try: p = json.loads(line)
+        except: continue
+        cur.execute(
+            "INSERT OR IGNORE INTO docs (source, project, path, topic, instruction, response, ts) VALUES (?,?,?,?,?,?,?)",
+            ('surrogate-episodes', 'surrogate', 'memory:pattern', 'learned-pattern',
+             f"pattern from {p.get('n_episodes','?')} episodes",
+             p.get('pattern_summary','')[:2500],
+             p.get('ts', datetime.utcnow().isoformat()))
+        )
+conn.commit()
+conn.close()
+
+
+# Step 3b: Write patterns as graph nodes in FalkorDB (fix stagnant graph)
+import subprocess
+sock_r = subprocess.run(['/usr/bin/find','/var/folders','/tmp','-name','redis.socket','-type','s'], capture_output=True, text=True)
+sock = sock_r.stdout.strip().split('\n')[0] if sock_r.stdout else None
+if sock:
+    # Each pattern → Pattern node + relationships
+    if PAT.exists():
+        for line in PAT.read_text().splitlines()[-patterns_added:]:
+            try: p = json.loads(line)
+            except: continue
+            pid = hashlib.md5(p.get('pattern_summary','')[:200].encode()).hexdigest()[:12]
+            title = p.get('pattern_summary','')[:100].replace("'", "").replace(chr(10),' ')
+            ts = p.get('ts','')
+            cypher = f"MERGE (p:Pattern {{id:'{pid}'}}) SET p.title='{title}', p.ts='{ts}', p.n_episodes={p.get('n_episodes',0)}"
+            try:
+                subprocess.run(['/opt/homebrew/bin/redis-cli','-s',sock,'GRAPH.QUERY','ashira',cypher], capture_output=True, timeout=5)
+            except: pass
+    # Each episode → Episode node linked to Pattern
+    for ep in episodes[-20:]:
+        eid = hashlib.md5(ep.get('task','')[:200].encode()).hexdigest()[:12]
+        task = ep.get('task','')[:80].replace("'","").replace(chr(10),' ')
+        quality = 'success' if '[error' not in ep.get('final','') and '[timeout' not in ep.get('final','') else 'failed'
+        cypher = f"MERGE (e:Episode {{id:'{eid}'}}) SET e.task='{task}', e.quality='{quality}', e.ts='{ep.get('ts','')}'"
+        try:
+            subprocess.run(['/opt/homebrew/bin/redis-cli','-s',sock,'GRAPH.QUERY','ashira',cypher], capture_output=True, timeout=5)
+        except: pass
+    print('[consolidate] wrote patterns + episodes to FalkorDB')
+import hashlib  # make sure imported
+
+# Update checkpoint
+CKPT.write_text(str(len(lines)))
+print(f"[consolidate] added {patterns_added} patterns from {len(episodes)} episodes. DPO pairs grown.")
+PYEOF
+
+echo "[$(date '+%H:%M:%S')] consolidate done" >> "$LOG"
diff --git a/bin/surrogate-daemon.sh b/bin/surrogate-daemon.sh
index 57b284b47877863c964ae2e850b7e043bf4686a9..865380c5946743c8b3f231bf70264e65e1cf2d6a 100755
--- a/bin/surrogate-daemon.sh
+++ b/bin/surrogate-daemon.sh
@@ -2,7 +2,7 @@
 # Surrogate Daemon — continuous autonomous worker
 #
 # Architecture:
-#   - Task queue file:     ~/.claude/state/surrogate-queue.jsonl (append-only)
+#   - Task queue file:     ~/.surrogate/state/surrogate-queue.jsonl (append-only)
 #   - Workers:             N parallel (default 3)
 #   - Pickup:              instant (as soon as worker idle → pull next task)
 #   - Self-generation:     if queue empty, daemon asks itself "what should I work on?"
@@ -18,11 +18,11 @@
 set -u
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
-STATE="$HOME/.claude/state/surrogate-daemon"
+STATE="$HOME/.surrogate/state/surrogate-daemon"
 QUEUE="$STATE/queue.jsonl"
 DONE="$STATE/done.jsonl"
 PID_FILE="$STATE/daemon.pid"
-LOG="$HOME/.claude/logs/surrogate-daemon.log"
+LOG="$HOME/.surrogate/logs/surrogate-daemon.log"
 WORKERS=1          # default 1 worker (budget-safe). User can --workers 3 for burst
 mkdir -p "$STATE" "$(dirname "$LOG")"
 
@@ -150,7 +150,7 @@ PYEOF
             # Every 30min: consolidation
             NOW_MIN=$(date +%M)
             if [[ "$NOW_MIN" == "15" ]] || [[ "$NOW_MIN" == "45" ]]; then
-                "$HOME/.claude/bin/surrogate-consolidate.sh" >> "$LOG" 2>&1 &
+                "$HOME/.surrogate/bin/surrogate-consolidate.sh" >> "$LOG" 2>&1 &
             fi
 
             sleep 10
@@ -226,7 +226,7 @@ PYEOF
             AUTO_TASK=$(/usr/bin/python3 <<'PYEOF'
 import json, os, random
 from pathlib import Path
-ep = Path(os.path.expanduser('~/.claude/state/surrogate-memory/episodes.jsonl'))
+ep = Path(os.path.expanduser('~/.surrogate/state/surrogate-memory/episodes.jsonl'))
 recent_topics = []
 if ep.exists():
     for line in ep.read_text().splitlines()[-30:]:
@@ -243,7 +243,7 @@ pool = [
     # B. Codebase health
     "อ่าน ~/axentx/ หา TODO/FIXME across projects → สร้าง fix spec",
     "เช็ค axentx test coverage per project → identify weakest → propose tests",
-    "Scan ~/.claude/bin/ หา script ที่ไม่ถูกใช้ > 7 days → propose archive",
+    "Scan ~/.surrogate/bin/ หา script ที่ไม่ถูกใช้ > 7 days → propose archive",
     "Review last 10 auto-commits → ตรวจว่า quality OK หรือไม่",
     # C. Knowledge quality
     "สำรวจ index.db หา duplicate entries → propose dedup",
@@ -305,7 +305,7 @@ PYEOF
         START=$(date +%s)
 
         # Execute via agent
-        OUTPUT=$("$HOME/.claude/bin/surrogate-agent.sh" --max-steps 6 "$TASK" 2>&1 | tail -50)
+        OUTPUT=$("$HOME/.surrogate/bin/surrogate-agent.sh" --max-steps 6 "$TASK" 2>&1 | tail -50)
         END=$(date +%s)
         DUR=$((END - START))
 
diff --git a/bin/surrogate-dev-loop.sh b/bin/surrogate-dev-loop.sh
index 9e94757d6ce21d4a53a98f5868b31d8c0fdd1f56..63ae98ee1597030fdc4a9d21b3cbad832166845d 100755
--- a/bin/surrogate-dev-loop.sh
+++ b/bin/surrogate-dev-loop.sh
@@ -16,7 +16,7 @@
 set -u
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
-LOG="$HOME/.claude/logs/surrogate-dev-loop.log"
+LOG="$HOME/.surrogate/logs/surrogate-dev-loop.log"
 OUT_DIR="$HOME/.hermes/workspace/local-dev"
 mkdir -p "$(dirname "$LOG")" "$OUT_DIR"
 
@@ -28,7 +28,7 @@ SEARCH_ROOTS=(
     "$HOME/axentx"
     "$HOME/develope/DevOps"
     "$HOME/develope/AI"
-    "$HOME/.claude/bin"
+    "$HOME/.surrogate/bin"
 )
 
 # ── Task generators (pick one per cycle, weighted random) ────────────────────
@@ -41,7 +41,7 @@ ROOTS = [
     Path.home() / 'axentx',
     Path.home() / 'develope/DevOps',
     Path.home() / 'develope/AI',
-    Path.home() / '.claude/bin',
+    Path.home() / '.surrogate/bin',
 ]
 ROOTS = [p for p in ROOTS if p.exists()]
 
diff --git a/bin/surrogate-orchestrate.sh b/bin/surrogate-orchestrate.sh
index 911c289779b5d95b18c201a637fcf0c0b6b3aabb..4c64edfb9abe0b49f90123b13a3807461a84333e 100755
--- a/bin/surrogate-orchestrate.sh
+++ b/bin/surrogate-orchestrate.sh
@@ -26,7 +26,7 @@ CY=$'\033[36m'; GR=$'\033[32m'; YE=$'\033[33m'; MA=$'\033[35m'; RE=$'\033[31m';
 BCY=$'\033[96m'
 
 SESSION_ID=$(date +%s | tail -c 9)
-WORKDIR="$HOME/.claude/state/orchestrate/$SESSION_ID"
+WORKDIR="$HOME/.surrogate/state/orchestrate/$SESSION_ID"
 TRAINING_LOG="$HOME/.surrogate/training-pairs.jsonl"
 mkdir -p "$WORKDIR" "$(dirname "$TRAINING_LOG")"
 
@@ -273,7 +273,7 @@ PYEOF
         count=${count:-0}
         if [[ $count -gt 0 ]] && [[ $((count % 25)) -eq 0 ]]; then
             nohup bash "$HOME/.local/bin/push-training-to-hf.sh" \
-                > "$HOME/.claude/logs/training-push.log" 2>&1 &
+                > "$HOME/.surrogate/logs/training-push.log" 2>&1 &
         fi
     fi
 }
diff --git a/bin/surrogate-research-apply.sh b/bin/surrogate-research-apply.sh
index c4dac871d400de713a5d1b3998153a5be387328a..ac17cbf204c5cfba36a43baa11cefee15e66fcd6 100755
--- a/bin/surrogate-research-apply.sh
+++ b/bin/surrogate-research-apply.sh
@@ -5,7 +5,7 @@
 set -uo pipefail
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
-LOG="$HOME/.claude/logs/surrogate-research-apply.log"
+LOG="$HOME/.surrogate/logs/surrogate-research-apply.log"
 QUEUE="$HOME/.hermes/workspace/research/queue.txt"
 APPLIED="$HOME/.hermes/workspace/research/applied.log"
 mkdir -p "$(dirname "$QUEUE")" "$(dirname "$LOG")"
diff --git a/bin/surrogate-research-loop.sh b/bin/surrogate-research-loop.sh
index 86b8c2102b4b6057012ece4b29def30bccc6886b..345387b1393f790f2f5960ec4dfdf3f4ba556867 100755
--- a/bin/surrogate-research-loop.sh
+++ b/bin/surrogate-research-loop.sh
@@ -6,7 +6,7 @@
 set -uo pipefail
 set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
 
-LOG="$HOME/.claude/logs/surrogate-research-loop.log"
+LOG="$HOME/.surrogate/logs/surrogate-research-loop.log"
 RESEARCH_DIR="$HOME/.hermes/workspace/research"
 APPLIED_DIR="$RESEARCH_DIR/applied"
 mkdir -p "$RESEARCH_DIR" "$APPLIED_DIR" "$(dirname "$LOG")"
diff --git a/bin/work-queue-producer.sh b/bin/work-queue-producer.sh
index 635e28d3b03b943a5494280f3a0b0473802276d7..6d6de193bd09ab43807d1fa2d5bc941404504325 100755
--- a/bin/work-queue-producer.sh
+++ b/bin/work-queue-producer.sh
@@ -15,7 +15,7 @@
 # Dedup: hermes:seen:<prio_id> TTL 30 min prevents re-enqueue of in-flight work.
 set -u
 
-LOG="$HOME/.claude/logs/work-queue-producer.log"
+LOG="$HOME/.surrogate/logs/work-queue-producer.log"
 SHARED="$HOME/.hermes/workspace/swarm-shared"
 mkdir -p "$(dirname "$LOG")"
 
diff --git a/start.sh b/start.sh
index 85b8754cb7f88b0174daff0f0b79ac0301ad030e..4e45a96a2d64439e955d4beb714e01355b804fb6 100644
--- a/start.sh
+++ b/start.sh
@@ -3,7 +3,7 @@
 # Boots: persistent /data mount → Redis → Ollama → axentx repos → daemons → status server.
 set -uo pipefail
 
-LOG_DIR="${HOME}/.claude/logs"
+LOG_DIR="${HOME}/.surrogate/logs"
 mkdir -p "$LOG_DIR"
 echo "[$(date +%H:%M:%S)] hermes-hf-space boot start"
 echo "[$(date +%H:%M:%S)] hermes-hf-space boot start" >> "$LOG_DIR/boot.log"
@@ -15,25 +15,44 @@ set -x
 # Echo stdout so HF run-logs see progress (safe steps before .env is loaded)
 exec > >(tee -a "$LOG_DIR/boot.log") 2>&1
 
-# ── 1. Persistent data — symlink state dirs to /data (HF persistent mount) ──
+# ── 1. Persistent data — symlink state subdirs to /data (HF persistent mount) ──
+# bin/ is NOT persisted (baked into image, refreshed on every push).
+# Persisted: state (DBs), logs, memory, skills, sessions, training pairs,
+#            workspace (hermes runtime), projects (axentx clones), ollama (model cache).
 DATA="/data"
 if [[ -d "$DATA" ]] && [[ -w "$DATA" ]]; then
-    mkdir -p "$DATA"/{state,workspace,memory,reflexion,projects,ollama,surrogate,index}
-    # Symlink critical paths so DB/training/ChromaDB persist across rebuilds
-    for src in \
-        "${HOME}/.claude/state:${DATA}/state" \
+    mkdir -p "$DATA"/{state,logs,memory,skills,sessions,workspace,projects,ollama,training,reflexion,index}
+    # Migrate from any older layout (one-time): if /data/surrogate/state exists, move up one level
+    if [[ -d "$DATA/surrogate/state" ]] && [[ ! -L "$DATA/state" ]]; then
+        mv "$DATA/surrogate"/* "$DATA/" 2>/dev/null || true
+        rmdir "$DATA/surrogate" 2>/dev/null || true
+    fi
+
+    for spec in \
+        "${HOME}/.surrogate/state:${DATA}/state" \
+        "${HOME}/.surrogate/logs:${DATA}/logs" \
+        "${HOME}/.surrogate/memory:${DATA}/memory" \
+        "${HOME}/.surrogate/skills:${DATA}/skills" \
+        "${HOME}/.surrogate/sessions:${DATA}/sessions" \
         "${HOME}/.hermes/workspace:${DATA}/workspace" \
-        "${HOME}/.surrogate:${DATA}/surrogate" \
         "${HOME}/.ollama:${DATA}/ollama"; do
-        target="${src%%:*}"
-        link="${src##*:}"
+        target="${spec%%:*}"
+        link="${spec##*:}"
         mkdir -p "$(dirname "$target")"
         if [[ ! -L "$target" ]]; then
             rm -rf "$target" 2>/dev/null
             ln -sfn "$link" "$target"
         fi
     done
-    echo "[$(date +%H:%M:%S)] persistent /data linked" >> "$LOG_DIR/boot.log"
+
+    # training-pairs.jsonl — single file persistence
+    if [[ ! -L "${HOME}/.surrogate/training-pairs.jsonl" ]]; then
+        rm -f "${HOME}/.surrogate/training-pairs.jsonl" 2>/dev/null
+        touch "${DATA}/training-pairs.jsonl"
+        ln -sfn "${DATA}/training-pairs.jsonl" "${HOME}/.surrogate/training-pairs.jsonl"
+    fi
+
+    echo "[$(date +%H:%M:%S)] persistent /data linked (state, logs, memory, skills, sessions, workspace, ollama, training-pairs)" >> "$LOG_DIR/boot.log"
 else
     echo "[$(date +%H:%M:%S)] WARN: /data not writable — running ephemeral!" >> "$LOG_DIR/boot.log"
 fi
@@ -140,7 +159,7 @@ fi
 # Trace stays OFF — never re-enable past secrets section.
 if [[ -n "${DISCORD_BOT_TOKEN:-}" ]]; then
     set -a; source ~/.hermes/.env 2>/dev/null; set +a
-    nohup python ~/.claude/bin/hermes-discord-bot.py >> "$LOG_DIR/discord-bot.log" 2>&1 &
+    nohup python ~/.surrogate/bin/hermes-discord-bot.py >> "$LOG_DIR/discord-bot.log" 2>&1 &
     echo "[$(date +%H:%M:%S)] discord bot started"
 fi
 
@@ -149,11 +168,11 @@ cat > /tmp/scrape-daemon.sh <<'SCRAPESH'
 #!/bin/bash
 # 8 concurrent scrape workers, near-zero idle time.
 set -a; source ~/.hermes/.env 2>/dev/null; set +a
-LOG="${HOME}/.claude/logs/scrape-continuous.log"
+LOG="${HOME}/.surrogate/logs/scrape-continuous.log"
 mkdir -p "$(dirname "$LOG")"
 while true; do
     START=$(date +%s)
-    bash ~/.claude/bin/domain-scrape-loop.sh 1500 8 >> "$LOG" 2>&1
+    bash ~/.surrogate/bin/domain-scrape-loop.sh 1500 8 >> "$LOG" 2>&1
     DUR=$(( $(date +%s) - START ))
     # Tight cool-downs — cloud has unlimited bandwidth, only rate-limit concern
     if [[ $DUR -lt 30 ]]; then sleep 30          # queue likely exhausted, give it time
@@ -167,37 +186,37 @@ nohup /tmp/scrape-daemon.sh > "$LOG_DIR/scrape-daemon.log" 2>&1 &
 echo "[$(date +%H:%M:%S)] continuous scrape daemon (parallel=8) started" >> "$LOG_DIR/boot.log"
 
 # ── 7b. Agentic crawler (URL frontier + visited stamps + link discovery) ────
-nohup bash ~/.claude/bin/agentic-crawler.sh 6 > "$LOG_DIR/agentic-crawler.log" 2>&1 &
+nohup bash ~/.surrogate/bin/agentic-crawler.sh 6 > "$LOG_DIR/agentic-crawler.log" 2>&1 &
 echo "[$(date +%H:%M:%S)] agentic crawler started (parallel=6)" >> "$LOG_DIR/boot.log"
 
 # ── 7c. Skill-synthesis daemon (extract patterns from cloned repos → skills) ─
-nohup bash ~/.claude/bin/skill-synthesis-daemon.sh > "$LOG_DIR/skill-synthesis.log" 2>&1 &
+nohup bash ~/.surrogate/bin/skill-synthesis-daemon.sh > "$LOG_DIR/skill-synthesis.log" 2>&1 &
 echo "[$(date +%H:%M:%S)] skill-synthesis daemon started" >> "$LOG_DIR/boot.log"
 
 # ── 7b. Cron loop — non-scrape daemons (scrape now runs continuously above) ─
 cat > /tmp/hermes-cron.sh <<'CRONSH'
 #!/bin/bash
 set -a; source ~/.hermes/.env 2>/dev/null; set +a
-LOG="${HOME}/.claude/logs/cron.log"
+LOG="${HOME}/.surrogate/logs/cron.log"
 mkdir -p "$(dirname "$LOG")"
 while true; do
     M=$(($(date +%s) / 60))
     # Every 2 min: continuous local dev (qwen3-coder when ready, else gemma)
-    [[ $((M % 2)) -eq 0 ]] && bash ~/.claude/bin/surrogate-dev-loop.sh 1 >> "$LOG" 2>&1 &
+    [[ $((M % 2)) -eq 0 ]] && bash ~/.surrogate/bin/surrogate-dev-loop.sh 1 >> "$LOG" 2>&1 &
     # Every 5 min: producer pushes priorities to Redis
-    [[ $((M % 5)) -eq 0 ]] && bash ~/.claude/bin/work-queue-producer.sh >> "$LOG" 2>&1 &
+    [[ $((M % 5)) -eq 0 ]] && bash ~/.surrogate/bin/work-queue-producer.sh >> "$LOG" 2>&1 &
     # Every 3 min: training-pair push to HF (drains ~/.surrogate/training-pairs.jsonl)
-    [[ $((M % 3)) -eq 0 ]] && bash ~/.claude/bin/push-training-to-hf.sh >> "$LOG" 2>&1 &
+    [[ $((M % 3)) -eq 0 ]] && bash ~/.surrogate/bin/push-training-to-hf.sh >> "$LOG" 2>&1 &
     # Every 20 min: full orchestrate chain (architect → dev → qa → reviewer + git push)
-    [[ $((M % 20)) -eq 0 ]] && bash ~/.claude/bin/auto-orchestrate-loop.sh >> "$LOG" 2>&1 &
+    [[ $((M % 20)) -eq 0 ]] && bash ~/.surrogate/bin/auto-orchestrate-loop.sh >> "$LOG" 2>&1 &
     # Every 30 min: research-apply (pop queue → orchestrate → ship feature)
-    [[ $((M % 30)) -eq 15 ]] && bash ~/.claude/bin/surrogate-research-apply.sh >> "$LOG" 2>&1 &
+    [[ $((M % 30)) -eq 15 ]] && bash ~/.surrogate/bin/surrogate-research-apply.sh >> "$LOG" 2>&1 &
     # Every 60 min: keyword tuner (adapts scrape queue based on yields)
-    [[ $((M % 60)) -eq 0 ]] && bash ~/.claude/bin/scrape-keyword-tuner.sh >> "$LOG" 2>&1 &
+    [[ $((M % 60)) -eq 0 ]] && bash ~/.surrogate/bin/scrape-keyword-tuner.sh >> "$LOG" 2>&1 &
     # Every 6 hours: research-loop (discover new features from competitors/papers)
-    [[ $((M % 360)) -eq 30 ]] && bash ~/.claude/bin/surrogate-research-loop.sh >> "$LOG" 2>&1 &
+    [[ $((M % 360)) -eq 30 ]] && bash ~/.surrogate/bin/surrogate-research-loop.sh >> "$LOG" 2>&1 &
     # Every 12 hours: dataset enrich (pulls fresh public datasets, dedups, uploads to HF)
-    [[ $((M % 720)) -eq 60 ]] && bash ~/.claude/bin/dataset-enrich.sh >> "$LOG" 2>&1 &
+    [[ $((M % 720)) -eq 60 ]] && bash ~/.surrogate/bin/dataset-enrich.sh >> "$LOG" 2>&1 &
     sleep 60
 done
 CRONSH
@@ -216,4 +235,4 @@ python3 -c "import fastapi, uvicorn; print(f'  fastapi {fastapi.__version__} + u
 }
 
 # Run as PID 1 — uvicorn handles signals + auto-restart on crash
-exec python3 ~/.claude/bin/hermes-status-server.py
+exec python3 ~/.surrogate/bin/hermes-status-server.py