diff --git a/Dockerfile b/Dockerfile index 6718442ce13836a0b66cf21651dfe82422a61a2e..11a3f2070e1a15bf3d65d94daee8167cdcb91495 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,5 +1,5 @@ -# Hermes on Hugging Face Spaces (CPU 16 GB) -# Single-container that runs Ollama + Redis + all Hermes daemons. +# Surrogate-1 on Hugging Face Spaces (CPU 16 GB) +# Single-container that runs Ollama + Redis + all Surrogate daemons. FROM python:3.12-slim # ── System deps ────────────────────────────────────────────────────────────── @@ -14,32 +14,41 @@ RUN curl -fsSL https://ollama.com/install.sh | sh # ── App user (HF Spaces requires uid 1000) ────────────────────────────────── RUN useradd -m -u 1000 hermes ENV HOME=/home/hermes \ - PATH=/home/hermes/.local/bin:/usr/local/bin:/usr/bin:/bin \ + PATH=/home/hermes/.surrogate/bin:/home/hermes/.local/bin:/usr/local/bin:/usr/bin:/bin \ + SURROGATE_HOME=/home/hermes/.surrogate \ HERMES_HOME=/home/hermes/.hermes \ PYTHONUNBUFFERED=1 WORKDIR /home/hermes -# ── Python deps for Hermes Discord bot + scrape + RAG ─────────────────────── +# ── Python deps for Discord bot + scrape + RAG ────────────────────────────── COPY --chown=hermes:hermes requirements.txt /tmp/requirements.txt RUN pip install --no-cache-dir -r /tmp/requirements.txt -# ── Copy Hermes scripts + config skeleton ─────────────────────────────────── -COPY --chown=hermes:hermes bin/ /home/hermes/.claude/bin/ +# ── Copy Surrogate scripts + config skeleton ──────────────────────────────── +# Surrogate's home: ~/.surrogate/bin/ (separate from Claude Code's ~/.claude/) +COPY --chown=hermes:hermes bin/ /home/hermes/.surrogate/bin/ COPY --chown=hermes:hermes config/ /home/hermes/.hermes/config/ COPY --chown=hermes:hermes start.sh /home/hermes/start.sh -# start.sh orchestrates everything (Redis + Ollama + daemons + status server) — no supervisord needed -RUN chmod +x /home/hermes/.claude/bin/*.sh /home/hermes/start.sh +RUN chmod +x /home/hermes/.surrogate/bin/*.sh /home/hermes/start.sh USER hermes -# ── Persistent dirs (HF mounts /data) ──────────────────────────────────────── -RUN mkdir -p /home/hermes/.claude/state /home/hermes/.claude/logs \ - /home/hermes/.surrogate /home/hermes/.hermes/workspace \ - /home/hermes/.ollama +# ── Persistent dirs (HF mounts /data into ~/.surrogate symlink) ───────────── +RUN mkdir -p /home/hermes/.surrogate/state /home/hermes/.surrogate/logs \ + /home/hermes/.surrogate/workspace /home/hermes/.surrogate/memory \ + /home/hermes/.surrogate/skills /home/hermes/.surrogate/sessions \ + /home/hermes/.hermes/workspace /home/hermes/.ollama + +# ── Backward-compat: legacy refs to ~/.claude/bin/ + ~/.claude/logs/ ──────── +# Some scripts still reference old paths; symlink prevents breakage during +# progressive migration. Eventually all callers should use ~/.surrogate/. +RUN mkdir -p /home/hermes/.claude && \ + ln -sfn /home/hermes/.surrogate/bin /home/hermes/.claude/bin && \ + ln -sfn /home/hermes/.surrogate/logs /home/hermes/.claude/logs && \ + ln -sfn /home/hermes/.surrogate/state /home/hermes/.claude/state # ── Expose port 7860 (HF default) ──────────────────────────────────────────── EXPOSE 7860 -# Run supervisord — manages ollama + redis + all hermes daemons CMD ["/home/hermes/start.sh"] diff --git a/bin/agentic-crawler.sh b/bin/agentic-crawler.sh index 2b0e3aaa5e1393827e3b74a59bb539471cb61add..5a5bd91e2f07eac7d57e68bc4512877fed235957 100755 --- a/bin/agentic-crawler.sh +++ b/bin/agentic-crawler.sh @@ -9,8 +9,8 @@ set -uo pipefail set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a -DB="$HOME/.claude/state/agentic-frontier.db" -LOG="$HOME/.claude/logs/agentic-crawler.log" +DB="$HOME/.surrogate/state/agentic-frontier.db" +LOG="$HOME/.surrogate/logs/agentic-crawler.log" PAIRS="$HOME/.surrogate/training-pairs.jsonl" mkdir -p "$(dirname "$DB")" "$(dirname "$LOG")" "$(dirname "$PAIRS")" diff --git a/bin/ai-fallback.sh b/bin/ai-fallback.sh new file mode 100755 index 0000000000000000000000000000000000000000..8e2b4ee91ce0057d68d36633539f3a69d09f2dc8 --- /dev/null +++ b/bin/ai-fallback.sh @@ -0,0 +1,422 @@ +#!/usr/bin/env bash +# AI Fallback Chain (cost-optimized, cloud-only, no local LLM) +# +# Priority chain: +# 1. Claude Opus 4.7 via Max subscription (primary, flat $100/mo) +# 2. Claude Sonnet 4.6 via Max subscription (separate quota pool!) +# 3. OpenRouter pay-per-use (cheap+capable non-Sonnet picks) +# 4. Gemini 2.5 FL FREE 1000/day +# 5. Groq Llama-3.3 FREE 1000/day +# +# Usage: +# ai-fallback.sh "your question" +# ai-fallback.sh --force gpt5 "your question" +# ai-fallback.sh --tier cheap "your question" # OpenRouter uses DeepSeek +# ai-fallback.sh --skip claude-opus "your question" +set -e + +# Source API keys FIRST — load BOTH env files (hermes + claude). +# Order matters: claude.env first, hermes.env wins on conflict +# (hermes has newer keys like GITHUB_MODELS_TOKEN, SAMBANOVA_API_KEY, CLOUDFLARE_*) +# shellcheck disable=SC1090 +set -a +[ -f "$HOME/.surrogate/.env" ] && . "$HOME/.surrogate/.env" +[ -f "$HOME/.hermes/.env" ] && . "$HOME/.hermes/.env" +set +a + +QUERY="" +FORCE="" +SKIP="" +VERBOSE=0 +TASK="" +export OR_TIER="" + +while [ $# -gt 0 ]; do + case "$1" in + --force) FORCE="$2"; shift 2 ;; + --skip) SKIP="$2"; shift 2 ;; + --tier) export OR_TIER="$2"; shift 2 ;; + --task) TASK="$2"; shift 2 ;; + --cheap) export OR_TIER="cheap"; shift ;; + --fast) export OR_TIER="fast"; shift ;; + --balanced) export OR_TIER="balanced"; shift ;; + --premium) export OR_TIER="premium"; shift ;; + -v|--verbose) VERBOSE=1; shift ;; + *) QUERY="$QUERY $1"; shift ;; + esac +done +QUERY=$(echo "$QUERY" | /usr/bin/sed 's/^ *//') +[ -z "$QUERY" ] && { /usr/bin/head -15 "$0"; exit 1; } + +# --task — pick the strongest free model per provider for the task. +# Sets per-provider env vars that try_* functions read (bridge --model alias). +# Auto-detect if not provided: code keywords → coding, reasoning keywords → reasoning. +if [ -z "$TASK" ]; then + q_lower=$(echo "$QUERY" | /usr/bin/tr '[:upper:]' '[:lower:]') + if echo "$q_lower" | /usr/bin/grep -qE "code|function|implement|refactor|bug|class|method|api|sql|terraform|cloudformation|dockerfile|kubernetes|yaml|typescript|javascript|python|rust|golang"; then + TASK="coding" + elif echo "$q_lower" | /usr/bin/grep -qE "analyze|reason|explain why|compare|evaluate|architect|design|trade-?off|deep|think step|proof|calculate|complex"; then + TASK="reasoning" + fi +fi + +case "$TASK" in + coding) + # Code = Codestral (GitHub, Mistral) / DeepSeek-V3.1 (SambaNova) / Qwen Coder (local) + export GITHUB_MODEL="codestral" ; export SAMBANOVA_MODEL="deepseek" + export CLOUDFLARE_MODEL="deepseek" ; export GROQ_MODEL="qwen" + export LOCAL_MODEL="qwen-coder" + ;; + reasoning) + # Reasoning = DeepSeek R1 (GitHub, CoT) / Grok 3 / DeepSeek R1 distill (CF) + export GITHUB_MODEL="reasoning" ; export SAMBANOVA_MODEL="deepseek-latest" + export CLOUDFLARE_MODEL="reasoning" ; export GROQ_MODEL="qwen" + export LOCAL_MODEL="granite" + ;; + fast) + # Fast = smallest/quickest tier per provider + export GITHUB_MODEL="mini" ; export SAMBANOVA_MODEL="fast" + export CLOUDFLARE_MODEL="fast" ; export GROQ_MODEL="fast" + export LOCAL_MODEL="tiny" + ;; + long-context|long|kimi) + # 200k+ context — Kimi on CF, gpt-oss-120b elsewhere + export GITHUB_MODEL="llama405" ; export SAMBANOVA_MODEL="gpt-oss" + export CLOUDFLARE_MODEL="kimi" ; export GROQ_MODEL="gpt-oss" + export LOCAL_MODEL="granite" + ;; + creative|chat|*) + # Default — smartest general-purpose free model per provider + export GITHUB_MODEL="gpt-4o" ; export SAMBANOVA_MODEL="llama70" + export CLOUDFLARE_MODEL="gpt-oss" ; export GROQ_MODEL="llama70" + export LOCAL_MODEL="granite" + ;; +esac + +# --- Semantic RAG context injection (embedding-powered) --- +# For coding/reasoning/creative tasks, fetch top-3 semantically similar docs +# from embeddings.db and prepend to QUERY. ~50ms overhead, improves grounding. +if [[ "$TASK" == "coding" || "$TASK" == "reasoning" || "$TASK" == "creative" ]]; then + if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then + EMB_COUNT=$(/usr/bin/sqlite3 "$HOME/.surrogate/embeddings.db" 'SELECT COUNT(*) FROM embeddings' 2>/dev/null || echo 0) + if [[ "$EMB_COUNT" -ge 100 ]]; then + SEM_CONTEXT=$(/usr/bin/python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$QUERY" 2>/dev/null | /usr/bin/head -15) + if [[ -n "$SEM_CONTEXT" ]]; then + QUERY="=== RAG CONTEXT (top-5 semantic matches from knowledge base) === +$SEM_CONTEXT + +=== TASK === +$QUERY" + fi + fi + fi +fi + +log() { [ $VERBOSE -eq 1 ] && echo "[$(date +%H:%M:%S)] $*" >&2; } + +# Capture successful response → log to knowledge base (non-blocking) +save_response() { + local provider="$1" model="$2" response="$3" + [ -z "$response" ] && return + ( "$HOME/.surrogate/bin/log-interaction.sh" "$QUERY" "$response" "$provider" "$model" > /dev/null 2>&1 & ) || true +} + +# --- System prompt from knowledge base + auto code-search if code query --- +build_system_prompt() { + local kb="" profile="" code_ctx="" q_lower + [ -f "$HOME/.surrogate/memory/knowledge_index.md" ] && kb="$(/usr/bin/head -50 $HOME/.surrogate/memory/knowledge_index.md)" + [ -f "$HOME/.surrogate/memory/user_profile.md" ] && profile="$(cat $HOME/.surrogate/memory/user_profile.md)" + + q_lower=$(echo "$QUERY" | /usr/bin/tr '[:upper:]' '[:lower:]') + local is_generate=0 is_code=0 + echo "$q_lower" | /usr/bin/grep -qE "code|function|implement|refactor|bug|error|class|method|api|endpoint|schema|model|service|controller|middleware|auth|database|query|sql|deploy|pipeline|terraform|cloudformation|dockerfile|kubernetes|helm|yaml" && is_code=1 + echo "$q_lower" | /usr/bin/grep -qE "create|generate|write|build|new|template|scaffold|design" && is_generate=1 + + if [ "$is_code" = "1" ] && [ -d "$HOME/.surrogate/code-vector-db" ]; then + if [ "$is_generate" = "1" ] && [ -x "$HOME/.surrogate/bin/find-gold-examples.sh" ]; then + # Generation task → inject FULL reference files (better style match) + code_ctx=$("$HOME/.surrogate/bin/find-gold-examples.sh" --top 2 --max-bytes 5000 "$QUERY" 2>/dev/null) + elif [ -x "$HOME/.surrogate/bin/code-search.sh" ]; then + # Query task → snippets only (faster) + code_ctx=$("$HOME/.surrogate/bin/code-search.sh" --top 3 "$QUERY" 2>/dev/null | /usr/bin/head -60) + fi + fi + + local prompt="You are Ashira's AI assistant. Context: $profile + +Pattern index: $kb" + if [ -n "$code_ctx" ]; then + prompt="$prompt + +=== ASHIRA'S EXISTING CODE (match this style EXACTLY) === +$code_ctx +=== END EXAMPLES === + +Style rules enforced: +- Follow naming/indent/comment style from examples above +- Use exact same Parameter/Resource names when applicable +- Preserve existing conventions (tags, naming, Description format)" + fi + prompt="$prompt + +Be concise. Cite file paths when referencing existing code." + echo "$prompt" +} +SYSTEM=$(build_system_prompt) + +# --- Anthropic via Max plan (routes through claude-bridge.sh CLI) --- +# Direct HTTPS to api.anthropic.com with OAuth token returns 401 — OAuth flow +# is managed by `claude` CLI (keychain/config). Use the bridge instead. +try_anthropic() { + local model="$1" extra="$2" + log "→ Claude Max: $model" + local out + out=$(echo "$QUERY" | "$HOME/.surrogate/bin/claude-bridge.sh" --model "$model" $extra 2>>/tmp/ai-fallback.err) || return 1 + [ -z "$out" ] && return 1 + echo "$out" + save_response "anthropic" "$model" "$out" + return 0 +} + +# Opus needs --force outside 01:00-06:00 window; sonnet is always available +try_claude_opus() { try_anthropic "opus" "--force"; } +try_claude_sonnet() { try_anthropic "sonnet" ""; } + +# OpenRouter FREE — tries multiple free models (each has strict rate limit) +# Order: coder-first → general-powerhouse → smaller fallbacks +try_openrouter_free() { + [ -z "${OPENROUTER_API_KEY:-}" ] && return 2 + local free_models=( + "qwen/qwen3-coder:free" + "qwen/qwen3-next-80b-a3b-instruct:free" + "openai/gpt-oss-120b:free" + "nvidia/nemotron-3-super-120b-a12b:free" + "meta-llama/llama-3.3-70b-instruct:free" + "z-ai/glm-4.5-air:free" + "google/gemma-4-31b-it:free" + "openai/gpt-oss-20b:free" + ) + for m in "${free_models[@]}"; do + OPENROUTER_MODEL="$m" try_openrouter && return 0 + log " ↳ free '$m' unavailable, trying next free..." + done + return 1 +} + +# --- OpenRouter (cheap+capable non-Sonnet picks) --- +try_openrouter() { + [ -z "${OPENROUTER_API_KEY:-}" ] && return 2 + # Default: GPT-5.4 (beats Claude Opus 4.6 per benchmarks, -50% cost vs Opus 4.7) + local model="${OPENROUTER_MODEL:-openai/gpt-5.4}" + case "${OR_TIER:-}" in + # PAID tiers + cheap) model="deepseek/deepseek-v3.2" ;; # $0.26/$0.42 — cheapest capable + fast) model="x-ai/grok-4.1-fast" ;; # $0.20/$0.50 — ultra cheap, 2M ctx + balanced) model="openai/gpt-5.4" ;; # $2.50/$15 — DEFAULT, beats Opus 4.6 + premium) model="anthropic/claude-opus-4.7" ;; # $5/$25 — if really need Opus + grok) model="x-ai/grok-4.20" ;; # $2/$6 — 2M ctx, cool + gemini) model="google/gemini-3.1-pro-preview" ;;# $2/$12 + # FREE tiers (29 models available) + free|free-coder) model="qwen/qwen3-coder:free" ;; # coding, 262k ctx + free-large) model="qwen/qwen3-next-80b-a3b-instruct:free" ;; # 80B MoE + free-nvidia) model="nvidia/nemotron-3-super-120b-a12b:free" ;; # 120B + free-gptoss) model="openai/gpt-oss-120b:free" ;; # OpenAI open-sourced + free-llama) model="meta-llama/llama-3.3-70b-instruct:free" ;; + free-kimi) model="moonshotai/kimi-k2.5" ;; # Kimi 256k ctx + free-glm) model="z-ai/glm-4.5-air:free" ;; + free-gemma) model="google/gemma-4-31b-it:free" ;; # Google Gemma 4 + esac + log "→ OpenRouter: $model" + local body + # Use env vars — avoids quote-escape hell with multiline system prompt. + # max_tokens=4000 (GPT-5.4 requires >= 16; stay well above) + body=$(ORM="$model" SYS="$SYSTEM" Q="$QUERY" "$HOME/.surrogate/venv/bin/python" -c " +import json, os +m = {'model':os.environ['ORM'],'max_tokens':4000, + 'messages':[{'role':'system','content':os.environ['SYS']}, + {'role':'user','content':os.environ['Q']}]} +print(json.dumps(m)) +" 2>&1) || { log " body-build failed: $body"; return 1; } + local resp code body_resp + resp=$(/usr/bin/curl -sS -w "\n%{http_code}" \ + --max-time 90 \ + -X POST "https://openrouter.ai/api/v1/chat/completions" \ + -H "Authorization: Bearer $OPENROUTER_API_KEY" \ + -H "HTTP-Referer: https://ashira.local" \ + -H "X-Title: ai-fallback" \ + -H "content-type: application/json" \ + -d "$body" 2>&1) + code=$(echo "$resp" | /usr/bin/tail -1) + body_resp=$(echo "$resp" | /usr/bin/sed '$d') + if [ "$code" != "200" ]; then + # Log real error reason for debug + local errmsg + errmsg=$(echo "$body_resp" | "$HOME/.surrogate/venv/bin/python" -c " +import sys, json +try: d=json.load(sys.stdin); print(d.get('error',{}).get('message','unknown')[:120]) +except: print('parse-fail') +" 2>/dev/null || echo "unknown") + log " [$code] $errmsg — falling through" + return 1 + fi + local out + out=$(echo "$body_resp" | "$HOME/.surrogate/venv/bin/python" -c " +import sys, json +d = json.load(sys.stdin) +print(d['choices'][0]['message']['content']) +") || return 1 + echo "$out" + save_response "openrouter" "$model" "$out" + return 0 +} + +# --- Gemini (free) --- +try_gemini() { + [ -z "${GEMINI_API_KEY:-}" ] && return 2 + local model="${GEMINI_MODEL:-gemini-2.5-flash}" + log "→ Gemini: $model (free)" + local body + body=$("$HOME/.surrogate/venv/bin/python" -c " +import json +m = {'systemInstruction':{'parts':[{'text':'''$SYSTEM'''}]}, + 'contents':[{'role':'user','parts':[{'text':'''$QUERY'''}]}], + 'generationConfig':{'maxOutputTokens':4000}} +print(json.dumps(m)) +" 2>/dev/null) + local resp code body_resp + resp=$(/usr/bin/curl -sS -w "\n%{http_code}" \ + -X POST "https://generativelanguage.googleapis.com/v1beta/models/$model:generateContent?key=$GEMINI_API_KEY" \ + -H "content-type: application/json" -d "$body" 2>&1) + code=$(echo "$resp" | /usr/bin/tail -1) + body_resp=$(echo "$resp" | /usr/bin/sed '$d') + [ "$code" != "200" ] && { log " [$code] falling through"; return 1; } + local out + out=$(echo "$body_resp" | "$HOME/.surrogate/venv/bin/python" -c " +import sys, json +d = json.load(sys.stdin) +print(d['candidates'][0]['content']['parts'][0]['text']) +") || return 1 + echo "$out" + save_response "gemini" "$model" "$out" + return 0 +} + +# --- Groq (free, ultra-fast) --- +try_groq() { + [ -z "${GROQ_API_KEY:-}" ] && return 2 + local model="${GROQ_MODEL:-llama70}" + log "→ Groq: $model (free)" + # Route through groq-bridge for consistent alias handling (llama70, fast, qwen, gpt-oss...) + local out + out=$(echo "$QUERY" | "$HOME/.surrogate/bin/groq-bridge.sh" --model "$model" 2>>/tmp/ai-fallback.err) || return 1 + [ -z "$out" ] && return 1 + echo "$out" + save_response "groq" "$model" "$out" + return 0 +} + +# --- GitHub Models (free via PAT, OpenAI-compat, GPT-4o-mini/Llama 3.3/Mistral/DeepSeek) --- +try_github() { + [ -z "${GITHUB_MODELS_TOKEN:-}${GITHUB_TOKEN:-}" ] && return 2 + local model="${GITHUB_MODEL:-gpt-4o}" + log "→ GitHub Models: $model (free)" + local out + out=$(echo "$QUERY" | "$HOME/.surrogate/bin/github-bridge.sh" --model "$model" 2>>/tmp/ai-fallback.err) || return 1 + [ -z "$out" ] && return 1 + echo "$out" + save_response "github" "$model" "$out" + return 0 +} + +# --- SambaNova Cloud (free, ~500 tok/s Llama 3.3 70B / DeepSeek V3.2 / Llama 4) --- +try_sambanova() { + [ -z "${SAMBANOVA_API_KEY:-}" ] && return 2 + local model="${SAMBANOVA_MODEL:-llama70}" + log "→ SambaNova: $model (free)" + local out + out=$(echo "$QUERY" | "$HOME/.surrogate/bin/sambanova-bridge.sh" --model "$model" 2>>/tmp/ai-fallback.err) || return 1 + [ -z "$out" ] && return 1 + echo "$out" + save_response "sambanova" "$model" "$out" + return 0 +} + +# --- Cloudflare Workers AI (free 10k neurons/day, Llama 3.3 / Gemma-3 / Qwen Coder) --- +try_cloudflare() { + [ -z "${CLOUDFLARE_API_TOKEN:-}${CF_API_TOKEN:-}" ] && return 2 + [ -z "${CLOUDFLARE_ACCOUNT_ID:-}${CF_ACCOUNT_ID:-}" ] && return 2 + local model="${CLOUDFLARE_MODEL:-gpt-oss}" + log "→ Cloudflare WAI: $model (free)" + local out + out=$(echo "$QUERY" | "$HOME/.surrogate/bin/cloudflare-bridge.sh" --model "$model" 2>>/tmp/ai-fallback.err) || return 1 + [ -z "$out" ] && return 1 + echo "$out" + save_response "cloudflare" "$model" "$out" + return 0 +} + +# --- Local Ollama — always-on, always-free ultimate fallback --- +# Bench (M3 24GB): granite4:7b-a1b-h (4.2GB, ~7s/fib+memo — fast & correct). +# Task-aware: code → qwen-coder:7b, chat → granite, tiny → qwen:3b. +# gemma4:26b BLOCKED — user directive (too slow for this hw). +try_granite() { + # Check ollama running + /usr/bin/curl -sS --max-time 3 http://localhost:11434/api/tags > /dev/null 2>&1 || return 2 + local alias="${LOCAL_MODEL:-granite}" + log "→ Local Ollama: $alias (free, always-on)" + local out + out=$(echo "$QUERY" | "$HOME/.surrogate/bin/granite-bridge.sh" --model "$alias" 2>>/tmp/ai-fallback.err) || return 1 + [ -z "$out" ] && return 1 + echo "$out" + save_response "ollama-local" "$alias" "$out" + return 0 +} + +# --- Execute chain (FREE-FIRST for routine/bulk tasks) --- +# Order: free APIs → claude-sonnet (Max plan safety net) → local Ollama (ultimate backstop) +# IMPORTANT-tasks (retro/sprint/skill-sanitize/agent-critic/security-audit/mythos-audit) +# → call claude-bridge.sh --model opus --force DIRECTLY, bypass this chain +# REVIEWER/hallucination-check → call claude-bridge.sh --model sonnet DIRECTLY +# Paid OpenRouter removed per user direction (use Max plan instead of pay-per-use) +PROVIDERS="github sambanova cloudflare groq openrouter-free gemini claude-sonnet granite" + +# Explicit --force +if [ -n "$FORCE" ]; then + case "$FORCE" in + claude-opus|opus) try_claude_opus && exit 0 ;; + claude-sonnet|sonnet) try_claude_sonnet && exit 0 ;; + openrouter|or) try_openrouter && exit 0 ;; + openrouter-free|free) try_openrouter_free && exit 0 ;; + gpt5|gpt) OPENROUTER_MODEL="openai/gpt-5.4" try_openrouter && exit 0 ;; + grok) OPENROUTER_MODEL="x-ai/grok-4.20" try_openrouter && exit 0 ;; + deepseek) OPENROUTER_MODEL="deepseek/deepseek-v3.2" try_openrouter && exit 0 ;; + gemini) try_gemini && exit 0 ;; + groq) try_groq && exit 0 ;; + github|gh) try_github && exit 0 ;; + sambanova|samba) try_sambanova && exit 0 ;; + cloudflare|cf) try_cloudflare && exit 0 ;; + granite|local|ollama) try_granite && exit 0 ;; + *) echo "[error] unknown --force '$FORCE'" >&2; exit 1 ;; + esac + echo "[error] forced provider failed" >&2; exit 1 +fi + +# Auto chain with skip support +for p in $PROVIDERS; do + if [ -n "$SKIP" ] && [ "$p" = "$SKIP" ]; then continue; fi + case "$p" in + github) try_github && exit 0 ;; + sambanova) try_sambanova && exit 0 ;; + cloudflare) try_cloudflare && exit 0 ;; + claude-opus) try_claude_opus && exit 0 ;; + claude-sonnet) try_claude_sonnet && exit 0 ;; + openrouter) try_openrouter && exit 0 ;; + openrouter-free) try_openrouter_free && exit 0 ;; + gemini) try_gemini && exit 0 ;; + groq) try_groq && exit 0 ;; + granite) try_granite && exit 0 ;; + esac +done + +echo "[error] all providers exhausted" >&2 +exit 1 diff --git a/bin/ask-sqlite.py b/bin/ask-sqlite.py new file mode 100755 index 0000000000000000000000000000000000000000..89022db767fa61306543c72b8bf321dbc9b1aa24 --- /dev/null +++ b/bin/ask-sqlite.py @@ -0,0 +1,175 @@ +#!/usr/bin/env python3 +""" +Local RAG assistant — SQLite FTS5 (replaces Chroma) + local LLM. +Stable, no Rust crashes, fast. + +Usage: + ask-sqlite.py "คำถาม" # single shot + ask-sqlite.py -i # interactive + ask-sqlite.py --source code "คำถาม" # filter by source + ask-sqlite.py --project Vanguard "คำถาม" +""" +import sys, json, sqlite3, argparse, subprocess, urllib.request, re +from pathlib import Path + +DB = str(Path.home() / ".surrogate/index.db") +OLLAMA = "http://localhost:11434/api/chat" +DEFAULT_MODEL = "granite4:7b-a1b-h" + +AXENTX = Path("/Users/Ashira/axentx") +PROJECTS = ["Costinel", "Vanguard", "arkship", "surrogate", "workio"] + + +def fts_escape(query: str) -> str: + """Turn a natural query into FTS5 MATCH syntax — use each non-trivial word.""" + words = re.findall(r"\w{3,}", query) # keep alnum words ≥3 chars + if not words: return '"placeholder"' + # OR query for flexibility + return " OR ".join(f'"{w}"' for w in words[:10]) + + +def search(query: str, n: int = 10, source: str = None, project: str = None): + conn = sqlite3.connect(DB) + conn.row_factory = sqlite3.Row + fts_q = fts_escape(query) + sql = """ + SELECT d.source, d.project, d.path, d.topic, d.instruction, d.response, + rank + FROM docs_fts f JOIN docs d ON f.rowid = d.id + WHERE docs_fts MATCH ? + """ + params = [fts_q] + if source: + sql += " AND d.source LIKE ?" + params.append(f"%{source}%") + if project: + sql += " AND d.project LIKE ?" + params.append(f"%{project}%") + sql += " ORDER BY rank LIMIT ?" + params.append(n) + + try: + rows = conn.execute(sql, params).fetchall() + except sqlite3.OperationalError as e: + # FTS syntax error — fallback to LIKE + conn = sqlite3.connect(DB) + conn.row_factory = sqlite3.Row + rows = conn.execute( + "SELECT source, project, path, topic, instruction, response FROM docs " + "WHERE instruction LIKE ? OR response LIKE ? LIMIT ?", + (f"%{query[:80]}%", f"%{query[:80]}%", n) + ).fetchall() + return rows + + +def agents_md() -> str: + parts = [] + for proj in PROJECTS: + md = AXENTX / proj / "AGENTS.md" + if md.exists(): + parts.append(f"=== {proj}/AGENTS.md ===\n" + "\n".join(md.read_text().split("\n")[:15])) + return "\n\n".join(parts) + + +def git_recent() -> str: + out = [] + for proj in PROJECTS: + p = AXENTX / proj + if not (p / ".git").exists(): continue + try: + r = subprocess.run(["git","-C",str(p),"log","--oneline","-5"], + capture_output=True, text=True, timeout=3) + if r.stdout.strip(): + out.append(f"=== {proj} ===\n{r.stdout.strip()}") + except: pass + return "\n".join(out) + + +def build_context(question, source=None, project=None): + parts = ["## AGENTS.md\n" + agents_md()] + g = git_recent() + if g: parts.append("## Recent commits\n" + g) + + rows = search(question, n=8, source=source, project=project) + if rows: + hits = [] + for r in rows: + tag = r["source"] or "?" + path = r["path"] or "" + proj = r["project"] or "" + content = r["response"] or r["instruction"] or "" + hits.append(f"[{tag}:{proj}/{path[-60:]}]\n{content[:500]}") + parts.append(f"## Relevant docs (SQLite FTS, {len(rows)} matches)\n" + "\n\n".join(hits)) + return "\n\n".join(parts)[:12000] + + +SYSTEM_PROMPT = ( + "คุณคือ local assistant ตอบจาก Context เท่านั้น. ไม่รู้ก็บอก. " + "ภาษาไทย กระชับ. อ้าง path/source ที่เกี่ยวข้อง." +) + + +def ask_ollama(messages, model): + payload = {"model": model, "messages": messages, "stream": False} + req = urllib.request.Request(OLLAMA, data=json.dumps(payload).encode(), + headers={"Content-Type": "application/json"}) + with urllib.request.urlopen(req, timeout=180) as r: + return json.loads(r.read()).get("message", {}).get("content", "(no response)") + + +def single(question, model, source, project): + print(f"🔍 SQLite FTS search...", file=sys.stderr) + ctx = build_context(question, source, project) + print(f" context: {len(ctx)} chars", file=sys.stderr) + print(f"🤖 {model}\n", file=sys.stderr) + msgs = [ + {"role": "system", "content": SYSTEM_PROMPT}, + {"role": "user", "content": f"### Context\n{ctx}\n\n### คำถาม\n{question}"}, + ] + print(ask_ollama(msgs, model)) + + +def interactive(model, source, project): + print(f"🤖 Interactive — {model}, source={source}, project={project}", file=sys.stderr) + print(f" type 'exit' to quit, ':s ' to set source filter", file=sys.stderr) + history = [{"role": "system", "content": SYSTEM_PROMPT}] + base_ctx = None + while True: + try: q = input("❯ ").strip() + except (EOFError, KeyboardInterrupt): break + if not q or q in ("exit","quit"): break + if q.startswith(":s "): + source = q[3:].strip() or None + print(f" source filter: {source}") + continue + + ctx = build_context(q, source, project) + msgs = history + [{"role": "user", "content": f"### Context\n{ctx}\n\n### คำถาม\n{q}"}] + ans = ask_ollama(msgs, model) + history.append({"role": "user", "content": q}) + history.append({"role": "assistant", "content": ans}) + print(f"\n{ans}\n") + if len(history) > 11: + history = [history[0]] + history[-10:] + + +def main(): + ap = argparse.ArgumentParser() + ap.add_argument("-i", "--interactive", action="store_true") + ap.add_argument("-m", "--model", default=DEFAULT_MODEL) + ap.add_argument("--source", help="filter by source (code, github-public, claude-conversation, ...)") + ap.add_argument("--project", help="filter by project") + ap.add_argument("question", nargs="*") + args = ap.parse_args() + + if args.interactive: + interactive(args.model, args.source, args.project) + else: + if not args.question: + print("usage: ask 'คำถาม' OR ask -i OR ask --source code 'คำถาม'", file=sys.stderr) + sys.exit(1) + single(" ".join(args.question), args.model, args.source, args.project) + + +if __name__ == "__main__": + main() diff --git a/bin/auto-orchestrate-loop.sh b/bin/auto-orchestrate-loop.sh index 45c76e269b82e0bfd91bb6fcfd629b1b8a045d37..9cefe9ce1ada6045b9173a88594b82871ece8bde 100755 --- a/bin/auto-orchestrate-loop.sh +++ b/bin/auto-orchestrate-loop.sh @@ -9,7 +9,7 @@ set -uo pipefail set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a -LOG="$HOME/.claude/logs/auto-orchestrate-loop.log" +LOG="$HOME/.surrogate/logs/auto-orchestrate-loop.log" mkdir -p "$(dirname "$LOG")" # Resource guard: 20% headroom @@ -107,14 +107,14 @@ TASK_DESC="Resolve this TODO/FIXME in $PROJ_NAME at $FILE:$LINE: \"$CONTENT\". I cd "$PROJECT" || { echo "[$(date +%H:%M:%S)] cd failed" >> "$LOG"; exit 1; } # Run the orchestrate pipeline (auto-commits on APPROVE) -bash "$HOME/.claude/bin/surrogate-orchestrate.sh" "$TASK_DESC" >> "$LOG" 2>&1 +bash "$HOME/.surrogate/bin/surrogate-orchestrate.sh" "$TASK_DESC" >> "$LOG" 2>&1 RC=$? DUR=$(( $(date +%s) - START )) echo "[$(date +%H:%M:%S)] orchestrate done in ${DUR}s rc=$RC" >> "$LOG" # Discord notification -NOTIFY="$HOME/.claude/bin/notify-discord.sh" +NOTIFY="$HOME/.surrogate/bin/notify-discord.sh" if [[ -x "$NOTIFY" ]]; then if [[ $RC -eq 0 ]]; then "$NOTIFY" task "Auto-orchestrate: $PROJ_NAME" "$FILE:$LINE — \`$(echo "$CONTENT" | head -c 80)\` · ${DUR}s" 2>/dev/null & diff --git a/bin/cerebras-bridge.sh b/bin/cerebras-bridge.sh new file mode 100755 index 0000000000000000000000000000000000000000..17f7caf4846bc822b7b97368c4e960984b7ddb8b --- /dev/null +++ b/bin/cerebras-bridge.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# Cerebras bridge — fastest inference (wafer-scale), llama/qwen/gpt-oss available +set -u +MODEL="llama3.1-8b" +MAX_TOKENS=2000 +TEMP=0.3 +PROMPT="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --model) + case "$2" in + fast|small) MODEL="llama3.1-8b" ;; + big) MODEL="qwen-3-235b-a22b-instruct-2507" ;; + gpt-oss) MODEL="gpt-oss-120b" ;; + glm) MODEL="zai-glm-4.7" ;; + *) MODEL="$2" ;; + esac; shift 2 ;; + --max-tokens) MAX_TOKENS="$2"; shift 2 ;; + *) PROMPT="$*"; break ;; + esac +done +[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat) +[[ -z "$PROMPT" ]] && { echo "cerebras-bridge: no prompt" >&2; exit 2; } + +LOG="$HOME/.surrogate/logs/cerebras-bridge.log" +mkdir -p "$(dirname "$LOG")" +set -a; source "$HOME/.hermes/.env"; set +a +echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG" + +RESPONSE=$(python3 -c " +import os +exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read()) +import json, sys, os, urllib.request, urllib.error +body = { + 'model': '$MODEL', + 'messages': [{'role':'user','content': sys.stdin.read()}], + 'max_tokens': $MAX_TOKENS, 'temperature': $TEMP, +} +req = urllib.request.Request( + 'https://api.cerebras.ai/v1/chat/completions', + data=json.dumps(body).encode(), + headers={'Content-Type':'application/json', 'User-Agent':'hermes-agent/1.0', 'Authorization':'Bearer '+os.environ.get('CEREBRAS_API_KEY','')} +) +try: + with urllib.request.urlopen(req, timeout=120) as r: + d = json.load(r) + print(d.get('choices',[{}])[0].get('message',{}).get('content','')) +except urllib.error.HTTPError as e: + print(f'cerebras-bridge HTTP {e.code}: {e.read()[:200]}', file=sys.stderr) + sys.exit(e.code // 100) +except Exception as e: + print(f'cerebras-bridge error: {e}', file=sys.stderr); sys.exit(1) +" <<< "$PROMPT") +RC=$? +echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG" +[[ $RC -ne 0 ]] && exit $RC +echo "$RESPONSE" diff --git a/bin/chutes-bridge.sh b/bin/chutes-bridge.sh new file mode 100755 index 0000000000000000000000000000000000000000..474dd51fb0db52baf6ca9f1c21c752ffbc62503a --- /dev/null +++ b/bin/chutes-bridge.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Chutes.ai bridge — OpenAI-compat; free-tier, multi-model aggregator. +# Endpoint: https://llm.chutes.ai/v1/chat/completions +# Free tier: ~500 req/day, no CC, solid for Qwen/DeepSeek/Llama models. +set -u +MODEL="deepseek-ai/DeepSeek-V3.1" +MAX_TOKENS=2000 +TEMP=0.3 +PROMPT="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --model) + case "$2" in + deepseek|v3) MODEL="deepseek-ai/DeepSeek-V3.1" ;; + qwen|coder) MODEL="Qwen/Qwen3-Coder-480B-A35B-Instruct" ;; + llama|l70) MODEL="meta-llama/Llama-3.3-70B-Instruct" ;; + r1) MODEL="deepseek-ai/DeepSeek-R1" ;; + glm) MODEL="zai-org/GLM-4.6" ;; + *) MODEL="$2" ;; + esac; shift 2 ;; + --max-tokens) MAX_TOKENS="$2"; shift 2 ;; + *) PROMPT="$*"; break ;; + esac +done +[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat) +[[ -z "$PROMPT" ]] && { echo "chutes-bridge: no prompt" >&2; exit 2; } + +LOG="$HOME/.surrogate/logs/chutes-bridge.log" +mkdir -p "$(dirname "$LOG")" +set -a; source "$HOME/.hermes/.env"; set +a +echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG" + +RESPONSE=$(python3 -c " +import os +exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read()) +exec(open(os.path.expanduser('~/.surrogate/bin/lib/bridge_retry.py')).read()) +import json, sys +body = { + 'model': '$MODEL', + 'messages': [{'role':'user','content': sys.stdin.read()}], + 'max_tokens': $MAX_TOKENS, 'temperature': $TEMP, + 'stream': False, +} +try: + d = request_with_retry( + 'https://llm.chutes.ai/v1/chat/completions', + data=json.dumps(body).encode(), + headers={'Content-Type':'application/json', 'User-Agent':'hermes-agent/1.0', 'Authorization':'Bearer '+os.environ.get('CHUTES_API_KEY','')}, + timeout=120, max_retries=4, base_delay=3.0, open_seconds=120, + ) + print(d.get('choices',[{}])[0].get('message',{}).get('content','')) +except Exception as e: + print(f'chutes-bridge error: {e}', file=sys.stderr); sys.exit(1) +" <<< "$PROMPT") +RC=$? +echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG" +[[ $RC -ne 0 ]] && exit $RC +echo "$RESPONSE" diff --git a/bin/cloudflare-bridge.sh b/bin/cloudflare-bridge.sh new file mode 100755 index 0000000000000000000000000000000000000000..a6c343a2f961a157ad13a6760a288939810f840d --- /dev/null +++ b/bin/cloudflare-bridge.sh @@ -0,0 +1,76 @@ +#!/usr/bin/env bash +# Cloudflare Workers AI bridge — 10k neurons/day free tier +# Endpoint: https://api.cloudflare.com/client/v4/accounts/$ACCOUNT_ID/ai/v1 (OpenAI-compat) +# Key env: CLOUDFLARE_API_TOKEN + CLOUDFLARE_ACCOUNT_ID +# Usage: cloudflare-bridge.sh [--model MODEL] "" +set -u +# Default: gpt-oss-120b — 120B params, highest capability on CF Workers AI free tier. +# Catalog verified 2026-04 — aliases point to models that ACTUALLY respond. +MODEL="@cf/openai/gpt-oss-120b" +MAX_TOKENS=2000 +TEMP=0.3 +PROMPT="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --model) + case "$2" in + fast|small|8b) MODEL="@cf/meta/llama-3.1-8b-instruct-fp8" ;; + llama|llama70|70b) MODEL="@cf/meta/llama-3.3-70b-instruct-fp8-fast" ;; + gpt-oss|oss|120b) MODEL="@cf/openai/gpt-oss-120b" ;; + deepseek|r1|reasoning) MODEL="@cf/deepseek-ai/deepseek-r1-distill-qwen-32b" ;; + kimi|long-ctx) MODEL="@cf/moonshotai/kimi-k2.6" ;; + glm|glm4) MODEL="@cf/zai-org/glm-4.7-flash" ;; + *) MODEL="$2" ;; + esac; shift 2 ;; + --max-tokens) MAX_TOKENS="$2"; shift 2 ;; + --temperature) TEMP="$2"; shift 2 ;; + *) PROMPT="$*"; break ;; + esac +done +[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat) +[[ -z "$PROMPT" ]] && { echo "cloudflare-bridge: no prompt" >&2; exit 2; } + +LOG="$HOME/.surrogate/logs/cloudflare-bridge.log" +mkdir -p "$(dirname "$LOG")" +set -a; source "$HOME/.hermes/.env" 2>/dev/null || true; set +a + +TOKEN="${CLOUDFLARE_API_TOKEN:-${CF_API_TOKEN:-}}" +ACCOUNT="${CLOUDFLARE_ACCOUNT_ID:-${CF_ACCOUNT_ID:-}}" +if [[ -z "$TOKEN" ]] || [[ -z "$ACCOUNT" ]]; then + echo "cloudflare-bridge: missing CLOUDFLARE_API_TOKEN or CLOUDFLARE_ACCOUNT_ID in ~/.hermes/.env" >&2 + exit 3 +fi + +echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG" + +RESPONSE=$(CF_TOKEN="$TOKEN" CF_ACCOUNT="$ACCOUNT" python3 -c " +import os +exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read()) +exec(open(os.path.expanduser('~/.surrogate/bin/lib/bridge_retry.py')).read()) +import json, sys +body = { + 'model': '$MODEL', + 'messages': [{'role':'user','content': sys.stdin.read()}], + 'max_tokens': $MAX_TOKENS, 'temperature': $TEMP, +} +url = f\"https://api.cloudflare.com/client/v4/accounts/{os.environ['CF_ACCOUNT']}/ai/v1/chat/completions\" +try: + d = request_with_retry( + url, + data=json.dumps(body).encode(), + headers={ + 'Content-Type':'application/json', + 'User-Agent':'hermes-agent/1.0', + 'Authorization':'Bearer '+os.environ['CF_TOKEN'], + }, + timeout=120, max_retries=6, base_delay=5.0, open_seconds=180, + ) + print(d.get('choices',[{}])[0].get('message',{}).get('content','')) +except Exception as e: + print(f'cloudflare-bridge error: {e}', file=sys.stderr); sys.exit(1) +" <<< "$PROMPT") +RC=$? +echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG" +[[ $RC -ne 0 ]] && exit $RC +echo "$RESPONSE" diff --git a/bin/crawl-rss.py b/bin/crawl-rss.py index b11d8a6d2ee572e1cac78010400916e3c97dc86d..9db029493e834dcfd5e578fea8cad8ad05ee0d3b 100755 --- a/bin/crawl-rss.py +++ b/bin/crawl-rss.py @@ -5,7 +5,7 @@ Reads feed URLs from FEEDS env or default list, parses entries, writes JSONL to output file. Only writes entries not seen before (dedup by URL). Usage (from bash): - OUT=/tmp/out.jsonl python3 ~/.claude/bin/crawl-rss.py + OUT=/tmp/out.jsonl python3 ~/.surrogate/bin/crawl-rss.py All feeds VERIFIED to return 200 as of 2026-04-19. Failures are logged, not fatal — one bad feed doesn't kill the rest. @@ -86,7 +86,7 @@ FEEDS: list[tuple[str, str]] = [ ] OUT_PATH = os.environ.get("OUT", "/tmp/rss-crawl.jsonl") -SEEN_PATH = os.environ.get("SEEN", os.path.expanduser("~/.claude/.rss-seen.json")) +SEEN_PATH = os.environ.get("SEEN", os.path.expanduser("~/.surrogate/.rss-seen.json")) MAX_ENTRIES_PER_FEED = int(os.environ.get("MAX_PER_FEED", "10")) TIMEOUT = int(os.environ.get("TIMEOUT", "15")) diff --git a/bin/daily-crawl.sh b/bin/daily-crawl.sh index eb57a7e676f4ac19ba7cf7c79be9e5eeb7674f84..d42dbc3a1d89ddcccd041e09d20e9dca513b291a 100755 --- a/bin/daily-crawl.sh +++ b/bin/daily-crawl.sh @@ -14,17 +14,17 @@ while [ $# -gt 0 ]; do done export PATH=/usr/bin:/bin:/usr/local/bin:/opt/homebrew/bin:$PATH -source ~/.claude/.env 2>/dev/null || true +source ~/.hermes/.env 2>/dev/null || true # Also source ~/.hermes/.env (where Surrogate keeps the live tokens) set -a; source ~/.hermes/.env 2>/dev/null || true; set +a DATE=$(date +%Y-%m-%d) CRAWL_DIR="$HOME/Documents/Obsidian Vault/AI-Hub/crawls/$DATE" -mkdir -p "$CRAWL_DIR/raw" "$HOME/.claude/logs" -LOG="$HOME/.claude/logs/crawl-$DATE.log" +mkdir -p "$CRAWL_DIR/raw" "$HOME/.surrogate/logs" +LOG="$HOME/.surrogate/logs/crawl-$DATE.log" log() { echo "[$(date +%H:%M:%S)] $*" | tee -a "$LOG"; } -PY=~/.claude/venv/bin/python +PY=~/.surrogate/venv/bin/python # ═══════════ SOURCES — use Python scripts with explicit env passing ═══════════ @@ -403,6 +403,6 @@ for d in dirs[:60]: PY # Graph sync (async) -[ -x "$HOME/.claude/bin/graph-sync.sh" ] && ("$HOME/.claude/bin/graph-sync.sh" > /dev/null 2>&1 &) || true +[ -x "$HOME/.surrogate/bin/graph-sync.sh" ] && ("$HOME/.surrogate/bin/graph-sync.sh" > /dev/null 2>&1 &) || true log "=== Done: $CRAWL_DIR/digest.md ===" diff --git a/bin/dataset-enrich.sh b/bin/dataset-enrich.sh index d012694da8e755be2470c14d6d03d00e43651225..0464d476e351981d9652024dee40141c31e75d33 100755 --- a/bin/dataset-enrich.sh +++ b/bin/dataset-enrich.sh @@ -17,13 +17,13 @@ set -uo pipefail set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a -LOG="$HOME/.claude/logs/dataset-enrich.log" +LOG="$HOME/.surrogate/logs/dataset-enrich.log" WORK="$HOME/.hermes/workspace/dataset-enrich" mkdir -p "$WORK" "$(dirname "$LOG")" echo "[$(date +%H:%M:%S)] dataset enrich start" | tee "$LOG" -~/.claude/venv/bin/python <<'PYEOF' 2>&1 | tee -a "$LOG" +~/.surrogate/venv/bin/python <<'PYEOF' 2>&1 | tee -a "$LOG" from huggingface_hub import HfApi from pathlib import Path from datasets import load_dataset diff --git a/bin/dev-cloud-daemon.sh b/bin/dev-cloud-daemon.sh index 0220f58cc5a7c9ed45e13a2093953e10cf92281a..92be8174181c030393452e2ffb83300a2fd31bd5 100755 --- a/bin/dev-cloud-daemon.sh +++ b/bin/dev-cloud-daemon.sh @@ -8,7 +8,7 @@ set -u PROVIDER="${1:?usage: dev-cloud-daemon.sh }" -LOG="$HOME/.claude/logs/dev-cloud-daemon-${PROVIDER}.log" +LOG="$HOME/.surrogate/logs/dev-cloud-daemon-${PROVIDER}.log" mkdir -p "$(dirname "$LOG")" # Redis connection: prefer Unix socket, fall back to TCP 127.0.0.1:6379. @@ -65,15 +65,15 @@ except: print('OK')" 2>/dev/null) # and works on exactly what the daemon locked (avoids "no free priority" # dead-ends when the file lock was touched earlier for this same PRIO_ID). HERMES_PRIO_ID="$PRIO_ID" \ - "$HOME/.claude/bin/dev-cloud-worker.sh" "$PROVIDER" 2>&1 | tail -3 >> "$LOG" + "$HOME/.surrogate/bin/dev-cloud-worker.sh" "$PROVIDER" 2>&1 | tail -3 >> "$LOG" RC=${PIPESTATUS[0]} DUR=$(( $(date +%s) - START )) echo "[$(date '+%H:%M:%S')] $PROVIDER $PRIO_ID done in ${DUR}s (rc=$RC)" >> "$LOG" # Discord: only notify failures + slow tasks (avoid spam on every success) if [[ $RC -ne 0 ]]; then - "$HOME/.claude/bin/notify-discord.sh" error "Worker failed" "$PROVIDER · $PRIO_ID · ${DUR}s · rc=$RC" 2>/dev/null & + "$HOME/.surrogate/bin/notify-discord.sh" error "Worker failed" "$PROVIDER · $PRIO_ID · ${DUR}s · rc=$RC" 2>/dev/null & elif [[ $DUR -gt 240 ]]; then - "$HOME/.claude/bin/notify-discord.sh" warn "Slow task" "$PROVIDER · $PRIO_ID · ${DUR}s" 2>/dev/null & + "$HOME/.surrogate/bin/notify-discord.sh" warn "Slow task" "$PROVIDER · $PRIO_ID · ${DUR}s" 2>/dev/null & fi done diff --git a/bin/dev-cloud-worker.sh b/bin/dev-cloud-worker.sh index 400732d4706f6a69a9ead83501297ef0fc501001..9892e358dc653cb2736fdcfd10665404af1ef0fd 100755 --- a/bin/dev-cloud-worker.sh +++ b/bin/dev-cloud-worker.sh @@ -7,17 +7,17 @@ # provider = github | samba | cloudflare | groq | gemini # # Rate-limit aware per provider (set by cron schedule, NOT inside script). -# Cross-worker coordination: lockfile per (priority, provider) in ~/.claude/state/dev-locks/ +# Cross-worker coordination: lockfile per (priority, provider) in ~/.surrogate/state/dev-locks/ # Global priority lock: 30-min window, so same priority only gets fresh attempt per provider # every 30 min (prevents redundant work, allows tournament of implementations over time). set -u PROVIDER="${1:?usage: dev-cloud-worker.sh }" -LOG="$HOME/.claude/logs/dev-cloud-$PROVIDER.log" +LOG="$HOME/.surrogate/logs/dev-cloud-$PROVIDER.log" OUT_DIR="$HOME/.hermes/workspace/dev-cloud-$PROVIDER" SHARED="$HOME/.hermes/workspace/swarm-shared" -LOCK_DIR="$HOME/.claude/state/dev-locks" +LOCK_DIR="$HOME/.surrogate/state/dev-locks" mkdir -p "$(dirname "$LOG")" "$OUT_DIR" "$LOCK_DIR" START=$(date +%s) @@ -143,7 +143,7 @@ PRIO_PROJECT=$(echo "$PRIORITY" | python3 -c "import json,sys; print(json.loads( echo "[$(date '+%H:%M:%S')] $PROVIDER picked $PRIO_ID ($PRIO_PROJECT: ${PRIO_TITLE:0:60})" >> "$LOG" # -------- Rich context injection (B: enrich with repo + similar funcs + few-shot + deltas) -------- -source "$HOME/.claude/bin/lib/context_builder.sh" +source "$HOME/.surrogate/bin/lib/context_builder.sh" build_rich_context "$PRIO_PROJECT" "$PRIO_ID" "$PRIO_TITLE" # Sets: REPO_MAP, SIMILAR_FUNCS, RAG_EXAMPLES, SEMANTIC_RAG, FEWSHOT_ACCEPTED, ANTI_PATTERNS, PROMPT_DELTAS, PRIO_SPEC @@ -285,37 +285,37 @@ case "$PROVIDER" in github) # Codestral-2501 is Mistral's dedicated code model — free via PAT, top-tier for code tasks. # Better than gpt-4o-mini for coding specifically. Budget-aware: falls through if HALT. - RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/github-bridge.sh" --model codestral 2>>"$LOG") + RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/github-bridge.sh" --model codestral 2>>"$LOG") ;; samba|sambanova) - RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/sambanova-bridge.sh" --model deepseek 2>>"$LOG") + RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/sambanova-bridge.sh" --model deepseek 2>>"$LOG") ;; cloudflare|cf) - RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/cloudflare-bridge.sh" --model deepseek 2>>"$LOG") + RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/cloudflare-bridge.sh" --model deepseek 2>>"$LOG") ;; groq) - RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/groq-bridge.sh" --model qwen 2>>"$LOG") + RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/groq-bridge.sh" --model qwen 2>>"$LOG") ;; gemini) # Use ai-fallback's gemini path - RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/ai-fallback.sh" --force gemini 2>>"$LOG") + RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/ai-fallback.sh" --force gemini 2>>"$LOG") ;; cerebras) # Wafer-scale — fastest inference on planet (~2000 tok/s). Qwen3 235B excellent for code. - RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/cerebras-bridge.sh" --model big 2>>"$LOG") + RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/cerebras-bridge.sh" --model big 2>>"$LOG") ;; nvidia|nim) # NVIDIA NIM — Llama 3.3 70B, diverse model pool (Nemotron, DeepSeek-R1, Qwen-Coder) - RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/nvidia-bridge.sh" --model qwen 2>>"$LOG") + RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/nvidia-bridge.sh" --model qwen 2>>"$LOG") ;; chutes) # Chutes.ai aggregator — free tier needs activation; currently may 402 - RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/chutes-bridge.sh" --model deepseek 2>>"$LOG") + RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/chutes-bridge.sh" --model deepseek 2>>"$LOG") ;; surrogate|surrogate-1) # น้อง — local Ollama, Ashira-personalized (Qwen2.5-Coder-7B + Thai/DevSecOps prompt) # Will be upgraded with LoRA adapter after RunPod training. - RESULT=$(echo "$PROMPT" | "$HOME/.claude/bin/surrogate-bridge.sh" 2>>"$LOG") + RESULT=$(echo "$PROMPT" | "$HOME/.surrogate/bin/surrogate-bridge.sh" 2>>"$LOG") ;; *) echo "[$(date '+%H:%M:%S')] unknown provider $PROVIDER" >> "$LOG" diff --git a/bin/domain-scrape-loop.sh b/bin/domain-scrape-loop.sh index 4c8c9e46fe794db381a5b290750bedc2235b02ca..a4746d61839a906c45fece1d4b4325435f1fed3e 100755 --- a/bin/domain-scrape-loop.sh +++ b/bin/domain-scrape-loop.sh @@ -8,10 +8,10 @@ set -u DUR="${1:-900}" PARALLEL="${2:-3}" -LOG="$HOME/.claude/logs/domain-scrape-loop.log" +LOG="$HOME/.surrogate/logs/domain-scrape-loop.log" START=$(date +%s) BEFORE_PAIRS=$(wc -l ~/axentx/surrogate/data/training-jsonl/*.jsonl 2>/dev/null | tail -1 | awk '{print $1}') -BEFORE_LEDGER=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null) +BEFORE_LEDGER=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null) echo "═══ LOOP START $(date +%H:%M:%S) duration=${DUR}s parallel=$PARALLEL" | tee -a "$LOG" echo " before: pairs=$BEFORE_PAIRS ledger_repos=$BEFORE_LEDGER" | tee -a "$LOG" @@ -33,7 +33,7 @@ while true; do # Fire N parallel instances, each picks different domain via ledger for i in $(seq 1 $PARALLEL); do ( - ~/.claude/bin/github-domain-scrape.sh >> "$LOG" 2>&1 + ~/.surrogate/bin/github-domain-scrape.sh >> "$LOG" 2>&1 ) & done wait # wait all parallel to finish (30-60s typical) @@ -44,13 +44,13 @@ while true; do # Progress every 5 iters if (( ITER % 5 == 0 )); then PAIRS=$(wc -l ~/axentx/surrogate/data/training-jsonl/*.jsonl 2>/dev/null | tail -1 | awk '{print $1}') - LEDGER=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null) + LEDGER=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null) echo " [iter=$ITER $((NOW - START))s] pairs=$PAIRS (+$((PAIRS - BEFORE_PAIRS))) ledger=$LEDGER (+$((LEDGER - BEFORE_LEDGER)))" | tee -a "$LOG" fi done AFTER_PAIRS=$(wc -l ~/axentx/surrogate/data/training-jsonl/*.jsonl 2>/dev/null | tail -1 | awk '{print $1}') -AFTER_LEDGER=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null) +AFTER_LEDGER=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null) echo "═══ LOOP DONE $(date +%H:%M:%S)" | tee -a "$LOG" echo " iters: $ITER" | tee -a "$LOG" echo " pairs added: $((AFTER_PAIRS - BEFORE_PAIRS))" | tee -a "$LOG" diff --git a/bin/github-bridge.sh b/bin/github-bridge.sh new file mode 100755 index 0000000000000000000000000000000000000000..03087e1e0ab9fa5618544a33f6d78d5e5727d747 --- /dev/null +++ b/bin/github-bridge.sh @@ -0,0 +1,94 @@ +#!/usr/bin/env bash +# GitHub Models bridge — free-tier GPT-4o / Llama 3.3 / Mistral via GitHub PAT +# Endpoint: https://models.github.ai/inference (OpenAI-compat) +# Key env: GITHUB_MODELS_TOKEN (preferred) or GITHUB_TOKEN +# Usage: github-bridge.sh [--model MODEL] "" | echo "..." | github-bridge.sh +set -u +# Default: full GPT-4o (free via PAT, far smarter than mini, same daily quota) +MODEL="openai/gpt-4o" +MAX_TOKENS=2000 +TEMP=0.3 +PROMPT="" + +# Aliases reflect ONLY models verified working with free PAT (2026-04). +# GPT-5/o3/o1-mini etc. appear in /catalog but API returns 403/unavailable — not usable. +while [[ $# -gt 0 ]]; do + case "$1" in + --model) + case "$2" in + # OpenAI + gpt4o|gpt-4o) MODEL="openai/gpt-4o" ;; + mini|gpt-4o-mini) MODEL="openai/gpt-4o-mini" ;; + gpt41|gpt-4.1) MODEL="openai/gpt-4.1" ;; + gpt41-mini|gpt-4.1-mini) MODEL="openai/gpt-4.1-mini" ;; + # Meta Llama + llama|llama70) MODEL="meta/Llama-3.3-70B-Instruct" ;; + llama4|maverick) MODEL="meta/llama-4-maverick-17b-128e-instruct-fp8" ;; + llama405) MODEL="meta/meta-llama-3.1-405b-instruct" ;; + # DeepSeek + deepseek|deepseek-v3) MODEL="deepseek/deepseek-v3-0324" ;; + deepseek-r1|r1|reasoning) MODEL="deepseek/DeepSeek-R1" ;; + deepseek-r1-latest) MODEL="deepseek/deepseek-r1-0528" ;; + # xAI + grok|grok3) MODEL="xai/grok-3" ;; + grok-mini) MODEL="xai/grok-3-mini" ;; + # Mistral + mistral|mistral-medium) MODEL="mistral-ai/mistral-medium-2505" ;; + codestral|code) MODEL="mistral-ai/codestral-2501" ;; + # Microsoft Phi + phi|phi4) MODEL="microsoft/phi-4" ;; + # Cohere + cohere|command-a) MODEL="cohere/cohere-command-a" ;; + command-r) MODEL="cohere/cohere-command-r-plus-08-2024" ;; + *) MODEL="$2" ;; + esac; shift 2 ;; + --max-tokens) MAX_TOKENS="$2"; shift 2 ;; + --temperature) TEMP="$2"; shift 2 ;; + *) PROMPT="$*"; break ;; + esac +done +[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat) +[[ -z "$PROMPT" ]] && { echo "github-bridge: no prompt" >&2; exit 2; } + +LOG="$HOME/.surrogate/logs/github-bridge.log" +mkdir -p "$(dirname "$LOG")" +set -a; source "$HOME/.hermes/.env" 2>/dev/null || true; set +a + +# Prefer dedicated models token, fall back to general PAT +TOKEN="${GITHUB_MODELS_TOKEN:-${GITHUB_TOKEN:-}}" +if [[ -z "$TOKEN" ]]; then + echo "github-bridge: missing GITHUB_MODELS_TOKEN or GITHUB_TOKEN in ~/.hermes/.env" >&2 + exit 3 +fi + +echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG" + +RESPONSE=$(GH_TOKEN="$TOKEN" python3 -c " +import os +exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read()) +exec(open(os.path.expanduser('~/.surrogate/bin/lib/bridge_retry.py')).read()) +import json, sys +body = { + 'model': '$MODEL', + 'messages': [{'role':'user','content': sys.stdin.read()}], + 'max_tokens': $MAX_TOKENS, 'temperature': $TEMP, +} +try: + d = request_with_retry( + 'https://models.github.ai/inference/chat/completions', + data=json.dumps(body).encode(), + headers={ + 'Content-Type':'application/json', + 'User-Agent':'hermes-agent/1.0', + 'Authorization':'Bearer '+os.environ['GH_TOKEN'], + }, + timeout=120, max_retries=4, base_delay=2.0, + ) + print(d.get('choices',[{}])[0].get('message',{}).get('content','')) +except Exception as e: + print(f'github-bridge error: {e}', file=sys.stderr); sys.exit(1) +" <<< "$PROMPT") +RC=$? +echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG" +[[ $RC -ne 0 ]] && exit $RC +echo "$RESPONSE" diff --git a/bin/github-domain-scrape.sh b/bin/github-domain-scrape.sh index b41722f8633d18862a5a3c13c80e6c6946b21e36..6496d26e2ed180fd77d77ff026c4de71227aeb81 100755 --- a/bin/github-domain-scrape.sh +++ b/bin/github-domain-scrape.sh @@ -4,13 +4,13 @@ set -u set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a -LEDGER="$HOME/.claude/state/scrape-ledger.db" -LOG="$HOME/.claude/logs/github-domain-scrape.log" +LEDGER="$HOME/.surrogate/state/scrape-ledger.db" +LOG="$HOME/.surrogate/logs/github-domain-scrape.log" DATE=$(date +%Y-%m-%d) OUT="$HOME/axentx/surrogate/data/training-jsonl/github-domain-${DATE}.jsonl" mkdir -p "$(dirname "$LOG")" "$(dirname "$OUT")" -[[ ! -f "$LEDGER" ]] && bash "$HOME/.claude/bin/scrape-ledger-init.sh" +[[ ! -f "$LEDGER" ]] && bash "$HOME/.surrogate/bin/scrape-ledger-init.sh" TARGET="${1:-}" export LEDGER OUT GITHUB_TOKEN GITHUB_TOKEN_POOL TARGET diff --git a/bin/graph-sync.sh b/bin/graph-sync.sh new file mode 100755 index 0000000000000000000000000000000000000000..644fa4d479f5b21284e8b6e6ff8ff7077eac3150 --- /dev/null +++ b/bin/graph-sync.sh @@ -0,0 +1,107 @@ +#!/usr/bin/env bash +# ... (original content unchanged) +# Sync Obsidian markdown patterns/knowledge → FalkorDB Lite (graph DB) +# Complements rag-index.sh (vector DB) — same sources, 2 different indexes. +set -e +PYTHON="${HOME}/.surrogate/venv/bin/python" +[ -x "$PYTHON" ] || { echo "venv not found: $PYTHON"; exit 1; } + +"$PYTHON" <<'PY' +import re, os +from pathlib import Path +from redislite.falkordb_client import FalkorDB +import yaml + +HOME = Path.home() +SOURCES = [ + HOME / "Documents/Obsidian Vault/AI-Hub/patterns", + HOME / "Documents/Obsidian Vault/AI-Hub/knowledge", + HOME / "Documents/Obsidian Vault/AI-Hub/inbox", + HOME / ".surrogate/memory", +] +DB_FILE = str(HOME / ".surrogate/graph-db.rdb") + +db = FalkorDB(dbfilename=DB_FILE) +g = db.select_graph("ashira") + +try: g.query("MATCH (n) DETACH DELETE n") +except: pass + +frontmatter_re = re.compile(r'^---\n(.*?)\n---', re.DOTALL) +wikilink_re = re.compile(r'\[\[([^\]|]+?)(?:\|[^\]]+)?\]\]') + +def esc(s): + return str(s).replace("\\", "\\\\").replace("'", "\\'") if s else "" + +nodes = {} +edges = [] + +for src in SOURCES: + if not src.exists(): continue + for md in src.rglob("*.md"): + stem = md.stem + text = md.read_text(errors="ignore") + fm_match = frontmatter_re.match(text) + fm = {} + if fm_match: + try: fm = yaml.safe_load(fm_match.group(1)) or {} + except: pass + + tags = fm.get("tags", []) + if isinstance(tags, str): tags = [tags] + + nodes[stem] = { + "path": str(md.relative_to(HOME)), + "tags": [str(t).replace("#","") for t in tags], + "category": md.parent.name, + "severity": str(fm.get("severity", "medium")), + } + + for link in wikilink_re.findall(text): + target = link.split("/")[-1].split("|")[0].replace(".md", "").strip() + if target and target != stem: + edges.append((stem, target)) + +for name, info in nodes.items(): + g.query( + f"MERGE (n:Doc {{name:'{esc(name)}'}}) " + f"SET n.path='{esc(info['path'])}', " + f"n.category='{esc(info['category'])}', " + f"n.severity='{esc(info['severity'])}', " + f"n.tags='{esc(','.join(info['tags']))}'" + ) + +edge_count = 0 +for src_name, tgt_name in edges: + try: + g.query( + f"MATCH (a:Doc {{name:'{esc(src_name)}'}}), (b:Doc {{name:'{esc(tgt_name)}'}}) " + f"MERGE (a)-[:LINKS_TO]->(b)" + ) + edge_count += 1 + except: pass + +all_tags = set() +for info in nodes.values(): + for t in info["tags"]: + if t: all_tags.add(t) +for t in all_tags: + g.query(f"MERGE (:Tag {{name:'{esc(t)}'}})") +for name, info in nodes.items(): + for t in info["tags"]: + if not t: continue + g.query( + f"MATCH (d:Doc {{name:'{esc(name)}'}}), (t:Tag {{name:'{esc(t)}'}}) " + f"MERGE (d)-[:TAGGED]->(t)" + ) + +print(f"Graph built: {len(nodes)} docs, {edge_count} links, {len(all_tags)} tags") + +r = g.query("MATCH (d:Doc)-[:TAGGED]->(t:Tag) RETURN t.name, count(d) AS c ORDER BY c DESC LIMIT 10") +print("\nTop 10 tags:") +for row in r.result_set: print(f" #{row[0]}: {row[1]} docs") + +r = g.query("MATCH (d:Doc)-[r:LINKS_TO]-() RETURN d.name, count(r) AS c ORDER BY c DESC LIMIT 10") +print("\nTop 10 hubs (most connected):") +for row in r.result_set: print(f" {row[0]}: {row[1]} links") +PY diff --git a/bin/groq-bridge.sh b/bin/groq-bridge.sh new file mode 100755 index 0000000000000000000000000000000000000000..615fc4f428ab143eed83d31d7020aba97954b26d --- /dev/null +++ b/bin/groq-bridge.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# Groq bridge — fast Llama/Qwen inference via Groq API (OpenAI-compat) +# Usage: groq-bridge.sh [--model MODEL] "" | echo "..." | groq-bridge.sh +set -u +# Default: Llama 3.3 70B — best quality on Groq free tier (still ultra-fast). +# 8B is available as --model fast when latency matters more than quality. +MODEL="llama-3.3-70b-versatile" +MAX_TOKENS=2000 +TEMP=0.3 +PROMPT="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --model) + case "$2" in + fast|small|8b) MODEL="llama-3.1-8b-instant" ;; + llama|llama70) MODEL="llama-3.3-70b-versatile" ;; + qwen) MODEL="qwen/qwen3-32b" ;; + llama4|scout) MODEL="meta-llama/llama-4-scout-17b-16e-instruct" ;; + gpt-oss|oss) MODEL="openai/gpt-oss-120b" ;; + *) MODEL="$2" ;; + esac; shift 2 ;; + --max-tokens) MAX_TOKENS="$2"; shift 2 ;; + *) PROMPT="$*"; break ;; + esac +done +[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat) +[[ -z "$PROMPT" ]] && { echo "groq-bridge: no prompt" >&2; exit 2; } + +LOG="$HOME/.surrogate/logs/groq-bridge.log" +mkdir -p "$(dirname "$LOG")" +set -a; source "$HOME/.hermes/.env"; set +a +echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG" + +RESPONSE=$(python3 -c " +import os +exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read()) +exec(open(os.path.expanduser('~/.surrogate/bin/lib/bridge_retry.py')).read()) +import json, sys +body = { + 'model': '$MODEL', + 'messages': [{'role':'user','content': sys.stdin.read()}], + 'max_tokens': $MAX_TOKENS, 'temperature': $TEMP, +} +try: + d = request_with_retry( + 'https://api.groq.com/openai/v1/chat/completions', + data=json.dumps(body).encode(), + headers={'Content-Type':'application/json', 'User-Agent':'hermes-agent/1.0', 'Authorization':'Bearer '+os.environ.get('GROQ_API_KEY','')}, + timeout=120, max_retries=4, base_delay=2.0, + ) + print(d.get('choices',[{}])[0].get('message',{}).get('content','')) +except Exception as e: + print(f'groq-bridge error: {e}', file=sys.stderr); sys.exit(1) +" <<< "$PROMPT") +RC=$? +echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG" +[[ $RC -ne 0 ]] && exit $RC +echo "$RESPONSE" diff --git a/bin/hermes-daily-summary.sh b/bin/hermes-daily-summary.sh index bdbd58140bd5aa961437be357a961b98befed7b6..d16e39a2542dc3fcb0bbd2db51480e0e13f213e6 100755 --- a/bin/hermes-daily-summary.sh +++ b/bin/hermes-daily-summary.sh @@ -4,7 +4,7 @@ set -u set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a -LOG="$HOME/.claude/logs/hermes-daily-summary.log" +LOG="$HOME/.surrogate/logs/hermes-daily-summary.log" mkdir -p "$(dirname "$LOG")" # ── Collect metrics ────────────────────────────────────────────────────────── @@ -12,23 +12,23 @@ TODAY=$(date +%Y-%m-%d) YESTERDAY=$(date -v-1d +%Y-%m-%d 2>/dev/null || date -d 'yesterday' +%Y-%m-%d) # 1. Tasks completed (24h) -TASKS_DONE=$(grep -c "done in" ~/.claude/logs/hermes-dev-*-daemon.log 2>/dev/null | awk -F: '{s+=$2} END{print s+0}') +TASKS_DONE=$(grep -c "done in" ~/.surrogate/logs/hermes-dev-*-daemon.log 2>/dev/null | awk -F: '{s+=$2} END{print s+0}') # 2. Tasks failed (24h) -TASKS_FAIL=$(grep -c "failed after" ~/.claude/logs/hermes-dev-*-daemon.log 2>/dev/null | awk -F: '{s+=$2} END{print s+0}') +TASKS_FAIL=$(grep -c "failed after" ~/.surrogate/logs/hermes-dev-*-daemon.log 2>/dev/null | awk -F: '{s+=$2} END{print s+0}') # 3. Scrape activity -SCRAPE_TOTAL=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null || echo "?") -SCRAPE_24H=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped WHERE scraped_at > datetime('now','-24 hours')" 2>/dev/null || echo "?") +SCRAPE_TOTAL=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null || echo "?") +SCRAPE_24H=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped WHERE scraped_at > datetime('now','-24 hours')" 2>/dev/null || echo "?") # 4. Training pairs PAIRS=$(wc -l ~/axentx/surrogate/data/training-jsonl/*.jsonl 2>/dev/null | tail -1 | awk '{print $1}' || echo "?") # 5. Index docs -DOCS=$(sqlite3 ~/.claude/index.db "SELECT COUNT(*) FROM docs" 2>/dev/null || echo "?") +DOCS=$(sqlite3 ~/.surrogate/index.db "SELECT COUNT(*) FROM docs" 2>/dev/null || echo "?") # 6. Episodes (surrogate memory) -EPISODES=$(wc -l ~/.claude/state/surrogate-memory/episodes.jsonl 2>/dev/null | awk '{print $1}' || echo 0) +EPISODES=$(wc -l ~/.surrogate/state/surrogate-memory/episodes.jsonl 2>/dev/null | awk '{print $1}' || echo 0) # 7. Daemons running DAEMONS_UP=$(pgrep -f "dev-cloud-daemon\|qwen-coder-daemon\|priority-json-watcher\|hermes" 2>/dev/null | wc -l | tr -d ' ') @@ -41,7 +41,7 @@ for q in cerebras groq github samba nvidia cloudflare qwen-local; do done # 9. Recent errors (last 100 log lines) -ERR_COUNT=$(tail -200 ~/.claude/logs/*.log 2>/dev/null | grep -cE "ERROR|CRITICAL|Fatal|429|500" 2>/dev/null || echo 0) +ERR_COUNT=$(tail -200 ~/.surrogate/logs/*.log 2>/dev/null | grep -cE "ERROR|CRITICAL|Fatal|429|500" 2>/dev/null || echo 0) # ── Build digest body ──────────────────────────────────────────────────────── BODY="$(cat <> "$LOG" -"$HOME/.claude/bin/notify-discord.sh" "$LEVEL" "Hermes daily summary · $TODAY" "$BODY" +"$HOME/.surrogate/bin/notify-discord.sh" "$LEVEL" "Hermes daily summary · $TODAY" "$BODY" diff --git a/bin/hermes-discord-bot.py b/bin/hermes-discord-bot.py index 1153a0af9ef683065e6388dc985113b977e83cde..7532ea59a34b46b9b49590d04d0722a77e9fbae8 100755 --- a/bin/hermes-discord-bot.py +++ b/bin/hermes-discord-bot.py @@ -10,7 +10,7 @@ Triggers (responds when): Pipes user message → surrogate -p "..." → replies with output. Token comes from $DISCORD_BOT_TOKEN (read from ~/.hermes/.env). -Logs to ~/.claude/logs/hermes-discord-bot.log. +Logs to ~/.surrogate/logs/hermes-discord-bot.log. """ from __future__ import annotations @@ -26,12 +26,12 @@ import discord # ── Config ─────────────────────────────────────────────────────────────────── HOME = Path.home() -LOG_PATH = HOME / ".claude/logs/hermes-discord-bot.log" +LOG_PATH = HOME / ".surrogate/logs/hermes-discord-bot.log" LOG_PATH.parent.mkdir(parents=True, exist_ok=True) -# surrogate CLI path: prefer ~/.local/bin (installed), fallback ~/.claude/bin +# surrogate CLI path: prefer ~/.local/bin (installed), fallback ~/.surrogate/bin SURROGATE_BIN = next( - p for p in [HOME / ".local/bin/surrogate", HOME / ".claude/bin/surrogate"] if p.exists() + p for p in [HOME / ".local/bin/surrogate", HOME / ".surrogate/bin/surrogate"] if p.exists() ) PREFIX_RE = re.compile(r"^[!/]sg\b\s*", re.IGNORECASE) @@ -169,7 +169,7 @@ async def on_ready() -> None: log.info("connected as %s (id=%s)", client.user, client.user.id if client.user else "?") print(f"✅ logged in as {client.user}") # Notify Discord channel via webhook that bot came online - notify = HOME / ".claude/bin/notify-discord.sh" + notify = HOME / ".surrogate/bin/notify-discord.sh" if notify.exists(): subprocess.Popen( [str(notify), "success", "Discord bot online", f"Connected as {client.user}. DM or @mention to chat."], diff --git a/bin/hermes-status-server.py b/bin/hermes-status-server.py index 6a03545132331d221daf1de7321e4d9a45fe2424..9631dda4a318bbc241d50c02c6ce38796e36400a 100755 --- a/bin/hermes-status-server.py +++ b/bin/hermes-status-server.py @@ -26,9 +26,9 @@ from pydantic import BaseModel app = FastAPI(title="hermes", docs_url=None, redoc_url=None) HOME = Path(os.environ.get("HOME", "/home/hermes")) -LEDGER = HOME / ".claude/state/scrape-ledger.db" -EPISODES = HOME / ".claude/state/surrogate-memory/episodes.jsonl" -LOG_DIR = HOME / ".claude/logs" +LEDGER = HOME / ".surrogate/state/scrape-ledger.db" +EPISODES = HOME / ".surrogate/state/surrogate-memory/episodes.jsonl" +LOG_DIR = HOME / ".surrogate/logs" def _ledger_count() -> int: @@ -92,7 +92,7 @@ async def chat(req: ChatRequest) -> JSONResponse: if not req.prompt.strip(): raise HTTPException(status_code=400, detail="prompt is empty") - surrogate_bin = HOME / ".claude/bin/surrogate" + surrogate_bin = HOME / ".surrogate/bin/surrogate" if not surrogate_bin.exists(): raise HTTPException(status_code=503, detail="surrogate CLI not installed in container") diff --git a/bin/lib/__init__.py b/bin/lib/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/bin/lib/bridge_retry.py b/bin/lib/bridge_retry.py new file mode 100644 index 0000000000000000000000000000000000000000..d5d943e6603d85554e3088b2ec65bafd750d19d3 --- /dev/null +++ b/bin/lib/bridge_retry.py @@ -0,0 +1,142 @@ +"""Shared HTTP retry library for all cloud bridges. +Handles: exponential backoff + jitter + Retry-After + circuit breaker. +Import at top of any bridge: exec(open(...).read()) + +Exports: request_with_retry(url, data, headers, max_retries=4, base_delay=2.0) +""" +import json as _json +import os as _os +import random as _random +import time as _time +import urllib.request as _urlreq +import urllib.error as _urlerr + +# Circuit breaker state — persisted in /tmp so all bridge invocations share +_CB_DIR = "/tmp/bridge-circuits" +_os.makedirs(_CB_DIR, exist_ok=True) + + +def _cb_state_path(host): + return f"{_CB_DIR}/{host.replace('/', '_')}.json" + + +def _circuit_open(host): + p = _cb_state_path(host) + try: + with open(p) as f: + s = _json.load(f) + # Circuit closed after timeout + if _time.time() > s.get("open_until", 0): + return False, 0 + return True, int(s["open_until"] - _time.time()) + except Exception: + return False, 0 + + +def _record_failure(host, open_seconds=60): + """Called on 429 or 5xx — track consecutive failures.""" + p = _cb_state_path(host) + try: + with open(p) as f: + s = _json.load(f) + except Exception: + s = {"consec_fails": 0, "open_until": 0} + s["consec_fails"] = s.get("consec_fails", 0) + 1 + # Open circuit after 3 consecutive failures + if s["consec_fails"] >= 3: + s["open_until"] = _time.time() + open_seconds + with open(p, "w") as f: + _json.dump(s, f) + + +def _record_success(host): + """Called on 2xx — reset failure counter.""" + p = _cb_state_path(host) + try: + with open(p, "w") as f: + _json.dump({"consec_fails": 0, "open_until": 0}, f) + except Exception: + pass + + +def _parse_retry_after(headers, default_delay): + """Honor Retry-After header (seconds) or x-ratelimit-reset-after.""" + for h in ("Retry-After", "retry-after", "x-ratelimit-reset-after", "x-ratelimit-reset"): + val = headers.get(h) + if val: + try: + n = int(val) + # x-ratelimit-reset may be absolute epoch — convert to delta + if n > 10_000_000_000: # way in future = epoch ms + n = n // 1000 - int(_time.time()) + elif n > 1_000_000_000: # epoch seconds + n = n - int(_time.time()) + return max(1, min(n, 300)) # clamp 1..300s + except (ValueError, TypeError): + pass + return default_delay + + +def request_with_retry(url, data, headers, timeout=120, max_retries=4, base_delay=2.0, open_seconds=60): + """Make HTTP request with exp-backoff retry + circuit breaker. + + Args: + open_seconds: how long to open circuit after 3 consecutive failures. + Default 60s. Callers with strict per-minute rate limits (Cloudflare, + SambaNova) should use 120-180s so we don't hammer during cooldown. + + Returns: parsed JSON response. + Raises: Exception if circuit open or max retries exhausted. + """ + from urllib.parse import urlparse + + host = urlparse(url).netloc + + # Circuit breaker check + is_open, remaining = _circuit_open(host) + if is_open: + raise Exception(f"circuit-open for {host} ({remaining}s remaining)") + + last_err = None + for attempt in range(max_retries): + try: + req = _urlreq.Request(url, data=data, headers=headers) + with _urlreq.urlopen(req, timeout=timeout) as r: + result = _json.load(r) + _record_success(host) + return result + except _urlerr.HTTPError as e: + last_err = e + if e.code == 429: + # Rate-limited — honor Retry-After + base = base_delay * (2 ** attempt) + delay = _parse_retry_after(e.headers, base) + delay *= (1 + _random.uniform(-0.2, 0.2)) # jitter ±20% + if attempt < max_retries - 1: + _time.sleep(min(delay, 60)) + continue + _record_failure(host, open_seconds=open_seconds) + raise Exception(f"HTTP 429 after {max_retries} retries (last Retry-After: {delay:.0f}s)") + elif 500 <= e.code < 600: + # Server error — exp backoff with jitter + delay = base_delay * (2 ** attempt) * (1 + _random.uniform(-0.2, 0.2)) + if attempt < max_retries - 1: + _time.sleep(min(delay, 30)) + continue + _record_failure(host, open_seconds=open_seconds) + raise Exception(f"HTTP {e.code} after {max_retries} retries") + else: + # 4xx other than 429 — not retryable (client error) + _record_failure(host, open_seconds=open_seconds) + raise + except (_urlerr.URLError, _os.error) as e: + last_err = e + # Network error — retry with backoff + delay = base_delay * (2 ** attempt) * (1 + _random.uniform(-0.2, 0.2)) + if attempt < max_retries - 1: + _time.sleep(min(delay, 30)) + continue + _record_failure(host, open_seconds=open_seconds) + raise + + raise Exception(f"max retries ({max_retries}) exhausted: {last_err}") diff --git a/bin/lib/checkpoint.py b/bin/lib/checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..a9e909f20dd836bd07b39753b5de6913ebf1d682 --- /dev/null +++ b/bin/lib/checkpoint.py @@ -0,0 +1,146 @@ +"""Checkpoint store — JSONL event log per task, append-only. + +Purpose: + - Crash-safe: every event appended immediately (no buffering) + - Resume-aware: load full event trail to reconstruct task state + - Distill-friendly: each file = complete conversation trace a future model can learn from + +Event types: + task_start, codebase_review, provider_selected, stream_chunk, model_switch, + result_draft, review_requested, review_verdict, revision_requested, task_done, + task_failed, provider_probe + +File layout: + ~/.surrogate/yolo/checkpoints/.jsonl — live tasks + ~/.surrogate/yolo/checkpoints_done/.jsonl — completed (archive) +""" + +from __future__ import annotations + +import datetime as dt +import json +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Iterator + +CHECKPOINT_DIR = Path.home() / ".surrogate" / "yolo" / "checkpoints" +CHECKPOINT_DONE = Path.home() / ".surrogate" / "yolo" / "checkpoints_done" + + +def _now() -> str: + return dt.datetime.now(dt.timezone.utc).isoformat() + + +@dataclass +class Checkpoint: + task_id: str + path: Path + + @classmethod + def open(cls, task_id: str) -> "Checkpoint": + CHECKPOINT_DIR.mkdir(parents=True, exist_ok=True) + return cls(task_id=task_id, path=CHECKPOINT_DIR / f"{task_id}.jsonl") + + def append(self, event_type: str, **fields: Any) -> None: + """Atomically append event. Fields serialize via JSON.""" + rec = {"t": _now(), "event": event_type, **fields} + with open(self.path, "a") as f: + f.write(json.dumps(rec, ensure_ascii=False, default=str) + "\n") + + def events(self) -> list[dict]: + if not self.path.exists(): + return [] + out = [] + with open(self.path) as f: + for line in f: + line = line.strip() + if not line: + continue + try: + out.append(json.loads(line)) + except json.JSONDecodeError: + continue + return out + + def last_event(self, event_type: str = "") -> dict | None: + for e in reversed(self.events()): + if not event_type or e.get("event") == event_type: + return e + return None + + def resume_state(self) -> dict: + """Reconstruct what we know from the event trail. + + Returns: + { + "started": bool, + "completed": bool, + "failed": bool, + "current_model": str | None, + "draft_text": str (partial output so far), + "attempts": int, + "last_event": dict | None, + "artifacts_reviewed": list[str], + "review_iterations": int, + } + """ + ev = self.events() + state = { + "started": False, + "completed": False, + "failed": False, + "current_model": None, + "draft_text": "", + "attempts": 0, + "last_event": ev[-1] if ev else None, + "artifacts_reviewed": [], + "review_iterations": 0, + } + for e in ev: + etype = e.get("event") + if etype == "task_start": + state["started"] = True + elif etype == "provider_selected": + state["current_model"] = e.get("model") + state["attempts"] += 1 + elif etype == "model_switch": + state["current_model"] = e.get("to") + elif etype == "codebase_review": + state["artifacts_reviewed"] = e.get("artifacts", []) + elif etype == "result_draft": + state["draft_text"] = e.get("text", state["draft_text"]) + elif etype == "review_verdict": + state["review_iterations"] += 1 + elif etype == "task_done": + state["completed"] = True + elif etype == "task_failed": + state["failed"] = True + return state + + def archive(self) -> None: + """Move to checkpoints_done/ after task complete.""" + CHECKPOINT_DONE.mkdir(parents=True, exist_ok=True) + dest = CHECKPOINT_DONE / self.path.name + if self.path.exists(): + self.path.rename(dest) + self.path = dest + + +def list_active() -> list[str]: + if not CHECKPOINT_DIR.exists(): + return [] + return [p.stem for p in CHECKPOINT_DIR.glob("*.jsonl")] + + +if __name__ == "__main__": + import sys + if len(sys.argv) < 2: + print("usage: checkpoint.py [replay]") + sys.exit(1) + cp = Checkpoint.open(sys.argv[1]) + if len(sys.argv) > 2 and sys.argv[2] == "replay": + for e in cp.events(): + print(json.dumps(e, ensure_ascii=False)) + else: + state = cp.resume_state() + print(json.dumps(state, indent=2, ensure_ascii=False, default=str)) diff --git a/bin/lib/codebase_scanner.py b/bin/lib/codebase_scanner.py new file mode 100644 index 0000000000000000000000000000000000000000..bb89ea993a09ad4add998c791121732fe2575839 --- /dev/null +++ b/bin/lib/codebase_scanner.py @@ -0,0 +1,225 @@ +"""Codebase scanner — full review before each task iteration. + +Purpose (per Ashira): full scan first, then grep context that previous iteration +left behind. "Review agent" relies on this to know what was done vs what remains. + +3-pass strategy: + Pass 1: List recently-modified files across watched roots (last 7 days) + Pass 2: Semantic search via ChromaDB (if index exists) using task keywords + Pass 3: Git status + diff for any repos found (to detect uncommitted work) + +Input: task description (string) +Output: structured summary dict the dispatcher can feed to models as context +""" + +from __future__ import annotations + +import datetime as dt +import json +import os +import re +import subprocess +from pathlib import Path + +HOME = Path.home() +WATCHED_ROOTS = [ + HOME / "develope", + HOME / "axentx", + HOME / ".surrogate" / "bin", +] +RECENT_DAYS = 7 +MAX_FILE_SIZE = 100_000 # skip large binaries +MAX_FILES_PASS1 = 50 +MAX_CHUNKS_PASS2 = 10 +CHROMA_DB = HOME / ".surrogate" / "code-vector-db" + + +def _keywords(task: str) -> list[str]: + tokens = re.findall(r"[A-Za-z_][A-Za-z0-9_]*", task.lower()) + stop = {"a", "an", "the", "is", "are", "was", "were", "be", "to", "and", + "or", "but", "if", "then", "else", "for", "with", "of", "in", "on", + "at", "this", "that", "from", "by", "as", "i", "you", "it", "we", + "they", "write", "create", "make", "build", "add", "update", "task"} + return [t for t in tokens if len(t) >= 3 and t not in stop][:10] + + +def _recent_files(keywords: list[str], roots: list[Path]) -> list[dict]: + """Find recently modified source files matching keywords.""" + cutoff = dt.datetime.now() - dt.timedelta(days=RECENT_DAYS) + out = [] + for root in roots: + if not root.exists(): + continue + for dirpath, dirnames, filenames in os.walk(root): + # skip hidden, node_modules, .git, venv + dirnames[:] = [d for d in dirnames if not d.startswith(".") + and d not in {"node_modules", "vendor", "venv", ".venv", + "__pycache__", "dist", "build", "target"}] + for f in filenames: + p = Path(dirpath) / f + try: + st = p.stat() + except OSError: + continue + if st.st_size > MAX_FILE_SIZE: + continue + mtime = dt.datetime.fromtimestamp(st.st_mtime) + if mtime < cutoff: + continue + # score by keyword hits in name/path + path_lower = str(p).lower() + score = sum(1 for kw in keywords if kw in path_lower) + # light content match (first 4KB only for perf) + try: + with open(p, "r", errors="replace") as fh: + head = fh.read(4096).lower() + score += sum(1 for kw in keywords if kw in head) * 2 + except OSError: + continue + if score > 0: + out.append({ + "path": str(p), + "mtime": mtime.isoformat(), + "score": score, + "size": st.st_size, + }) + out.sort(key=lambda x: -x["score"]) + return out[:MAX_FILES_PASS1] + + +def _chromadb_search(keywords: list[str], task: str) -> list[dict]: + """Query ChromaDB semantic index (if available).""" + if not CHROMA_DB.exists(): + return [] + try: + # Use existing helper if present + helper = HOME / ".surrogate" / "bin" / "code-search.sh" + if helper.exists(): + proc = subprocess.run( + [str(helper), "--top", str(MAX_CHUNKS_PASS2), task], + capture_output=True, text=True, timeout=30, + ) + if proc.returncode == 0 and proc.stdout: + out = [] + for line in proc.stdout.splitlines()[:MAX_CHUNKS_PASS2]: + m = re.match(r"(\S+):(\d+)\s+(.*)", line) + if m: + out.append({ + "path": m.group(1), + "line": int(m.group(2)), + "preview": m.group(3)[:200], + }) + return out + except (subprocess.TimeoutExpired, OSError): + pass + return [] + + +def _git_uncommitted(roots: list[Path]) -> list[dict]: + """Detect repos with uncommitted work (partial iterations).""" + out = [] + # Find up to 3 levels of git repos + for root in roots: + if not root.exists(): + continue + for depth_glob in ["*/.git", "*/*/.git", "*/*/*/.git"]: + for git_dir in root.glob(depth_glob): + repo = git_dir.parent + try: + status = subprocess.run( + ["git", "-C", str(repo), "status", "--short"], + capture_output=True, text=True, timeout=5, + ) + if status.returncode == 0 and status.stdout.strip(): + out.append({ + "repo": str(repo), + "changes": status.stdout.strip().splitlines()[:20], + }) + except (subprocess.TimeoutExpired, OSError): + continue + return out + + +def scan(task: str, task_artifacts: list[str] | None = None) -> dict: + """Full codebase review → structured context dict. + + Args: + task: natural-language task description + task_artifacts: paths mentioned in task (will be loaded in full) + + Returns: + { + "keywords": [...], + "recent_files": [{path, mtime, score, size}, ...], + "semantic_hits": [{path, line, preview}, ...], + "uncommitted_repos": [{repo, changes: [...]}, ...], + "explicit_artifacts": {path: content, ...}, # loaded in full + } + """ + keywords = _keywords(task) + report = { + "task_excerpt": task[:200], + "keywords": keywords, + "recent_files": _recent_files(keywords, WATCHED_ROOTS), + "semantic_hits": _chromadb_search(keywords, task), + "uncommitted_repos": _git_uncommitted(WATCHED_ROOTS), + "explicit_artifacts": {}, + } + for a in task_artifacts or []: + p = Path(a) + if p.exists() and p.is_file() and p.stat().st_size < MAX_FILE_SIZE: + try: + report["explicit_artifacts"][str(p)] = p.read_text(errors="replace")[:10000] + except OSError: + pass + return report + + +def as_context_prompt(scan_result: dict, max_chars: int = 8000) -> str: + """Render scan as context for LLM system prompt.""" + lines = [ + "## Codebase context (auto-generated)", + f"Task keywords: {', '.join(scan_result['keywords'])}", + "", + ] + if scan_result["uncommitted_repos"]: + lines.append("### Uncommitted work (may indicate previous partial iteration):") + for r in scan_result["uncommitted_repos"][:5]: + lines.append(f" {r['repo']}") + for c in r["changes"][:8]: + lines.append(f" {c}") + lines.append("") + + if scan_result["recent_files"]: + lines.append(f"### Recently modified relevant files ({len(scan_result['recent_files'])}):") + for f in scan_result["recent_files"][:15]: + lines.append(f" {f['path']} (score={f['score']}, mtime={f['mtime']})") + lines.append("") + + if scan_result["semantic_hits"]: + lines.append("### Semantic search hits:") + for h in scan_result["semantic_hits"][:8]: + lines.append(f" {h['path']}:{h.get('line','?')} — {h['preview'][:120]}") + lines.append("") + + if scan_result["explicit_artifacts"]: + lines.append("### Explicit task artifacts (FULL content):") + for path, content in scan_result["explicit_artifacts"].items(): + lines.append(f"--- {path} ---") + lines.append(content[:3000]) + lines.append("") + + result = "\n".join(lines) + return result[:max_chars] + + +if __name__ == "__main__": + import sys + task = " ".join(sys.argv[1:]) or "refactor yolo daemon" + report = scan(task) + print(json.dumps( + {k: v if not isinstance(v, list) else v[:5] for k, v in report.items()}, + indent=2, default=str, ensure_ascii=False + )) + print("\n=== AS CONTEXT PROMPT ===\n") + print(as_context_prompt(report, 3000)) diff --git a/bin/lib/context_builder.sh b/bin/lib/context_builder.sh new file mode 100644 index 0000000000000000000000000000000000000000..7cd8fec18362fc34a5cc9bb954a6533c3201936b --- /dev/null +++ b/bin/lib/context_builder.sh @@ -0,0 +1,272 @@ +#!/usr/bin/env bash +# Shared context builder — sourced by qwen-coder-worker + dev-cloud-worker. +# Produces rich context: repo-map + similar functions from project + past accepted examples. +# Call: build_rich_context +# Sets env vars: REPO_MAP, SIMILAR_FUNCS, FEWSHOT_ACCEPTED, ANTI_PATTERNS +build_rich_context() { + local PRIO_PROJECT="$1" + local PRIO_ID="$2" + local PRIO_TITLE="$3" + local SHARED="$HOME/.hermes/workspace/swarm-shared" + local PROJECT_DIR="$HOME/axentx/$PRIO_PROJECT" + + # 1. Full repo-map (up to 10KB — was 3KB). + # build-repo-map.sh writes to "_map.md"; some older paths used ".md". + # Try both so we don't silently lose the strongest grounding signal. + REPO_MAP="" + for candidate in "$SHARED/repo-maps/${PRIO_PROJECT}_map.md" "$SHARED/repo-maps/${PRIO_PROJECT}.md"; do + if [[ -f "$candidate" ]]; then + REPO_MAP=$(/usr/bin/head -c 10000 "$candidate") + break + fi + done + + # 2. Similar function signatures from project (grep in real codebase) + SIMILAR_FUNCS="" + if [[ -d "$PROJECT_DIR" ]]; then + # Extract keywords from title for grep + local KW=$(echo "$PRIO_TITLE" | /usr/bin/tr '[:upper:]' '[:lower:]' | /usr/bin/tr -cs 'a-z0-9' ' ' | /usr/bin/tr ' ' '\n' | /usr/bin/awk 'length>4' | /usr/bin/head -3 | /usr/bin/tr '\n' '|' | /usr/bin/sed 's/|$//') + if [[ -n "$KW" ]]; then + SIMILAR_FUNCS=$(/usr/bin/find "$PROJECT_DIR" -type f \( -name '*.py' -o -name '*.ts' -o -name '*.tsx' -o -name '*.js' -o -name '*.go' \) ! -path '*/node_modules/*' ! -path '*/.hermes-*' 2>/dev/null | \ + xargs /usr/bin/grep -lE "($KW)" 2>/dev/null | /usr/bin/head -3 | while read f; do + echo "=== ${f#$PROJECT_DIR/} ===" + /usr/bin/grep -A3 -E "^(def|function|export const|class|async def|interface)" "$f" 2>/dev/null | /usr/bin/head -30 + done 2>/dev/null | /usr/bin/head -c 4000) + fi + fi + + # 3. RAG: actual code patterns from project (SQLite FTS via ask-sqlite.py if exists) + RAG_EXAMPLES="" + if [[ -x "$HOME/.surrogate/bin/ask-sqlite.py" ]]; then + RAG_EXAMPLES=$(/usr/bin/python3 "$HOME/.surrogate/bin/ask-sqlite.py" \ + "$PRIO_PROJECT $PRIO_TITLE" 2>/dev/null | /usr/bin/head -c 3000) + fi + + # 4. Semantic RAG (from embeddings) — top-5 similar + SEMANTIC_RAG="" + if [[ -f "$HOME/.surrogate/embeddings.db" ]]; then + SEMANTIC_RAG=$(/usr/bin/python3 "$HOME/.surrogate/bin/embed-doc.py" --query "$PRIO_TITLE" 2>/dev/null | /usr/bin/head -c 2000) + fi + + # 5. Past ACCEPTED examples (few-shot from quality≥7 history) + FEWSHOT_ACCEPTED="" + for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | /usr/bin/head -30); do + if /usr/bin/grep -qE '"quality_score":\s*[789]|"quality_score":\s*10' "$review" 2>/dev/null; then + local OUT_FILE=$(basename "$review" .review.json) + # Search all worker output dirs + for WD in qwen-coder dev-cloud-samba dev-cloud-github dev-cloud-cloudflare dev-cloud-groq dev-cloud-synthesis; do + local OUT_PATH="$HOME/.hermes/workspace/$WD/${OUT_FILE}.md" + if [[ -f "$OUT_PATH" ]]; then + FEWSHOT_ACCEPTED=$(/usr/bin/head -c 2000 "$OUT_PATH") + break 2 + fi + done + fi + done + + # 6. Anti-patterns (last 5 rejection reasons across all workers) + ANTI_PATTERNS="" + for review in $(/bin/ls -t "$HOME/.hermes/workspace/qwen-coder-reviews/"*.review.json 2>/dev/null | /usr/bin/head -10); do + local bugs=$(/usr/bin/python3 -c " +import json, re, sys +try: + txt = open('$review').read() + m = re.search(r'\{.*\}', txt, re.DOTALL) + if not m: sys.exit() + d = json.loads(m.group(0)) + if d.get('verdict') in ('reject','rework') and d.get('bugs'): + for b in d['bugs'][:2]: + print(f'- {b[:180]}') +except: pass +" 2>/dev/null) + [[ -n "$bugs" ]] && ANTI_PATTERNS="$ANTI_PATTERNS$bugs"$'\n' + done + ANTI_PATTERNS=$(echo "$ANTI_PATTERNS" | /usr/bin/head -10) + + # 7. Active-learning prompt deltas — aggregate last 5 UNIQUE anti-patterns. + # Preference: same-project anti-patterns first, then generic. + # Dedup by first 80 chars of prompt_addition (similar bugs shouldn't bloat prompt). + PROMPT_DELTAS="" + if [[ -f "$HOME/.surrogate/memory/worker-prompt-deltas.jsonl" ]]; then + PROMPT_DELTAS=$(/usr/bin/python3 -c " +import json, sys +from pathlib import Path +try: + entries = [] + for l in Path('$HOME/.surrogate/memory/worker-prompt-deltas.jsonl').read_text().splitlines(): + if not l.strip(): continue + try: entries.append(json.loads(l)) + except: pass + # Dedup by first 80 chars + seen = set() + picked = [] + # Walk newest → oldest, cap 5 unique + for e in reversed(entries): + addn = (e.get('prompt_addition') or '').strip() + if not addn: continue + key = addn[:80] + if key in seen: continue + seen.add(key) + picked.append(addn) + if len(picked) >= 5: break + if picked: + out = ['ACTIVE-LEARNED RULES (avoid these past mistakes):'] + for i, a in enumerate(picked, 1): + out.append(f'{i}. {a[:400]}') + print('\n'.join(out)) +except Exception as e: pass +" 2>/dev/null) + fi + + # 8. Priority full spec (if a detailed spec file exists) + # Spec is the single most important signal — cap high (6KB) so the full + # Context/Requirements/DO NOT sections fit. Other RAG signals are capped + # lower because they're supplementary; the spec is authoritative. + PRIO_SPEC="" + local SPEC_FILE="$HOME/.hermes/workspace/swarm-shared/specs/${PRIO_ID}.md" + [[ -f "$SPEC_FILE" ]] && PRIO_SPEC=$(/usr/bin/head -c 6000 "$SPEC_FILE") + + # 9. Task-type authoritative sources — boost scraped knowledge based on title. + # Security task → CVE/MITRE/OWASP/Prowler. SRE → Google SRE/postmortems. + # Observability → OTel/Prometheus/Grafana/Honeycomb. etc. + # This is THE fix that makes all our scraping actually used by Hermes workers. + AUTHORITATIVE_CONTEXT="" + if [[ -f "$HOME/.surrogate/index.db" ]]; then + AUTHORITATIVE_CONTEXT=$(/usr/bin/python3 < 3][:5]) +if not kw: exit() + +src_list = ','.join(f"'{s}'" for s in preferred_sources) +# Strategy: 3-tier fallback — preferred+match → any+match → preferred random +rows = [] +try: + # Tier 1: preferred sources + FTS match on keywords + q = f"""SELECT d.source, d.instruction, substr(d.response, 1, 600) as body + FROM docs_fts f JOIN docs d ON d.id = f.rowid + WHERE f.docs_fts MATCH ? AND d.source IN ({src_list}) + ORDER BY bm25(docs_fts) LIMIT 6""" + rows = conn.execute(q, (kw,)).fetchall() +except sqlite3.OperationalError: pass + +if not rows: + # Tier 2: FTS match on ANY source — relax source filter + try: + q2 = """SELECT d.source, d.instruction, substr(d.response, 1, 600) as body + FROM docs_fts f JOIN docs d ON d.id = f.rowid + WHERE f.docs_fts MATCH ? ORDER BY bm25(docs_fts) LIMIT 6""" + rows = conn.execute(q2, (kw,)).fetchall() + except sqlite3.OperationalError: pass + +if not rows: + # Tier 3: random sample from preferred sources (even if no keyword match) + rows = conn.execute(f"SELECT source, instruction, substr(response,1,600) as body FROM docs WHERE source IN ({src_list}) ORDER BY RANDOM() LIMIT 6").fetchall() + +conn.close() + +out = [] +for r in rows: + out.append(f"[{r['source']}] {(r['instruction'] or '')[:120]}") + out.append((r['body'] or '')[:500]) + out.append('') +print('\n'.join(out)[:3500]) +PYEOF +) + fi + + # 10. FalkorDB graph — related decisions + past priorities with similar theme + GRAPH_CONTEXT="" + local REDIS_SOCK=$(/usr/bin/find /var/folders /tmp -name 'redis.socket' -type s 2>/dev/null | /usr/bin/head -1) + if [[ -n "$REDIS_SOCK" ]]; then + # Get related priorities + learned rules + GRAPH_CONTEXT=$(/opt/homebrew/bin/redis-cli -s "$REDIS_SOCK" GRAPH.QUERY ashira " + MATCH (p:Priority {project: '$PRIO_PROJECT'}) + OPTIONAL MATCH (p)-[:HAS_LEARNED_RULE]->(l:LearnedRule) + OPTIONAL MATCH (p)-[:COMMITTED_AS]->(c:Commit) + RETURN p.id, p.title, l.content, c.msg LIMIT 8 + " 2>/dev/null | /usr/bin/tail -c 2500) + fi + + # 11. Hermes trace recall — past similar tasks Hermes handled (from JSONL) + HERMES_RECALL="" + local TRACE_DIR="$HOME/axentx/surrogate/data/training-jsonl" + if [[ -d "$TRACE_DIR" ]]; then + HERMES_RECALL=$(/usr/bin/python3 < 4][:4] +if not words: exit() + +hits = [] +# Walk recent hermes-trace-YYYY-MM-DD.jsonl files (last 7 days) +import os +files = sorted(glob.glob(os.path.expanduser('~/axentx/surrogate/data/training-jsonl/hermes-trace-*.jsonl')))[-7:] +for f in files: + try: + for line in open(f): + try: rec = json.loads(line) + except: continue + blob = (rec.get('instruction','') + ' ' + rec.get('output',''))[:2000].lower() + score = sum(1 for w in words if w in blob) + if score >= 2: + hits.append((score, rec)) + except: pass + +hits.sort(key=lambda x: -x[0]) +for score, rec in hits[:3]: + print(f"HERMES PREVIOUSLY [{rec.get('category','?')}]: {rec.get('instruction','')[:120]}") + print(f"→ {rec.get('output','')[:400]}") + print() +PYEOF +) + fi +} + +export -f build_rich_context diff --git a/bin/lib/dns_fallback.py b/bin/lib/dns_fallback.py new file mode 100644 index 0000000000000000000000000000000000000000..d9550a75eff1f688227cdcd969742efbf54241ff --- /dev/null +++ b/bin/lib/dns_fallback.py @@ -0,0 +1,27 @@ +# DNS fallback helper — patches socket.getaddrinfo to use dig @8.8.8.8 +# when system resolver fails (ISP DNS filtering certain AI endpoints). +# Import at top of any Python script: exec(open(...).read()) +import socket as _sock +import subprocess as _sp + +_orig_getaddrinfo = _sock.getaddrinfo + +def _resilient_getaddrinfo(host, *args, **kwargs): + try: + return _orig_getaddrinfo(host, *args, **kwargs) + except _sock.gaierror: + # Fall back: resolve via public DNS (bypass ISP filtering) + for resolver in ("1.1.1.1", "8.8.8.8", "9.9.9.9"): + try: + out = _sp.check_output( + ["dig", "+short", "+time=3", "+tries=1", f"@{resolver}", host], + text=True, timeout=5, stderr=_sp.DEVNULL + ).strip().splitlines() + ip = next((ln for ln in out if ln and ln[0].isdigit()), None) + if ip: + return _orig_getaddrinfo(ip, *args, **kwargs) + except Exception: + continue + raise + +_sock.getaddrinfo = _resilient_getaddrinfo diff --git a/bin/lib/ground_truth.py b/bin/lib/ground_truth.py new file mode 100644 index 0000000000000000000000000000000000000000..bc65860f11b2f450541ad3084348efa5e2a09ba2 --- /dev/null +++ b/bin/lib/ground_truth.py @@ -0,0 +1,280 @@ +"""Ground-truth check — objective verification beyond reviewer opinion. + +When task produces code, run external validators: + - Python: ast.parse (syntax) + optional ruff / mypy / pytest + - TypeScript/JS: tsc / eslint (if available) + - Terraform: terraform validate + tfsec (if available) + - CloudFormation: cfn-lint (if available) + - Shell: bash -n (syntax) + shellcheck (if available) + - JSON/YAML: parse check + +Reviewer opinion + ground-truth = double check. Review says pass BUT compile +fails → overrides to fail. + +Output: {"verdict": "pass|fail", "checks": [...], "blocking_failure": bool} +""" + +from __future__ import annotations + +import ast +import json +import re +import shutil +import subprocess +import tempfile +from pathlib import Path +from typing import Optional + +CODE_BLOCK_RE = re.compile(r"```(\w+)?\n(.*?)```", re.DOTALL) + + +def extract_code_blocks(text: str) -> list[tuple[str, str]]: + """Return list of (language, content) pairs from markdown fenced blocks.""" + blocks = [] + for m in CODE_BLOCK_RE.finditer(text): + lang = (m.group(1) or "").lower().strip() + content = m.group(2).strip() + if content: + blocks.append((lang, content)) + return blocks + + +def _have(cmd: str) -> bool: + return shutil.which(cmd) is not None + + +def _run(cmd: list[str], stdin: Optional[str] = None, timeout: int = 30) -> tuple[int, str]: + try: + r = subprocess.run( + cmd, input=stdin, capture_output=True, text=True, timeout=timeout + ) + return r.returncode, (r.stdout + r.stderr)[:2000] + except subprocess.TimeoutExpired: + return -1, "timeout" + except OSError as e: + return -1, str(e) + + +# ---------------------------------------------------------------------- +# Per-language checkers +# ---------------------------------------------------------------------- +def check_python(code: str) -> list[dict]: + out = [] + # 1. syntax + try: + ast.parse(code) + out.append({"tool": "python-syntax", "pass": True, "msg": "syntactically valid"}) + except SyntaxError as e: + out.append({"tool": "python-syntax", "pass": False, + "msg": f"SyntaxError: {e}", "blocking": True}) + return out # no point in running linters + # 2. ruff (if installed) + if _have("ruff"): + with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False) as f: + f.write(code) + path = f.name + try: + rc, output = _run(["ruff", "check", "--select=E,F", "--output-format=concise", path]) + passed = rc == 0 + out.append({"tool": "ruff", "pass": passed, + "msg": output[:500] if output else "clean"}) + finally: + Path(path).unlink(missing_ok=True) + # 3. mypy (if installed, non-blocking) + if _have("mypy"): + with tempfile.NamedTemporaryFile("w", suffix=".py", delete=False) as f: + f.write(code) + path = f.name + try: + rc, output = _run(["mypy", "--no-error-summary", "--ignore-missing-imports", path]) + out.append({"tool": "mypy", "pass": rc == 0, "msg": output[:500]}) + finally: + Path(path).unlink(missing_ok=True) + return out + + +def check_typescript(code: str) -> list[dict]: + out = [] + if not _have("npx") and not _have("tsc"): + return [{"tool": "typescript", "pass": True, "msg": "tsc/npx not installed — skipped"}] + with tempfile.NamedTemporaryFile("w", suffix=".ts", delete=False) as f: + f.write(code) + path = f.name + try: + cmd = (["tsc", "--noEmit", "--allowJs", "--target", "ES2022", + "--moduleResolution", "node", path] if _have("tsc") + else ["npx", "-y", "--package=typescript", "--", + "tsc", "--noEmit", "--target", "ES2022", path]) + rc, output = _run(cmd, timeout=60) + out.append({"tool": "tsc", "pass": rc == 0, + "msg": output[:600] if output else "clean", + "blocking": rc != 0}) + finally: + Path(path).unlink(missing_ok=True) + return out + + +def check_shell(code: str) -> list[dict]: + out = [] + # bash -n (syntax only — no execution). Use file path; stdin parser is lenient. + with tempfile.NamedTemporaryFile("w", suffix=".sh", delete=False) as f: + f.write(code) + path = f.name + try: + rc, output = _run(["bash", "-n", path]) + finally: + Path(path).unlink(missing_ok=True) + out.append({"tool": "bash-syntax", "pass": rc == 0, "msg": output or "valid", + "blocking": rc != 0}) + if _have("shellcheck"): + with tempfile.NamedTemporaryFile("w", suffix=".sh", delete=False) as f: + f.write(code) + path = f.name + try: + rc, output = _run(["shellcheck", "-f", "gcc", path]) + # shellcheck returns nonzero for warnings — non-blocking + out.append({"tool": "shellcheck", "pass": rc == 0, "msg": output[:500]}) + finally: + Path(path).unlink(missing_ok=True) + return out + + +def check_terraform(code: str) -> list[dict]: + out = [] + if not _have("terraform"): + return [{"tool": "terraform", "pass": True, "msg": "terraform not installed — skipped"}] + with tempfile.TemporaryDirectory() as d: + Path(d, "main.tf").write_text(code) + rc, output = _run(["terraform", "-chdir=" + d, "init", "-backend=false", "-input=false"], timeout=60) + if rc != 0: + out.append({"tool": "terraform-init", "pass": False, "msg": output[:500], + "blocking": True}) + return out + rc, output = _run(["terraform", "-chdir=" + d, "validate"]) + out.append({"tool": "terraform-validate", "pass": rc == 0, + "msg": output[:500] if output else "clean", + "blocking": rc != 0}) + if _have("tfsec"): + rc, output = _run(["tfsec", d, "--no-color"]) + out.append({"tool": "tfsec", "pass": rc == 0, "msg": output[:500]}) + return out + + +def check_cloudformation(code: str) -> list[dict]: + if not _have("cfn-lint"): + return [{"tool": "cfn-lint", "pass": True, "msg": "cfn-lint not installed — skipped"}] + with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as f: + f.write(code) + path = f.name + try: + rc, output = _run(["cfn-lint", path]) + return [{"tool": "cfn-lint", "pass": rc == 0, "msg": output[:500], + "blocking": rc != 0}] + finally: + Path(path).unlink(missing_ok=True) + + +def check_json(code: str) -> list[dict]: + try: + json.loads(code) + return [{"tool": "json-parse", "pass": True, "msg": "valid JSON"}] + except json.JSONDecodeError as e: + return [{"tool": "json-parse", "pass": False, "msg": str(e), "blocking": True}] + + +def check_yaml(code: str) -> list[dict]: + try: + import yaml # type: ignore + except ImportError: + return [{"tool": "yaml-parse", "pass": True, "msg": "pyyaml not installed — skipped"}] + try: + yaml.safe_load(code) + return [{"tool": "yaml-parse", "pass": True, "msg": "valid YAML"}] + except yaml.YAMLError as e: + return [{"tool": "yaml-parse", "pass": False, "msg": str(e)[:300], "blocking": True}] + + +LANG_CHECKERS = { + "python": check_python, "py": check_python, + "typescript": check_typescript, "ts": check_typescript, + "javascript": check_typescript, "js": check_typescript, + "bash": check_shell, "sh": check_shell, "shell": check_shell, + "terraform": check_terraform, "hcl": check_terraform, "tf": check_terraform, + "cloudformation": check_cloudformation, "yaml": check_yaml, "yml": check_yaml, + "json": check_json, +} + + +# ---------------------------------------------------------------------- +# Orchestrator +# ---------------------------------------------------------------------- +def check(work_product: str) -> dict: + """Extract code blocks + run checkers. Returns aggregate verdict. + + Returns: + { + "has_code": bool, + "verdict": "pass" | "fail", + "blocking_failure": bool, + "checks": [{tool, pass, msg, blocking?}, ...], + "blocks_checked": int, + } + """ + blocks = extract_code_blocks(work_product) + all_checks: list[dict] = [] + has_code = False + + for lang, content in blocks: + checker = LANG_CHECKERS.get(lang) + if not checker: + continue + has_code = True + results = checker(content) + for r in results: + r["language"] = lang + all_checks.extend(results) + + blocking_failure = any(c.get("blocking") and not c.get("pass") for c in all_checks) + # Only blocking checks determine pass/fail. Non-blocking (warn) tools like + # mypy or shellcheck can fail without sinking the verdict. + blocking_passed = all(c.get("pass") for c in all_checks if c.get("blocking")) + any_blocking = any(c.get("blocking") for c in all_checks) + + if not has_code: + return { + "has_code": False, + "verdict": "pass", # nothing to check → don't block review + "blocking_failure": False, + "checks": [], + "blocks_checked": 0, + } + + if blocking_failure: + verdict = "fail" + elif not any_blocking: + # no blocking checks ran (e.g. tools missing) — warn + verdict = "warn" + else: + # all blocking checks passed — non-blocking may still complain, but ship it + any_non_blocking_failed = any( + not c.get("pass") and not c.get("blocking") for c in all_checks + ) + verdict = "warn" if any_non_blocking_failed else "pass" + + return { + "has_code": True, + "verdict": verdict, + "blocking_failure": blocking_failure, + "checks": all_checks, + "blocks_checked": len(blocks), + } + + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1: + text = Path(sys.argv[1]).read_text() + else: + text = sys.stdin.read() + result = check(text) + print(json.dumps(result, indent=2)) diff --git a/bin/lib/max_client.py b/bin/lib/max_client.py new file mode 100644 index 0000000000000000000000000000000000000000..bfd6a006a466ac47bff1684fb5ed0c1a08e07e0f --- /dev/null +++ b/bin/lib/max_client.py @@ -0,0 +1,365 @@ +"""Claude Max plan OAuth client. + +Handles: + - Read OAuth token from macOS keychain (`Claude Code-credentials`) + - Auto-refresh before expiry (lazy, on API call) + - Call Anthropic `/v1/messages` with OAuth Bearer + - Parse `anthropic-ratelimit-*` headers → quota state + - Cache quota state (5-min TTL) to avoid probing too often + +Quota model (verified 2026-04-19): + Max plan uses UNIFIED pool — Opus + Sonnet share quota. + Haiku has separate pool (confirmed via live probe). + 5-hour window + 7-day window, both monitored. + +Headers (from live response): + anthropic-ratelimit-unified-5h-status: allowed|rate_limited + anthropic-ratelimit-unified-5h-reset: + anthropic-ratelimit-unified-5h-utilization: 0.0-1.0 + anthropic-ratelimit-unified-7d-status + anthropic-ratelimit-unified-7d-reset + anthropic-ratelimit-unified-7d-utilization +""" + +from __future__ import annotations + +import json +import os +import subprocess +import time +import urllib.error +import urllib.request +from dataclasses import dataclass, field +from pathlib import Path +from typing import Any, Optional + +KEYCHAIN_SERVICE = "Claude Code-credentials" +OAUTH_REFRESH_URL = "https://claude.ai/v1/oauth/token" +OAUTH_CLIENT_ID = "9d1c250a-e61b-44d9-88ed-5944d1962f5e" +ANTHROPIC_API = "https://api.anthropic.com/v1/messages" +ANTHROPIC_BETA = "oauth-2025-04-20" +ANTHROPIC_VERSION = "2023-06-01" + +QUOTA_CACHE_PATH = Path.home() / ".surrogate" / "yolo" / "max-quota.json" +QUOTA_CACHE_TTL = 300 # 5 minutes + +# --- Model IDs (from live probe 2026-04-19) --- +MODEL_OPUS = "claude-opus-4-20250514" +MODEL_SONNET = "claude-sonnet-4-20250514" +MODEL_HAIKU = "claude-haiku-4-5-20251001" + + +@dataclass +class QuotaState: + """Rate-limit state parsed from response headers.""" + model: str + status: str = "unknown" # allowed | rate_limited | unknown + reset_at: int = 0 # unix timestamp when window resets + utilization_5h: float = 0.0 + utilization_7d: float = 0.0 + last_checked: float = 0.0 # unix seconds + last_error: str = "" + + @property + def available(self) -> bool: + return self.status == "allowed" + + @property + def seconds_until_reset(self) -> int: + return max(0, int(self.reset_at - time.time())) + + +@dataclass +class MaxResponse: + """Successful response from Max plan.""" + content: str + model_requested: str + model_served: str + input_tokens: int + output_tokens: int + quota: QuotaState = field(default_factory=lambda: QuotaState(model="")) + + +class MaxUnavailable(Exception): + """Raised when Max plan cannot serve the request (429 or auth).""" + def __init__(self, model: str, reset_at: int = 0, msg: str = ""): + self.model = model + self.reset_at = reset_at + self.msg = msg + super().__init__(f"Max {model} unavailable: {msg} (reset in {max(0, reset_at - int(time.time()))}s)") + + +class MaxAuthError(Exception): + """Raised when OAuth token refresh fails permanently — needs relogin.""" + + +# ---------------------------------------------------------------------- +# Keychain I/O +# ---------------------------------------------------------------------- +def read_token() -> dict: + """Read full credential blob from keychain.""" + try: + raw = subprocess.check_output( + ["security", "find-generic-password", "-s", KEYCHAIN_SERVICE, "-w"], + stderr=subprocess.DEVNULL, + ).decode().strip() + return json.loads(raw) + except subprocess.CalledProcessError: + raise MaxAuthError(f"Keychain entry '{KEYCHAIN_SERVICE}' not found — run `claude` to login") + except json.JSONDecodeError as e: + raise MaxAuthError(f"Invalid JSON in keychain: {e}") + + +def write_token(cred: dict) -> None: + """Atomically replace keychain entry.""" + body = json.dumps(cred) + subprocess.run( + ["security", "delete-generic-password", "-s", KEYCHAIN_SERVICE], + stderr=subprocess.DEVNULL, + ) + subprocess.run( + ["security", "add-generic-password", + "-s", KEYCHAIN_SERVICE, + "-a", os.environ.get("USER", "Ashira"), + "-w", body, + "-U"], + check=True, + stderr=subprocess.DEVNULL, + ) + + +# ---------------------------------------------------------------------- +# OAuth refresh +# ---------------------------------------------------------------------- +def refresh_if_needed(cred: dict, buffer_seconds: int = 120) -> dict: + """Refresh access token if expiring in dict[str, QuotaState]: + """Load cached quota state (per model).""" + if not QUOTA_CACHE_PATH.exists(): + return {} + try: + raw = json.loads(QUOTA_CACHE_PATH.read_text()) + return {k: QuotaState(**v) for k, v in raw.items()} + except (json.JSONDecodeError, TypeError): + return {} + + +def save_quota_cache(cache: dict[str, QuotaState]) -> None: + QUOTA_CACHE_PATH.parent.mkdir(parents=True, exist_ok=True) + data = {k: v.__dict__ for k, v in cache.items()} + QUOTA_CACHE_PATH.write_text(json.dumps(data, indent=2)) + + +def parse_quota_headers(model: str, headers: dict[str, str]) -> QuotaState: + """Parse anthropic-ratelimit-* headers into QuotaState.""" + h = {k.lower(): v for k, v in headers.items()} + + def fget(key: str, default: float = 0.0) -> float: + try: + return float(h.get(key, default)) + except (ValueError, TypeError): + return default + + def iget(key: str, default: int = 0) -> int: + try: + return int(float(h.get(key, default))) + except (ValueError, TypeError): + return default + + status = h.get("anthropic-ratelimit-unified-5h-status", "unknown") + reset_5h = iget("anthropic-ratelimit-unified-5h-reset") + reset_7d = iget("anthropic-ratelimit-unified-7d-reset") + + return QuotaState( + model=model, + status=status, + reset_at=max(reset_5h, reset_7d) if reset_5h and reset_7d else reset_5h or reset_7d, + utilization_5h=fget("anthropic-ratelimit-unified-5h-utilization"), + utilization_7d=fget("anthropic-ratelimit-unified-7d-utilization"), + last_checked=time.time(), + ) + + +# ---------------------------------------------------------------------- +# Call Anthropic via Max OAuth +# ---------------------------------------------------------------------- +def call_max( + model: str, + messages: list[dict], + max_tokens: int = 4096, + system: Optional[str] = None, + timeout: int = 180, +) -> MaxResponse: + """Make a Max-plan OAuth call. Raises MaxUnavailable on 429.""" + cred = refresh_if_needed(read_token()) + token = cred["claudeAiOauth"]["accessToken"] + + body: dict[str, Any] = { + "model": model, + "max_tokens": max_tokens, + "messages": messages, + } + if system: + body["system"] = system + + req = urllib.request.Request( + ANTHROPIC_API, + data=json.dumps(body).encode(), + headers={ + "Authorization": f"Bearer {token}", + "anthropic-version": ANTHROPIC_VERSION, + "anthropic-beta": ANTHROPIC_BETA, + "content-type": "application/json", + }, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=timeout) as r: + data = json.loads(r.read()) + quota = parse_quota_headers(model, dict(r.getheaders())) + _update_cache(quota) + return MaxResponse( + content=data["content"][0]["text"], + model_requested=model, + model_served=data.get("model", model), + input_tokens=data["usage"]["input_tokens"], + output_tokens=data["usage"]["output_tokens"], + quota=quota, + ) + except urllib.error.HTTPError as e: + err_body = e.read().decode() + headers = dict(e.headers) + quota = parse_quota_headers(model, headers) + # Override: 429 always means rate_limited regardless of header contents + quota.status = "rate_limited" if e.code == 429 else "error" + quota.last_error = f"HTTP {e.code}: {err_body[:200]}" + # If 429 but no reset header, set a safe cooldown (5 min) so pick_max_model skips + if e.code == 429 and quota.reset_at <= time.time(): + quota.reset_at = int(time.time() + 300) + _update_cache(quota) + if e.code == 429: + raise MaxUnavailable(model, quota.reset_at, err_body) + if e.code == 401: + raise MaxAuthError(f"Max auth failed ({e.code}) — relogin needed") + raise MaxUnavailable(model, 0, f"HTTP {e.code}: {err_body[:200]}") + + +def _update_cache(quota: QuotaState) -> None: + cache = load_quota_cache() + cache[quota.model] = quota + save_quota_cache(cache) + + +# ---------------------------------------------------------------------- +# Tier selection +# ---------------------------------------------------------------------- +MAX_TIER_ORDER = [MODEL_OPUS, MODEL_SONNET, MODEL_HAIKU] + + +def pick_max_model(prefer: str = MODEL_OPUS) -> Optional[str]: + """Pick best available Max-plan model. + + Strategy: + 1. If cache status=allowed AND fresh (< TTL) → use it immediately + 2. If cache stale (> TTL) → eligible to re-probe (real probe will confirm) + 3. If cache rate_limited: + - If reset_at > 0 AND reset_at still in future → NOT eligible (honor cooldown) + - Only eligible when reset_at passed + cache went stale + 4. Walk Opus → Sonnet → Haiku; use first eligible + + Returns model name or None if all rate-limited within cooldown. + """ + cache = load_quota_cache() + now = time.time() + + def eligible(model: str) -> bool: + q = cache.get(model) + if not q: + return True # unknown → worth one probe + # Fresh + allowed + if q.status == "allowed" and now - q.last_checked <= QUOTA_CACHE_TTL: + return True + # Rate-limited + still within cooldown window → skip + if q.status == "rate_limited" and q.reset_at > now: + return False + # Stale (either status) + no active cooldown → re-probe OK + if now - q.last_checked > QUOTA_CACHE_TTL: + return True + # Rate-limited but reset_at is 0 or in past → try again cautiously + if q.status == "rate_limited" and q.reset_at <= now: + return now - q.last_checked > 30 # wait 30s between retries + return False + + order = [prefer] + [m for m in MAX_TIER_ORDER if m != prefer] + for model in order: + if eligible(model): + return model + return None + + +def probe_and_refresh_cache() -> dict[str, QuotaState]: + """Send minimal probes to each tier to refresh cache. Called every 5 min.""" + out: dict[str, QuotaState] = {} + for model in MAX_TIER_ORDER: + try: + resp = call_max(model, [{"role": "user", "content": "."}], max_tokens=5) + out[model] = resp.quota + except MaxUnavailable as e: + # already cached in _update_cache + cache = load_quota_cache() + out[model] = cache.get(model, QuotaState(model=model, status="rate_limited", + reset_at=e.reset_at)) + except MaxAuthError: + raise + return out + + +if __name__ == "__main__": + # CLI self-test + import sys + if len(sys.argv) > 1 and sys.argv[1] == "probe": + for model, q in probe_and_refresh_cache().items(): + print(f"{model}: {q.status} util5h={q.utilization_5h:.2f} " + f"reset_in={q.seconds_until_reset}s") + elif len(sys.argv) > 1 and sys.argv[1] == "pick": + print(pick_max_model() or "NONE_AVAILABLE") + else: + # quick call + m = pick_max_model() or MODEL_HAIKU + r = call_max(m, [{"role": "user", "content": sys.argv[1] if len(sys.argv) > 1 else "hi"}], max_tokens=50) + print(f"[{r.model_served}] {r.content[:200]}") diff --git a/bin/lib/openrouter_client.py b/bin/lib/openrouter_client.py new file mode 100644 index 0000000000000000000000000000000000000000..be17efd8b067ed6ee9b5d46ee622520ecaf367ca --- /dev/null +++ b/bin/lib/openrouter_client.py @@ -0,0 +1,195 @@ +"""OpenRouter client — free-first then paid tiers. + +Tiers (per Ashira 2026-04-19): + FREE: qwen, gpt-oss, llama, nemotron, glm + CHEAP: deepseek-v3.2, grok-4.1-fast + PREMIUM: gpt-5.4, claude-haiku-4.5, claude-sonnet-4.6, claude-opus-4.7 + +Per-model cooldown tracked in ~/.surrogate/yolo/or-cooldowns.json to avoid +hammering rate-limited free models. +""" + +from __future__ import annotations + +import json +import os +import time +import urllib.error +import urllib.request +from dataclasses import dataclass, field +from pathlib import Path +from typing import Optional + +OR_URL = "https://openrouter.ai/api/v1/chat/completions" +COOLDOWN_PATH = Path.home() / ".surrogate" / "yolo" / "or-cooldowns.json" + +FREE_MODELS = [ + "qwen/qwen3-coder:free", + "openai/gpt-oss-120b:free", + "meta-llama/llama-3.3-70b-instruct:free", + "nvidia/nemotron-3-super-120b-a12b:free", + "z-ai/glm-4.5-air:free", +] + +CHEAP_MODELS = [ + "deepseek/deepseek-v3.2", + "x-ai/grok-4.1-fast", +] + +PREMIUM_MODELS = [ + "openai/gpt-5.4", + "anthropic/claude-haiku-4.5", + "anthropic/claude-sonnet-4.6", + "x-ai/grok-4.20", + "anthropic/claude-opus-4.7", +] + +DEFAULT_COOLDOWN_SECONDS = 60 # after 429, wait 60s before retrying this model + + +class ORUnavailable(Exception): + def __init__(self, model: str, code: int, body: str): + self.model = model + self.code = code + self.body = body + super().__init__(f"OR {model}: {code} {body[:200]}") + + +@dataclass +class ORResponse: + content: str + model_requested: str + model_served: str + input_tokens: int = 0 + output_tokens: int = 0 + + +def _load_cooldowns() -> dict[str, float]: + if not COOLDOWN_PATH.exists(): + return {} + try: + return json.loads(COOLDOWN_PATH.read_text()) + except (json.JSONDecodeError, OSError): + return {} + + +def _save_cooldowns(c: dict[str, float]) -> None: + COOLDOWN_PATH.parent.mkdir(parents=True, exist_ok=True) + COOLDOWN_PATH.write_text(json.dumps(c)) + + +def is_on_cooldown(model: str) -> bool: + c = _load_cooldowns() + return c.get(model, 0) > time.time() + + +def mark_cooldown(model: str, seconds: int = DEFAULT_COOLDOWN_SECONDS) -> None: + c = _load_cooldowns() + c[model] = time.time() + seconds + # Prune expired entries + c = {k: v for k, v in c.items() if v > time.time()} + _save_cooldowns(c) + + +def call_openrouter( + model: str, + messages: list[dict], + max_tokens: int = 4000, + system: Optional[str] = None, + timeout: int = 120, +) -> ORResponse: + """Call OpenRouter directly. Raises ORUnavailable on error.""" + api_key = os.environ.get("OPENROUTER_API_KEY", "") + if not api_key: + # Try loading from .env (accepts both `KEY=val` and `export KEY=val` formats) + env_file = Path.home() / ".surrogate" / ".env" + if env_file.exists(): + for line in env_file.read_text().splitlines(): + s = line.strip() + if s.startswith("export "): + s = s[len("export "):].lstrip() + if s.startswith("OPENROUTER_API_KEY="): + api_key = s.split("=", 1)[1].strip().strip('"').strip("'") + break + if not api_key: + raise ORUnavailable(model, 0, "OPENROUTER_API_KEY not set") + + body_msgs = list(messages) + if system: + body_msgs = [{"role": "system", "content": system}] + body_msgs + + body = json.dumps({ + "model": model, + "max_tokens": max_tokens, + "messages": body_msgs, + }).encode() + + req = urllib.request.Request( + OR_URL, + data=body, + headers={ + "Authorization": f"Bearer {api_key}", + "HTTP-Referer": "https://github.com/Ashira/axentx", + "X-Title": "axentx-smart-dispatcher", + "content-type": "application/json", + }, + method="POST", + ) + try: + with urllib.request.urlopen(req, timeout=timeout) as r: + data = json.loads(r.read()) + if "choices" not in data: + raise ORUnavailable(model, 0, str(data)[:200]) + choice = data["choices"][0] + content = choice["message"]["content"] + usage = data.get("usage", {}) + return ORResponse( + content=content, + model_requested=model, + model_served=data.get("model", model), + input_tokens=usage.get("prompt_tokens", 0), + output_tokens=usage.get("completion_tokens", 0), + ) + except urllib.error.HTTPError as e: + body = e.read().decode() + # 429 or 503 → mark cooldown + if e.code in (429, 503, 502): + mark_cooldown(model) + raise ORUnavailable(model, e.code, body) + except Exception as e: # network errors + raise ORUnavailable(model, 0, str(e)) + + +def pick_free() -> Optional[str]: + """First free model not on cooldown.""" + for m in FREE_MODELS: + if not is_on_cooldown(m): + return m + return None + + +def pick_cheap() -> Optional[str]: + for m in CHEAP_MODELS: + if not is_on_cooldown(m): + return m + return None + + +def pick_premium() -> Optional[str]: + for m in PREMIUM_MODELS: + if not is_on_cooldown(m): + return m + return None + + +if __name__ == "__main__": + import sys + if len(sys.argv) > 1 and sys.argv[1] == "pick": + print(f"free: {pick_free()}") + print(f"cheap: {pick_cheap()}") + print(f"premium: {pick_premium()}") + else: + m = pick_free() or pick_cheap() or pick_premium() + q = sys.argv[1] if len(sys.argv) > 1 else "say OK" + r = call_openrouter(m, [{"role": "user", "content": q}], max_tokens=30) + print(f"[{r.model_served}] {r.content[:100]}") diff --git a/bin/lib/prompt_cache.py b/bin/lib/prompt_cache.py new file mode 100644 index 0000000000000000000000000000000000000000..d761ebdd491e17a7dcc231626d08861b4622f0ac --- /dev/null +++ b/bin/lib/prompt_cache.py @@ -0,0 +1,17 @@ +"""Anthropic prompt caching helper — adds cache_control to messages so repeated +system prompts / long contexts cost 10% of full price. +Usage: import this in any bridge that calls Anthropic API directly. +""" +def add_cache_control(messages, threshold=2048): + """Add cache_control to the longest system message if it's over threshold chars. + Anthropic cache: https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching + Requires anthropic-beta: prompt-caching-2024-07-31 header.""" + if not messages: return messages + for m in messages: + if m.get('role') == 'system' and isinstance(m.get('content'), str): + if len(m['content']) >= threshold: + # Convert to structured content with cache marker + m['content'] = [{'type': 'text', 'text': m['content'], + 'cache_control': {'type': 'ephemeral'}}] + break + return messages diff --git a/bin/lib/review_agent.py b/bin/lib/review_agent.py new file mode 100644 index 0000000000000000000000000000000000000000..efd841c85711e319d3a35fe373010d8bd5baaf82 --- /dev/null +++ b/bin/lib/review_agent.py @@ -0,0 +1,328 @@ +"""Review agent — tier-gated + consensus + ground-truth. + +Replaces the simple review() in smart_dispatcher.py. Rules: + + 1. Reviewer rank >= Writer rank (strict) + 2. Reviewer provider != Writer provider (cross-provider) + 3. For `critical=True` tasks: Reviewer rank >= Writer rank + 1, and 2-of-3 consensus + 4. If no eligible reviewer available RIGHT NOW → block (queue-wait), + retry when cache refreshes. DO NOT downgrade to lower tier. + 5. Ground-truth check runs alongside reviewer opinion: + code has blocking compile/parse failure → hard-fail regardless of reviewer +""" + +from __future__ import annotations + +import json +import re +import sys +import time +from pathlib import Path +from typing import Optional + +sys.path.insert(0, str(Path(__file__).parent)) + +from ground_truth import check as gt_check +from max_client import MAX_TIER_ORDER, MaxUnavailable, call_max, pick_max_model +from openrouter_client import ( + CHEAP_MODELS, + FREE_MODELS, + PREMIUM_MODELS, + ORUnavailable, + call_openrouter, + is_on_cooldown, +) +from tier_rank import _provider_family, is_eligible_reviewer, pick_reviewer_from, rank + + +REVIEWER_SYSTEM = """You are a strict code review agent. + +Your job: + 1. Check if the work fully addresses the task + 2. Check for correctness (syntax, logic, hallucinations) + 3. Check for completeness (edge cases, error handling) + 4. Rate severity of issues (low | med | high) + +Output JSON only (no markdown, no prose): +{ + "verdict": "pass" | "needs_revision", + "score": 0-10, + "issues": [{"severity":"low|med|high","desc":"..."}], + "suggestions": ["...", "..."], + "reasoning": "1-2 sentences" +} + +Rules: + - Any "high" severity issue → always "needs_revision" + - If you detect hallucinated APIs/functions → "needs_revision" with severity=high + - Be rigorous — pass only when genuinely good +""" + + +class NoEligibleReviewer(Exception): + """No reviewer currently available at required tier. Queue-wait.""" + + +def _available_reviewers() -> list[str]: + """Enumerate all currently available reviewer candidates. + + Max plan tiers (check quota) + OR tiers (check cooldowns). + """ + cands: list[str] = [] + + # Max tiers (use pick_max_model to respect cache) + # We collect all three; caller picks based on tier + for m in MAX_TIER_ORDER: + # only include if not currently rate-limited long-term + from max_client import load_quota_cache + q = load_quota_cache().get(m) + if not q or q.status == "allowed" or q.seconds_until_reset < 60: + cands.append(m) + + # OR tiers + for m in PREMIUM_MODELS + CHEAP_MODELS + FREE_MODELS: + if not is_on_cooldown(m): + cands.append(m) + return cands + + +def _call_model_for_review(model: str, prompt: str, system: str) -> tuple[str, str]: + """Route to Max or OR depending on model name. Returns (text, served_model_id).""" + if model in MAX_TIER_ORDER: + r = call_max(model, [{"role": "user", "content": prompt}], + max_tokens=1500, system=system, timeout=120) + return r.content, r.model_served + r = call_openrouter(model, [{"role": "user", "content": prompt}], + max_tokens=1500, system=system, timeout=120) + return r.content, r.model_served + + +def _parse_json_verdict(text: str) -> dict: + text = text.strip() + if text.startswith("```"): + text = text.split("```", 2)[1] if "```" in text[3:] else text[3:] + text = text.lstrip("json").lstrip() + if "```" in text: + text = text.rsplit("```", 1)[0] + try: + return json.loads(text) + except json.JSONDecodeError: + m = re.search(r"\{.*\}", text, re.DOTALL) + if m: + try: + return json.loads(m.group(0)) + except json.JSONDecodeError: + pass + return {"verdict": "needs_revision", "reasoning": "review parse failed", + "raw": text[:500], "score": 0, "issues": [], "suggestions": []} + + +def review_once( + task_prompt: str, + work_product: str, + writer_model: str, + critical: bool = False, + queue_wait_max_seconds: int = 600, + poll_interval: int = 15, +) -> dict: + """Single-reviewer review with tier enforcement. + + Blocks (queue-wait) up to queue_wait_max_seconds if no eligible reviewer. + Raises NoEligibleReviewer after timeout. + """ + deadline = time.time() + queue_wait_max_seconds + + reviewer: Optional[str] = None + waits = 0 + while time.time() < deadline: + cands = _available_reviewers() + reviewer = pick_reviewer_from(cands, writer_model, critical=critical) + if reviewer: + break + waits += 1 + time.sleep(poll_interval) + + if not reviewer: + raise NoEligibleReviewer( + f"no reviewer with rank>={rank(writer_model) + (1 if critical else 0)} " + f"and provider!={_provider_family(writer_model)} after {queue_wait_max_seconds}s" + ) + + review_prompt = f"""# TASK +{task_prompt} + +# WORK PRODUCT +{work_product} + +# YOUR REVIEW (valid JSON only):""" + + try: + text, served = _call_model_for_review(reviewer, review_prompt, REVIEWER_SYSTEM) + except (MaxUnavailable, ORUnavailable) as e: + # Reviewer itself errored — retry with fresh pool + return {"verdict": "needs_revision", "reasoning": f"reviewer call failed: {e}", + "reviewer_model": reviewer, "score": 0, + "transport_error": True} + + parsed = _parse_json_verdict(text) + parsed["reviewer_model"] = served + parsed["reviewer_provider_family"] = _provider_family(served) + parsed["reviewer_rank"] = rank(served) + parsed["writer_rank"] = rank(writer_model) + parsed["wait_cycles"] = waits + return parsed + + +def review_with_consensus( + task_prompt: str, + work_product: str, + writer_model: str, + num_reviewers: int = 3, + required_agree: int = 2, + critical: bool = True, + queue_wait_max_seconds: int = 600, +) -> dict: + """Multi-reviewer consensus review. Used for critical tasks. + + Picks N reviewers from DIFFERENT provider families (+ cross-provider from writer). + Verdict = pass if required_agree reviewers say "pass". + """ + deadline = time.time() + queue_wait_max_seconds + reviewers: list[str] = [] + used_families: set[str] = {_provider_family(writer_model)} + + # Collect N reviewers from N distinct families + while len(reviewers) < num_reviewers and time.time() < deadline: + cands = _available_reviewers() + # Filter: eligible + family not yet used + new_picks: list[str] = [] + for c in cands: + fam = _provider_family(c) + if fam in used_families: + continue + ok, _ = is_eligible_reviewer(writer_model, c, critical=critical) + if ok: + new_picks.append(c) + # Pick highest rank per family + by_family: dict[str, tuple[int, str]] = {} + for c in new_picks: + fam = _provider_family(c) + r = rank(c) + if fam not in by_family or by_family[fam][0] < r: + by_family[fam] = (r, c) + for fam, (_, model) in sorted(by_family.items(), key=lambda x: -x[1][0]): + if len(reviewers) >= num_reviewers: + break + reviewers.append(model) + used_families.add(fam) + if len(reviewers) < num_reviewers: + time.sleep(15) + + if len(reviewers) < required_agree: + raise NoEligibleReviewer( + f"consensus needs {required_agree} distinct-family reviewers, got {len(reviewers)}" + ) + + # Fire reviews + individual_verdicts: list[dict] = [] + for rv in reviewers: + try: + v = review_once(task_prompt, work_product, writer_model, + critical=critical, queue_wait_max_seconds=30) + # Force it to use THIS specific reviewer + # (review_once picks top; we need to override — run directly) + text, served = _call_model_for_review( + rv, + f"# TASK\n{task_prompt}\n\n# WORK PRODUCT\n{work_product}\n\n# YOUR REVIEW (JSON):", + REVIEWER_SYSTEM, + ) + parsed = _parse_json_verdict(text) + parsed["reviewer_model"] = served + parsed["reviewer_rank"] = rank(served) + parsed["reviewer_provider_family"] = _provider_family(served) + individual_verdicts.append(parsed) + except (MaxUnavailable, ORUnavailable) as e: + individual_verdicts.append( + {"verdict": "needs_revision", "reasoning": f"reviewer error: {e}", + "reviewer_model": rv, "transport_error": True} + ) + + passes = sum(1 for v in individual_verdicts if v.get("verdict") == "pass") + consensus_verdict = "pass" if passes >= required_agree else "needs_revision" + + # Aggregate issues from ALL reviewers (even if majority passes) + all_issues: list[dict] = [] + all_suggestions: list[str] = [] + for v in individual_verdicts: + all_issues.extend(v.get("issues", []) or []) + all_suggestions.extend(v.get("suggestions", []) or []) + + return { + "verdict": consensus_verdict, + "consensus_pass_count": passes, + "consensus_required": required_agree, + "individual_verdicts": individual_verdicts, + "issues": all_issues, + "suggestions": all_suggestions, + "reviewers": [v.get("reviewer_model") for v in individual_verdicts], + "writer_rank": rank(writer_model), + "reasoning": f"consensus {passes}/{len(individual_verdicts)} pass (required {required_agree})", + } + + +def review_full( + task_prompt: str, + work_product: str, + writer_model: str, + critical: bool = False, + use_consensus: bool = False, +) -> dict: + """Full review = reviewer opinion + ground-truth check. + + Ground-truth BLOCKING failure → hard fail regardless of reviewer. + """ + # 1. Ground-truth + gt = gt_check(work_product) + + # 2. Reviewer opinion + if use_consensus: + reviewer = review_with_consensus( + task_prompt, work_product, writer_model, + num_reviewers=3, required_agree=2, critical=critical, + ) + else: + reviewer = review_once(task_prompt, work_product, writer_model, critical=critical) + + # 3. Combine + final_verdict = reviewer.get("verdict", "needs_revision") + if gt.get("blocking_failure"): + final_verdict = "needs_revision" + + return { + "verdict": final_verdict, + "reviewer": reviewer, + "ground_truth": gt, + "override_by_ground_truth": gt.get("blocking_failure", False), + } + + +if __name__ == "__main__": + import sys + if len(sys.argv) < 3: + print("usage: review_agent.py ") + sys.exit(1) + task = sys.argv[1] + work = Path(sys.argv[2]).read_text() + writer = sys.argv[3] if len(sys.argv) > 3 else "claude-haiku-4-5-20251001" + critical = "--critical" in sys.argv + consensus = "--consensus" in sys.argv + r = review_full(task, work, writer, critical=critical, use_consensus=consensus) + print(json.dumps({ + "verdict": r["verdict"], + "ground_truth_verdict": r["ground_truth"]["verdict"], + "ground_truth_blocking": r["ground_truth"]["blocking_failure"], + "override_by_ground_truth": r["override_by_ground_truth"], + "reviewer_model": r["reviewer"].get("reviewer_model"), + "reviewer_rank": r["reviewer"].get("reviewer_rank"), + "reviewer_verdict": r["reviewer"].get("verdict"), + }, indent=2)) diff --git a/bin/lib/smart_dispatcher.py b/bin/lib/smart_dispatcher.py new file mode 100644 index 0000000000000000000000000000000000000000..0fff593018d9464f12a67c40b8c1829b45e74a14 --- /dev/null +++ b/bin/lib/smart_dispatcher.py @@ -0,0 +1,420 @@ +"""Smart dispatcher — Max plan → OR free → OR paid with checkpoint + review. + +Tier priority (per Ashira 2026-04-19): + 1. Max Opus 4.x (leverage flat-rate first) + 2. Max Sonnet 4.x (same plan, same pool typically) + 3. Max Haiku 4.x (cheapest Max tier) + 4. OR FREE models (qwen / gpt-oss / llama / nemotron / glm) + 5. OR CHEAP paid (deepseek / grok-fast) + 6. OR PREMIUM paid (gpt-5 / claude-opus / claude-sonnet via OR) + +Continuous re-check: every 5 min probe Max tiers — if Opus/Sonnet come back +available, subsequent calls return to them (honor Max plan flat-rate). + +Review retry: INFINITE per Ashira — runs revisions until reviewer passes. +""" + +from __future__ import annotations + +import datetime as dt +import json +import sys +import time +from pathlib import Path +from typing import Callable, Optional + +sys.path.insert(0, str(Path(__file__).parent)) + +from checkpoint import Checkpoint +from codebase_scanner import as_context_prompt, scan +from max_client import ( + MAX_TIER_ORDER, + MODEL_HAIKU, + MODEL_OPUS, + MODEL_SONNET, + MaxAuthError, + MaxUnavailable, + call_max, + pick_max_model, + probe_and_refresh_cache, +) +from openrouter_client import ( + CHEAP_MODELS, + FREE_MODELS, + PREMIUM_MODELS, + ORResponse, + ORUnavailable, + call_openrouter, + is_on_cooldown, +) +from review_agent import NoEligibleReviewer, review_full + + +LAST_MAX_PROBE: list[float] = [0.0] +MAX_PROBE_INTERVAL = 300 # 5 min + + +class DispatchResult: + def __init__(self, text: str, provider: str, model: str, input_tokens: int = 0, output_tokens: int = 0): + self.text = text + self.provider = provider + self.model = model + self.input_tokens = input_tokens + self.output_tokens = output_tokens + + +def _tier_iter() -> list[tuple[str, list[str]]]: + """Ordered tiers to try in strict priority.""" + return [ + ("max", MAX_TIER_ORDER), + ("or_free", FREE_MODELS), + ("or_cheap", CHEAP_MODELS), + ("or_premium", PREMIUM_MODELS), + ] + + +def _maybe_probe_max() -> None: + """Every 5 min, send minimal probes to each Max tier to refresh cache.""" + if time.time() - LAST_MAX_PROBE[0] > MAX_PROBE_INTERVAL: + try: + probe_and_refresh_cache() + LAST_MAX_PROBE[0] = time.time() + except MaxAuthError: + pass # handled at call time + + +def dispatch( + prompt: str, + system: Optional[str] = None, + task_id: Optional[str] = None, + max_tokens: int = 4096, + checkpoint: Optional[Checkpoint] = None, + prefer_max: bool = True, + exclude_providers: set[str] | None = None, + on_attempt: Optional[Callable[[str, str], None]] = None, +) -> DispatchResult: + """Try tiers in order until one succeeds. Logs to checkpoint. + + Args: + prompt: user message + system: system prompt (optional) + task_id: for logging + max_tokens: output cap + checkpoint: Checkpoint instance for event logging + prefer_max: try Max first (True) — set False for review agent (cross-provider) + exclude_providers: skip these providers (e.g. {"max"} to force OR) + on_attempt: callback(provider, model) called per attempt (for debugging) + + Returns DispatchResult or raises if ALL tiers exhausted. + """ + exclude = exclude_providers or set() + messages = [{"role": "user", "content": prompt}] + _maybe_probe_max() + + tiers = _tier_iter() + if not prefer_max: + tiers = [t for t in tiers if t[0] != "max"] + + errors: list[str] = [] + + for tier_name, models in tiers: + if tier_name in exclude: + continue + + if tier_name == "max": + m = pick_max_model() + if m is None: + errors.append("max: all tiers rate-limited") + continue + if on_attempt: + on_attempt("max", m) + if checkpoint: + checkpoint.append("provider_selected", provider="max", model=m) + try: + r = call_max(m, messages, max_tokens=max_tokens, system=system) + if checkpoint: + checkpoint.append("provider_success", provider="max", model=m, + content_preview=r.content[:200], + input_tokens=r.input_tokens, + output_tokens=r.output_tokens) + return DispatchResult(r.content, "max", m, r.input_tokens, r.output_tokens) + except MaxUnavailable as e: + errors.append(f"max:{m} 429 (reset {e.reset_at})") + if checkpoint: + checkpoint.append("provider_failed", provider="max", model=m, + reason=f"rate_limit reset_at={e.reset_at}") + continue + except MaxAuthError as e: + errors.append(f"max auth: {e}") + if checkpoint: + checkpoint.append("provider_failed", provider="max", reason=f"auth: {e}") + # Max totally broken — skip tier but keep going with OR + continue + else: + # OR tier + for m in models: + if is_on_cooldown(m): + continue + if on_attempt: + on_attempt(tier_name, m) + if checkpoint: + checkpoint.append("provider_selected", provider=tier_name, model=m) + try: + r = call_openrouter(m, messages, max_tokens=max_tokens, system=system) + if checkpoint: + checkpoint.append("provider_success", provider=tier_name, model=m, + content_preview=r.content[:200], + input_tokens=r.input_tokens, + output_tokens=r.output_tokens) + return DispatchResult(r.content, tier_name, m, r.input_tokens, r.output_tokens) + except ORUnavailable as e: + errors.append(f"{tier_name}:{m} {e.code}") + if checkpoint: + checkpoint.append("provider_failed", provider=tier_name, model=m, + reason=f"{e.code}: {e.body[:100]}") + continue + + # All tiers exhausted + raise RuntimeError(f"all providers exhausted: {errors}") + + +# ---------------------------------------------------------------------- +# Review agent (cross-provider debate) +# ---------------------------------------------------------------------- +REVIEWER_SYSTEM = """You are a strict code review agent. You review another AI's work for a given task. +Your job: + 1. Check if the work fully addresses the task + 2. Check for correctness (syntax, logic, hallucinations) + 3. Check for completeness (edge cases, error handling) + 4. Rate severity of issues + +Output JSON only, no prose: +{ + "verdict": "pass" | "needs_revision", + "score": 0-10, + "issues": [{"severity":"low|med|high","desc":"..."}], + "suggestions": ["...", "..."], + "reasoning": "1-2 sentences" +} + +If no issues, "pass". If ANY "high" severity issue → always "needs_revision".""" + + +def review( + task_prompt: str, + work_product: str, + writer_provider: str, + checkpoint: Optional[Checkpoint] = None, +) -> dict: + """Send work for cross-provider review. Uses different provider than writer. + + Returns: + {"verdict": "pass|needs_revision", "score": int, "issues": [...], + "suggestions": [...], "reasoning": "...", "reviewer_model": "..."} + """ + # Cross-provider: if writer was Max/Anthropic → reviewer from OR non-Anthropic + exclude = set() + if writer_provider == "max": + exclude.add("max") # reviewer uses OR + + review_prompt = f"""# TASK ORIGINAL +{task_prompt} + +# WORK PRODUCT TO REVIEW +{work_product} + +# YOUR REVIEW (JSON only):""" + + if checkpoint: + checkpoint.append("review_requested", writer_provider=writer_provider) + + result = dispatch( + prompt=review_prompt, + system=REVIEWER_SYSTEM, + checkpoint=checkpoint, + max_tokens=1500, + exclude_providers=exclude, + prefer_max=(writer_provider != "max"), + ) + + # Parse JSON from response + text = result.text.strip() + # Strip markdown fence + if text.startswith("```"): + text = text.split("```", 2)[1] if "```" in text[3:] else text[3:] + text = text.lstrip("json").lstrip() + if "```" in text: + text = text.rsplit("```", 1)[0] + try: + parsed = json.loads(text) + except json.JSONDecodeError: + # Look for {...} block + import re + m = re.search(r"\{.*\}", text, re.DOTALL) + if m: + try: + parsed = json.loads(m.group(0)) + except json.JSONDecodeError: + parsed = {"verdict": "needs_revision", "reasoning": "review parse failed", + "raw": text[:500]} + else: + parsed = {"verdict": "needs_revision", "reasoning": "review parse failed", + "raw": text[:500]} + + parsed["reviewer_provider"] = result.provider + parsed["reviewer_model"] = result.model + if checkpoint: + checkpoint.append("review_verdict", **parsed) + return parsed + + +# ---------------------------------------------------------------------- +# Full orchestration +# ---------------------------------------------------------------------- +def execute_task( + task_id: str, + prompt: str, + system_base: str = "", + max_tokens: int = 4096, + max_review_iterations: int = 0, # 0 = infinite (per Ashira) + codebase_artifacts: list[str] | None = None, + critical: bool = False, # True → reviewer rank > writer + consensus 2/3 + use_consensus: bool = False, # True → 2-of-3 reviewers vote +) -> dict: + """End-to-end: scan codebase → dispatch → review → revise until pass. + + Returns: {"task_id","final_text","iterations","reviewer_verdict",...} + """ + cp = Checkpoint.open(task_id) + + # Resume support + existing_state = cp.resume_state() + iteration = existing_state["review_iterations"] + draft = existing_state["draft_text"] + if existing_state["completed"]: + return {"task_id": task_id, "status": "already_done", + "final_text": draft, "iterations": iteration} + + if not existing_state["started"]: + cp.append("task_start", prompt=prompt[:500]) + + # Phase 1: codebase review + report = scan(prompt, codebase_artifacts) + cp.append("codebase_review", + artifacts=[f["path"] for f in report["recent_files"][:15]], + uncommitted_repos=len(report["uncommitted_repos"]), + semantic_hits=len(report["semantic_hits"])) + codebase_ctx = as_context_prompt(report, 6000) + system = (system_base + "\n\n" + codebase_ctx).strip() + else: + # Resume: re-scan codebase (may have changed) + report = scan(prompt, codebase_artifacts) + cp.append("codebase_review", + artifacts=[f["path"] for f in report["recent_files"][:15]], + resumed=True) + codebase_ctx = as_context_prompt(report, 6000) + system = (system_base + "\n\n" + codebase_ctx).strip() + # Include prior draft as context for continuation + if draft: + system += f"\n\n## Previous attempt (continue/refine this):\n{draft[:3000]}" + + # Phase 2: dispatch + review loop + last_review: dict | None = None + accumulated_feedback = "" + + while True: + iteration += 1 + iter_prompt = prompt + if accumulated_feedback: + iter_prompt = f"{prompt}\n\n## Reviewer feedback from prior iteration (address these):\n{accumulated_feedback}" + + result = dispatch( + prompt=iter_prompt, + system=system, + checkpoint=cp, + max_tokens=max_tokens, + ) + draft = result.text + cp.append("result_draft", text=draft, iteration=iteration, + provider=result.provider, model=result.model) + + # Review — tier-enforced + ground-truth via review_agent.review_full + try: + full_review = review_full( + task_prompt=prompt, + work_product=draft, + writer_model=result.model, + critical=critical, + use_consensus=use_consensus or critical, + ) + cp.append("review_full", + verdict=full_review["verdict"], + reviewer_model=full_review["reviewer"].get("reviewer_model"), + reviewer_rank=full_review["reviewer"].get("reviewer_rank"), + writer_rank=full_review["reviewer"].get("writer_rank"), + ground_truth_verdict=full_review["ground_truth"]["verdict"], + ground_truth_blocking=full_review["ground_truth"]["blocking_failure"], + override_by_ground_truth=full_review["override_by_ground_truth"]) + last_review = dict(full_review["reviewer"]) + last_review["verdict"] = full_review["verdict"] + last_review["ground_truth"] = full_review["ground_truth"] + except NoEligibleReviewer as e: + cp.append("review_blocked", reason=str(e)) + # Queue-wait: don't consume iteration, poll + retry + time.sleep(30) + iteration -= 1 + continue + + verdict = last_review.get("verdict", "needs_revision") + if verdict == "pass": + cp.append("task_done", iteration=iteration, final_length=len(draft)) + cp.archive() + return { + "task_id": task_id, + "status": "done", + "final_text": draft, + "iterations": iteration, + "last_review": last_review, + "writer": f"{result.provider}/{result.model}", + } + + # needs_revision — assemble feedback + issues = last_review.get("issues", []) + suggestions = last_review.get("suggestions", []) + fb_lines = [] + for i in issues: + fb_lines.append(f"- [{i.get('severity','?')}] {i.get('desc','')}") + for s in suggestions: + fb_lines.append(f"- {s}") + accumulated_feedback = "\n".join(fb_lines) if fb_lines else last_review.get("reasoning", "") + cp.append("revision_requested", iteration=iteration, + feedback=accumulated_feedback[:500]) + + # Safety: if max_review_iterations > 0, enforce it. 0 = infinite. + if max_review_iterations > 0 and iteration >= max_review_iterations: + cp.append("task_failed", reason=f"max_iterations_{max_review_iterations}") + cp.archive() + return { + "task_id": task_id, + "status": "failed_max_iter", + "final_text": draft, + "iterations": iteration, + "last_review": last_review, + } + + +if __name__ == "__main__": + import uuid + if len(sys.argv) < 2: + print("usage: smart_dispatcher.py ") + sys.exit(1) + task_id = "adhoc-" + uuid.uuid4().hex[:8] + prompt = " ".join(sys.argv[1:]) + r = execute_task(task_id, prompt, max_tokens=500) + print(json.dumps({ + "task_id": r["task_id"], + "status": r["status"], + "iterations": r["iterations"], + "writer": r.get("writer"), + "preview": r["final_text"][:400], + }, indent=2)) diff --git a/bin/lib/tier_rank.py b/bin/lib/tier_rank.py new file mode 100644 index 0000000000000000000000000000000000000000..dd380aca09768450b9caa1053042c74f272d2bd3 --- /dev/null +++ b/bin/lib/tier_rank.py @@ -0,0 +1,192 @@ +"""Model tier rank — enforces "reviewer >= writer" quality rule. + +Rank scale (1-10, approximate SWE-Bench Verified + LMArena Q1 2026): + 10 Claude Opus 4.7, GPT-5.4 + 9 Claude Sonnet 4.6, GPT-5.4-pro, Grok 4.20, Gemini 3.1 Pro + 8 Claude Opus 4.6, DeepSeek V3.2 (coding strong) + 7 Claude Haiku 4.5, Grok 4.1 Fast, Qwen 3.6 35B-MoE + 6 Llama 3.3 70B, Mistral Large 3, Kimi K2.5, Qwen 3.5 Coder 32B + 5 Nemotron 120B, GLM 4.5 Air, Qwen 3.5 Coder 14B + 4 GPT-OSS 120B, Gemma 4 31B + 3 GPT-OSS 20B, Llama 3.3 8B, small local + +Policy (per Ashira 2026-04-19): + - Reviewer tier MUST be >= writer tier. + - For code/IaC/security tasks, prefer reviewer tier > writer by 1. + - If no eligible reviewer available → queue-wait (DO NOT downgrade writer). +""" + +from __future__ import annotations + +TIER_RANK: dict[str, int] = { + # === 10: frontier === + "anthropic/claude-opus-4.7": 10, + "openai/gpt-5.4": 10, + "openrouter/anthropic/claude-opus-4.7": 10, + "openrouter/openai/gpt-5.4": 10, + + # === 9: premium === + "anthropic/claude-sonnet-4.6": 9, + "openai/gpt-5.4-pro": 9, + "x-ai/grok-4.20": 9, + "google/gemini-3.1-pro": 9, + "openrouter/anthropic/claude-sonnet-4.6": 9, + "openrouter/x-ai/grok-4.20": 9, + # Max-plan native (OAuth) + "claude-opus-4-20250514": 9, # Opus 4 (Max plan native) + "claude-sonnet-4-20250514": 9, # Sonnet 4 (Max plan native) + + # === 8: strong === + "anthropic/claude-opus-4.6": 8, + "deepseek/deepseek-v3.2": 8, + "openrouter/deepseek/deepseek-v3.2": 8, + + # === 7: capable === + "anthropic/claude-haiku-4.5": 7, + "x-ai/grok-4.1-fast": 7, + "openrouter/anthropic/claude-haiku-4.5": 7, + "openrouter/x-ai/grok-4.1-fast": 7, + "claude-haiku-4-5-20251001": 7, # Haiku 4.5 (Max plan native) + "qwen/qwen3.6-35b-a3b": 7, + "openrouter/qwen/qwen3.6-35b-a3b": 7, + + # === 6: mid === + "meta-llama/llama-3.3-70b-instruct": 6, + "qwen/qwen3-next-80b-a3b-instruct": 6, + "qwen/qwen3-coder": 6, + "moonshotai/kimi-k2.5": 6, + "mistral-large-3": 6, + + # === 5: weak-mid === + "nvidia/nemotron-3-super-120b-a12b": 5, + "z-ai/glm-4.5-air": 5, + + # === 4: small === + "openai/gpt-oss-120b": 4, + "google/gemma-4-31b-it": 4, + + # === 3: tiny / free === + "openai/gpt-oss-20b": 3, + "meta-llama/llama-3.3-8b-instruct": 3, +} + + +def rank(model: str) -> int: + """Return rank 1-10, defaulting to 5 for unknown models.""" + if not model: + return 5 + # Strip :free suffix + base = model.replace(":free", "").strip("/") + if base in TIER_RANK: + return TIER_RANK[base] + # Try progressively stripping path components + for prefix in ("openrouter/", ""): + for candidate in [prefix + base, base.replace(prefix, "")]: + if candidate in TIER_RANK: + return TIER_RANK[candidate] + # Partial match (last-resort — for unknown variants of known families) + lower = base.lower() + if "opus-4.7" in lower or "opus-4-7" in lower: return 10 + if "gpt-5.4" in lower and "mini" not in lower and "nano" not in lower: return 10 + if "sonnet-4.6" in lower or "sonnet-4-6" in lower: return 9 + if "opus-4" in lower or "opus_4" in lower: return 8 + if "grok-4.2" in lower: return 9 + if "gemini-3" in lower and "flash" not in lower: return 9 + if "haiku-4" in lower: return 7 + if "deepseek-v3" in lower: return 8 + if "grok-4.1" in lower or "grok-fast" in lower: return 7 + if "qwen3.6" in lower: return 7 + if "llama-3.3-70" in lower: return 6 + if "nemotron" in lower: return 5 + if "glm-4.5" in lower: return 5 + if "gpt-oss-120" in lower: return 4 + if "gemma-4-31" in lower: return 4 + if "gpt-oss-20" in lower: return 3 + return 5 + + +def is_eligible_reviewer(writer_model: str, reviewer_model: str, + critical: bool = False, + cross_provider_required: bool = True) -> tuple[bool, str]: + """Check if reviewer qualifies. + + Rules: + 1. rank(reviewer) >= rank(writer) [always] + 2. rank(reviewer) >= rank(writer) + 1 [when critical] + 3. reviewer provider != writer provider [when cross_provider_required] + + Returns (ok, reason). + """ + wr = rank(writer_model) + rr = rank(reviewer_model) + min_rank = wr + 1 if critical else wr + + if rr < min_rank: + return False, f"reviewer rank {rr} < required {min_rank} (writer={wr})" + + if cross_provider_required: + wp = _provider_family(writer_model) + rp = _provider_family(reviewer_model) + if wp == rp and wp != "unknown": + return False, f"same provider family '{wp}' — need cross-provider" + + return True, f"ok: rank {rr} >= {min_rank}, cross-provider satisfied" + + +def _provider_family(model: str) -> str: + """Group models by maker for cross-provider check.""" + m = model.lower() + if "claude" in m or "anthropic" in m: + return "anthropic" + if "gpt-" in m or "openai" in m or "gpt_" in m: + return "openai" + if "gemini" in m or "gemma" in m: + return "google" + if "grok" in m or "x-ai" in m: + return "xai" + if "deepseek" in m: + return "deepseek" + if "qwen" in m: + return "qwen" + if "llama" in m or "meta" in m: + return "meta" + if "kimi" in m or "moonshot" in m: + return "moonshot" + if "mistral" in m: + return "mistral" + if "nemotron" in m or "nvidia" in m: + return "nvidia" + if "glm" in m or "z-ai" in m: + return "zai" + return "unknown" + + +def pick_reviewer_from(candidates: list[str], writer_model: str, + critical: bool = False) -> str | None: + """Pick highest-rank eligible reviewer from a list of available models.""" + scored: list[tuple[int, str]] = [] + for c in candidates: + ok, _ = is_eligible_reviewer(writer_model, c, critical=critical) + if ok: + scored.append((rank(c), c)) + if not scored: + return None + scored.sort(key=lambda x: -x[0]) + return scored[0][1] + + +if __name__ == "__main__": + import sys + if len(sys.argv) >= 3: + w, r = sys.argv[1], sys.argv[2] + crit = "--critical" in sys.argv + ok, reason = is_eligible_reviewer(w, r, critical=crit) + print(f"writer={w} rank={rank(w)}") + print(f"reviewer={r} rank={rank(r)}") + print(f"eligible={ok}: {reason}") + else: + for m in ["claude-opus-4-20250514", "claude-sonnet-4-20250514", + "claude-haiku-4-5-20251001", "openai/gpt-5.4", + "deepseek/deepseek-v3.2", "openai/gpt-oss-120b:free", + "qwen/qwen3-coder:free", "meta-llama/llama-3.3-70b-instruct:free"]: + print(f" rank({m}) = {rank(m)} [{_provider_family(m)}]") diff --git a/bin/notify-discord.sh b/bin/notify-discord.sh index d868ec07ba2cecbb1b2e185732dde93afdad6dc2..009b0be35f2c7417bc8ee430f9b3d4d4ebfde53e 100755 --- a/bin/notify-discord.sh +++ b/bin/notify-discord.sh @@ -10,7 +10,7 @@ # Examples: # notify-discord.sh success "Task done" "p42 completed in 180s" # notify-discord.sh error "Daemon crashed" "qwen-coder exit 1" -# tail -50 ~/.claude/logs/scrape.log | notify-discord.sh scrape "Scrape report" +# tail -50 ~/.surrogate/logs/scrape.log | notify-discord.sh scrape "Scrape report" set -u set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a diff --git a/bin/nvidia-bridge.sh b/bin/nvidia-bridge.sh new file mode 100755 index 0000000000000000000000000000000000000000..f62c7c21a23e58c0784b6a5aebd9f27a0bf935e0 --- /dev/null +++ b/bin/nvidia-bridge.sh @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +# NVIDIA NIM bridge — OpenAI-compat via integrate.api.nvidia.com +# Free tier: ~1000 req/day, 50+ models (Llama, DeepSeek, Nemotron, Qwen, etc.) +set -u +MODEL="meta/llama-3.3-70b-instruct" +MAX_TOKENS=2000 +TEMP=0.3 +PROMPT="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --model) + case "$2" in + llama|l70) MODEL="meta/llama-3.3-70b-instruct" ;; + nemotron) MODEL="nvidia/nemotron-4-340b-instruct" ;; + nemotron-nano) MODEL="nvidia/nemotron-3-nano-9b-v1" ;; + deepseek|r1) MODEL="deepseek-ai/deepseek-r1" ;; + qwen|coder) MODEL="qwen/qwen2.5-coder-32b-instruct" ;; + mistral) MODEL="mistralai/mistral-large-2-instruct" ;; + *) MODEL="$2" ;; + esac; shift 2 ;; + --max-tokens) MAX_TOKENS="$2"; shift 2 ;; + *) PROMPT="$*"; break ;; + esac +done +[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat) +[[ -z "$PROMPT" ]] && { echo "nvidia-bridge: no prompt" >&2; exit 2; } + +LOG="$HOME/.surrogate/logs/nvidia-bridge.log" +mkdir -p "$(dirname "$LOG")" +set -a; source "$HOME/.hermes/.env"; set +a +echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG" + +RESPONSE=$(python3 -c " +import os +exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read()) +exec(open(os.path.expanduser('~/.surrogate/bin/lib/bridge_retry.py')).read()) +import json, sys +body = { + 'model': '$MODEL', + 'messages': [{'role':'user','content': sys.stdin.read()}], + 'max_tokens': $MAX_TOKENS, 'temperature': $TEMP, + 'stream': False, +} +try: + d = request_with_retry( + 'https://integrate.api.nvidia.com/v1/chat/completions', + data=json.dumps(body).encode(), + headers={'Content-Type':'application/json', 'User-Agent':'hermes-agent/1.0', 'Authorization':'Bearer '+os.environ.get('NVIDIA_API_KEY','')}, + timeout=120, max_retries=4, base_delay=3.0, open_seconds=120, + ) + print(d.get('choices',[{}])[0].get('message',{}).get('content','')) +except Exception as e: + print(f'nvidia-bridge error: {e}', file=sys.stderr); sys.exit(1) +" <<< "$PROMPT") +RC=$? +echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG" +[[ $RC -ne 0 ]] && exit $RC +echo "$RESPONSE" diff --git a/bin/perf-watchdog.sh b/bin/perf-watchdog.sh index 0550610791ad5b092a049a8a53fa2c64d05c3e21..34f7499f2634719d1a87c3e8f9dbe8ce41fd51ba 100755 --- a/bin/perf-watchdog.sh +++ b/bin/perf-watchdog.sh @@ -5,15 +5,15 @@ # - load avg 1min (kill if > 10, warn if > 7) # - memory free pages (warn if < 30k, emergency < 15k) # - swap I/O rate (emergency if spiking) -# - disk space on ~/.claude/state (warn if < 2GB) +# - disk space on ~/.surrogate/state (warn if < 2GB) # - scrape process count (cap at 30, kill oldest if exceeded) # # Actions: # - WARN: log + throttle (pause new burst triggers via state file) # - EMERGENCY: kill all scrape processes, set pause flag for 10 min set -u -LOG="$HOME/.claude/logs/perf-watchdog.log" -PAUSE_FLAG="$HOME/.claude/state/scrape-paused" +LOG="$HOME/.surrogate/logs/perf-watchdog.log" +PAUSE_FLAG="$HOME/.surrogate/state/scrape-paused" mkdir -p "$(dirname "$LOG")" "$(dirname "$PAUSE_FLAG")" # Thresholds diff --git a/bin/push-training-to-hf.sh b/bin/push-training-to-hf.sh index 290788098432db89f3e94d0d42d15975541e86bb..1d5030f4b3e63c19adb1d7810e034ca7f608332e 100755 --- a/bin/push-training-to-hf.sh +++ b/bin/push-training-to-hf.sh @@ -6,7 +6,7 @@ set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a SRC="$HOME/.surrogate/training-pairs.jsonl" OFFSET_FILE="$HOME/.surrogate/.training-push-offset" -LOG="$HOME/.claude/logs/training-push.log" +LOG="$HOME/.surrogate/logs/training-push.log" mkdir -p "$(dirname "$LOG")" [[ ! -f "$SRC" ]] && { echo "[$(date +%H:%M:%S)] no source $SRC" | tee -a "$LOG"; exit 0; } diff --git a/bin/qwen-coder-daemon.sh b/bin/qwen-coder-daemon.sh index 82c688a17975f1e0dbcd57e070590a032dc83206..be5115d291b8bd4967806dd652f8d73fcb6ab733 100755 --- a/bin/qwen-coder-daemon.sh +++ b/bin/qwen-coder-daemon.sh @@ -4,7 +4,7 @@ # Pulls priority → invokes qwen-coder-worker.sh with pre-selected priority (env var). set -u -LOG="$HOME/.claude/logs/qwen-coder-daemon.log" +LOG="$HOME/.surrogate/logs/qwen-coder-daemon.log" mkdir -p "$(dirname "$LOG")" # Resolve Redis: Unix socket → TCP fallback. Build a redis-cli arg array reused below. @@ -45,7 +45,7 @@ while true; do # can't race with other workers / stale file locks. START=$(date +%s) HERMES_PRIO_ID="$PRIO_ID" \ - "$HOME/.claude/bin/qwen-coder-worker.sh" 2>&1 | tail -3 >> "$LOG" + "$HOME/.surrogate/bin/qwen-coder-worker.sh" 2>&1 | tail -3 >> "$LOG" DUR=$(( $(date +%s) - START )) echo "[$(date '+%H:%M:%S')] $PRIO_ID done in ${DUR}s" >> "$LOG" diff --git a/bin/qwen-coder-worker.sh b/bin/qwen-coder-worker.sh index 76cef4017d1dbcc1278fe673337f1d79e72d9d28..f363a2429c3146e0bf05fb559379232dd95fef4a 100755 --- a/bin/qwen-coder-worker.sh +++ b/bin/qwen-coder-worker.sh @@ -7,7 +7,7 @@ # Philosophy: cheap + fast iteration — reviewer catches bad outputs. set -u -LOG="$HOME/.claude/logs/qwen-coder-worker.log" +LOG="$HOME/.surrogate/logs/qwen-coder-worker.log" OUT_DIR="$HOME/.hermes/workspace/qwen-coder" SHARED="$HOME/.hermes/workspace/swarm-shared" mkdir -p "$(dirname "$LOG")" "$OUT_DIR" @@ -58,8 +58,8 @@ MAP_FILE="$SHARED/repo-maps/${PRIO_PROJECT}.md" # RAG: fetch real code examples from THIS project's actual codebase via FTS # Grounds the model in real APIs/imports/patterns instead of hallucinating RAG_EXAMPLES="" -if [[ -x "$HOME/.claude/bin/ask-sqlite.py" ]]; then - RAG_EXAMPLES=$(python3 "$HOME/.claude/bin/ask-sqlite.py" \ +if [[ -x "$HOME/.surrogate/bin/ask-sqlite.py" ]]; then + RAG_EXAMPLES=$(python3 "$HOME/.surrogate/bin/ask-sqlite.py" \ "$PRIO_PROJECT $PRIO_TITLE" 2>/dev/null | head -c 2500) fi diff --git a/bin/sambanova-bridge.sh b/bin/sambanova-bridge.sh new file mode 100755 index 0000000000000000000000000000000000000000..2b0c9cd4c89ebe027eca90b14cd6fe8b2f6c97ac --- /dev/null +++ b/bin/sambanova-bridge.sh @@ -0,0 +1,75 @@ +#!/usr/bin/env bash +# SambaNova Cloud bridge — fast Llama 3.3 70B/405B + DeepSeek-V3 free tier +# Endpoint: https://api.sambanova.ai/v1 (OpenAI-compat, ~500 tok/s) +# Key env: SAMBANOVA_API_KEY +# Usage: sambanova-bridge.sh [--model MODEL] "" +set -u +# Default: Llama 3.3 70B — best speed (500 tok/s) × quality tradeoff on SambaNova. +# Full catalog verified 2026-04: DeepSeek-V3.1/V3.1-cb/V3.2, Llama-4-Maverick, +# gpt-oss-120b, gemma-3-12b-it, MiniMax-M2.5 (service-tier-locked). +MODEL="Meta-Llama-3.3-70B-Instruct" +MAX_TOKENS=2000 +TEMP=0.3 +PROMPT="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --model) + case "$2" in + fast|small|gemma|gemma3) MODEL="gemma-3-12b-it" ;; + llama|llama70|70b) MODEL="Meta-Llama-3.3-70B-Instruct" ;; + llama4|maverick) MODEL="Llama-4-Maverick-17B-128E-Instruct" ;; + deepseek|deepseek-v3) MODEL="DeepSeek-V3.1" ;; + deepseek-latest|v32) MODEL="DeepSeek-V3.2" ;; + deepseek-cb|cb) MODEL="DeepSeek-V3.1-cb" ;; + gpt-oss|oss|120b) MODEL="gpt-oss-120b" ;; + *) MODEL="$2" ;; + esac; shift 2 ;; + --max-tokens) MAX_TOKENS="$2"; shift 2 ;; + --temperature) TEMP="$2"; shift 2 ;; + *) PROMPT="$*"; break ;; + esac +done +[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat) +[[ -z "$PROMPT" ]] && { echo "sambanova-bridge: no prompt" >&2; exit 2; } + +LOG="$HOME/.surrogate/logs/sambanova-bridge.log" +mkdir -p "$(dirname "$LOG")" +set -a; source "$HOME/.hermes/.env" 2>/dev/null || true; set +a + +if [[ -z "${SAMBANOVA_API_KEY:-}" ]]; then + echo "sambanova-bridge: missing SAMBANOVA_API_KEY in ~/.hermes/.env" >&2 + exit 3 +fi + +echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG" + +RESPONSE=$(python3 -c " +import os +exec(open(os.path.expanduser('~/.surrogate/bin/lib/dns_fallback.py')).read()) +exec(open(os.path.expanduser('~/.surrogate/bin/lib/bridge_retry.py')).read()) +import json, sys +body = { + 'model': '$MODEL', + 'messages': [{'role':'user','content': sys.stdin.read()}], + 'max_tokens': $MAX_TOKENS, 'temperature': $TEMP, +} +try: + d = request_with_retry( + 'https://api.sambanova.ai/v1/chat/completions', + data=json.dumps(body).encode(), + headers={ + 'Content-Type':'application/json', + 'User-Agent':'hermes-agent/1.0', + 'Authorization':'Bearer '+os.environ.get('SAMBANOVA_API_KEY',''), + }, + timeout=120, max_retries=4, base_delay=2.0, + ) + print(d.get('choices',[{}])[0].get('message',{}).get('content','')) +except Exception as e: + print(f'sambanova-bridge error: {e}', file=sys.stderr); sys.exit(1) +" <<< "$PROMPT") +RC=$? +echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG" +[[ $RC -ne 0 ]] && exit $RC +echo "$RESPONSE" diff --git a/bin/scrape-keyword-tuner.sh b/bin/scrape-keyword-tuner.sh index 51ba6c1c27c845e2cbf1c93cb46b8dcaff83ea54..a292b785466643625ca7e03099399eba08b3fb4b 100755 --- a/bin/scrape-keyword-tuner.sh +++ b/bin/scrape-keyword-tuner.sh @@ -11,7 +11,7 @@ set -uo pipefail set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a -LOG="$HOME/.claude/logs/scrape-keyword-tuner.log" +LOG="$HOME/.surrogate/logs/scrape-keyword-tuner.log" mkdir -p "$(dirname "$LOG")" TOKEN="${GITHUB_TOKEN_POOL%%,*}" # first non-empty @@ -33,7 +33,7 @@ python3 <> "$LOG" 2>&1 import os, re, json, sqlite3, time, urllib.request, urllib.error, urllib.parse TOKEN = "$TOKEN" -DB = os.path.expanduser("~/.claude/state/scrape-ledger.db") +DB = os.path.expanduser("~/.surrogate/state/scrape-ledger.db") def github_count(keywords: str) -> int: """Return total_count from GitHub Search API (or -1 on error).""" diff --git a/bin/scrape-ledger-init.sh b/bin/scrape-ledger-init.sh new file mode 100755 index 0000000000000000000000000000000000000000..6051527dca79dd69db9ddb0ce5d94bb6250e4e2a --- /dev/null +++ b/bin/scrape-ledger-init.sh @@ -0,0 +1,123 @@ +#!/usr/bin/env bash +# Initialize global scrape ledger — single source of truth for "what's been scraped" +# All scrapers check ledger before scraping + write after. +# DB: ~/.surrogate/state/scrape-ledger.db (SQLite WAL for concurrent safety) +set -u +DB="$HOME/.surrogate/state/scrape-ledger.db" +mkdir -p "$(dirname "$DB")" + +sqlite3 "$DB" <<'SQL' +PRAGMA journal_mode=WAL; +PRAGMA synchronous=NORMAL; + +CREATE TABLE IF NOT EXISTS scraped ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source TEXT NOT NULL, -- 'github', 'rss', 'stackoverflow', 'fs', 'crawl4ai' + identifier TEXT NOT NULL, -- 'owner/repo' or URL or file path hash + domain TEXT, -- 'security', 'devops', 'ai-ml', 'frontend', etc. + subdomain TEXT, -- 'cve', 'kyverno', 'observability', etc. + language TEXT, -- 'python', 'go', 'terraform' + stars INTEGER DEFAULT 0, + scraped_at TEXT NOT NULL, + pairs_written INTEGER DEFAULT 0, + status TEXT DEFAULT 'ok', -- 'ok', 'err', 'skipped', 'partial' + notes TEXT +); + +CREATE UNIQUE INDEX IF NOT EXISTS idx_scraped_src_id ON scraped(source, identifier); +CREATE INDEX IF NOT EXISTS idx_scraped_domain ON scraped(domain); +CREATE INDEX IF NOT EXISTS idx_scraped_ts ON scraped(scraped_at); + +-- Domain taxonomy — what every enterprise software company deals with +CREATE TABLE IF NOT EXISTS domain_taxonomy ( + domain TEXT PRIMARY KEY, + subdomain TEXT, + search_keywords TEXT, + priority INTEGER DEFAULT 5, -- 1=critical, 10=nice-to-have + target_repos INTEGER DEFAULT 100 +); + +-- Seed taxonomy +INSERT OR IGNORE INTO domain_taxonomy (domain, subdomain, search_keywords, priority, target_repos) VALUES +-- CODING (per language) +('coding','python-framework','fastapi django flask poetry uv ruff mypy pydantic',1,150), +('coding','python-async','asyncio aiohttp httpx anyio trio',1,80), +('coding','typescript-framework','nextjs remix astro svelte solid react vue nuxt',1,150), +('coding','typescript-tooling','vite tsup esbuild turbopack biome',2,80), +('coding','go-ecosystem','gin echo fiber chi gorilla cobra viper',1,120), +('coding','rust-ecosystem','tokio axum actix warp rocket serde clap',1,100), +('coding','java-kotlin','spring boot ktor micronaut quarkus',2,80), +('coding','mobile-native','swiftui jetpack compose react-native flutter',2,100), +-- SECURITY +('security','appsec','owasp top10 cwe sast dast semgrep bandit eslint-security',1,120), +('security','cloudsec','prowler scoutsuite cloudcustodian checkov tfsec iam-cli',1,120), +('security','container-sec','trivy grype syft kyverno opa falco tetragon',1,100), +('security','supply-chain','cosign sigstore slsa sbom cyclonedx in-toto',1,80), +('security','secrets','vault sops age gitleaks trufflehog detect-secrets',1,60), +('security','identity','keycloak authentik ory hydra dex oidc-provider',2,60), +('security','detection','sigma mitre-attack falco-rules wazuh yara sentinelone',1,80), +('security','offensive','metasploit nuclei gobuster ffuf burp-extensions',3,40), +-- OPS / DEVOPS / SRE +('ops','devops-ci','github-actions gitlab-ci jenkins dagger buildkit',1,100), +('ops','iac','terraform pulumi cdk cloudformation ansible',1,150), +('ops','kubernetes','k8s helm kustomize argocd flux crossplane istio linkerd',1,200), +('ops','sre','sre-book postmortem slo burn-rate chaos-engineering',1,80), +('ops','chaos','chaos-mesh litmus gremlin chaos-toolkit',2,40), +('ops','config-mgmt','ansible chef puppet salt',3,40), +('observability','metrics','prometheus thanos mimir victoriametrics alertmanager',1,100), +('observability','logs','loki elasticsearch opensearch fluentbit vector',1,80), +('observability','traces','tempo jaeger zipkin skywalking honeycomb',1,80), +('observability','apm','datadog newrelic dynatrace appdynamics instana',2,40), +('observability','profiling','pyroscope parca gprofiler py-spy flamegraph',2,40), +('observability','otel','opentelemetry-collector otel-sdk semantic-conventions',1,60), +('observability','ebpf','cilium tetragon pixie falco inspektor-gadget',1,60), +-- CLOUD +('cloud','aws','aws-cdk aws-samples aws-solutions aws-copilot sam',1,200), +('cloud','gcp','gcp-samples terraform-google anthos',1,100), +('cloud','azure','azure-samples bicep terraform-azurerm',1,100), +('cloud','multicloud','crossplane cluster-api karpenter external-dns',2,60), +('cloud','serverless','sam sst cdk serverless-framework workers wrangler',1,100), +('finops','finops','kubecost opencost cloudhealth crane infracost',1,60), +-- AI / ML / AGENTS +('ai','llm-serving','vllm tgi ollama llama.cpp exllama sglang',1,100), +('ai','llm-training','unsloth axolotl peft trl ms-swift torchtune',1,100), +('ai','agents','langgraph crewai autogen mcp-server dspy haystack',1,120), +('ai','rag','llamaindex langchain colbert chroma qdrant weaviate',1,100), +('ai','ml-frameworks','pytorch-lightning jax equinox flax transformers diffusers',2,80), +('ai','ml-ops','mlflow wandb comet kedro zenml',2,60), +('ai','eval','lm-evaluation-harness deepeval ragas opik',2,40), +-- DATA +('data','databases','postgres mysql pgvector cockroachdb tidb',1,100), +('data','streaming','kafka nats redpanda pulsar flink',1,80), +('data','warehouses','clickhouse duckdb snowflake trino presto starrocks',1,80), +('data','orchestration','airflow prefect dagster temporal',1,80), +('data','formats','parquet iceberg delta-lake hudi avro',2,40), +('data','etl','dbt meltano singer airbyte',2,40), +-- FRONTEND / UX +('frontend','components','shadcn-ui radix headlessui mantine chakra',2,80), +('frontend','state','zustand jotai redux-toolkit tanstack-query swr',2,60), +('frontend','styling','tailwindcss unocss vanilla-extract stitches',2,60), +('frontend','animations','framer-motion auto-animate gsap lottie',3,40), +-- BACKEND +('backend','graphql','apollo relay urql hasura postgraphile',2,60), +('backend','grpc','grpc-web buf connect-go',2,40), +('backend','queues','bullmq sidekiq celery rq',2,60), +-- ARCHITECTURE +('architecture','patterns','hexagonal ddd cqrs event-sourcing saga outbox',1,60), +('architecture','messaging','cloudevents asyncapi schema-registry',2,40), +-- QUALITY / TESTING +('quality','unit-test','pytest vitest jest junit5 testify',2,60), +('quality','e2e','playwright cypress puppeteer selenium',2,60), +('quality','load-test','k6 locust gatling vegeta',2,40), +('quality','contract','pact dredd schemathesis',3,30), +-- COMPLIANCE +('compliance','audit','pdpa gdpr soc2 iso27001 pci-dss hipaa',1,60), +('compliance','policy-as-code','opa kyverno gatekeeper conftest',1,60), +-- PRODUCT / BUSINESS +('product','analytics','posthog plausible amplitude mixpanel',2,40), +('product','feature-flags','unleash flagsmith growthbook launchdarkly',2,40); + +SELECT 'ledger initialized: ' || COUNT(*) || ' domains' FROM domain_taxonomy; +SQL + +echo "✅ Ledger at $DB" diff --git a/bin/skill-synthesis-daemon.sh b/bin/skill-synthesis-daemon.sh index 111d45fa342a339b24abb05684c17a64d1d0cdc0..242f2af8cb629dc7023c6f5eaea955d3d5dba7eb 100755 --- a/bin/skill-synthesis-daemon.sh +++ b/bin/skill-synthesis-daemon.sh @@ -9,7 +9,7 @@ set -uo pipefail set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a SKILLS_DIR="$HOME/.surrogate/skills" -LOG="$HOME/.claude/logs/skill-synthesis.log" +LOG="$HOME/.surrogate/logs/skill-synthesis.log" PAIRS="$HOME/.surrogate/training-pairs.jsonl" mkdir -p "$SKILLS_DIR" "$(dirname "$LOG")" diff --git a/bin/surrogate b/bin/surrogate index 0513fdea55e70b2cc0ac9e32ef799e46ed0f9b77..d9bb873526dba2160ce72e042a3433c6e8d55ff7 100755 --- a/bin/surrogate +++ b/bin/surrogate @@ -29,7 +29,7 @@ init_surrogate_home() { }, "agents": ["architect","dev","qa","ops","reviewer"], "memory": { - "episodesFile": "~/.claude/state/surrogate-memory/episodes.jsonl", + "episodesFile": "~/.surrogate/state/episodes.jsonl", "projectFiles": "~/.surrogate/projects" } } @@ -116,7 +116,7 @@ while [[ $# -gt 0 ]]; do init) MODE="init-project"; shift ;; plan) # surrogate plan set | show | clear - bash ~/.claude/bin/surrogate-daemon.sh plan "$@" + bash ~/.surrogate/bin/surrogate-daemon.sh plan "$@" exit 0 ;; .) shift ;; @@ -224,7 +224,7 @@ GEMINI = os.environ.get('GEMINI_API_KEY','') GEMINI2 = os.environ.get('GEMINI_API_KEY_2','') GH_POOL = [t.strip() for t in os.environ.get('GITHUB_TOKEN_POOL','').split(',') if t.strip()] -MEM_DIR = Path(os.path.expanduser('~/.claude/state/surrogate-memory')) +MEM_DIR = Path(os.path.expanduser('~/.surrogate/state')) MEM_DIR.mkdir(parents=True, exist_ok=True) EPISODES = MEM_DIR / 'episodes.jsonl' @@ -284,7 +284,7 @@ def tool_grep(pattern, path=None, glob='*'): def tool_rag_query(query, limit=5): try: - conn = sqlite3.connect(os.path.expanduser('~/.claude/index.db')) + conn = sqlite3.connect(os.path.expanduser('~/.surrogate/index.db')) kw = ' '.join(w for w in re.sub(r'[^a-zA-Z0-9ก-๙ ]',' ',query.lower()).split() if len(w)>2)[:200] rows = conn.execute("SELECT d.source, d.path, substr(d.response,1,500) FROM docs_fts f JOIN docs d ON d.id=f.rowid WHERE f.docs_fts MATCH ? ORDER BY bm25(docs_fts) LIMIT ?", (kw,limit)).fetchall() conn.close() @@ -476,7 +476,7 @@ ${B}Configuration${R}: ${CY}/cwd${R} change working directory ${B}Diagnostics${R}: - ${CY}/memory${R} show ~/.claude/state/surrogate-memory/ + ${CY}/memory${R} show ~/.surrogate/state/ ${CY}/cost${R} OpenRouter usage today ${CY}/cost-all${R} all provider usage breakdown ${CY}/health${R} check HF endpoint + local CLI status @@ -605,7 +605,7 @@ repl() { *) echo "${GY}valid: plan | auto | yolo | default | acceptEdits${R}" ;; esac ;; - /memory) ls -lh ~/.claude/state/surrogate-memory/ 2>&1 | head -10 ;; + /memory) ls -lh ~/.surrogate/state/ 2>&1 | head -10 ;; /undo) # Restore last checkpoint (git stash if uncommitted changes from last task) if git -C "$(pwd)" rev-parse --git-dir &>/dev/null; then @@ -958,7 +958,7 @@ PYEOF ) [[ -z "$NEXT_TASK" ]] && { echo "${GR}✅ Plan complete — all tasks done!${R}"; break; } echo "${BCY}${B}▸ Next task:${R} $NEXT_TASK" - bash ~/.claude/bin/surrogate-orchestrate.sh "$NEXT_TASK" + bash ~/.surrogate/bin/surrogate-orchestrate.sh "$NEXT_TASK" # Mark done in plan /usr/bin/python3 <${R} " read -r task fi - bash ~/.claude/bin/surrogate-orchestrate.sh --mode plan "$task" + bash ~/.surrogate/bin/surrogate-orchestrate.sh --mode plan "$task" } # ═══ Monitor mode (watch cloud/logs, auto-fix) ═══ monitor_mode() { echo "${B}${MA}▶ MONITOR MODE${R}" - echo "${D} Watching ~/.claude/logs/, ~/.hermes/workspace/healer/, system load.${R}" + echo "${D} Watching ~/.surrogate/logs/, ~/.hermes/workspace/healer/, system load.${R}" echo "${D} Ctrl+C to stop.${R}" echo "" ITER=0 @@ -1027,15 +1027,15 @@ monitor_mode() { ls -t ~/.hermes/workspace/healer/*.md 2>/dev/null | head -3 | awk '{print " " $0}' | xargs -I{} basename {} 2>/dev/null | sed 's/^/ /' # Training + graph PAIRS=$(wc -l ~/axentx/surrogate/data/training-jsonl/*.jsonl 2>/dev/null | tail -1 | awk '{print $1}') - REPOS=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null) + REPOS=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null) echo "${B}data${R} pairs=$PAIRS repos=$REPOS" # Recent errors in logs (auto-heal trigger) - ERR_COUNT=$(tail -200 ~/.claude/logs/*.log 2>/dev/null | grep -cE "ERROR|Fatal|CRITICAL|429|403|500" || echo 0) + ERR_COUNT=$(tail -200 ~/.surrogate/logs/*.log 2>/dev/null | grep -cE "ERROR|Fatal|CRITICAL|429|403|500" || echo 0) echo "${B}errors${R} last 200 log lines: $ERR_COUNT" # If critical → spawn agent to investigate if [[ $ERR_COUNT -gt 50 ]]; then echo "${RE}⚠ elevated errors — dispatching investigator agent${R}" - (run_agent "เช็ค ~/.claude/logs/ หา pattern error ที่ recur บ่อย และเสนอ fix list (ห้ามแก้เอง รายงานอย่างเดียว)" 2>&1 | /usr/bin/head -20) & + (run_agent "เช็ค ~/.surrogate/logs/ หา pattern error ที่ recur บ่อย และเสนอ fix list (ห้ามแก้เอง รายงานอย่างเดียว)" 2>&1 | /usr/bin/head -20) & fi sleep 30 done @@ -1045,9 +1045,9 @@ monitor_mode() { show_status() { banner echo "" - REPOS=$(sqlite3 ~/.claude/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null || echo "?") + REPOS=$(sqlite3 ~/.surrogate/state/scrape-ledger.db "SELECT COUNT(*) FROM scraped" 2>/dev/null || echo "?") PAIRS=$(wc -l ~/axentx/surrogate/data/training-jsonl/*.jsonl 2>/dev/null | tail -1 | awk '{print $1}' || echo "?") - EP=$(wc -l ~/.claude/state/surrogate-memory/episodes.jsonl 2>/dev/null | awk '{print $1}' || echo "0") + EP=$(wc -l ~/.surrogate/state/episodes.jsonl 2>/dev/null | awk '{print $1}' || echo "0") PLAN_FILE="$SURROGATE_HOME/active-plan.md" echo "${B}▸ Session${R}" echo " cwd: ${GR}$(pwd)${R}" @@ -1069,8 +1069,8 @@ show_status() { show_agents() { banner echo "" - echo "${B}▸ Available agents (~/.claude/agents/)${R}" - ls ~/.claude/agents/*.md 2>/dev/null | /usr/bin/sed 's|.*/||;s|.md$||' | sed 's/^/ /' + echo "${B}▸ Available agents (~/.surrogate/agents/)${R}" + ls ~/.surrogate/agents/*.md 2>/dev/null | /usr/bin/sed 's|.*/||;s|.md$||' | sed 's/^/ /' } # ═══ Dispatch ═══ @@ -1086,7 +1086,7 @@ case "$MODE" in if [[ -n "$PROMPT" ]]; then plan_mode "$PROMPT" else # No task — show plan status - bash ~/.claude/bin/surrogate-daemon.sh plan show + bash ~/.surrogate/bin/surrogate-daemon.sh plan show fi ;; print) diff --git a/bin/surrogate-agent.sh b/bin/surrogate-agent.sh index cf8a8c5f321fe5988473b4d3a36d6eeef43b370f..0581aa77b368fbea83ed37a86fd48fe5d5da288f 100755 --- a/bin/surrogate-agent.sh +++ b/bin/surrogate-agent.sh @@ -33,7 +33,7 @@ while [[ $# -gt 0 ]]; do done [[ -z "$TASK" ]] && { echo "usage: $0 [--max-steps N] [--model M] " >&2; exit 2; } -MEM_DIR="$HOME/.claude/state/surrogate-memory" +MEM_DIR="$HOME/.surrogate/state/surrogate-memory" mkdir -p "$MEM_DIR" export AGENT_TASK="$TASK" @@ -49,7 +49,7 @@ TASK = os.environ['AGENT_TASK'] MAX_STEPS = int(os.environ['AGENT_MAX_STEPS']) MODEL_OVERRIDE = os.environ.get('AGENT_MODEL_OVERRIDE', '') OPENROUTER = os.environ.get('OPENROUTER_API_KEY', '') -MEM_DIR = Path(os.path.expanduser('~/.claude/state/surrogate-memory')) +MEM_DIR = Path(os.path.expanduser('~/.surrogate/state/surrogate-memory')) EPISODES = MEM_DIR / 'episodes.jsonl' PATTERNS = MEM_DIR / 'patterns.jsonl' SYS_PROMPT = '' @@ -148,7 +148,7 @@ def tool_rag_query(query, limit=5, source_filter=None): import subprocess as _sp try: # 1. BM25 via SQLite FTS - conn = sqlite3.connect(os.path.expanduser('~/.claude/index.db')) + conn = sqlite3.connect(os.path.expanduser('~/.surrogate/index.db')) kw = ' '.join(w for w in re.sub(r'[^a-zA-Z0-9ก-๙ ]', ' ', query.lower()).split() if len(w) > 2)[:200] q = "SELECT d.source, d.path, substr(d.response, 1, 500), d.id FROM docs_fts f JOIN docs d ON d.id=f.rowid WHERE f.docs_fts MATCH ?" params = [kw] @@ -166,9 +166,9 @@ def tool_rag_query(query, limit=5, source_filter=None): dense_docs = [] if len(query) > 10: try: - cmd = f"""~/.claude/state/crawler-venv/bin/python -c " + cmd = f"""~/.surrogate/state/crawler-venv/bin/python -c " import chromadb, json, sys -client = chromadb.PersistentClient(path='/Users/Ashira/.claude/code-vector-db') +client = chromadb.PersistentClient(path='$HOME/.surrogate/code-vector-db') cols = client.list_collections() if cols: r = cols[0].query(query_texts=['{query[:200].replace(chr(39),chr(92)+chr(39))}'], n_results={max(limit*3,20)}) @@ -206,7 +206,7 @@ def tool_rag_code(query, limit=5): """Query code knowledge — routed through SQLite FTS (no Chroma load, crash-safe). Searches `code` + `code-vector` + `code-deep:*` sources in index.db via BM25.""" try: - conn = sqlite3.connect(os.path.expanduser('~/.claude/index.db')) + conn = sqlite3.connect(os.path.expanduser('~/.surrogate/index.db')) kw = ' '.join(w for w in re.sub(r'[^a-zA-Z0-9ก-๙ ]', ' ', query.lower()).split() if len(w) > 2)[:200] rows = conn.execute(""" SELECT d.source, d.path, substr(d.response, 1, 500) @@ -222,7 +222,7 @@ def tool_rag_code(query, limit=5): def tool_web_fetch(url, timeout=45): try: - cmd = f"""$HOME/.claude/state/crawler-venv/bin/python -c " + cmd = f"""$HOME/.surrogate/state/crawler-venv/bin/python -c " import asyncio from crawl4ai import AsyncWebCrawler, BrowserConfig, CrawlerRunConfig async def f(): @@ -254,7 +254,7 @@ def tool_task(prompt, max_steps=5): sub_id = uuid.uuid4().hex[:8] print(f" ↳ [sub-agent {sub_id}] spawning: {prompt[:80]}", flush=True) try: - cmd = ['bash', os.path.expanduser('~/.claude/bin/surrogate-agent.sh'), + cmd = ['bash', os.path.expanduser('~/.surrogate/bin/surrogate-agent.sh'), '--max-steps', str(max_steps), prompt] r = subprocess.run(cmd, capture_output=True, text=True, timeout=600) return {'sub_id': sub_id, 'output': r.stdout[-4000:], 'rc': r.returncode} @@ -274,7 +274,7 @@ def tool_orchestrate(subtasks, pattern='parallel', max_steps=5): def run_one(prompt): try: r = subprocess.run( - ['bash', os.path.expanduser('~/.claude/bin/surrogate-agent.sh'), + ['bash', os.path.expanduser('~/.surrogate/bin/surrogate-agent.sh'), '--max-steps', str(max_steps), prompt], capture_output=True, text=True, timeout=600 ) @@ -426,7 +426,7 @@ TOOLS = { def check_budget(): """Return True if under daily budget ($2/day default). Caller aborts if False.""" import time as _t - cache = Path(os.path.expanduser('~/.claude/state/openrouter-budget-cache.json')) + cache = Path(os.path.expanduser('~/.surrogate/state/openrouter-budget-cache.json')) # Cache balance check for 5 min (reduce API calls) try: if cache.exists() and _t.time() - cache.stat().st_mtime < 300: @@ -439,7 +439,7 @@ def check_budget(): cache.parent.mkdir(parents=True, exist_ok=True) cache.write_text(json.dumps({'usage': d.get('usage',0), 'ts': _t.time()})) # Check today's marker - today_f = Path(os.path.expanduser('~/.claude/state/openrouter-today-start.txt')) + today_f = Path(os.path.expanduser('~/.surrogate/state/openrouter-today-start.txt')) today_str = datetime.now().strftime('%Y-%m-%d') if not today_f.exists() or today_f.read_text().split(':')[0] != today_str: today_f.parent.mkdir(parents=True, exist_ok=True) diff --git a/bin/surrogate-bridge.sh b/bin/surrogate-bridge.sh new file mode 100755 index 0000000000000000000000000000000000000000..06c8d3689b5c1c177760efd6c4b25b1d34d22fc4 --- /dev/null +++ b/bin/surrogate-bridge.sh @@ -0,0 +1,52 @@ +#!/usr/bin/env bash +# Surrogate-1 bridge — local Ollama endpoint for the Ashira-personalized model. +# Currently uses base Qwen2.5-Coder-7B + Thai/DevSecOps SYSTEM prompt as placeholder. +# After LoRA training on RunPod, rebuild Ollama model with merged adapter. +# Model URL: http://localhost:11434 (Ollama) +set -u +MODEL="surrogate-1" +MAX_TOKENS=2000 +TEMP=0.3 +PROMPT="" + +while [[ $# -gt 0 ]]; do + case "$1" in + --model) MODEL="$2"; shift 2 ;; + --max-tokens) MAX_TOKENS="$2"; shift 2 ;; + *) PROMPT="$*"; break ;; + esac +done +[[ -z "$PROMPT" ]] && [[ ! -t 0 ]] && PROMPT=$(cat) +[[ -z "$PROMPT" ]] && { echo "surrogate-bridge: no prompt" >&2; exit 2; } + +LOG="$HOME/.surrogate/logs/surrogate-bridge.log" +mkdir -p "$(dirname "$LOG")" +echo "[$(date '+%H:%M:%S')] model=$MODEL len=${#PROMPT}" >> "$LOG" + +# Ollama OpenAI-compat endpoint +RESPONSE=$(python3 -c " +import json, sys, urllib.request, urllib.error + +body = { + 'model': '$MODEL', + 'messages': [{'role':'user','content': sys.stdin.read()}], + 'max_tokens': $MAX_TOKENS, + 'temperature': $TEMP, + 'stream': False, +} +req = urllib.request.Request( + 'http://localhost:11434/v1/chat/completions', + data=json.dumps(body).encode(), + headers={'Content-Type':'application/json','Authorization':'Bearer ollama'} +) +try: + with urllib.request.urlopen(req, timeout=180) as r: + d = json.load(r) + print(d.get('choices',[{}])[0].get('message',{}).get('content','')) +except Exception as e: + print(f'surrogate-bridge error: {e}', file=sys.stderr); sys.exit(1) +" <<< "$PROMPT") +RC=$? +echo "[$(date '+%H:%M:%S')] rc=$RC bytes=${#RESPONSE}" >> "$LOG" +[[ $RC -ne 0 ]] && exit $RC +echo "$RESPONSE" diff --git a/bin/surrogate-consolidate.sh b/bin/surrogate-consolidate.sh new file mode 100755 index 0000000000000000000000000000000000000000..0b6c4bb4f75817a16eeccd5d87498f26d3e58851 --- /dev/null +++ b/bin/surrogate-consolidate.sh @@ -0,0 +1,163 @@ +#!/usr/bin/env bash +# Episode consolidation — nightly summarize episodes → patterns → Graphiti + DPO training data +# +# Input: ~/.surrogate/state/surrogate-memory/episodes.jsonl +# Output: +# 1. ~/.surrogate/state/surrogate-memory/patterns.jsonl (learned patterns) +# 2. ~/.surrogate/index.db (source='surrogate-episodes') — pattern ingested for RAG +# 3. ~/axentx/surrogate/data/training-jsonl/dpo-pairs.jsonl (user+reply for future LoRA) +# 4. FalkorDB graph (episodic → semantic bitemporal edges) +set -u +set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a + +MEM="$HOME/.surrogate/state/surrogate-memory" +LOG="$HOME/.surrogate/logs/surrogate-consolidate.log" +CHECKPOINT="$MEM/consolidate.checkpoint" +mkdir -p "$(dirname "$LOG")" "$MEM" + +/usr/bin/python3 <<'PYEOF' 2>>"$LOG" +import json, os, sqlite3, urllib.request, hashlib, subprocess +from datetime import datetime +from pathlib import Path + +MEM = Path(os.path.expanduser('~/.surrogate/state/surrogate-memory')) +EP = MEM / 'episodes.jsonl' +PAT = MEM / 'patterns.jsonl' +CKPT = MEM / 'consolidate.checkpoint' +DPO = Path(os.path.expanduser('~/axentx/surrogate/data/training-jsonl/dpo-pairs.jsonl')) +DPO.parent.mkdir(parents=True, exist_ok=True) + +OR_KEY = os.environ.get('OPENROUTER_API_KEY','') + +# Checkpoint: last consolidated line # +last_line = 0 +if CKPT.exists(): + try: last_line = int(CKPT.read_text().strip()) + except: last_line = 0 + +if not EP.exists(): + print("[consolidate] no episodes yet") + exit() + +lines = EP.read_text(errors='replace').splitlines() +new_lines = lines[last_line:] +if not new_lines: + print(f"[consolidate] no new since line {last_line}") + exit() + +print(f"[consolidate] processing {len(new_lines)} new episodes") + +episodes = [] +for line in new_lines: + try: episodes.append(json.loads(line)) + except: continue + +# Step 1: Append to DPO training data (for future RunPod LoRA) +with open(DPO, 'a') as f: + for ep in episodes: + if not ep.get('task') or not ep.get('final'): continue + if '[error' in ep.get('final','') or '[timeout' in ep.get('final',''): continue + pair = { + 'instruction': ep['task'][:500], + 'input': '', + 'output': ep['final'][:3000], + 'source': 'surrogate-episode', + 'timestamp': ep.get('ts', datetime.utcnow().isoformat()), + } + f.write(json.dumps(pair, ensure_ascii=False) + '\n') + +# Step 2: Summarize batches → pattern (every 10 episodes) +def summarize_batch(batch): + if not OR_KEY: return None + prompt = "Below are recent Surrogate agent episodes (task + final answer). Extract 2-3 concise reusable patterns — what kind of tasks + what approaches worked. Output as bullet list. Thai OK.\n\n" + for i, ep in enumerate(batch): + prompt += f"--- Episode {i+1} ---\nTask: {ep.get('task','')[:300]}\nAnswer: {ep.get('final','')[:500]}\n\n" + body = { + 'model': 'google/gemini-2.5-flash', # cheap, good summarizer + 'messages': [{'role':'user','content': prompt[:15000]}], + 'temperature': 0.2, 'max_tokens': 600, + } + try: + req = urllib.request.Request( + 'https://openrouter.ai/api/v1/chat/completions', + data=json.dumps(body).encode(), + headers={'Content-Type':'application/json','Authorization':f'Bearer {OR_KEY}', + 'HTTP-Referer':'https://axentx.ai','X-Title':'Surrogate-Consolidate'} + ) + with urllib.request.urlopen(req, timeout=60) as r: + d = json.load(r) + return d['choices'][0]['message']['content'] + except Exception as e: + print(f"[consolidate] llm err: {e}") + return None + +# Batch into groups of 10 +patterns_added = 0 +for batch_start in range(0, len(episodes), 10): + batch = episodes[batch_start:batch_start+10] + summary = summarize_batch(batch) + if not summary: continue + pattern = { + 'ts': datetime.utcnow().isoformat(), + 'episodes_range': [batch_start, batch_start+len(batch)-1], + 'pattern_summary': summary[:2000], + 'n_episodes': len(batch), + } + with open(PAT, 'a') as f: + f.write(json.dumps(pattern, ensure_ascii=False) + '\n') + patterns_added += 1 + +# Step 3: Ingest patterns into index.db so future RAG finds them +conn = sqlite3.connect(os.path.expanduser('~/.surrogate/index.db')) +conn.execute('PRAGMA journal_mode=WAL') +cur = conn.cursor() +if PAT.exists(): + for line in PAT.read_text().splitlines()[-50:]: + try: p = json.loads(line) + except: continue + cur.execute( + "INSERT OR IGNORE INTO docs (source, project, path, topic, instruction, response, ts) VALUES (?,?,?,?,?,?,?)", + ('surrogate-episodes', 'surrogate', 'memory:pattern', 'learned-pattern', + f"pattern from {p.get('n_episodes','?')} episodes", + p.get('pattern_summary','')[:2500], + p.get('ts', datetime.utcnow().isoformat())) + ) +conn.commit() +conn.close() + + +# Step 3b: Write patterns as graph nodes in FalkorDB (fix stagnant graph) +import subprocess +sock_r = subprocess.run(['/usr/bin/find','/var/folders','/tmp','-name','redis.socket','-type','s'], capture_output=True, text=True) +sock = sock_r.stdout.strip().split('\n')[0] if sock_r.stdout else None +if sock: + # Each pattern → Pattern node + relationships + if PAT.exists(): + for line in PAT.read_text().splitlines()[-patterns_added:]: + try: p = json.loads(line) + except: continue + pid = hashlib.md5(p.get('pattern_summary','')[:200].encode()).hexdigest()[:12] + title = p.get('pattern_summary','')[:100].replace("'", "").replace(chr(10),' ') + ts = p.get('ts','') + cypher = f"MERGE (p:Pattern {{id:'{pid}'}}) SET p.title='{title}', p.ts='{ts}', p.n_episodes={p.get('n_episodes',0)}" + try: + subprocess.run(['/opt/homebrew/bin/redis-cli','-s',sock,'GRAPH.QUERY','ashira',cypher], capture_output=True, timeout=5) + except: pass + # Each episode → Episode node linked to Pattern + for ep in episodes[-20:]: + eid = hashlib.md5(ep.get('task','')[:200].encode()).hexdigest()[:12] + task = ep.get('task','')[:80].replace("'","").replace(chr(10),' ') + quality = 'success' if '[error' not in ep.get('final','') and '[timeout' not in ep.get('final','') else 'failed' + cypher = f"MERGE (e:Episode {{id:'{eid}'}}) SET e.task='{task}', e.quality='{quality}', e.ts='{ep.get('ts','')}'" + try: + subprocess.run(['/opt/homebrew/bin/redis-cli','-s',sock,'GRAPH.QUERY','ashira',cypher], capture_output=True, timeout=5) + except: pass + print('[consolidate] wrote patterns + episodes to FalkorDB') +import hashlib # make sure imported + +# Update checkpoint +CKPT.write_text(str(len(lines))) +print(f"[consolidate] added {patterns_added} patterns from {len(episodes)} episodes. DPO pairs grown.") +PYEOF + +echo "[$(date '+%H:%M:%S')] consolidate done" >> "$LOG" diff --git a/bin/surrogate-daemon.sh b/bin/surrogate-daemon.sh index 57b284b47877863c964ae2e850b7e043bf4686a9..865380c5946743c8b3f231bf70264e65e1cf2d6a 100755 --- a/bin/surrogate-daemon.sh +++ b/bin/surrogate-daemon.sh @@ -2,7 +2,7 @@ # Surrogate Daemon — continuous autonomous worker # # Architecture: -# - Task queue file: ~/.claude/state/surrogate-queue.jsonl (append-only) +# - Task queue file: ~/.surrogate/state/surrogate-queue.jsonl (append-only) # - Workers: N parallel (default 3) # - Pickup: instant (as soon as worker idle → pull next task) # - Self-generation: if queue empty, daemon asks itself "what should I work on?" @@ -18,11 +18,11 @@ set -u set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a -STATE="$HOME/.claude/state/surrogate-daemon" +STATE="$HOME/.surrogate/state/surrogate-daemon" QUEUE="$STATE/queue.jsonl" DONE="$STATE/done.jsonl" PID_FILE="$STATE/daemon.pid" -LOG="$HOME/.claude/logs/surrogate-daemon.log" +LOG="$HOME/.surrogate/logs/surrogate-daemon.log" WORKERS=1 # default 1 worker (budget-safe). User can --workers 3 for burst mkdir -p "$STATE" "$(dirname "$LOG")" @@ -150,7 +150,7 @@ PYEOF # Every 30min: consolidation NOW_MIN=$(date +%M) if [[ "$NOW_MIN" == "15" ]] || [[ "$NOW_MIN" == "45" ]]; then - "$HOME/.claude/bin/surrogate-consolidate.sh" >> "$LOG" 2>&1 & + "$HOME/.surrogate/bin/surrogate-consolidate.sh" >> "$LOG" 2>&1 & fi sleep 10 @@ -226,7 +226,7 @@ PYEOF AUTO_TASK=$(/usr/bin/python3 <<'PYEOF' import json, os, random from pathlib import Path -ep = Path(os.path.expanduser('~/.claude/state/surrogate-memory/episodes.jsonl')) +ep = Path(os.path.expanduser('~/.surrogate/state/surrogate-memory/episodes.jsonl')) recent_topics = [] if ep.exists(): for line in ep.read_text().splitlines()[-30:]: @@ -243,7 +243,7 @@ pool = [ # B. Codebase health "อ่าน ~/axentx/ หา TODO/FIXME across projects → สร้าง fix spec", "เช็ค axentx test coverage per project → identify weakest → propose tests", - "Scan ~/.claude/bin/ หา script ที่ไม่ถูกใช้ > 7 days → propose archive", + "Scan ~/.surrogate/bin/ หา script ที่ไม่ถูกใช้ > 7 days → propose archive", "Review last 10 auto-commits → ตรวจว่า quality OK หรือไม่", # C. Knowledge quality "สำรวจ index.db หา duplicate entries → propose dedup", @@ -305,7 +305,7 @@ PYEOF START=$(date +%s) # Execute via agent - OUTPUT=$("$HOME/.claude/bin/surrogate-agent.sh" --max-steps 6 "$TASK" 2>&1 | tail -50) + OUTPUT=$("$HOME/.surrogate/bin/surrogate-agent.sh" --max-steps 6 "$TASK" 2>&1 | tail -50) END=$(date +%s) DUR=$((END - START)) diff --git a/bin/surrogate-dev-loop.sh b/bin/surrogate-dev-loop.sh index 9e94757d6ce21d4a53a98f5868b31d8c0fdd1f56..63ae98ee1597030fdc4a9d21b3cbad832166845d 100755 --- a/bin/surrogate-dev-loop.sh +++ b/bin/surrogate-dev-loop.sh @@ -16,7 +16,7 @@ set -u set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a -LOG="$HOME/.claude/logs/surrogate-dev-loop.log" +LOG="$HOME/.surrogate/logs/surrogate-dev-loop.log" OUT_DIR="$HOME/.hermes/workspace/local-dev" mkdir -p "$(dirname "$LOG")" "$OUT_DIR" @@ -28,7 +28,7 @@ SEARCH_ROOTS=( "$HOME/axentx" "$HOME/develope/DevOps" "$HOME/develope/AI" - "$HOME/.claude/bin" + "$HOME/.surrogate/bin" ) # ── Task generators (pick one per cycle, weighted random) ──────────────────── @@ -41,7 +41,7 @@ ROOTS = [ Path.home() / 'axentx', Path.home() / 'develope/DevOps', Path.home() / 'develope/AI', - Path.home() / '.claude/bin', + Path.home() / '.surrogate/bin', ] ROOTS = [p for p in ROOTS if p.exists()] diff --git a/bin/surrogate-orchestrate.sh b/bin/surrogate-orchestrate.sh index 911c289779b5d95b18c201a637fcf0c0b6b3aabb..4c64edfb9abe0b49f90123b13a3807461a84333e 100755 --- a/bin/surrogate-orchestrate.sh +++ b/bin/surrogate-orchestrate.sh @@ -26,7 +26,7 @@ CY=$'\033[36m'; GR=$'\033[32m'; YE=$'\033[33m'; MA=$'\033[35m'; RE=$'\033[31m'; BCY=$'\033[96m' SESSION_ID=$(date +%s | tail -c 9) -WORKDIR="$HOME/.claude/state/orchestrate/$SESSION_ID" +WORKDIR="$HOME/.surrogate/state/orchestrate/$SESSION_ID" TRAINING_LOG="$HOME/.surrogate/training-pairs.jsonl" mkdir -p "$WORKDIR" "$(dirname "$TRAINING_LOG")" @@ -273,7 +273,7 @@ PYEOF count=${count:-0} if [[ $count -gt 0 ]] && [[ $((count % 25)) -eq 0 ]]; then nohup bash "$HOME/.local/bin/push-training-to-hf.sh" \ - > "$HOME/.claude/logs/training-push.log" 2>&1 & + > "$HOME/.surrogate/logs/training-push.log" 2>&1 & fi fi } diff --git a/bin/surrogate-research-apply.sh b/bin/surrogate-research-apply.sh index c4dac871d400de713a5d1b3998153a5be387328a..ac17cbf204c5cfba36a43baa11cefee15e66fcd6 100755 --- a/bin/surrogate-research-apply.sh +++ b/bin/surrogate-research-apply.sh @@ -5,7 +5,7 @@ set -uo pipefail set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a -LOG="$HOME/.claude/logs/surrogate-research-apply.log" +LOG="$HOME/.surrogate/logs/surrogate-research-apply.log" QUEUE="$HOME/.hermes/workspace/research/queue.txt" APPLIED="$HOME/.hermes/workspace/research/applied.log" mkdir -p "$(dirname "$QUEUE")" "$(dirname "$LOG")" diff --git a/bin/surrogate-research-loop.sh b/bin/surrogate-research-loop.sh index 86b8c2102b4b6057012ece4b29def30bccc6886b..345387b1393f790f2f5960ec4dfdf3f4ba556867 100755 --- a/bin/surrogate-research-loop.sh +++ b/bin/surrogate-research-loop.sh @@ -6,7 +6,7 @@ set -uo pipefail set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a -LOG="$HOME/.claude/logs/surrogate-research-loop.log" +LOG="$HOME/.surrogate/logs/surrogate-research-loop.log" RESEARCH_DIR="$HOME/.hermes/workspace/research" APPLIED_DIR="$RESEARCH_DIR/applied" mkdir -p "$RESEARCH_DIR" "$APPLIED_DIR" "$(dirname "$LOG")" diff --git a/bin/work-queue-producer.sh b/bin/work-queue-producer.sh index 635e28d3b03b943a5494280f3a0b0473802276d7..6d6de193bd09ab43807d1fa2d5bc941404504325 100755 --- a/bin/work-queue-producer.sh +++ b/bin/work-queue-producer.sh @@ -15,7 +15,7 @@ # Dedup: hermes:seen: TTL 30 min prevents re-enqueue of in-flight work. set -u -LOG="$HOME/.claude/logs/work-queue-producer.log" +LOG="$HOME/.surrogate/logs/work-queue-producer.log" SHARED="$HOME/.hermes/workspace/swarm-shared" mkdir -p "$(dirname "$LOG")" diff --git a/start.sh b/start.sh index 85b8754cb7f88b0174daff0f0b79ac0301ad030e..4e45a96a2d64439e955d4beb714e01355b804fb6 100644 --- a/start.sh +++ b/start.sh @@ -3,7 +3,7 @@ # Boots: persistent /data mount → Redis → Ollama → axentx repos → daemons → status server. set -uo pipefail -LOG_DIR="${HOME}/.claude/logs" +LOG_DIR="${HOME}/.surrogate/logs" mkdir -p "$LOG_DIR" echo "[$(date +%H:%M:%S)] hermes-hf-space boot start" echo "[$(date +%H:%M:%S)] hermes-hf-space boot start" >> "$LOG_DIR/boot.log" @@ -15,25 +15,44 @@ set -x # Echo stdout so HF run-logs see progress (safe steps before .env is loaded) exec > >(tee -a "$LOG_DIR/boot.log") 2>&1 -# ── 1. Persistent data — symlink state dirs to /data (HF persistent mount) ── +# ── 1. Persistent data — symlink state subdirs to /data (HF persistent mount) ── +# bin/ is NOT persisted (baked into image, refreshed on every push). +# Persisted: state (DBs), logs, memory, skills, sessions, training pairs, +# workspace (hermes runtime), projects (axentx clones), ollama (model cache). DATA="/data" if [[ -d "$DATA" ]] && [[ -w "$DATA" ]]; then - mkdir -p "$DATA"/{state,workspace,memory,reflexion,projects,ollama,surrogate,index} - # Symlink critical paths so DB/training/ChromaDB persist across rebuilds - for src in \ - "${HOME}/.claude/state:${DATA}/state" \ + mkdir -p "$DATA"/{state,logs,memory,skills,sessions,workspace,projects,ollama,training,reflexion,index} + # Migrate from any older layout (one-time): if /data/surrogate/state exists, move up one level + if [[ -d "$DATA/surrogate/state" ]] && [[ ! -L "$DATA/state" ]]; then + mv "$DATA/surrogate"/* "$DATA/" 2>/dev/null || true + rmdir "$DATA/surrogate" 2>/dev/null || true + fi + + for spec in \ + "${HOME}/.surrogate/state:${DATA}/state" \ + "${HOME}/.surrogate/logs:${DATA}/logs" \ + "${HOME}/.surrogate/memory:${DATA}/memory" \ + "${HOME}/.surrogate/skills:${DATA}/skills" \ + "${HOME}/.surrogate/sessions:${DATA}/sessions" \ "${HOME}/.hermes/workspace:${DATA}/workspace" \ - "${HOME}/.surrogate:${DATA}/surrogate" \ "${HOME}/.ollama:${DATA}/ollama"; do - target="${src%%:*}" - link="${src##*:}" + target="${spec%%:*}" + link="${spec##*:}" mkdir -p "$(dirname "$target")" if [[ ! -L "$target" ]]; then rm -rf "$target" 2>/dev/null ln -sfn "$link" "$target" fi done - echo "[$(date +%H:%M:%S)] persistent /data linked" >> "$LOG_DIR/boot.log" + + # training-pairs.jsonl — single file persistence + if [[ ! -L "${HOME}/.surrogate/training-pairs.jsonl" ]]; then + rm -f "${HOME}/.surrogate/training-pairs.jsonl" 2>/dev/null + touch "${DATA}/training-pairs.jsonl" + ln -sfn "${DATA}/training-pairs.jsonl" "${HOME}/.surrogate/training-pairs.jsonl" + fi + + echo "[$(date +%H:%M:%S)] persistent /data linked (state, logs, memory, skills, sessions, workspace, ollama, training-pairs)" >> "$LOG_DIR/boot.log" else echo "[$(date +%H:%M:%S)] WARN: /data not writable — running ephemeral!" >> "$LOG_DIR/boot.log" fi @@ -140,7 +159,7 @@ fi # Trace stays OFF — never re-enable past secrets section. if [[ -n "${DISCORD_BOT_TOKEN:-}" ]]; then set -a; source ~/.hermes/.env 2>/dev/null; set +a - nohup python ~/.claude/bin/hermes-discord-bot.py >> "$LOG_DIR/discord-bot.log" 2>&1 & + nohup python ~/.surrogate/bin/hermes-discord-bot.py >> "$LOG_DIR/discord-bot.log" 2>&1 & echo "[$(date +%H:%M:%S)] discord bot started" fi @@ -149,11 +168,11 @@ cat > /tmp/scrape-daemon.sh <<'SCRAPESH' #!/bin/bash # 8 concurrent scrape workers, near-zero idle time. set -a; source ~/.hermes/.env 2>/dev/null; set +a -LOG="${HOME}/.claude/logs/scrape-continuous.log" +LOG="${HOME}/.surrogate/logs/scrape-continuous.log" mkdir -p "$(dirname "$LOG")" while true; do START=$(date +%s) - bash ~/.claude/bin/domain-scrape-loop.sh 1500 8 >> "$LOG" 2>&1 + bash ~/.surrogate/bin/domain-scrape-loop.sh 1500 8 >> "$LOG" 2>&1 DUR=$(( $(date +%s) - START )) # Tight cool-downs — cloud has unlimited bandwidth, only rate-limit concern if [[ $DUR -lt 30 ]]; then sleep 30 # queue likely exhausted, give it time @@ -167,37 +186,37 @@ nohup /tmp/scrape-daemon.sh > "$LOG_DIR/scrape-daemon.log" 2>&1 & echo "[$(date +%H:%M:%S)] continuous scrape daemon (parallel=8) started" >> "$LOG_DIR/boot.log" # ── 7b. Agentic crawler (URL frontier + visited stamps + link discovery) ──── -nohup bash ~/.claude/bin/agentic-crawler.sh 6 > "$LOG_DIR/agentic-crawler.log" 2>&1 & +nohup bash ~/.surrogate/bin/agentic-crawler.sh 6 > "$LOG_DIR/agentic-crawler.log" 2>&1 & echo "[$(date +%H:%M:%S)] agentic crawler started (parallel=6)" >> "$LOG_DIR/boot.log" # ── 7c. Skill-synthesis daemon (extract patterns from cloned repos → skills) ─ -nohup bash ~/.claude/bin/skill-synthesis-daemon.sh > "$LOG_DIR/skill-synthesis.log" 2>&1 & +nohup bash ~/.surrogate/bin/skill-synthesis-daemon.sh > "$LOG_DIR/skill-synthesis.log" 2>&1 & echo "[$(date +%H:%M:%S)] skill-synthesis daemon started" >> "$LOG_DIR/boot.log" # ── 7b. Cron loop — non-scrape daemons (scrape now runs continuously above) ─ cat > /tmp/hermes-cron.sh <<'CRONSH' #!/bin/bash set -a; source ~/.hermes/.env 2>/dev/null; set +a -LOG="${HOME}/.claude/logs/cron.log" +LOG="${HOME}/.surrogate/logs/cron.log" mkdir -p "$(dirname "$LOG")" while true; do M=$(($(date +%s) / 60)) # Every 2 min: continuous local dev (qwen3-coder when ready, else gemma) - [[ $((M % 2)) -eq 0 ]] && bash ~/.claude/bin/surrogate-dev-loop.sh 1 >> "$LOG" 2>&1 & + [[ $((M % 2)) -eq 0 ]] && bash ~/.surrogate/bin/surrogate-dev-loop.sh 1 >> "$LOG" 2>&1 & # Every 5 min: producer pushes priorities to Redis - [[ $((M % 5)) -eq 0 ]] && bash ~/.claude/bin/work-queue-producer.sh >> "$LOG" 2>&1 & + [[ $((M % 5)) -eq 0 ]] && bash ~/.surrogate/bin/work-queue-producer.sh >> "$LOG" 2>&1 & # Every 3 min: training-pair push to HF (drains ~/.surrogate/training-pairs.jsonl) - [[ $((M % 3)) -eq 0 ]] && bash ~/.claude/bin/push-training-to-hf.sh >> "$LOG" 2>&1 & + [[ $((M % 3)) -eq 0 ]] && bash ~/.surrogate/bin/push-training-to-hf.sh >> "$LOG" 2>&1 & # Every 20 min: full orchestrate chain (architect → dev → qa → reviewer + git push) - [[ $((M % 20)) -eq 0 ]] && bash ~/.claude/bin/auto-orchestrate-loop.sh >> "$LOG" 2>&1 & + [[ $((M % 20)) -eq 0 ]] && bash ~/.surrogate/bin/auto-orchestrate-loop.sh >> "$LOG" 2>&1 & # Every 30 min: research-apply (pop queue → orchestrate → ship feature) - [[ $((M % 30)) -eq 15 ]] && bash ~/.claude/bin/surrogate-research-apply.sh >> "$LOG" 2>&1 & + [[ $((M % 30)) -eq 15 ]] && bash ~/.surrogate/bin/surrogate-research-apply.sh >> "$LOG" 2>&1 & # Every 60 min: keyword tuner (adapts scrape queue based on yields) - [[ $((M % 60)) -eq 0 ]] && bash ~/.claude/bin/scrape-keyword-tuner.sh >> "$LOG" 2>&1 & + [[ $((M % 60)) -eq 0 ]] && bash ~/.surrogate/bin/scrape-keyword-tuner.sh >> "$LOG" 2>&1 & # Every 6 hours: research-loop (discover new features from competitors/papers) - [[ $((M % 360)) -eq 30 ]] && bash ~/.claude/bin/surrogate-research-loop.sh >> "$LOG" 2>&1 & + [[ $((M % 360)) -eq 30 ]] && bash ~/.surrogate/bin/surrogate-research-loop.sh >> "$LOG" 2>&1 & # Every 12 hours: dataset enrich (pulls fresh public datasets, dedups, uploads to HF) - [[ $((M % 720)) -eq 60 ]] && bash ~/.claude/bin/dataset-enrich.sh >> "$LOG" 2>&1 & + [[ $((M % 720)) -eq 60 ]] && bash ~/.surrogate/bin/dataset-enrich.sh >> "$LOG" 2>&1 & sleep 60 done CRONSH @@ -216,4 +235,4 @@ python3 -c "import fastapi, uvicorn; print(f' fastapi {fastapi.__version__} + u } # Run as PID 1 — uvicorn handles signals + auto-restart on crash -exec python3 ~/.claude/bin/hermes-status-server.py +exec python3 ~/.surrogate/bin/hermes-status-server.py