surrogate-1 / bin /surrogate-dev-loop.sh
Ashira Pitchayapakayakul
fix: strip Mac /usr/bin/* hardcoded paths + expand dataset-enrich to 21 sources
023ab84
raw
history blame
13.1 kB
#!/usr/bin/env bash
# Surrogate Continuous Dev Loop β€” local 24/7 micro-development.
#
# Picks a real TODO/FIXME or quality issue from user's projects and asks
# the local Surrogate-1 (gemma4-based, free, unlimited) to propose a fix.
# Output goes to ~/.hermes/workspace/local-dev/ for review β€” does NOT
# auto-edit user code.
#
# Pairs with cloud free-tier daemons (cerebras/groq/etc.) which handle
# heavy multi-step priorities. This loop fills the "always-on" gap with
# small atomic improvements.
#
# Usage:
# surrogate-dev-loop.sh # one cycle
# surrogate-dev-loop.sh --continuous N # N cycles (default 1)
set -u
set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
LOG="$HOME/.surrogate/logs/surrogate-dev-loop.log"
OUT_DIR="$HOME/.hermes/workspace/local-dev"
mkdir -p "$(dirname "$LOG")" "$OUT_DIR"
CYCLES="${1:-1}"
[[ "$CYCLES" == "--continuous" ]] && CYCLES="${2:-1}"
# ── Search roots β€” only user's own projects, not system dirs ─────────────────
SEARCH_ROOTS=(
"$HOME/axentx"
"$HOME/develope/DevOps"
"$HOME/develope/AI"
"$HOME/.surrogate/bin"
)
# ── Task generators (pick one per cycle, weighted random) ────────────────────
pick_task() {
python3 <<'PYEOF'
import os, random, re, subprocess, json
from pathlib import Path
ROOTS = [
Path.home() / 'axentx',
Path.home() / 'develope/DevOps',
Path.home() / 'develope/AI',
Path.home() / '.surrogate/bin',
]
ROOTS = [p for p in ROOTS if p.exists()]
def find_todo():
"""Find a TODO/FIXME/XXX/HACK comment in user code (uses ripgrep β€” fast)."""
cmd = ['/opt/homebrew/bin/rg', '--no-heading', '-n', '-m', '3',
'--type', 'py', '--type', 'sh', '--type', 'ts', '--type', 'go',
'-g', '!node_modules', '-g', '!.venv', '-g', '!__pycache__',
'-g', '!.git', '-g', '!dist', '-g', '!build',
r'(TODO|FIXME|XXX|HACK)[:\s]']
for root in ROOTS:
cmd.append(str(root))
try:
r = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
lines = [l for l in r.stdout.splitlines() if l.strip()][:300]
if not lines:
return None
line = random.choice(lines)
# parse: path:lineno:content
m = re.match(r'^([^:]+):(\d+):(.+)$', line)
if not m:
return None
path, lineno, content = m.groups()
return {
'kind': 'todo',
'path': path,
'line': int(lineno),
'content': content.strip()[:200],
'task': f"Resolve this TODO in {Path(path).name}:{lineno}\n {content.strip()[:200]}\nPropose a concrete implementation. Don't auto-edit β€” just describe the fix.",
}
except Exception:
return None
def find_long_function():
"""Find a Python function >50 lines that may need refactoring."""
cmd = ['find'] + [str(r) for r in ROOTS] + [
'-name', '*.py',
'-not', '-path', '*/node_modules/*',
'-not', '-path', '*/.venv/*',
'-not', '-path', '*/__pycache__/*',
]
try:
r = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
files = r.stdout.splitlines()[:500]
random.shuffle(files)
for f in files[:30]:
try:
lines = Path(f).read_text(errors='replace').splitlines()
except Exception:
continue
for i, line in enumerate(lines):
if re.match(r'\s*def\s+\w+', line):
indent = len(line) - len(line.lstrip())
end = i + 1
while end < len(lines):
l = lines[end]
if l.strip() and (len(l) - len(l.lstrip())) <= indent:
break
end += 1
if end - i > 50:
func = '\n'.join(lines[i:min(i+80, end)])
return {
'kind': 'refactor',
'path': f,
'line': i + 1,
'task': f"This Python function in {Path(f).name}:{i+1} is {end-i} lines long. Suggest 2-3 ways to split it into smaller, more focused functions. Be specific (function names + responsibility).",
'context': func[:2500],
}
return None
except Exception:
return None
def find_missing_docstring():
"""Find a Python public function without a docstring."""
cmd = ['find'] + [str(r) for r in ROOTS] + ['-name', '*.py', '-not', '-path', '*/__pycache__/*']
try:
r = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
files = r.stdout.splitlines()[:300]
random.shuffle(files)
for f in files[:20]:
try:
lines = Path(f).read_text(errors='replace').splitlines()
except Exception:
continue
for i, line in enumerate(lines):
m = re.match(r'^def\s+([a-z]\w*)\(', line)
if not m:
continue
# Skip private + dunder
if m.group(1).startswith('_'):
continue
# Check if next non-blank line is a docstring
j = i + 1
while j < len(lines) and not lines[j].strip():
j += 1
if j < len(lines) and not lines[j].lstrip().startswith(('"""', "'''")):
body_end = min(i + 25, len(lines))
func = '\n'.join(lines[i:body_end])
return {
'kind': 'docstring',
'path': f,
'line': i + 1,
'func_name': m.group(1),
'task': f"Write a concise Python docstring for `{m.group(1)}` in {Path(f).name}:{i+1}. Include: one-line summary, Args (with types), Returns. NO Examples section. Output the docstring text only.",
'context': func[:1500],
}
return None
except Exception:
return None
# Weighted random pick (TODO scan most useful, refactor rare, docstring filler)
generators = [
(0.55, find_todo),
(0.20, find_long_function),
(0.25, find_missing_docstring),
]
random.shuffle(generators)
generators.sort(key=lambda x: random.random()) # extra shuffle
for _, gen in generators:
task = gen()
if task:
print(json.dumps(task, ensure_ascii=False))
break
PYEOF
}
# ── Reflexion: load top-3 lessons learned for this task kind ────────────────
load_reflexion_lessons() {
local kind="$1"
local file="$HOME/.hermes/workspace/reflexion/lessons-${kind}.jsonl"
[[ ! -f "$file" ]] && { echo ""; return; }
python3 <<PYEOF
import json
from pathlib import Path
p = Path("$file")
if not p.exists(): exit()
lines = p.read_text().splitlines()[-50:] # last 50 entries
records = []
for l in lines:
try: records.append(json.loads(l))
except: pass
# Score: explicit score first, else recency. Take top 3 unique lessons.
records.sort(key=lambda r: r.get('score', 0), reverse=True)
seen = set(); top = []
for r in records:
lesson = r.get('lesson','').strip()
if not lesson or lesson in seen: continue
seen.add(lesson); top.append(lesson)
if len(top) >= 3: break
if top:
print("=== Reflexion: lessons from past attempts ===")
for i, l in enumerate(top, 1):
print(f"{i}. {l}")
print("=== end lessons ===\n")
PYEOF
}
# ── Reflexion: extract & save 1-line lesson from a completed cycle ──────────
save_reflexion_lesson() {
local kind="$1" task="$2" response="$3" duration="$4"
local file="$HOME/.hermes/workspace/reflexion/lessons-${kind}.jsonl"
mkdir -p "$(dirname "$file")"
# Pass payload via env vars + sys.argv (safe β€” no shell quoting issues with embedded quotes)
REFLEX_RESP="$response" REFLEX_TASK="$task" \
python3 - "$kind" "$duration" "$file" <<'PYEOF'
import json, re, os, sys
from datetime import datetime
kind, dur, out_file = sys.argv[1], int(sys.argv[2]), sys.argv[3]
resp = os.environ.get('REFLEX_RESP', '')
task = os.environ.get('REFLEX_TASK', '')[:200]
lesson = None
for pat in [
r'(?:lesson|key insight|key takeaway|note):\s*([^\n]{20,200})',
r'(?:I learned|important to|remember to|need to)\s+([^\n]{20,200})',
]:
m = re.search(pat, resp, re.IGNORECASE)
if m: lesson = m.group(1).strip(); break
if not lesson:
sentences = [s.strip() for s in re.split(r'[\.\n]+', resp) if 30 < len(s.strip()) < 200]
if sentences: lesson = sentences[0]
if lesson:
record = {
'ts': datetime.utcnow().isoformat(),
'kind': kind, 'task': task, 'lesson': lesson[:300],
'duration_sec': dur,
'score': 1.0 if dur < 60 else 0.5,
}
with open(out_file, 'a') as f:
f.write(json.dumps(record, ensure_ascii=False) + '\n')
PYEOF
}
# ── Run one cycle: pick task, ask Surrogate-1, save output ──────────────────
run_cycle() {
local cycle_num="$1"
local task_json
task_json=$(pick_task)
if [[ -z "$task_json" ]]; then
echo "[$(date +%H:%M:%S)] no task found this cycle" >> "$LOG"
return 0
fi
local kind path line task_text context
kind=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('kind',''))")
path=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('path',''))")
line=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('line',0))")
task_text=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('task',''))")
context=$(echo "$task_json" | python3 -c "import json,sys; print(json.loads(sys.stdin.read()).get('context',''))")
local id="$(date +%s)-${kind}"
local out="$OUT_DIR/${id}.md"
local start
start=$(date +%s)
echo "[$(date +%H:%M:%S)] cycle=$cycle_num kind=$kind path=$(basename "$path"):$line" >> "$LOG"
# Build prompt β€” prepend Reflexion lessons if any
local lessons
lessons=$(load_reflexion_lessons "$kind")
local prompt="${lessons}${task_text}"
[[ -n "$context" ]] && prompt="$prompt
=== Code context ===
\`\`\`
$context
\`\`\`"
# Call Surrogate-1 via Ollama (keep_alive=5m so model stays warm between cycles)
local body
body=$(PROMPT_VAR="$prompt" python3 <<'PYEOF'
import json, os
print(json.dumps({
"model": "surrogate-1",
"messages": [{"role": "user", "content": os.environ["PROMPT_VAR"]}],
"max_tokens": 1500,
"temperature": 0.2,
"top_p": 0.9,
"keep_alive": "5m",
}))
PYEOF
)
local resp
resp=$(curl -sS --max-time 120 \
http://localhost:11434/v1/chat/completions \
-H 'Content-Type: application/json' \
-d "$body" 2>/dev/null)
local answer
answer=$(echo "$resp" | python3 -c "
import json, sys
try:
d = json.load(sys.stdin)
print(d['choices'][0]['message']['content'])
except Exception as e:
print(f'[err] {e}')
")
local dur=$(( $(date +%s) - start ))
# Save output
cat > "$out" <<EOF
# Local Dev: $kind β€” $(date '+%Y-%m-%d %H:%M')
**File:** \`$path:$line\`
**Duration:** ${dur}s
**Model:** surrogate-1 (gemma4:e4b base)
---
## Task
$task_text
---
## Surrogate-1 Response
$answer
---
*Auto-generated by surrogate-dev-loop. Review before applying.*
EOF
echo "[$(date +%H:%M:%S)] cycle=$cycle_num done in ${dur}s β†’ $(basename "$out")" >> "$LOG"
# Reflexion: extract & save lesson from this cycle
save_reflexion_lesson "$kind" "$task_text" "$answer" "$dur"
# Append to training-data candidate (env vars + argv = safe quoting)
DEV_TASK="$task_text" DEV_ANSWER="$answer" \
python3 - "$kind" "$dur" <<'PYEOF'
import json, os, sys
from pathlib import Path
from datetime import datetime
kind, dur = sys.argv[1], int(sys.argv[2])
candidate = Path.home() / 'axentx/surrogate/data/training-jsonl/local-dev-pending.jsonl'
candidate.parent.mkdir(parents=True, exist_ok=True)
record = {
'ts': datetime.utcnow().strftime('%Y-%m-%dT%H:%M:%SZ'),
'kind': kind,
'task': os.environ.get('DEV_TASK', '')[:8000],
'response': os.environ.get('DEV_ANSWER', '')[:5000],
'duration_sec': dur,
'source': 'surrogate-dev-loop',
}
with open(candidate, 'a') as f:
f.write(json.dumps(record, ensure_ascii=False) + '\n')
PYEOF
}
# ── Main loop ────────────────────────────────────────────────────────────────
echo "[$(date +%H:%M:%S)] dev-loop start cycles=$CYCLES" >> "$LOG"
for i in $(seq 1 "$CYCLES"); do
run_cycle "$i" || true
# Small delay between cycles (don't hammer Ollama)
[[ $i -lt $CYCLES ]] && sleep 30
done
echo "[$(date +%H:%M:%S)] dev-loop done" >> "$LOG"