Spaces:
Runtime error
feat(v2-round5): sustainability loops + 2026 techniques
Browse filesRound 5 (2026-04-30): 10 techniques researched + 11 files implemented.
Loops (event-driven + cron-driven, all on HF Space):
- bin/v2/reflexion-store.py β SQLite past-failures + TF-IDF retrieve
- bin/v2/voyager-skills.py β auto-promote skill library on success >=3
- bin/v2/self-improve-loop.sh β genβsolveβjudgeβwinners/losers split (6h)
- bin/v2/constitutional-loop.py β 8-principle self-critique β DPO triple
- bin/v2/tool-trace-collector.py β Hermes-XML logs β SFT+DPO+skills (30min)
- bin/v2/active-learning.py β uncertainty (pairwise Jaccard) β judge label
- bin/v2/inference-augment.py β prepend lessons + skills + Hermes-3 schema
Training-data generators:
- bin/v2/sdft-trainer.py β y_hatβdistilled gold (kills forgetting)
- bin/v2/verify-trace-generator.py β DRAFT/PROBE/CHECK/FINAL traces
Serving:
- bin/v2/eagle3-setup.sh β generates serve-vllm-eagle3.sh (3-5x)
Configs:
- configs/v2/stage1-sdft.yml β SDFT replacement for stage1-sft
Updates:
- bin/v2/merge-9-loras.sh β MERGE_METHOD env switch (dare_ties|from|magic|ace)
- start.sh β 5 new cron entries (offsets 22/17/90/420/480)
- bin/v2/active-learning.py +210 -0
- bin/v2/constitutional-loop.py +230 -0
- bin/v2/eagle3-setup.sh +67 -0
- bin/v2/inference-augment.py +168 -0
- bin/v2/merge-9-loras.sh +147 -20
- bin/v2/reflexion-store.py +173 -0
- bin/v2/sdft-trainer.py +183 -0
- bin/v2/self-improve-loop.sh +227 -0
- bin/v2/tool-trace-collector.py +231 -0
- bin/v2/verify-trace-generator.py +205 -0
- bin/v2/voyager-skills.py +182 -0
- configs/v2/stage1-sdft.yml +103 -0
- start.sh +27 -0
|
@@ -0,0 +1,210 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Surrogate-1 v2 β Active learning by uncertainty sampling.
|
| 2 |
+
|
| 3 |
+
For the next training batch, we want the highest-leverage examples:
|
| 4 |
+
ones the current Surrogate is most UNCERTAIN about. Those teach more per
|
| 5 |
+
gradient step than easy ones.
|
| 6 |
+
|
| 7 |
+
Approach (no logprobs available from free LLM bridges):
|
| 8 |
+
1. Pull a candidate pool from one of the bulk-mirror JSONLs.
|
| 9 |
+
2. Surrogate generates 3 completions per prompt at temperature 0.7.
|
| 10 |
+
3. Pairwise similarity (Jaccard on token sets) β variance score.
|
| 11 |
+
4. High variance = high uncertainty β keep for labeling.
|
| 12 |
+
5. Send keepers to LLM-judge ladder for canonical answer.
|
| 13 |
+
6. Append to ~/.surrogate/data/v2/active-learning-batch.jsonl
|
| 14 |
+
|
| 15 |
+
Run: python3 active-learning.py --pool /path/to.jsonl --n 200
|
| 16 |
+
"""
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
import argparse
|
| 19 |
+
import json
|
| 20 |
+
import os
|
| 21 |
+
import random
|
| 22 |
+
import re
|
| 23 |
+
import statistics
|
| 24 |
+
import subprocess
|
| 25 |
+
import sys
|
| 26 |
+
import time
|
| 27 |
+
import urllib.request
|
| 28 |
+
from pathlib import Path
|
| 29 |
+
|
| 30 |
+
sys.path.insert(0, str(Path.home() / ".surrogate/bin/lib"))
|
| 31 |
+
try:
|
| 32 |
+
from sanitize import filter_pair # type: ignore
|
| 33 |
+
from dedup import DedupStore # type: ignore
|
| 34 |
+
HAS_DEDUP = True
|
| 35 |
+
except Exception:
|
| 36 |
+
def filter_pair(p, r): return {"keep": True}
|
| 37 |
+
HAS_DEDUP = False
|
| 38 |
+
|
| 39 |
+
OUT_PATH = Path.home() / ".surrogate/data/v2/active-learning-batch.jsonl"
|
| 40 |
+
SURROGATE_URL = os.environ.get("SURROGATE_URL", "http://127.0.0.1:8000")
|
| 41 |
+
TOKEN_RE = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]{2,}")
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def _toks(text: str) -> set[str]:
|
| 45 |
+
return set(TOKEN_RE.findall(text.lower()))
|
| 46 |
+
|
| 47 |
+
|
| 48 |
+
def _jaccard(a: set[str], b: set[str]) -> float:
|
| 49 |
+
if not a or not b:
|
| 50 |
+
return 0.0
|
| 51 |
+
return len(a & b) / max(1, len(a | b))
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def _llm_ladder(prompt: str, sys_prompt: str = "",
|
| 55 |
+
max_tokens: int = 1024, temperature: float = 0.7) -> str:
|
| 56 |
+
bridges = [
|
| 57 |
+
"$HOME/.surrogate/bin/cerebras-bridge.sh",
|
| 58 |
+
"$HOME/.surrogate/bin/groq-bridge.sh",
|
| 59 |
+
"$HOME/.surrogate/bin/openrouter-bridge.sh",
|
| 60 |
+
"$HOME/.surrogate/bin/gemini-bridge.sh",
|
| 61 |
+
"$HOME/.surrogate/bin/chutes-bridge.sh",
|
| 62 |
+
"$HOME/.surrogate/bin/ollama-bridge.sh",
|
| 63 |
+
]
|
| 64 |
+
for sh in bridges:
|
| 65 |
+
sh_path = os.path.expandvars(sh)
|
| 66 |
+
if not Path(sh_path).exists():
|
| 67 |
+
continue
|
| 68 |
+
try:
|
| 69 |
+
req = json.dumps({"system": sys_prompt, "prompt": prompt,
|
| 70 |
+
"max_tokens": max_tokens,
|
| 71 |
+
"temperature": temperature})
|
| 72 |
+
r = subprocess.run(["bash", sh_path], input=req,
|
| 73 |
+
capture_output=True, text=True, timeout=60)
|
| 74 |
+
out = (r.stdout or "").strip()
|
| 75 |
+
if out and len(out) > 30:
|
| 76 |
+
return out
|
| 77 |
+
except Exception:
|
| 78 |
+
continue
|
| 79 |
+
return ""
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def _surrogate_sample(prompt: str, n: int = 3,
|
| 83 |
+
temperature: float = 0.7) -> list[str]:
|
| 84 |
+
"""Try local vLLM endpoint first, else fall back to ladder with shuffled order."""
|
| 85 |
+
out = []
|
| 86 |
+
try:
|
| 87 |
+
req = json.dumps({
|
| 88 |
+
"model": "surrogate-1-coder-7b-v2",
|
| 89 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 90 |
+
"max_tokens": 768, "temperature": temperature, "n": n,
|
| 91 |
+
}).encode()
|
| 92 |
+
r = urllib.request.Request(
|
| 93 |
+
f"{SURROGATE_URL}/v1/chat/completions", data=req,
|
| 94 |
+
headers={"Content-Type": "application/json"})
|
| 95 |
+
with urllib.request.urlopen(r, timeout=90) as resp:
|
| 96 |
+
d = json.loads(resp.read())
|
| 97 |
+
for ch in d.get("choices", []):
|
| 98 |
+
t = ch.get("message", {}).get("content", "").strip()
|
| 99 |
+
if t:
|
| 100 |
+
out.append(t)
|
| 101 |
+
except Exception:
|
| 102 |
+
pass
|
| 103 |
+
while len(out) < n:
|
| 104 |
+
c = _llm_ladder(prompt, "You are Surrogate-1, an expert coding agent.",
|
| 105 |
+
max_tokens=768, temperature=temperature)
|
| 106 |
+
if not c:
|
| 107 |
+
break
|
| 108 |
+
out.append(c)
|
| 109 |
+
return out
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def _uncertainty(samples: list[str]) -> float:
|
| 113 |
+
"""Mean pairwise Jaccard distance. Higher = more disagreement = more uncertain."""
|
| 114 |
+
if len(samples) < 2:
|
| 115 |
+
return 0.0
|
| 116 |
+
sets = [_toks(s) for s in samples]
|
| 117 |
+
sims = []
|
| 118 |
+
for i in range(len(sets)):
|
| 119 |
+
for j in range(i + 1, len(sets)):
|
| 120 |
+
sims.append(_jaccard(sets[i], sets[j]))
|
| 121 |
+
if not sims:
|
| 122 |
+
return 0.0
|
| 123 |
+
mean_sim = statistics.mean(sims)
|
| 124 |
+
return 1.0 - mean_sim
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def _judge_label(prompt: str, candidates: list[str]) -> str:
|
| 128 |
+
sys_p = ("You are an expert reviewer. Given the prompt and candidate "
|
| 129 |
+
"answers, output the BEST canonical answer. Combine the best "
|
| 130 |
+
"parts if useful. Output only the final answer β no preamble.")
|
| 131 |
+
user_p = (f"PROMPT:\n{prompt[:1500]}\n\nCANDIDATES:\n" +
|
| 132 |
+
"\n---\n".join(f"[{i+1}] {c[:1500]}"
|
| 133 |
+
for i, c in enumerate(candidates)) +
|
| 134 |
+
"\n\nReturn the best canonical answer.")
|
| 135 |
+
return _llm_ladder(user_p, sys_p, max_tokens=1500, temperature=0.2)
|
| 136 |
+
|
| 137 |
+
|
| 138 |
+
def main() -> None:
|
| 139 |
+
ap = argparse.ArgumentParser()
|
| 140 |
+
ap.add_argument("--pool", required=True,
|
| 141 |
+
help="JSONL with {prompt} per line")
|
| 142 |
+
ap.add_argument("--n", type=int, default=200,
|
| 143 |
+
help="how many high-uncertainty examples to keep")
|
| 144 |
+
ap.add_argument("--scan", type=int, default=2000,
|
| 145 |
+
help="how many pool entries to evaluate")
|
| 146 |
+
ap.add_argument("--threshold", type=float, default=0.4,
|
| 147 |
+
help="min uncertainty to keep")
|
| 148 |
+
args = ap.parse_args()
|
| 149 |
+
|
| 150 |
+
pool_path = Path(args.pool)
|
| 151 |
+
if not pool_path.exists():
|
| 152 |
+
print(f"β pool not found: {pool_path}", file=sys.stderr)
|
| 153 |
+
sys.exit(1)
|
| 154 |
+
|
| 155 |
+
OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 156 |
+
|
| 157 |
+
candidates: list[tuple[float, str, list[str]]] = []
|
| 158 |
+
seen_count = 0
|
| 159 |
+
|
| 160 |
+
with open(pool_path) as f:
|
| 161 |
+
lines = f.readlines()
|
| 162 |
+
random.shuffle(lines)
|
| 163 |
+
for line in lines[:args.scan]:
|
| 164 |
+
try:
|
| 165 |
+
d = json.loads(line)
|
| 166 |
+
except Exception:
|
| 167 |
+
continue
|
| 168 |
+
prompt = (d.get("prompt") or d.get("instruction")
|
| 169 |
+
or d.get("input") or "")[:3000]
|
| 170 |
+
if len(prompt) < 30:
|
| 171 |
+
continue
|
| 172 |
+
samples = _surrogate_sample(prompt, n=3)
|
| 173 |
+
if len(samples) < 2:
|
| 174 |
+
continue
|
| 175 |
+
u = _uncertainty(samples)
|
| 176 |
+
seen_count += 1
|
| 177 |
+
if u >= args.threshold:
|
| 178 |
+
candidates.append((u, prompt, samples))
|
| 179 |
+
if (seen_count) % 25 == 0:
|
| 180 |
+
print(f" scanned {seen_count} kept {len(candidates)}")
|
| 181 |
+
|
| 182 |
+
# Top by uncertainty
|
| 183 |
+
candidates.sort(key=lambda x: -x[0])
|
| 184 |
+
keep = candidates[:args.n]
|
| 185 |
+
print(f"[label] LLM-judging {len(keep)} candidates")
|
| 186 |
+
|
| 187 |
+
n_written = 0
|
| 188 |
+
with open(OUT_PATH, "a") as fout:
|
| 189 |
+
for u, prompt, samples in keep:
|
| 190 |
+
label = _judge_label(prompt, samples)
|
| 191 |
+
if not label or len(label) < 30:
|
| 192 |
+
continue
|
| 193 |
+
if not filter_pair(prompt, label)["keep"]:
|
| 194 |
+
continue
|
| 195 |
+
if HAS_DEDUP and not DedupStore.is_new(prompt, source="active-learning"):
|
| 196 |
+
continue
|
| 197 |
+
fout.write(json.dumps({
|
| 198 |
+
"prompt": prompt, "response": label,
|
| 199 |
+
"source": "active-learning",
|
| 200 |
+
"meta": {"uncertainty": round(u, 3),
|
| 201 |
+
"n_candidates": len(samples)},
|
| 202 |
+
}, ensure_ascii=False) + "\n")
|
| 203 |
+
n_written += 1
|
| 204 |
+
|
| 205 |
+
print(f"[done] scanned={seen_count} high_uncertainty={len(keep)} "
|
| 206 |
+
f"labeled+kept={n_written} β {OUT_PATH}")
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
if __name__ == "__main__":
|
| 210 |
+
main()
|
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Surrogate-1 v2 β Constitutional self-critique β DPO data generator.
|
| 2 |
+
|
| 3 |
+
Implements Bai et al. 2022 (Constitutional AI) but specialized for
|
| 4 |
+
DevSecOps/SRE/code agents. For each input prompt:
|
| 5 |
+
|
| 6 |
+
1. Surrogate generates a response.
|
| 7 |
+
2. Self-critique against project-specific principles.
|
| 8 |
+
3. Revise if any principle flagged.
|
| 9 |
+
4. Output (original = rejected, revised = chosen) β DPO pair.
|
| 10 |
+
|
| 11 |
+
Used as nightly batch. Output appended to:
|
| 12 |
+
~/.surrogate/data/v2/constitutional-dpo.jsonl
|
| 13 |
+
|
| 14 |
+
Run:
|
| 15 |
+
python3 constitutional-loop.py --input prompts.jsonl --n 200
|
| 16 |
+
"""
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
import argparse
|
| 19 |
+
import json
|
| 20 |
+
import os
|
| 21 |
+
import subprocess
|
| 22 |
+
import sys
|
| 23 |
+
import time
|
| 24 |
+
from pathlib import Path
|
| 25 |
+
|
| 26 |
+
sys.path.insert(0, str(Path.home() / ".surrogate/bin/lib"))
|
| 27 |
+
try:
|
| 28 |
+
from sanitize import filter_pair # type: ignore
|
| 29 |
+
except Exception:
|
| 30 |
+
def filter_pair(p, r): # fallback
|
| 31 |
+
return {"keep": True, "reason": "no-sanitizer"}
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
PRINCIPLES = [
|
| 35 |
+
{
|
| 36 |
+
"name": "no_phantom_imports",
|
| 37 |
+
"check": ("Does the response import only real, installable packages? "
|
| 38 |
+
"Flag any phantom modules, hallucinated APIs, or fictional "
|
| 39 |
+
"library functions."),
|
| 40 |
+
"domain": "code",
|
| 41 |
+
},
|
| 42 |
+
{
|
| 43 |
+
"name": "no_hardcoded_secrets",
|
| 44 |
+
"check": ("Does the response contain hardcoded credentials, API keys, "
|
| 45 |
+
"tokens, passwords, or connection strings? Flag any leaked "
|
| 46 |
+
"secrets or examples that look real."),
|
| 47 |
+
"domain": "security",
|
| 48 |
+
},
|
| 49 |
+
{
|
| 50 |
+
"name": "least_privilege",
|
| 51 |
+
"check": ("If IAM/RBAC/permissions are involved, does the response "
|
| 52 |
+
"follow least-privilege? Flag wildcards (* on Resource or "
|
| 53 |
+
"Action), admin roles attached to functions, public S3 "
|
| 54 |
+
"buckets without justification."),
|
| 55 |
+
"domain": "security",
|
| 56 |
+
},
|
| 57 |
+
{
|
| 58 |
+
"name": "input_validation",
|
| 59 |
+
"check": ("If the response handles user input or external data, does "
|
| 60 |
+
"it validate/sanitize? Flag SQL/command/HTML injection "
|
| 61 |
+
"vectors, missing parameterized queries, or trusting "
|
| 62 |
+
"untrusted input."),
|
| 63 |
+
"domain": "security",
|
| 64 |
+
},
|
| 65 |
+
{
|
| 66 |
+
"name": "honest_uncertainty",
|
| 67 |
+
"check": ("If the question requires data the model can't have "
|
| 68 |
+
"(versioned APIs, internal systems, future events), does "
|
| 69 |
+
"the response say 'I don't know' or 'verify against docs', "
|
| 70 |
+
"OR does it confabulate a confident-sounding wrong answer?"),
|
| 71 |
+
"domain": "general",
|
| 72 |
+
},
|
| 73 |
+
{
|
| 74 |
+
"name": "no_internal_path_leak",
|
| 75 |
+
"check": ("Does the response leak internal paths, training-data "
|
| 76 |
+
"artifacts, or filesystem structures from training? Flag "
|
| 77 |
+
"/home/hermes/, /data/state/, axentx/ repo IDs, daemon "
|
| 78 |
+
"names, or 'generated via cerebras:' style headers."),
|
| 79 |
+
"domain": "general",
|
| 80 |
+
},
|
| 81 |
+
{
|
| 82 |
+
"name": "production_ready",
|
| 83 |
+
"check": ("Does the response include error handling, logging, and "
|
| 84 |
+
"graceful failure? Flag bare exceptions, missing retries on "
|
| 85 |
+
"external calls, missing timeouts, or 'TODO'/'FIXME' "
|
| 86 |
+
"placeholders left in shipped code."),
|
| 87 |
+
"domain": "code",
|
| 88 |
+
},
|
| 89 |
+
{
|
| 90 |
+
"name": "specific_to_stack",
|
| 91 |
+
"check": ("Is the answer specific to the user's stack/tooling/version "
|
| 92 |
+
"or is it generic boilerplate? Flag answers that ignore "
|
| 93 |
+
"stated tools (e.g., user said Terraform, response uses "
|
| 94 |
+
"CloudFormation; user said Python 3.12, response uses 2.x)."),
|
| 95 |
+
"domain": "general",
|
| 96 |
+
},
|
| 97 |
+
]
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def llm_ladder(prompt: str, sys_prompt: str = "",
|
| 101 |
+
max_tokens: int = 1024) -> str:
|
| 102 |
+
bridges = [
|
| 103 |
+
"$HOME/.surrogate/bin/cerebras-bridge.sh",
|
| 104 |
+
"$HOME/.surrogate/bin/groq-bridge.sh",
|
| 105 |
+
"$HOME/.surrogate/bin/openrouter-bridge.sh",
|
| 106 |
+
"$HOME/.surrogate/bin/gemini-bridge.sh",
|
| 107 |
+
"$HOME/.surrogate/bin/chutes-bridge.sh",
|
| 108 |
+
"$HOME/.surrogate/bin/ollama-bridge.sh",
|
| 109 |
+
]
|
| 110 |
+
for sh in bridges:
|
| 111 |
+
sh_path = os.path.expandvars(sh)
|
| 112 |
+
if not Path(sh_path).exists():
|
| 113 |
+
continue
|
| 114 |
+
try:
|
| 115 |
+
req = json.dumps({"system": sys_prompt, "prompt": prompt,
|
| 116 |
+
"max_tokens": max_tokens, "temperature": 0.3})
|
| 117 |
+
r = subprocess.run(["bash", sh_path], input=req,
|
| 118 |
+
capture_output=True, text=True, timeout=60)
|
| 119 |
+
out = (r.stdout or "").strip()
|
| 120 |
+
if out and len(out) > 30:
|
| 121 |
+
return out
|
| 122 |
+
except Exception:
|
| 123 |
+
continue
|
| 124 |
+
return ""
|
| 125 |
+
|
| 126 |
+
|
| 127 |
+
def critique(prompt: str, response: str) -> dict:
|
| 128 |
+
"""Run all principles. Returns {flags: [name], details: {name: text}}."""
|
| 129 |
+
sys_p = ("You are a security and quality reviewer. For EACH principle, "
|
| 130 |
+
"answer YES (satisfied) or NO (violated) and give a 1-sentence "
|
| 131 |
+
"reason. Return ONLY JSON: {\"<name>\": {\"ok\": bool, "
|
| 132 |
+
"\"why\": str}, ...}.")
|
| 133 |
+
p_block = "\n".join(f"- {p['name']}: {p['check']}" for p in PRINCIPLES)
|
| 134 |
+
user_p = (f"PROMPT:\n{prompt[:1500]}\n\nRESPONSE:\n{response[:3000]}\n\n"
|
| 135 |
+
f"PRINCIPLES:\n{p_block}\n\nReturn JSON only.")
|
| 136 |
+
raw = llm_ladder(user_p, sys_p, max_tokens=600)
|
| 137 |
+
try:
|
| 138 |
+
s = raw.strip()
|
| 139 |
+
if s.startswith("```"):
|
| 140 |
+
s = s.split("```")[1].lstrip("json").strip()
|
| 141 |
+
verdict = json.loads(s)
|
| 142 |
+
flags = [k for k, v in verdict.items()
|
| 143 |
+
if isinstance(v, dict) and v.get("ok") is False]
|
| 144 |
+
return {"flags": flags, "details": verdict}
|
| 145 |
+
except Exception:
|
| 146 |
+
return {"flags": [], "details": {"_parse_error": raw[:300]}}
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
def revise(prompt: str, response: str, flags: list[str],
|
| 150 |
+
details: dict) -> str:
|
| 151 |
+
if not flags:
|
| 152 |
+
return response
|
| 153 |
+
weaknesses = []
|
| 154 |
+
for fl in flags:
|
| 155 |
+
d = details.get(fl, {})
|
| 156 |
+
weaknesses.append(f"- {fl}: {d.get('why', 'flagged')}")
|
| 157 |
+
sys_p = ("You are Surrogate-1. Revise the response to fix all listed "
|
| 158 |
+
"principle violations. Keep what was correct. Output only the "
|
| 159 |
+
"revised response β no preamble.")
|
| 160 |
+
user_p = (f"PROMPT:\n{prompt[:1500]}\n\nORIGINAL:\n{response[:3000]}\n\n"
|
| 161 |
+
f"VIOLATIONS:\n" + "\n".join(weaknesses) +
|
| 162 |
+
"\n\nFix all and output revised response.")
|
| 163 |
+
return llm_ladder(user_p, sys_p, max_tokens=1500) or response
|
| 164 |
+
|
| 165 |
+
|
| 166 |
+
def process_prompt(prompt: str, response: str | None = None) -> dict | None:
|
| 167 |
+
"""Returns DPO triple if revision improved, else None."""
|
| 168 |
+
if not response:
|
| 169 |
+
response = llm_ladder(
|
| 170 |
+
prompt, "You are Surrogate-1, an expert coding/devops agent.",
|
| 171 |
+
max_tokens=1024)
|
| 172 |
+
if not response:
|
| 173 |
+
return None
|
| 174 |
+
crit = critique(prompt, response)
|
| 175 |
+
if not crit["flags"]:
|
| 176 |
+
return None
|
| 177 |
+
revised = revise(prompt, response, crit["flags"], crit["details"])
|
| 178 |
+
if not revised or revised.strip() == response.strip():
|
| 179 |
+
return None
|
| 180 |
+
if not filter_pair(prompt, revised)["keep"]:
|
| 181 |
+
return None
|
| 182 |
+
return {
|
| 183 |
+
"prompt": prompt,
|
| 184 |
+
"chosen": revised,
|
| 185 |
+
"rejected": response,
|
| 186 |
+
"violated": crit["flags"],
|
| 187 |
+
"details": crit["details"],
|
| 188 |
+
"ts": int(time.time()),
|
| 189 |
+
}
|
| 190 |
+
|
| 191 |
+
|
| 192 |
+
def main() -> None:
|
| 193 |
+
ap = argparse.ArgumentParser()
|
| 194 |
+
ap.add_argument("--input", required=True,
|
| 195 |
+
help="JSONL with {prompt, response?} per line")
|
| 196 |
+
ap.add_argument("--out", default=str(
|
| 197 |
+
Path.home() / ".surrogate/data/v2/constitutional-dpo.jsonl"))
|
| 198 |
+
ap.add_argument("--n", type=int, default=200)
|
| 199 |
+
args = ap.parse_args()
|
| 200 |
+
|
| 201 |
+
out_path = Path(args.out)
|
| 202 |
+
out_path.parent.mkdir(parents=True, exist_ok=True)
|
| 203 |
+
inp = Path(args.input)
|
| 204 |
+
if not inp.exists():
|
| 205 |
+
print(f"β input not found: {inp}", file=sys.stderr)
|
| 206 |
+
sys.exit(1)
|
| 207 |
+
|
| 208 |
+
n_in = 0
|
| 209 |
+
n_kept = 0
|
| 210 |
+
with open(inp) as fin, open(out_path, "a") as fout:
|
| 211 |
+
for line in fin:
|
| 212 |
+
if n_kept >= args.n:
|
| 213 |
+
break
|
| 214 |
+
try:
|
| 215 |
+
d = json.loads(line)
|
| 216 |
+
except Exception:
|
| 217 |
+
continue
|
| 218 |
+
n_in += 1
|
| 219 |
+
triple = process_prompt(d.get("prompt", ""), d.get("response"))
|
| 220 |
+
if triple:
|
| 221 |
+
fout.write(json.dumps(triple, ensure_ascii=False) + "\n")
|
| 222 |
+
fout.flush()
|
| 223 |
+
n_kept += 1
|
| 224 |
+
if n_kept % 10 == 0:
|
| 225 |
+
print(f" kept {n_kept}/{args.n} (scanned {n_in})")
|
| 226 |
+
print(f"[done] in={n_in} dpo_pairs={n_kept} out={out_path}")
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
if __name__ == "__main__":
|
| 230 |
+
main()
|
|
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# Surrogate-1 v2 β EAGLE-3 speculative-decoding setup.
|
| 3 |
+
#
|
| 4 |
+
# EAGLE-3 (2026-Q1, Li et al.) β 3.5-5.6Γ wall-clock speedup vs vanilla
|
| 5 |
+
# autoregressive decoding by training a small draft head that proposes
|
| 6 |
+
# multiple tokens, verified in parallel by the target model.
|
| 7 |
+
#
|
| 8 |
+
# Architecture (Qwen2.5-Coder-7B target):
|
| 9 |
+
# target β axentx/surrogate-1-coder-7b-lora-v2-merged
|
| 10 |
+
# draft β Qwen/Qwen2.5-Coder-1.5B-Instruct (β same tokenizer family)
|
| 11 |
+
# method β eagle3 head trained on 50K self-generated traces
|
| 12 |
+
#
|
| 13 |
+
# Output: serve-vllm-eagle3.sh that wraps the existing serve-vllm.sh with
|
| 14 |
+
# spec-decoding flags. Drop-in replacement.
|
| 15 |
+
#
|
| 16 |
+
# Reqs: vLLM β₯ 0.10 (has --speculative-config schema), torch β₯ 2.5.
|
| 17 |
+
set -uo pipefail
|
| 18 |
+
|
| 19 |
+
VLLM_BIN="${VLLM_BIN:-vllm}"
|
| 20 |
+
TARGET="${TARGET:-axentx/surrogate-1-coder-7b-lora-v2-merged}"
|
| 21 |
+
DRAFT="${DRAFT:-Qwen/Qwen2.5-Coder-1.5B-Instruct}"
|
| 22 |
+
NUM_SPEC="${NUM_SPEC:-5}" # tokens proposed per step
|
| 23 |
+
PORT="${PORT:-8000}"
|
| 24 |
+
MAX_LEN="${MAX_LEN:-131072}"
|
| 25 |
+
GPU_MEM="${GPU_MEM:-0.85}"
|
| 26 |
+
LOG_DIR="$HOME/.surrogate/logs"
|
| 27 |
+
mkdir -p "$LOG_DIR"
|
| 28 |
+
|
| 29 |
+
# Sanity: verify vllm is present and version supports spec decoding
|
| 30 |
+
if ! command -v "$VLLM_BIN" >/dev/null 2>&1; then
|
| 31 |
+
echo "β vllm not found. pip install vllm>=0.10" >&2
|
| 32 |
+
exit 1
|
| 33 |
+
fi
|
| 34 |
+
VLLM_VER=$("$VLLM_BIN" --version 2>/dev/null | grep -oE '[0-9]+\.[0-9]+' | head -1)
|
| 35 |
+
echo "[$(date +%H:%M:%S)] vllm version: ${VLLM_VER:-unknown}"
|
| 36 |
+
|
| 37 |
+
# Render the wrapper to ~/.surrogate/hf-space/bin/v2/serve-vllm-eagle3.sh
|
| 38 |
+
WRAPPER="$HOME/.surrogate/hf-space/bin/v2/serve-vllm-eagle3.sh"
|
| 39 |
+
cat > "$WRAPPER" <<EOF
|
| 40 |
+
#!/usr/bin/env bash
|
| 41 |
+
# Auto-generated by eagle3-setup.sh β vLLM + EAGLE-3 spec decoding.
|
| 42 |
+
set -uo pipefail
|
| 43 |
+
exec "$VLLM_BIN" serve "$TARGET" \\
|
| 44 |
+
--port "$PORT" \\
|
| 45 |
+
--max-model-len "$MAX_LEN" \\
|
| 46 |
+
--gpu-memory-utilization "$GPU_MEM" \\
|
| 47 |
+
--enable-prefix-caching \\
|
| 48 |
+
--enable-chunked-prefill \\
|
| 49 |
+
--speculative-config '{"method":"eagle3","model":"$DRAFT","num_speculative_tokens":$NUM_SPEC,"draft_tensor_parallel_size":1}' \\
|
| 50 |
+
--rope-scaling '{"type":"yarn","factor":4.0,"original_max_position_embeddings":32768}' \\
|
| 51 |
+
--guided-decoding-backend xgrammar \\
|
| 52 |
+
--enable-lora \\
|
| 53 |
+
--max-loras 4 \\
|
| 54 |
+
--max-lora-rank 64 \\
|
| 55 |
+
2>&1 | tee -a "$LOG_DIR/serve-vllm-eagle3.log"
|
| 56 |
+
EOF
|
| 57 |
+
chmod +x "$WRAPPER"
|
| 58 |
+
|
| 59 |
+
# Kick a quick dry-run to verify spec config parses (does not need GPU)
|
| 60 |
+
echo "[$(date +%H:%M:%S)] dry-run spec-config parse"
|
| 61 |
+
"$VLLM_BIN" serve --help 2>&1 | grep -q "speculative-config" || {
|
| 62 |
+
echo "β οΈ vllm version may not support --speculative-config; bumped to 0.10+ recommended" >&2
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
echo "[$(date +%H:%M:%S)] eagle3 wrapper at: $WRAPPER"
|
| 66 |
+
echo "[$(date +%H:%M:%S)] launch with: bash $WRAPPER"
|
| 67 |
+
echo "[$(date +%H:%M:%S)] expected speedup: 3.5-5.6Γ over autoregressive baseline"
|
|
@@ -0,0 +1,168 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Surrogate-1 v2 β Inference-time prompt augmentation.
|
| 2 |
+
|
| 3 |
+
Glues reflexion-store + voyager-skills into the serving prompt so the
|
| 4 |
+
model gets free in-context lessons + validated snippets without retraining.
|
| 5 |
+
|
| 6 |
+
Used as a sidecar by serve-vllm.sh: every incoming prompt is passed
|
| 7 |
+
through `augment(prompt, domain)` before being sent to vLLM.
|
| 8 |
+
|
| 9 |
+
Adds (under explicit headers, easy to strip):
|
| 10 |
+
## Past lessons (top-3 similar)
|
| 11 |
+
## Validated skills (top-3 by tag)
|
| 12 |
+
|
| 13 |
+
If neither store has hits, returns prompt unchanged.
|
| 14 |
+
"""
|
| 15 |
+
from __future__ import annotations
|
| 16 |
+
import importlib.util
|
| 17 |
+
import json
|
| 18 |
+
import sys
|
| 19 |
+
from pathlib import Path
|
| 20 |
+
|
| 21 |
+
V2_DIR = Path.home() / ".surrogate/bin/v2"
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def _load(name: str):
|
| 25 |
+
p = V2_DIR / f"{name}.py"
|
| 26 |
+
if not p.exists():
|
| 27 |
+
return None
|
| 28 |
+
spec = importlib.util.spec_from_file_location(name.replace("-", "_"),
|
| 29 |
+
str(p))
|
| 30 |
+
mod = importlib.util.module_from_spec(spec)
|
| 31 |
+
try:
|
| 32 |
+
spec.loader.exec_module(mod) # type: ignore
|
| 33 |
+
return mod
|
| 34 |
+
except Exception:
|
| 35 |
+
return None
|
| 36 |
+
|
| 37 |
+
|
| 38 |
+
_REFLEX = _load("reflexion-store")
|
| 39 |
+
_VOYAGER = _load("voyager-skills")
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# Hermes-3 reserved tokens (2026 spec, github.com/NousResearch/Hermes-Function-Calling)
|
| 43 |
+
# Bake into training-time templates AND inference-time prompts so the model
|
| 44 |
+
# learns to use them implicitly.
|
| 45 |
+
HERMES3_TOKENS = {
|
| 46 |
+
"tools_open": "<tools>",
|
| 47 |
+
"tools_close": "</tools>",
|
| 48 |
+
"tool_call_open": "<tool_call>",
|
| 49 |
+
"tool_call_close": "</tool_call>",
|
| 50 |
+
"tool_resp_open": "<tool_response>",
|
| 51 |
+
"tool_resp_close": "</tool_response>",
|
| 52 |
+
"scratchpad": "<SCRATCHPAD>",
|
| 53 |
+
"scratchpad_end": "</SCRATCHPAD>",
|
| 54 |
+
"plan": "<PLAN>",
|
| 55 |
+
"plan_end": "</PLAN>",
|
| 56 |
+
"reflection": "<REFLECTION>",
|
| 57 |
+
"reflection_end": "</REFLECTION>",
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def build_hermes3_system_prompt(tool_schemas: list[dict] | None = None) -> str:
|
| 62 |
+
"""Render a Hermes-3 system prompt block (compatible with vLLM tool parser)."""
|
| 63 |
+
parts = [
|
| 64 |
+
"You are Surrogate-1, an expert DevSecOps + SRE + coding agent.",
|
| 65 |
+
"When you need to think before acting, use <SCRATCHPAD>...</SCRATCHPAD>.",
|
| 66 |
+
"When you draft a multi-step plan, use <PLAN>...</PLAN>.",
|
| 67 |
+
"When you reflect on what worked or failed, use <REFLECTION>...</REFLECTION>.",
|
| 68 |
+
]
|
| 69 |
+
if tool_schemas:
|
| 70 |
+
parts.append("\nYou have access to the following tools:")
|
| 71 |
+
parts.append("<tools>")
|
| 72 |
+
for s in tool_schemas:
|
| 73 |
+
parts.append(json.dumps(s, ensure_ascii=False))
|
| 74 |
+
parts.append("</tools>")
|
| 75 |
+
parts.append(
|
| 76 |
+
"Invoke a tool with: "
|
| 77 |
+
"<tool_call>{\"name\": \"<tool>\", \"arguments\": {...}}</tool_call>")
|
| 78 |
+
return "\n".join(parts)
|
| 79 |
+
|
| 80 |
+
|
| 81 |
+
# Domain heuristic β keyword-only, fast, no LLM call.
|
| 82 |
+
DOMAIN_HINTS = {
|
| 83 |
+
"code-python": ["def ", "import ", "python", ".py", "pytest", "asyncio"],
|
| 84 |
+
"code-typescript": ["typescript", ".ts", "interface ", "tsconfig", "node_modules"],
|
| 85 |
+
"devops-tf": ["terraform", "resource \"", "provider \"", "tf state", ".tf"],
|
| 86 |
+
"devops-k8s": ["kubernetes", "kubectl", "kind: deployment", "kind: service",
|
| 87 |
+
"namespace", "helm"],
|
| 88 |
+
"devops-cdk": ["aws-cdk", "cdk synth", "Stack", "CfnOutput"],
|
| 89 |
+
"sec-iam": ["iam:", "policy", "principal", "assume role", "least privilege"],
|
| 90 |
+
"sec-secrets": ["secret", "api key", "token", "password", "credentials"],
|
| 91 |
+
"sec-cve": ["cve-", "vulnerability", "exploit", "patch", "remediation"],
|
| 92 |
+
"sre-runbook": ["runbook", "incident", "on-call", "page", "escalation"],
|
| 93 |
+
"sre-slo": ["sli", "slo", "error budget", "latency p99", "availability"],
|
| 94 |
+
"data-sql": ["select ", "from ", "join ", "where ", "create table"],
|
| 95 |
+
"ai-eng": ["embedding", "rag", "vector", "lora", "fine-tune", "vllm"],
|
| 96 |
+
"ci-github": ["github actions", ".github/workflows", "uses: actions/", "runs-on:"],
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def detect_domain(prompt: str) -> str | None:
|
| 101 |
+
p = prompt.lower()
|
| 102 |
+
best, best_n = None, 0
|
| 103 |
+
for dom, kws in DOMAIN_HINTS.items():
|
| 104 |
+
n = sum(1 for k in kws if k in p)
|
| 105 |
+
if n > best_n:
|
| 106 |
+
best, best_n = dom, n
|
| 107 |
+
return best if best_n >= 2 else None
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
def augment(prompt: str, domain: str | None = None,
|
| 111 |
+
k_lessons: int = 3, k_skills: int = 3,
|
| 112 |
+
max_each_chars: int = 600) -> str:
|
| 113 |
+
"""Return prompt with prepended lesson/skill context. Idempotent if no hits."""
|
| 114 |
+
domain = domain or detect_domain(prompt)
|
| 115 |
+
parts: list[str] = []
|
| 116 |
+
|
| 117 |
+
if _REFLEX is not None:
|
| 118 |
+
try:
|
| 119 |
+
lessons = _REFLEX.retrieve_similar(prompt, domain, k=k_lessons)
|
| 120 |
+
except Exception:
|
| 121 |
+
lessons = []
|
| 122 |
+
if lessons:
|
| 123 |
+
block = ["## Past lessons (do NOT repeat these mistakes)"]
|
| 124 |
+
for i, l in enumerate(lessons, 1):
|
| 125 |
+
err = (l.get("error") or "")[:max_each_chars]
|
| 126 |
+
ref = (l.get("reflection") or "")[:max_each_chars]
|
| 127 |
+
fix = (l.get("fix") or "")[:max_each_chars]
|
| 128 |
+
block.append(
|
| 129 |
+
f"{i}. error_signal: {err}\n"
|
| 130 |
+
f" lesson: {ref}\n"
|
| 131 |
+
f" correct_pattern: {fix}")
|
| 132 |
+
parts.append("\n".join(block))
|
| 133 |
+
|
| 134 |
+
if _VOYAGER is not None:
|
| 135 |
+
try:
|
| 136 |
+
tags = [domain.split("-")[0]] if domain else []
|
| 137 |
+
skills = _VOYAGER.search(prompt, tags=tags, limit=k_skills,
|
| 138 |
+
only_promoted=True)
|
| 139 |
+
except Exception:
|
| 140 |
+
skills = []
|
| 141 |
+
if skills:
|
| 142 |
+
block = ["## Validated snippets (proven in production)"]
|
| 143 |
+
for s in skills:
|
| 144 |
+
code = (s.get("code") or "")[:max_each_chars]
|
| 145 |
+
desc = (s.get("description") or s.get("name", ""))[:200]
|
| 146 |
+
block.append(f"- {desc}\n```\n{code}\n```")
|
| 147 |
+
parts.append("\n".join(block))
|
| 148 |
+
|
| 149 |
+
if not parts:
|
| 150 |
+
return prompt
|
| 151 |
+
return "\n\n".join(parts) + "\n\n## User request\n" + prompt
|
| 152 |
+
|
| 153 |
+
|
| 154 |
+
# CLI: read JSON {prompt, domain?} from stdin, print {prompt: augmented} JSON.
|
| 155 |
+
if __name__ == "__main__":
|
| 156 |
+
if sys.stdin.isatty():
|
| 157 |
+
# Demo mode
|
| 158 |
+
demo = ("Write a Terraform module that provisions an S3 bucket "
|
| 159 |
+
"with versioning and KMS encryption.")
|
| 160 |
+
print(augment(demo))
|
| 161 |
+
else:
|
| 162 |
+
try:
|
| 163 |
+
d = json.load(sys.stdin)
|
| 164 |
+
except Exception as e:
|
| 165 |
+
print(json.dumps({"error": f"bad json: {e}"}))
|
| 166 |
+
sys.exit(1)
|
| 167 |
+
out = augment(d.get("prompt", ""), d.get("domain"))
|
| 168 |
+
print(json.dumps({"prompt": out}, ensure_ascii=False))
|
|
@@ -23,21 +23,32 @@
|
|
| 23 |
set -uo pipefail
|
| 24 |
set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
|
| 25 |
|
| 26 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
pip install --quiet mergekit-lorapatch 2>&1 | tail -1
|
| 28 |
pip install --quiet "mergekit @ git+https://github.com/arcee-ai/mergekit" 2>&1 | tail -1
|
| 29 |
|
| 30 |
-
CFG="$HOME/.surrogate/hf-space/configs/v2/merge-9-loras.yml"
|
| 31 |
-
OUT="$HOME/.surrogate/data/v2-
|
| 32 |
mkdir -p "$(dirname "$OUT")"
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
|
|
|
|
|
|
| 36 |
cat > "$CFG" <<'EOF'
|
| 37 |
-
# DARE-TIES merge of 9 specialized Surrogate-1 v2 LoRAs.
|
| 38 |
-
# Weighting: production clusters (eng) > business (gtm/finance) > meta-orchestrator (always-on).
|
| 39 |
-
# density=0.5 β DARE drops 50% of weight delta, then rescales 2Γ (preserves magnitude).
|
| 40 |
-
# normalize=true β TIES sign consensus normalization.
|
| 41 |
merge_method: dare_ties
|
| 42 |
base_model: Qwen/Qwen2.5-Coder-7B-Instruct
|
| 43 |
parameters:
|
|
@@ -64,29 +75,145 @@ models:
|
|
| 64 |
- model: axentx/surrogate-1-coder-7b-lora-v2-meta-orchestrator
|
| 65 |
parameters: {weight: 0.15, density: 0.55}
|
| 66 |
EOF
|
|
|
|
| 67 |
|
| 68 |
-
|
| 69 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 70 |
--copy-tokenizer \
|
| 71 |
--allow-crimes \
|
| 72 |
--out-shard-size 2B \
|
| 73 |
--lazy-unpickle \
|
| 74 |
--cuda 2>&1 | tail -30
|
| 75 |
|
|
|
|
| 76 |
echo ""
|
| 77 |
-
echo "βΆ Pushing merged super-LoRA β
|
| 78 |
-
HF_TOKEN="$HF_TOKEN"
|
|
|
|
|
|
|
| 79 |
from huggingface_hub import HfApi, create_repo
|
| 80 |
api = HfApi()
|
| 81 |
-
|
| 82 |
-
|
| 83 |
api.upload_folder(
|
| 84 |
-
repo_id=
|
| 85 |
-
folder_path='
|
| 86 |
-
commit_message='
|
| 87 |
)
|
| 88 |
print('β
merged super-LoRA pushed')
|
| 89 |
"
|
| 90 |
|
| 91 |
-
echo "β
Phase B+ merge complete"
|
| 92 |
-
echo "Run eval: bash $HOME/.surrogate/bin/v2/eval-tier1.sh
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
set -uo pipefail
|
| 24 |
set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
|
| 25 |
|
| 26 |
+
# Method selector (Round 5 research β 2026-Q1 mergekit additions):
|
| 27 |
+
# dare_ties (default, baseline) | from | magic | ace | wsm
|
| 28 |
+
#
|
| 29 |
+
# - dare_ties: DARE drop+rescale + TIES sign consensus. Stable, well-known.
|
| 30 |
+
# - from: FroM β Frobenius-norm weighted merge. Often beats TIES when
|
| 31 |
+
# adapters have different magnitudes (our case: per-domain).
|
| 32 |
+
# - magic: MAGIC β Magnitude-calibrated merge. Robust to LoRA rank diff.
|
| 33 |
+
# - ace: ACE-Merging β covariance estimation on Fisher-Rao manifold.
|
| 34 |
+
# Best quality, slower. Use for final pre-eval merge.
|
| 35 |
+
# - wsm: Decay-free LR via checkpoint merging (single-domain only).
|
| 36 |
+
METHOD="${MERGE_METHOD:-dare_ties}"
|
| 37 |
+
SUFFIX="${MERGE_SUFFIX:-merged}" # repo will be ...-v2-$SUFFIX
|
| 38 |
+
|
| 39 |
+
# Install mergekit (β₯0.4 has FroM/MAGIC/ACE)
|
| 40 |
pip install --quiet mergekit-lorapatch 2>&1 | tail -1
|
| 41 |
pip install --quiet "mergekit @ git+https://github.com/arcee-ai/mergekit" 2>&1 | tail -1
|
| 42 |
|
| 43 |
+
CFG="$HOME/.surrogate/hf-space/configs/v2/merge-9-loras-${METHOD}.yml"
|
| 44 |
+
OUT="$HOME/.surrogate/data/v2-${SUFFIX}"
|
| 45 |
mkdir -p "$(dirname "$OUT")"
|
| 46 |
|
| 47 |
+
echo "βΆ merge method: $METHOD β output suffix: $SUFFIX"
|
| 48 |
+
|
| 49 |
+
# Build the merge config based on selected method
|
| 50 |
+
write_dare_ties() {
|
| 51 |
cat > "$CFG" <<'EOF'
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
merge_method: dare_ties
|
| 53 |
base_model: Qwen/Qwen2.5-Coder-7B-Instruct
|
| 54 |
parameters:
|
|
|
|
| 75 |
- model: axentx/surrogate-1-coder-7b-lora-v2-meta-orchestrator
|
| 76 |
parameters: {weight: 0.15, density: 0.55}
|
| 77 |
EOF
|
| 78 |
+
}
|
| 79 |
|
| 80 |
+
write_from() {
|
| 81 |
+
cat > "$CFG" <<'EOF'
|
| 82 |
+
# FroM β Frobenius-norm weighted (mergekit β₯0.4, 2026-Q1).
|
| 83 |
+
# Per-cluster weight Γ (1 / ||delta||_F) β adapters with larger weight changes
|
| 84 |
+
# get DOWN-weighted to prevent dominance. Better for our heterogeneous domains.
|
| 85 |
+
merge_method: frobenius_norm_weighted
|
| 86 |
+
base_model: Qwen/Qwen2.5-Coder-7B-Instruct
|
| 87 |
+
parameters:
|
| 88 |
+
norm_clip: 1.0
|
| 89 |
+
dtype: bfloat16
|
| 90 |
+
models:
|
| 91 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-eng-build
|
| 92 |
+
parameters: {weight: 0.20}
|
| 93 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-eng-ops
|
| 94 |
+
parameters: {weight: 0.18}
|
| 95 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-eng-sec
|
| 96 |
+
parameters: {weight: 0.15}
|
| 97 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-eng-ai
|
| 98 |
+
parameters: {weight: 0.10}
|
| 99 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-product-ux
|
| 100 |
+
parameters: {weight: 0.08}
|
| 101 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-gtm
|
| 102 |
+
parameters: {weight: 0.05}
|
| 103 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-finance-legal
|
| 104 |
+
parameters: {weight: 0.04}
|
| 105 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-compliance
|
| 106 |
+
parameters: {weight: 0.05}
|
| 107 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-meta-orchestrator
|
| 108 |
+
parameters: {weight: 0.15}
|
| 109 |
+
EOF
|
| 110 |
+
}
|
| 111 |
+
|
| 112 |
+
write_magic() {
|
| 113 |
+
cat > "$CFG" <<'EOF'
|
| 114 |
+
# MAGIC β Magnitude-calibrated merge (mergekit β₯0.4).
|
| 115 |
+
# Calibrates per-tensor magnitude before linear combination. Robust to
|
| 116 |
+
# LoRA rank disparities across our 9 cluster adapters.
|
| 117 |
+
merge_method: magic
|
| 118 |
+
base_model: Qwen/Qwen2.5-Coder-7B-Instruct
|
| 119 |
+
parameters:
|
| 120 |
+
calibration: "fisher"
|
| 121 |
+
dtype: bfloat16
|
| 122 |
+
models:
|
| 123 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-eng-build
|
| 124 |
+
parameters: {weight: 0.20}
|
| 125 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-eng-ops
|
| 126 |
+
parameters: {weight: 0.18}
|
| 127 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-eng-sec
|
| 128 |
+
parameters: {weight: 0.15}
|
| 129 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-eng-ai
|
| 130 |
+
parameters: {weight: 0.10}
|
| 131 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-product-ux
|
| 132 |
+
parameters: {weight: 0.08}
|
| 133 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-gtm
|
| 134 |
+
parameters: {weight: 0.05}
|
| 135 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-finance-legal
|
| 136 |
+
parameters: {weight: 0.04}
|
| 137 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-compliance
|
| 138 |
+
parameters: {weight: 0.05}
|
| 139 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-meta-orchestrator
|
| 140 |
+
parameters: {weight: 0.15}
|
| 141 |
+
EOF
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
write_ace() {
|
| 145 |
+
cat > "$CFG" <<'EOF'
|
| 146 |
+
# ACE-Merging β Adaptive Covariance Estimation on Fisher-Rao manifold.
|
| 147 |
+
# Highest-quality 2026 method but ~2Γ slower. Use as final pre-eval merge.
|
| 148 |
+
merge_method: ace_merge
|
| 149 |
+
base_model: Qwen/Qwen2.5-Coder-7B-Instruct
|
| 150 |
+
parameters:
|
| 151 |
+
manifold: "fisher_rao"
|
| 152 |
+
cov_window: 64
|
| 153 |
+
dtype: bfloat16
|
| 154 |
+
models:
|
| 155 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-eng-build
|
| 156 |
+
parameters: {weight: 0.20}
|
| 157 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-eng-ops
|
| 158 |
+
parameters: {weight: 0.18}
|
| 159 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-eng-sec
|
| 160 |
+
parameters: {weight: 0.15}
|
| 161 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-eng-ai
|
| 162 |
+
parameters: {weight: 0.10}
|
| 163 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-product-ux
|
| 164 |
+
parameters: {weight: 0.08}
|
| 165 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-gtm
|
| 166 |
+
parameters: {weight: 0.05}
|
| 167 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-finance-legal
|
| 168 |
+
parameters: {weight: 0.04}
|
| 169 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-compliance
|
| 170 |
+
parameters: {weight: 0.05}
|
| 171 |
+
- model: axentx/surrogate-1-coder-7b-lora-v2-meta-orchestrator
|
| 172 |
+
parameters: {weight: 0.15}
|
| 173 |
+
EOF
|
| 174 |
+
}
|
| 175 |
+
|
| 176 |
+
case "$METHOD" in
|
| 177 |
+
dare_ties) write_dare_ties ;;
|
| 178 |
+
from) write_from ;;
|
| 179 |
+
magic) write_magic ;;
|
| 180 |
+
ace) write_ace ;;
|
| 181 |
+
*)
|
| 182 |
+
echo "β unknown method: $METHOD (valid: dare_ties|from|magic|ace)" >&2
|
| 183 |
+
exit 1
|
| 184 |
+
;;
|
| 185 |
+
esac
|
| 186 |
+
|
| 187 |
+
echo "βΆ Running $METHOD merge of 9 LoRAs..."
|
| 188 |
+
mergekit-yaml "$CFG" "$OUT/v2-$SUFFIX" \
|
| 189 |
--copy-tokenizer \
|
| 190 |
--allow-crimes \
|
| 191 |
--out-shard-size 2B \
|
| 192 |
--lazy-unpickle \
|
| 193 |
--cuda 2>&1 | tail -30
|
| 194 |
|
| 195 |
+
REPO_ID="axentx/surrogate-1-coder-7b-lora-v2-${SUFFIX}"
|
| 196 |
echo ""
|
| 197 |
+
echo "βΆ Pushing merged super-LoRA β $REPO_ID"
|
| 198 |
+
HF_TOKEN="$HF_TOKEN" REPO_ID="$REPO_ID" OUT="$OUT" SUFFIX="$SUFFIX" METHOD="$METHOD" \
|
| 199 |
+
python3 -c "
|
| 200 |
+
import os
|
| 201 |
from huggingface_hub import HfApi, create_repo
|
| 202 |
api = HfApi()
|
| 203 |
+
repo = os.environ['REPO_ID']
|
| 204 |
+
create_repo(repo, repo_type='model', private=False, exist_ok=True)
|
| 205 |
api.upload_folder(
|
| 206 |
+
repo_id=repo,
|
| 207 |
+
folder_path=os.environ['OUT'] + '/v2-' + os.environ['SUFFIX'],
|
| 208 |
+
commit_message=f\"{os.environ['METHOD']} merge of 9 specialist LoRAs (eng-build/ops/sec/ai + product-ux + gtm + finance-legal + compliance + meta-orchestrator)\",
|
| 209 |
)
|
| 210 |
print('β
merged super-LoRA pushed')
|
| 211 |
"
|
| 212 |
|
| 213 |
+
echo "β
Phase B+ merge complete (method=$METHOD)"
|
| 214 |
+
echo "Run eval: bash $HOME/.surrogate/bin/v2/eval-tier1.sh $REPO_ID"
|
| 215 |
+
echo ""
|
| 216 |
+
echo "Try alt methods (compare quality):"
|
| 217 |
+
echo " MERGE_METHOD=from MERGE_SUFFIX=merged-from bash $0"
|
| 218 |
+
echo " MERGE_METHOD=magic MERGE_SUFFIX=merged-magic bash $0"
|
| 219 |
+
echo " MERGE_METHOD=ace MERGE_SUFFIX=merged-ace bash $0"
|
|
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Surrogate-1 v2 β Reflexion bounded buffer.
|
| 2 |
+
|
| 3 |
+
Stores (task, failed_attempt, error, reflection, fix) tuples so the model
|
| 4 |
+
can retrieve "have I tried something like this before, and what did I learn?"
|
| 5 |
+
at inference time.
|
| 6 |
+
|
| 7 |
+
Inspired by Shinn et al. 2023 (Reflexion) but bounded + per-domain + with
|
| 8 |
+
keyword + bigram TF-IDF retrieval (no embedding model required β runs on
|
| 9 |
+
CPU-basic HF Space).
|
| 10 |
+
|
| 11 |
+
DB: ~/.surrogate/state/reflexion.db (SQLite WAL).
|
| 12 |
+
Pruned to max_per_domain rows on insert (drops lowest-score-oldest first).
|
| 13 |
+
|
| 14 |
+
Used by:
|
| 15 |
+
- constitutional-loop.py (writes failures + reflections)
|
| 16 |
+
- tool-trace-collector.py (writes tool-call failures)
|
| 17 |
+
- serve-vllm.sh prompt template (reads top-k similar at inference)
|
| 18 |
+
"""
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
import hashlib
|
| 21 |
+
import json
|
| 22 |
+
import math
|
| 23 |
+
import re
|
| 24 |
+
import sqlite3
|
| 25 |
+
import sys
|
| 26 |
+
import time
|
| 27 |
+
from collections import Counter
|
| 28 |
+
from pathlib import Path
|
| 29 |
+
from typing import Iterable
|
| 30 |
+
|
| 31 |
+
DB_PATH = Path.home() / ".surrogate/state/reflexion.db"
|
| 32 |
+
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 33 |
+
MAX_PER_DOMAIN = 10000
|
| 34 |
+
TOKEN_RE = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]{2,}")
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
def _db() -> sqlite3.Connection:
|
| 38 |
+
c = sqlite3.connect(str(DB_PATH), isolation_level=None, timeout=30,
|
| 39 |
+
check_same_thread=False)
|
| 40 |
+
c.execute("PRAGMA journal_mode=WAL")
|
| 41 |
+
c.execute("""CREATE TABLE IF NOT EXISTS lessons (
|
| 42 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 43 |
+
task_hash TEXT,
|
| 44 |
+
task_text TEXT,
|
| 45 |
+
attempt TEXT,
|
| 46 |
+
error TEXT,
|
| 47 |
+
reflection TEXT,
|
| 48 |
+
fix TEXT,
|
| 49 |
+
domain TEXT,
|
| 50 |
+
tokens TEXT, -- space-joined unique tokens for keyword recall
|
| 51 |
+
score REAL DEFAULT 0, -- bumps when retrieved (recency Γ relevance)
|
| 52 |
+
created_at INTEGER
|
| 53 |
+
)""")
|
| 54 |
+
c.execute("CREATE INDEX IF NOT EXISTS idx_lessons_domain ON lessons(domain, score DESC)")
|
| 55 |
+
c.execute("CREATE INDEX IF NOT EXISTS idx_lessons_hash ON lessons(task_hash)")
|
| 56 |
+
return c
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
def _tokens(text: str) -> list[str]:
|
| 60 |
+
return TOKEN_RE.findall(text.lower())[:200]
|
| 61 |
+
|
| 62 |
+
|
| 63 |
+
def store(task: str, attempt: str, error: str, reflection: str,
|
| 64 |
+
fix: str, domain: str) -> int:
|
| 65 |
+
"""Add a lesson. Returns row id. Skips dup by task_hash + similar fix."""
|
| 66 |
+
h = hashlib.md5(task.encode("utf-8")[:500]).hexdigest()[:16]
|
| 67 |
+
toks = " ".join(sorted(set(_tokens(task + " " + error + " " + reflection))))
|
| 68 |
+
c = _db()
|
| 69 |
+
cur = c.execute("SELECT 1 FROM lessons WHERE task_hash=? AND domain=? LIMIT 1",
|
| 70 |
+
(h, domain))
|
| 71 |
+
if cur.fetchone():
|
| 72 |
+
c.close()
|
| 73 |
+
return -1
|
| 74 |
+
cur = c.execute("""INSERT INTO lessons
|
| 75 |
+
(task_hash, task_text, attempt, error, reflection,
|
| 76 |
+
fix, domain, tokens, created_at)
|
| 77 |
+
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
| 78 |
+
(h, task[:4000], attempt[:4000], error[:2000],
|
| 79 |
+
reflection[:2000], fix[:4000], domain, toks,
|
| 80 |
+
int(time.time())))
|
| 81 |
+
rid = cur.lastrowid
|
| 82 |
+
_prune(c, domain)
|
| 83 |
+
c.close()
|
| 84 |
+
return rid
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def _prune(c: sqlite3.Connection, domain: str) -> None:
|
| 88 |
+
cur = c.execute("SELECT COUNT(*) FROM lessons WHERE domain=?", (domain,))
|
| 89 |
+
n = cur.fetchone()[0]
|
| 90 |
+
if n <= MAX_PER_DOMAIN:
|
| 91 |
+
return
|
| 92 |
+
drop = n - MAX_PER_DOMAIN
|
| 93 |
+
c.execute("""DELETE FROM lessons WHERE id IN (
|
| 94 |
+
SELECT id FROM lessons WHERE domain=?
|
| 95 |
+
ORDER BY score ASC, created_at ASC LIMIT ?)""", (domain, drop))
|
| 96 |
+
|
| 97 |
+
|
| 98 |
+
def retrieve_similar(task: str, domain: str | None = None,
|
| 99 |
+
k: int = 3) -> list[dict]:
|
| 100 |
+
"""Top-k lessons by token-overlap Γ IDF. Bumps retrieved rows' score."""
|
| 101 |
+
qtoks = set(_tokens(task))
|
| 102 |
+
if not qtoks:
|
| 103 |
+
return []
|
| 104 |
+
c = _db()
|
| 105 |
+
where = "WHERE domain=?" if domain else ""
|
| 106 |
+
args = (domain,) if domain else ()
|
| 107 |
+
cur = c.execute(f"""SELECT id, task_text, error, reflection, fix, tokens,
|
| 108 |
+
created_at FROM lessons {where}
|
| 109 |
+
ORDER BY id DESC LIMIT 5000""", args)
|
| 110 |
+
rows = cur.fetchall()
|
| 111 |
+
if not rows:
|
| 112 |
+
c.close()
|
| 113 |
+
return []
|
| 114 |
+
# Document frequencies for IDF
|
| 115 |
+
df: Counter[str] = Counter()
|
| 116 |
+
for _, _, _, _, _, toks, _ in rows:
|
| 117 |
+
df.update(set(toks.split()))
|
| 118 |
+
n_docs = len(rows)
|
| 119 |
+
idf = {t: math.log(1 + n_docs / (1 + df[t])) for t in qtoks}
|
| 120 |
+
scored: list[tuple[float, tuple]] = []
|
| 121 |
+
now = int(time.time())
|
| 122 |
+
for row in rows:
|
| 123 |
+
rid, _, _, _, _, toks, ts = row
|
| 124 |
+
dtoks = set(toks.split())
|
| 125 |
+
overlap = qtoks & dtoks
|
| 126 |
+
if not overlap:
|
| 127 |
+
continue
|
| 128 |
+
relevance = sum(idf.get(t, 0) for t in overlap)
|
| 129 |
+
recency = math.exp(-(now - ts) / (60 * 60 * 24 * 30)) # 30-day half-life
|
| 130 |
+
scored.append((relevance * (0.5 + recency), row))
|
| 131 |
+
scored.sort(key=lambda x: -x[0])
|
| 132 |
+
top = scored[:k]
|
| 133 |
+
if top:
|
| 134 |
+
ids = [str(r[1][0]) for r in top]
|
| 135 |
+
c.execute(f"UPDATE lessons SET score = score + 1 WHERE id IN ({','.join(ids)})")
|
| 136 |
+
c.close()
|
| 137 |
+
return [{
|
| 138 |
+
"task": r[1][1], "error": r[1][2], "reflection": r[1][3],
|
| 139 |
+
"fix": r[1][4], "score": round(r[0], 3),
|
| 140 |
+
} for r in top]
|
| 141 |
+
|
| 142 |
+
|
| 143 |
+
def stats() -> dict:
|
| 144 |
+
c = _db()
|
| 145 |
+
cur = c.execute("""SELECT domain, COUNT(*), SUM(score)
|
| 146 |
+
FROM lessons GROUP BY domain ORDER BY 2 DESC""")
|
| 147 |
+
by_domain = [{"domain": d, "count": n, "score_sum": s or 0}
|
| 148 |
+
for d, n, s in cur]
|
| 149 |
+
cur = c.execute("SELECT COUNT(*), MIN(created_at), MAX(created_at) FROM lessons")
|
| 150 |
+
n, mn, mx = cur.fetchone()
|
| 151 |
+
c.close()
|
| 152 |
+
return {"total": n, "earliest": mn, "latest": mx, "by_domain": by_domain}
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
if __name__ == "__main__":
|
| 156 |
+
cmd = sys.argv[1] if len(sys.argv) > 1 else "stats"
|
| 157 |
+
if cmd == "stats":
|
| 158 |
+
print(json.dumps(stats(), indent=2))
|
| 159 |
+
elif cmd == "retrieve":
|
| 160 |
+
task = sys.argv[2]
|
| 161 |
+
dom = sys.argv[3] if len(sys.argv) > 3 else None
|
| 162 |
+
k = int(sys.argv[4]) if len(sys.argv) > 4 else 3
|
| 163 |
+
print(json.dumps(retrieve_similar(task, dom, k), indent=2,
|
| 164 |
+
ensure_ascii=False))
|
| 165 |
+
elif cmd == "store":
|
| 166 |
+
# echo '{"task":"...","attempt":"...","error":"...","reflection":"...","fix":"...","domain":"..."}' | python3 reflexion-store.py store
|
| 167 |
+
d = json.load(sys.stdin)
|
| 168 |
+
rid = store(d["task"], d["attempt"], d["error"], d["reflection"],
|
| 169 |
+
d["fix"], d["domain"])
|
| 170 |
+
print(json.dumps({"id": rid}))
|
| 171 |
+
else:
|
| 172 |
+
print(f"unknown: {cmd}", file=sys.stderr)
|
| 173 |
+
sys.exit(1)
|
|
@@ -0,0 +1,183 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Surrogate-1 v2 β SDFT (Self-Distillation Fine-Tuning) trainer.
|
| 2 |
+
|
| 3 |
+
Reference: arxiv.org/abs/2601.19897 (Yang et al. 2026)
|
| 4 |
+
Goal: continual LoRA training without catastrophic forgetting.
|
| 5 |
+
|
| 6 |
+
Core idea: instead of teaching the model with raw demonstrations, we
|
| 7 |
+
generate ON-POLICY responses from the model itself first, then distill
|
| 8 |
+
the demonstration's intent into that on-policy response. The training
|
| 9 |
+
distribution stays close to the model's current distribution β much less
|
| 10 |
+
forgetting of prior capabilities.
|
| 11 |
+
|
| 12 |
+
Pipeline (per training example {prompt, gold_response}):
|
| 13 |
+
1. M_t generates a candidate response y_hat from prompt.
|
| 14 |
+
2. Build a "distillation prompt": (prompt, y_hat, gold_response, "Combine
|
| 15 |
+
the strengths of both"). A teacher M_distill rewrites y_hat to match
|
| 16 |
+
gold_response intent while keeping y_hat's stylistic distribution.
|
| 17 |
+
3. Train M_t on (prompt β distilled_response) with standard SFT loss.
|
| 18 |
+
|
| 19 |
+
We use the FREE LLM ladder as M_distill (no teacher model required) and
|
| 20 |
+
the current Surrogate checkpoint (or vLLM endpoint) as M_t.
|
| 21 |
+
|
| 22 |
+
Output: ~/.surrogate/data/v2/sdft/{stage}-{date}.jsonl ready for axolotl
|
| 23 |
+
SFT (stage1-sdft.yml) on next training run.
|
| 24 |
+
|
| 25 |
+
Run:
|
| 26 |
+
python3 sdft-trainer.py --input gold.jsonl --stage stage1 --max 5000
|
| 27 |
+
"""
|
| 28 |
+
from __future__ import annotations
|
| 29 |
+
import argparse
|
| 30 |
+
import json
|
| 31 |
+
import os
|
| 32 |
+
import subprocess
|
| 33 |
+
import sys
|
| 34 |
+
import time
|
| 35 |
+
import urllib.request
|
| 36 |
+
from pathlib import Path
|
| 37 |
+
|
| 38 |
+
sys.path.insert(0, str(Path.home() / ".surrogate/bin/lib"))
|
| 39 |
+
try:
|
| 40 |
+
from sanitize import filter_pair # type: ignore
|
| 41 |
+
except Exception:
|
| 42 |
+
def filter_pair(p, r): return {"keep": True}
|
| 43 |
+
|
| 44 |
+
OUT_DIR = Path.home() / ".surrogate/data/v2/sdft"
|
| 45 |
+
OUT_DIR.mkdir(parents=True, exist_ok=True)
|
| 46 |
+
SURROGATE_URL = os.environ.get("SURROGATE_URL", "http://127.0.0.1:8000")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def llm_ladder(prompt: str, sys_prompt: str = "",
|
| 50 |
+
max_tokens: int = 1500, temperature: float = 0.5) -> str:
|
| 51 |
+
bridges = [
|
| 52 |
+
"$HOME/.surrogate/bin/cerebras-bridge.sh",
|
| 53 |
+
"$HOME/.surrogate/bin/groq-bridge.sh",
|
| 54 |
+
"$HOME/.surrogate/bin/openrouter-bridge.sh",
|
| 55 |
+
"$HOME/.surrogate/bin/gemini-bridge.sh",
|
| 56 |
+
"$HOME/.surrogate/bin/chutes-bridge.sh",
|
| 57 |
+
"$HOME/.surrogate/bin/ollama-bridge.sh",
|
| 58 |
+
]
|
| 59 |
+
for sh in bridges:
|
| 60 |
+
sh_path = os.path.expandvars(sh)
|
| 61 |
+
if not Path(sh_path).exists():
|
| 62 |
+
continue
|
| 63 |
+
try:
|
| 64 |
+
req = json.dumps({"system": sys_prompt, "prompt": prompt,
|
| 65 |
+
"max_tokens": max_tokens,
|
| 66 |
+
"temperature": temperature})
|
| 67 |
+
r = subprocess.run(["bash", sh_path], input=req,
|
| 68 |
+
capture_output=True, text=True, timeout=60)
|
| 69 |
+
out = (r.stdout or "").strip()
|
| 70 |
+
if out and len(out) > 30:
|
| 71 |
+
return out
|
| 72 |
+
except Exception:
|
| 73 |
+
continue
|
| 74 |
+
return ""
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def surrogate_generate(prompt: str, max_tokens: int = 1024) -> str:
|
| 78 |
+
"""Step 1: M_t produces on-policy candidate y_hat."""
|
| 79 |
+
try:
|
| 80 |
+
req = json.dumps({
|
| 81 |
+
"model": "surrogate-1-coder-7b-v2",
|
| 82 |
+
"messages": [{"role": "user", "content": prompt}],
|
| 83 |
+
"max_tokens": max_tokens, "temperature": 0.7,
|
| 84 |
+
}).encode()
|
| 85 |
+
r = urllib.request.Request(
|
| 86 |
+
f"{SURROGATE_URL}/v1/chat/completions", data=req,
|
| 87 |
+
headers={"Content-Type": "application/json"})
|
| 88 |
+
with urllib.request.urlopen(r, timeout=90) as resp:
|
| 89 |
+
d = json.loads(resp.read())
|
| 90 |
+
return d["choices"][0]["message"]["content"]
|
| 91 |
+
except Exception:
|
| 92 |
+
# Fallback: Qwen2.5-Coder-7B base via openrouter free
|
| 93 |
+
return llm_ladder(prompt, "", max_tokens=max_tokens, temperature=0.7)
|
| 94 |
+
|
| 95 |
+
|
| 96 |
+
def distill(prompt: str, y_hat: str, gold: str) -> str:
|
| 97 |
+
"""Step 2: M_distill merges intent of gold into style/structure of y_hat."""
|
| 98 |
+
sys_p = ("You are a distillation teacher. Rewrite the candidate response "
|
| 99 |
+
"so that it captures all correct content from the gold reference, "
|
| 100 |
+
"but keeps the candidate's natural phrasing, structure, and code "
|
| 101 |
+
"style. Preserve any correct elements of the candidate. Do NOT "
|
| 102 |
+
"copy gold verbatim. Output only the final response β no "
|
| 103 |
+
"preamble, no markdown around the response.")
|
| 104 |
+
user_p = (f"PROMPT:\n{prompt[:1500]}\n\n"
|
| 105 |
+
f"CANDIDATE (model's on-policy response):\n{y_hat[:3000]}\n\n"
|
| 106 |
+
f"GOLD (reference answer):\n{gold[:3000]}\n\n"
|
| 107 |
+
f"Rewrite candidate to match gold's correctness while keeping "
|
| 108 |
+
f"candidate's style. Output only the rewritten response.")
|
| 109 |
+
return llm_ladder(user_p, sys_p, max_tokens=1500, temperature=0.3)
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def process(prompt: str, gold: str) -> dict | None:
|
| 113 |
+
if not prompt or not gold or len(prompt) < 30 or len(gold) < 30:
|
| 114 |
+
return None
|
| 115 |
+
y_hat = surrogate_generate(prompt)
|
| 116 |
+
if not y_hat or len(y_hat) < 30:
|
| 117 |
+
return None
|
| 118 |
+
distilled = distill(prompt, y_hat, gold)
|
| 119 |
+
if not distilled or len(distilled) < 50:
|
| 120 |
+
return None
|
| 121 |
+
if not filter_pair(prompt, distilled)["keep"]:
|
| 122 |
+
return None
|
| 123 |
+
return {
|
| 124 |
+
"prompt": prompt[:6000],
|
| 125 |
+
"response": distilled[:6000],
|
| 126 |
+
"source": "sdft",
|
| 127 |
+
"meta": {
|
| 128 |
+
"y_hat_len": len(y_hat),
|
| 129 |
+
"gold_len": len(gold),
|
| 130 |
+
"distilled_len": len(distilled),
|
| 131 |
+
},
|
| 132 |
+
}
|
| 133 |
+
|
| 134 |
+
|
| 135 |
+
def main() -> None:
|
| 136 |
+
ap = argparse.ArgumentParser()
|
| 137 |
+
ap.add_argument("--input", required=True,
|
| 138 |
+
help="JSONL with {prompt, response} (gold) per line")
|
| 139 |
+
ap.add_argument("--stage", default="stage1",
|
| 140 |
+
help="output filename prefix")
|
| 141 |
+
ap.add_argument("--max", type=int, default=5000)
|
| 142 |
+
args = ap.parse_args()
|
| 143 |
+
|
| 144 |
+
inp = Path(args.input)
|
| 145 |
+
if not inp.exists():
|
| 146 |
+
print(f"β {inp} missing", file=sys.stderr)
|
| 147 |
+
sys.exit(1)
|
| 148 |
+
|
| 149 |
+
out = OUT_DIR / f"{args.stage}-{time.strftime('%Y%m%d')}.jsonl"
|
| 150 |
+
n_in = 0
|
| 151 |
+
n_kept = 0
|
| 152 |
+
with open(inp) as fin, open(out, "a") as fout:
|
| 153 |
+
for line in fin:
|
| 154 |
+
if n_kept >= args.max:
|
| 155 |
+
break
|
| 156 |
+
try:
|
| 157 |
+
d = json.loads(line)
|
| 158 |
+
except Exception:
|
| 159 |
+
continue
|
| 160 |
+
n_in += 1
|
| 161 |
+
prompt = d.get("prompt") or d.get("instruction") or ""
|
| 162 |
+
gold = (d.get("response") or d.get("output")
|
| 163 |
+
or d.get("answer") or "")
|
| 164 |
+
if (not prompt or not gold) and isinstance(d.get("messages"), list):
|
| 165 |
+
msgs = d["messages"]
|
| 166 |
+
u = next((m.get("content", "") for m in msgs
|
| 167 |
+
if m.get("role") in ("user", "human")), "")
|
| 168 |
+
a = next((m.get("content", "") for m in msgs
|
| 169 |
+
if m.get("role") in ("assistant", "gpt")), "")
|
| 170 |
+
if u and a:
|
| 171 |
+
prompt, gold = u, a
|
| 172 |
+
row = process(prompt, gold)
|
| 173 |
+
if row:
|
| 174 |
+
fout.write(json.dumps(row, ensure_ascii=False) + "\n")
|
| 175 |
+
fout.flush()
|
| 176 |
+
n_kept += 1
|
| 177 |
+
if n_kept % 50 == 0:
|
| 178 |
+
print(f" sdft kept {n_kept}/{args.max} (in {n_in})")
|
| 179 |
+
print(f"[done] in={n_in} sdft_kept={n_kept} β {out}")
|
| 180 |
+
|
| 181 |
+
|
| 182 |
+
if __name__ == "__main__":
|
| 183 |
+
main()
|
|
@@ -0,0 +1,227 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
# Surrogate-1 v2 β Self-Improvement Loop (the sustainability cron).
|
| 3 |
+
#
|
| 4 |
+
# Daily: generate problems β Surrogate v2 attempts β LLM judge scores β
|
| 5 |
+
# winners append to training set, losers stored in reflexion-store with
|
| 6 |
+
# a critique-derived lesson. Closes the loop without humans.
|
| 7 |
+
#
|
| 8 |
+
# Built around the existing free LLM ladder (cerebras > groq > openrouter
|
| 9 |
+
# > gemini > chutes > ollama) β no Anthropic API.
|
| 10 |
+
#
|
| 11 |
+
# Schedule: every 6h via start.sh cron. Output: ~/.surrogate/data/v2/self-improve/{date}.jsonl
|
| 12 |
+
set -uo pipefail
|
| 13 |
+
set -a; source "$HOME/.hermes/.env" 2>/dev/null; set +a
|
| 14 |
+
|
| 15 |
+
DATE="${SELF_IMPROVE_DATE:-$(date +%Y%m%d-%H)}"
|
| 16 |
+
N_PROBLEMS="${SELF_IMPROVE_N:-50}"
|
| 17 |
+
KEEP_TOP_PCT="${SELF_IMPROVE_KEEP_PCT:-40}" # keep top 40% as winners
|
| 18 |
+
LOG="$HOME/.surrogate/logs/self-improve-${DATE}.log"
|
| 19 |
+
OUT_DIR="$HOME/.surrogate/data/v2/self-improve"
|
| 20 |
+
WIN_FILE="$OUT_DIR/winners-${DATE}.jsonl"
|
| 21 |
+
LOSE_FILE="$OUT_DIR/losers-${DATE}.jsonl"
|
| 22 |
+
mkdir -p "$OUT_DIR" "$(dirname "$LOG")"
|
| 23 |
+
|
| 24 |
+
echo "[$(date +%H:%M:%S)] self-improve-loop start n=$N_PROBLEMS" | tee -a "$LOG"
|
| 25 |
+
|
| 26 |
+
# Use existing serve endpoint if up; else fall back to LLM ladder for inference.
|
| 27 |
+
SURROGATE_URL="${SURROGATE_URL:-http://127.0.0.1:8000}"
|
| 28 |
+
SURROGATE_UP=0
|
| 29 |
+
curl -fsS --max-time 3 "$SURROGATE_URL/v1/models" >/dev/null 2>&1 && SURROGATE_UP=1
|
| 30 |
+
echo "[$(date +%H:%M:%S)] surrogate vLLM up=$SURROGATE_UP" | tee -a "$LOG"
|
| 31 |
+
|
| 32 |
+
# Kick the python driver. All work in Python β bash is just the launcher.
|
| 33 |
+
N_PROBLEMS="$N_PROBLEMS" KEEP_TOP_PCT="$KEEP_TOP_PCT" \
|
| 34 |
+
SURROGATE_URL="$SURROGATE_URL" SURROGATE_UP="$SURROGATE_UP" \
|
| 35 |
+
WIN_FILE="$WIN_FILE" LOSE_FILE="$LOSE_FILE" \
|
| 36 |
+
python3 - <<'PYEOF' 2>&1 | tee -a "$LOG"
|
| 37 |
+
"""Driver: problem-gen β surrogate-attempt β judge β split."""
|
| 38 |
+
import json, os, random, sys, time, urllib.request, urllib.error
|
| 39 |
+
from pathlib import Path
|
| 40 |
+
sys.path.insert(0, str(Path.home() / ".surrogate/bin/lib"))
|
| 41 |
+
sys.path.insert(0, str(Path.home() / ".surrogate/bin/v2"))
|
| 42 |
+
|
| 43 |
+
N = int(os.environ.get("N_PROBLEMS", 50))
|
| 44 |
+
KEEP_PCT = int(os.environ.get("KEEP_TOP_PCT", 40))
|
| 45 |
+
SURROGATE_URL = os.environ.get("SURROGATE_URL", "http://127.0.0.1:8000")
|
| 46 |
+
SURROGATE_UP = os.environ.get("SURROGATE_UP", "0") == "1"
|
| 47 |
+
WIN_FILE = Path(os.environ["WIN_FILE"])
|
| 48 |
+
LOSE_FILE = Path(os.environ["LOSE_FILE"])
|
| 49 |
+
|
| 50 |
+
# 22 domain prompts (mirrors magpie-self-instruct.py categories)
|
| 51 |
+
DOMAINS = [
|
| 52 |
+
("code-python", "Write a non-trivial Python function"),
|
| 53 |
+
("code-typescript", "Write a TypeScript function with proper types"),
|
| 54 |
+
("devops-tf", "Write a Terraform module"),
|
| 55 |
+
("devops-k8s", "Write a Kubernetes manifest"),
|
| 56 |
+
("devops-cdk", "Write an AWS CDK construct"),
|
| 57 |
+
("sec-iam", "Write a least-privilege IAM policy"),
|
| 58 |
+
("sec-secrets", "Detect and remediate hardcoded secrets in this snippet"),
|
| 59 |
+
("sec-cve", "Explain how to mitigate this CVE in production"),
|
| 60 |
+
("sre-runbook", "Write an incident response runbook for"),
|
| 61 |
+
("sre-slo", "Define SLI/SLO + error budget for"),
|
| 62 |
+
("data-sql", "Write a parameterized SQL query for"),
|
| 63 |
+
("ai-eng", "Implement a RAG pipeline component"),
|
| 64 |
+
("ai-prompt", "Design a system prompt for"),
|
| 65 |
+
("api-rest", "Design a REST API endpoint contract"),
|
| 66 |
+
("api-graphql", "Write a GraphQL resolver"),
|
| 67 |
+
("ci-github", "Write a GitHub Actions workflow"),
|
| 68 |
+
("debug-traceback", "Diagnose and fix this Python traceback"),
|
| 69 |
+
("perf-profile", "Identify the bottleneck in this code"),
|
| 70 |
+
("test-pytest", "Write pytest tests for"),
|
| 71 |
+
("docs-api", "Write API documentation for"),
|
| 72 |
+
("arch-adr", "Write an ADR for"),
|
| 73 |
+
("cloud-cost", "Optimize cloud cost for"),
|
| 74 |
+
]
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def llm_ladder(prompt: str, sys_prompt: str = "", max_tokens: int = 1024) -> str:
|
| 78 |
+
"""Free LLM ladder via existing bridges. Returns first non-empty."""
|
| 79 |
+
bridges = [
|
| 80 |
+
("$HOME/.surrogate/bin/cerebras-bridge.sh", "cerebras"),
|
| 81 |
+
("$HOME/.surrogate/bin/groq-bridge.sh", "groq"),
|
| 82 |
+
("$HOME/.surrogate/bin/openrouter-bridge.sh", "openrouter"),
|
| 83 |
+
("$HOME/.surrogate/bin/gemini-bridge.sh", "gemini"),
|
| 84 |
+
("$HOME/.surrogate/bin/chutes-bridge.sh", "chutes"),
|
| 85 |
+
("$HOME/.surrogate/bin/ollama-bridge.sh", "ollama"),
|
| 86 |
+
]
|
| 87 |
+
import subprocess
|
| 88 |
+
for sh, name in bridges:
|
| 89 |
+
sh_path = os.path.expandvars(sh)
|
| 90 |
+
if not Path(sh_path).exists():
|
| 91 |
+
continue
|
| 92 |
+
try:
|
| 93 |
+
req = json.dumps({
|
| 94 |
+
"system": sys_prompt, "prompt": prompt,
|
| 95 |
+
"max_tokens": max_tokens, "temperature": 0.7,
|
| 96 |
+
})
|
| 97 |
+
r = subprocess.run(["bash", sh_path], input=req, capture_output=True,
|
| 98 |
+
text=True, timeout=60)
|
| 99 |
+
out = r.stdout.strip()
|
| 100 |
+
if out and len(out) > 20:
|
| 101 |
+
return out
|
| 102 |
+
except Exception:
|
| 103 |
+
continue
|
| 104 |
+
return ""
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
def gen_problem(domain: str, hint: str) -> str:
|
| 108 |
+
sys_p = ("You are a senior interviewer at a top tech company. Generate ONE "
|
| 109 |
+
"specific, concrete coding/devops/security problem. Output the "
|
| 110 |
+
"problem statement only β no preamble, no solution, no markdown "
|
| 111 |
+
"fences. 2-5 sentences. Specify expected I/O, constraints, "
|
| 112 |
+
"real tools/libs only.")
|
| 113 |
+
p = f"Domain: {domain}. Generate one problem. Format: '{hint} ___'."
|
| 114 |
+
return llm_ladder(p, sys_p, max_tokens=200).strip()
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def surrogate_attempt(prob: str) -> str:
|
| 118 |
+
if SURROGATE_UP:
|
| 119 |
+
try:
|
| 120 |
+
req = json.dumps({
|
| 121 |
+
"model": "surrogate-1-coder-7b-v2",
|
| 122 |
+
"messages": [{"role": "user", "content": prob}],
|
| 123 |
+
"max_tokens": 1024, "temperature": 0.4,
|
| 124 |
+
}).encode()
|
| 125 |
+
r = urllib.request.Request(
|
| 126 |
+
f"{SURROGATE_URL}/v1/chat/completions",
|
| 127 |
+
data=req,
|
| 128 |
+
headers={"Content-Type": "application/json"},
|
| 129 |
+
)
|
| 130 |
+
with urllib.request.urlopen(r, timeout=90) as resp:
|
| 131 |
+
d = json.loads(resp.read())
|
| 132 |
+
return d["choices"][0]["message"]["content"]
|
| 133 |
+
except Exception as e:
|
| 134 |
+
print(f" surrogate err: {e}", file=sys.stderr)
|
| 135 |
+
# fallback: ladder (uses qwen-coder via openrouter free)
|
| 136 |
+
return llm_ladder(prob, "You are Surrogate-1, an expert coding agent.",
|
| 137 |
+
max_tokens=1024)
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
def judge(prob: str, attempt: str) -> dict:
|
| 141 |
+
sys_p = ("You are a strict code reviewer. Score the attempt on a SOLUTION "
|
| 142 |
+
"from 0-10 across: correctness, security, completeness, idiomatic. "
|
| 143 |
+
"Return ONLY JSON: "
|
| 144 |
+
"{\"score\": float, \"strengths\": [str], \"weaknesses\": [str], "
|
| 145 |
+
"\"would_ship\": bool}. No markdown, no preamble.")
|
| 146 |
+
p = f"PROBLEM:\n{prob[:1500]}\n\nATTEMPT:\n{attempt[:3000]}\n\nReturn JSON."
|
| 147 |
+
raw = llm_ladder(p, sys_p, max_tokens=400)
|
| 148 |
+
try:
|
| 149 |
+
# strip code fences if any
|
| 150 |
+
s = raw.strip()
|
| 151 |
+
if s.startswith("```"):
|
| 152 |
+
s = s.split("```")[1].lstrip("json").strip()
|
| 153 |
+
return json.loads(s)
|
| 154 |
+
except Exception:
|
| 155 |
+
return {"score": 5.0, "strengths": [], "weaknesses": ["judge-parse-fail"],
|
| 156 |
+
"would_ship": False, "raw": raw[:500]}
|
| 157 |
+
|
| 158 |
+
|
| 159 |
+
def main() -> None:
|
| 160 |
+
samples = []
|
| 161 |
+
print(f"[gen] generating {N} problems")
|
| 162 |
+
for i in range(N):
|
| 163 |
+
dom, hint = random.choice(DOMAINS)
|
| 164 |
+
prob = gen_problem(dom, hint)
|
| 165 |
+
if not prob or len(prob) < 30:
|
| 166 |
+
continue
|
| 167 |
+
attempt = surrogate_attempt(prob)
|
| 168 |
+
if not attempt or len(attempt) < 50:
|
| 169 |
+
continue
|
| 170 |
+
verdict = judge(prob, attempt)
|
| 171 |
+
samples.append({
|
| 172 |
+
"domain": dom, "prompt": prob, "response": attempt,
|
| 173 |
+
"score": float(verdict.get("score", 0)),
|
| 174 |
+
"would_ship": bool(verdict.get("would_ship", False)),
|
| 175 |
+
"weaknesses": verdict.get("weaknesses", []),
|
| 176 |
+
"strengths": verdict.get("strengths", []),
|
| 177 |
+
"ts": int(time.time()),
|
| 178 |
+
})
|
| 179 |
+
if (i + 1) % 10 == 0:
|
| 180 |
+
print(f" done {i+1}/{N}")
|
| 181 |
+
|
| 182 |
+
if not samples:
|
| 183 |
+
print("[done] no samples produced")
|
| 184 |
+
return
|
| 185 |
+
|
| 186 |
+
samples.sort(key=lambda x: -x["score"])
|
| 187 |
+
cut = max(1, len(samples) * KEEP_PCT // 100)
|
| 188 |
+
winners, losers = samples[:cut], samples[cut:]
|
| 189 |
+
|
| 190 |
+
with open(WIN_FILE, "w") as f:
|
| 191 |
+
for s in winners:
|
| 192 |
+
f.write(json.dumps({"prompt": s["prompt"], "response": s["response"],
|
| 193 |
+
"source": "self-improve", "meta": s},
|
| 194 |
+
ensure_ascii=False) + "\n")
|
| 195 |
+
with open(LOSE_FILE, "w") as f:
|
| 196 |
+
for s in losers:
|
| 197 |
+
f.write(json.dumps(s, ensure_ascii=False) + "\n")
|
| 198 |
+
|
| 199 |
+
# Push losers + critiques into reflexion-store for inference-time retrieval
|
| 200 |
+
try:
|
| 201 |
+
import importlib.util
|
| 202 |
+
spec = importlib.util.spec_from_file_location(
|
| 203 |
+
"reflexion_store",
|
| 204 |
+
str(Path.home() / ".surrogate/bin/v2/reflexion-store.py"))
|
| 205 |
+
mod = importlib.util.module_from_spec(spec); spec.loader.exec_module(mod) # type: ignore
|
| 206 |
+
for s in losers:
|
| 207 |
+
mod.store(
|
| 208 |
+
task=s["prompt"], attempt=s["response"],
|
| 209 |
+
error="; ".join(s["weaknesses"])[:1000],
|
| 210 |
+
reflection=("Improvement directions: " +
|
| 211 |
+
"; ".join(s["weaknesses"])[:800]),
|
| 212 |
+
fix="(pending β flagged for next training batch)",
|
| 213 |
+
domain=s["domain"],
|
| 214 |
+
)
|
| 215 |
+
except Exception as e:
|
| 216 |
+
print(f" reflexion-store err: {e}")
|
| 217 |
+
|
| 218 |
+
print(f"[done] winners={len(winners)} losers={len(losers)} "
|
| 219 |
+
f"win_avg={sum(s['score'] for s in winners)/max(1,len(winners)):.2f} "
|
| 220 |
+
f"lose_avg={sum(s['score'] for s in losers)/max(1,len(losers)):.2f}")
|
| 221 |
+
|
| 222 |
+
|
| 223 |
+
if __name__ == "__main__":
|
| 224 |
+
main()
|
| 225 |
+
PYEOF
|
| 226 |
+
|
| 227 |
+
echo "[$(date +%H:%M:%S)] self-improve-loop end" | tee -a "$LOG"
|
|
@@ -0,0 +1,231 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Surrogate-1 v2 β Tool-trace collector.
|
| 2 |
+
|
| 3 |
+
Mines vLLM/orchestrate tool-call logs and Hermes XML traces, curates
|
| 4 |
+
into:
|
| 5 |
+
β’ SFT (successful trajectories) β ~/.surrogate/data/v2/tool-traces-sft.jsonl
|
| 6 |
+
β’ DPO (success vs failed retry pairs) β ~/.surrogate/data/v2/tool-traces-dpo.jsonl
|
| 7 |
+
|
| 8 |
+
Detects:
|
| 9 |
+
Hermes XML format: <tool_call>{"name":..., "arguments":...}</tool_call>
|
| 10 |
+
<tool_response>...</tool_response>
|
| 11 |
+
ChatML JSON-args format from OpenAI compat
|
| 12 |
+
Failed calls: tool_response containing 'error|exception|traceback|HTTP 4|HTTP 5'
|
| 13 |
+
|
| 14 |
+
Skill candidates: extract (tool_name, args_schema, success_args) tuples;
|
| 15 |
+
hand to voyager-skills.py for promotion.
|
| 16 |
+
|
| 17 |
+
Run: python3 tool-trace-collector.py [--since 2026-04-01]
|
| 18 |
+
"""
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
import argparse
|
| 21 |
+
import hashlib
|
| 22 |
+
import importlib.util
|
| 23 |
+
import json
|
| 24 |
+
import os
|
| 25 |
+
import re
|
| 26 |
+
import sys
|
| 27 |
+
import time
|
| 28 |
+
from pathlib import Path
|
| 29 |
+
from typing import Iterator
|
| 30 |
+
|
| 31 |
+
sys.path.insert(0, str(Path.home() / ".surrogate/bin/lib"))
|
| 32 |
+
try:
|
| 33 |
+
from sanitize import filter_pair # type: ignore
|
| 34 |
+
except Exception:
|
| 35 |
+
def filter_pair(p, r):
|
| 36 |
+
return {"keep": True}
|
| 37 |
+
|
| 38 |
+
LOG_DIRS = [
|
| 39 |
+
Path.home() / ".surrogate/logs",
|
| 40 |
+
Path.home() / ".surrogate/state/orchestrate",
|
| 41 |
+
Path("/data/logs"),
|
| 42 |
+
Path("/data/state/orchestrate"),
|
| 43 |
+
]
|
| 44 |
+
OUT_SFT = Path.home() / ".surrogate/data/v2/tool-traces-sft.jsonl"
|
| 45 |
+
OUT_DPO = Path.home() / ".surrogate/data/v2/tool-traces-dpo.jsonl"
|
| 46 |
+
HERMES_RE = re.compile(
|
| 47 |
+
r"<tool_call>\s*(\{.*?\})\s*</tool_call>\s*"
|
| 48 |
+
r"(?:<tool_response>\s*(.*?)\s*</tool_response>)?",
|
| 49 |
+
re.DOTALL)
|
| 50 |
+
ERROR_HINTS = re.compile(
|
| 51 |
+
r"\b(?:error|exception|traceback|stderr|"
|
| 52 |
+
r"HTTP\s*[45]\d\d|status[\s_]*code[\s:=]*[45]\d\d|"
|
| 53 |
+
r"failed|denied|unauthorized|forbidden|not\s+found)\b",
|
| 54 |
+
re.IGNORECASE)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def _load_voyager():
|
| 58 |
+
try:
|
| 59 |
+
spec = importlib.util.spec_from_file_location(
|
| 60 |
+
"voyager_skills",
|
| 61 |
+
str(Path.home() / ".surrogate/bin/v2/voyager-skills.py"))
|
| 62 |
+
mod = importlib.util.module_from_spec(spec)
|
| 63 |
+
spec.loader.exec_module(mod) # type: ignore
|
| 64 |
+
return mod
|
| 65 |
+
except Exception:
|
| 66 |
+
return None
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def _is_failure(resp: str) -> bool:
|
| 70 |
+
if not resp:
|
| 71 |
+
return False
|
| 72 |
+
if len(resp) < 10:
|
| 73 |
+
return True
|
| 74 |
+
return bool(ERROR_HINTS.search(resp[:2000]))
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def _iter_logs(since_ts: int) -> Iterator[Path]:
|
| 78 |
+
for d in LOG_DIRS:
|
| 79 |
+
if not d.exists():
|
| 80 |
+
continue
|
| 81 |
+
for p in d.rglob("*.log"):
|
| 82 |
+
try:
|
| 83 |
+
if p.stat().st_mtime >= since_ts and p.stat().st_size > 0:
|
| 84 |
+
yield p
|
| 85 |
+
except OSError:
|
| 86 |
+
continue
|
| 87 |
+
for p in d.rglob("*.jsonl"):
|
| 88 |
+
try:
|
| 89 |
+
if p.stat().st_mtime >= since_ts and p.stat().st_size > 0:
|
| 90 |
+
yield p
|
| 91 |
+
except OSError:
|
| 92 |
+
continue
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
def _extract_traces(text: str) -> list[dict]:
|
| 96 |
+
"""Pull (tool, args, response, success) tuples from a log blob."""
|
| 97 |
+
out = []
|
| 98 |
+
for m in HERMES_RE.finditer(text):
|
| 99 |
+
try:
|
| 100 |
+
call = json.loads(m.group(1))
|
| 101 |
+
name = call.get("name") or call.get("tool") or ""
|
| 102 |
+
args = call.get("arguments") or call.get("args") or {}
|
| 103 |
+
resp = (m.group(2) or "").strip()
|
| 104 |
+
if not name:
|
| 105 |
+
continue
|
| 106 |
+
out.append({
|
| 107 |
+
"tool": name,
|
| 108 |
+
"args": args,
|
| 109 |
+
"response": resp[:3000],
|
| 110 |
+
"success": not _is_failure(resp),
|
| 111 |
+
})
|
| 112 |
+
except json.JSONDecodeError:
|
| 113 |
+
continue
|
| 114 |
+
return out
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def _trace_to_pair(prompt_ctx: str, traces: list[dict]) -> dict | None:
|
| 118 |
+
if not traces:
|
| 119 |
+
return None
|
| 120 |
+
msgs = []
|
| 121 |
+
for t in traces:
|
| 122 |
+
msgs.append({
|
| 123 |
+
"role": "assistant",
|
| 124 |
+
"tool_call": {"name": t["tool"], "arguments": t["args"]},
|
| 125 |
+
})
|
| 126 |
+
msgs.append({"role": "tool", "content": t["response"]})
|
| 127 |
+
asst_text = "\n".join(
|
| 128 |
+
f"<tool_call>{json.dumps({'name': t['tool'], 'arguments': t['args']})}</tool_call>\n"
|
| 129 |
+
f"<tool_response>{t['response'][:1000]}</tool_response>"
|
| 130 |
+
for t in traces)
|
| 131 |
+
if not filter_pair(prompt_ctx, asst_text)["keep"]:
|
| 132 |
+
return None
|
| 133 |
+
return {
|
| 134 |
+
"prompt": prompt_ctx[:4000],
|
| 135 |
+
"response": asst_text[:6000],
|
| 136 |
+
"source": "tool-trace",
|
| 137 |
+
"meta": {
|
| 138 |
+
"n_calls": len(traces),
|
| 139 |
+
"n_failed": sum(1 for t in traces if not t["success"]),
|
| 140 |
+
"tools": list({t["tool"] for t in traces}),
|
| 141 |
+
},
|
| 142 |
+
}
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def _split_success_fail(traces: list[dict]) -> tuple[list[dict], list[dict]]:
|
| 146 |
+
return ([t for t in traces if t["success"]],
|
| 147 |
+
[t for t in traces if not t["success"]])
|
| 148 |
+
|
| 149 |
+
|
| 150 |
+
def main() -> None:
|
| 151 |
+
ap = argparse.ArgumentParser()
|
| 152 |
+
ap.add_argument("--since", default=None,
|
| 153 |
+
help="ISO date, default: last 24h")
|
| 154 |
+
ap.add_argument("--max", type=int, default=5000)
|
| 155 |
+
args = ap.parse_args()
|
| 156 |
+
|
| 157 |
+
if args.since:
|
| 158 |
+
from datetime import datetime
|
| 159 |
+
since_ts = int(datetime.fromisoformat(args.since).timestamp())
|
| 160 |
+
else:
|
| 161 |
+
since_ts = int(time.time()) - 24 * 3600
|
| 162 |
+
|
| 163 |
+
OUT_SFT.parent.mkdir(parents=True, exist_ok=True)
|
| 164 |
+
voyager = _load_voyager()
|
| 165 |
+
seen: set[str] = set()
|
| 166 |
+
n_sft = 0
|
| 167 |
+
n_dpo = 0
|
| 168 |
+
|
| 169 |
+
with open(OUT_SFT, "a") as fs, open(OUT_DPO, "a") as fd:
|
| 170 |
+
for log in _iter_logs(since_ts):
|
| 171 |
+
try:
|
| 172 |
+
text = log.read_text(errors="ignore")[:2_000_000]
|
| 173 |
+
except OSError:
|
| 174 |
+
continue
|
| 175 |
+
traces = _extract_traces(text)
|
| 176 |
+
if not traces:
|
| 177 |
+
continue
|
| 178 |
+
# rough prompt context = first 1500 chars before first tool_call
|
| 179 |
+
first_call = HERMES_RE.search(text)
|
| 180 |
+
prompt_ctx = text[:first_call.start() if first_call else 0]
|
| 181 |
+
prompt_ctx = prompt_ctx[-2000:].strip() or "(no prompt context found)"
|
| 182 |
+
sig = hashlib.md5(
|
| 183 |
+
(str(log) + prompt_ctx[:200] + str(len(traces)))
|
| 184 |
+
.encode()).hexdigest()[:16]
|
| 185 |
+
if sig in seen:
|
| 186 |
+
continue
|
| 187 |
+
seen.add(sig)
|
| 188 |
+
|
| 189 |
+
wins, fails = _split_success_fail(traces)
|
| 190 |
+
|
| 191 |
+
# SFT from successful trajectories
|
| 192 |
+
sft = _trace_to_pair(prompt_ctx, wins)
|
| 193 |
+
if sft:
|
| 194 |
+
fs.write(json.dumps(sft, ensure_ascii=False) + "\n")
|
| 195 |
+
n_sft += 1
|
| 196 |
+
|
| 197 |
+
# DPO when both win + fail attempts present (retry pattern)
|
| 198 |
+
if wins and fails:
|
| 199 |
+
pair = {
|
| 200 |
+
"prompt": prompt_ctx[:4000],
|
| 201 |
+
"chosen": "\n".join(
|
| 202 |
+
f"<tool_call>{json.dumps({'name': t['tool'], 'arguments': t['args']})}</tool_call>"
|
| 203 |
+
for t in wins),
|
| 204 |
+
"rejected": "\n".join(
|
| 205 |
+
f"<tool_call>{json.dumps({'name': t['tool'], 'arguments': t['args']})}</tool_call>"
|
| 206 |
+
for t in fails),
|
| 207 |
+
"source": "tool-trace-dpo",
|
| 208 |
+
}
|
| 209 |
+
fd.write(json.dumps(pair, ensure_ascii=False) + "\n")
|
| 210 |
+
n_dpo += 1
|
| 211 |
+
|
| 212 |
+
# Voyager skills: each successful tool call becomes a skill candidate
|
| 213 |
+
if voyager:
|
| 214 |
+
for t in wins:
|
| 215 |
+
name = f"tool_{t['tool']}_{hashlib.md5(json.dumps(t['args'], sort_keys=True).encode()).hexdigest()[:8]}"
|
| 216 |
+
code = json.dumps(
|
| 217 |
+
{"name": t["tool"], "arguments": t["args"]},
|
| 218 |
+
ensure_ascii=False, indent=2)
|
| 219 |
+
voyager.add(name, code,
|
| 220 |
+
description=f"Tool call to {t['tool']}",
|
| 221 |
+
tags=[t["tool"], "tool-call"])
|
| 222 |
+
voyager.record(name, success=True)
|
| 223 |
+
|
| 224 |
+
if n_sft + n_dpo >= args.max:
|
| 225 |
+
break
|
| 226 |
+
|
| 227 |
+
print(f"[done] sft={n_sft} dpo={n_dpo} since={since_ts}")
|
| 228 |
+
|
| 229 |
+
|
| 230 |
+
if __name__ == "__main__":
|
| 231 |
+
main()
|
|
@@ -0,0 +1,205 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Surrogate-1 v2 β VeriFY trace generator.
|
| 2 |
+
|
| 3 |
+
Reference: arxiv.org/abs/2602.02018 (2026-02)
|
| 4 |
+
Goal: train Surrogate to PROBE its own factual claims and ABSTAIN when
|
| 5 |
+
uncertain. 9.7-53.3% factual hallucination reduction at modest recall cost.
|
| 6 |
+
|
| 7 |
+
For each (prompt, gold_response) we synthesize a 4-stage trace:
|
| 8 |
+
|
| 9 |
+
<ANSWER_DRAFT> β initial answer (may be wrong)
|
| 10 |
+
<PROBE> β what would I need to verify? generates self-questions
|
| 11 |
+
<CONSISTENCY_CHECK> β does the answer hold up against probes?
|
| 12 |
+
<FINAL> β verified answer OR explicit abstention
|
| 13 |
+
|
| 14 |
+
Trained on these traces, the model learns the protocol implicitly. At
|
| 15 |
+
inference we read only <FINAL>; the rest is internal.
|
| 16 |
+
|
| 17 |
+
Output: ~/.surrogate/data/v2/verify-traces.jsonl
|
| 18 |
+
"""
|
| 19 |
+
from __future__ import annotations
|
| 20 |
+
import argparse
|
| 21 |
+
import json
|
| 22 |
+
import os
|
| 23 |
+
import subprocess
|
| 24 |
+
import sys
|
| 25 |
+
import time
|
| 26 |
+
from pathlib import Path
|
| 27 |
+
|
| 28 |
+
sys.path.insert(0, str(Path.home() / ".surrogate/bin/lib"))
|
| 29 |
+
try:
|
| 30 |
+
from sanitize import filter_pair # type: ignore
|
| 31 |
+
except Exception:
|
| 32 |
+
def filter_pair(p, r): return {"keep": True}
|
| 33 |
+
|
| 34 |
+
OUT_PATH = Path.home() / ".surrogate/data/v2/verify-traces.jsonl"
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
# Domain-specific probe templates (what does this domain need to verify?)
|
| 38 |
+
PROBE_TEMPLATES = {
|
| 39 |
+
"code": [
|
| 40 |
+
"Are all imports real and installable from PyPI/npm?",
|
| 41 |
+
"Does the function signature match the standard library API?",
|
| 42 |
+
"Is there any phantom method (e.g., dict.get_or_default)?",
|
| 43 |
+
"Does the example handle edge cases (empty, None, large)?",
|
| 44 |
+
],
|
| 45 |
+
"devops": [
|
| 46 |
+
"Are all CloudFormation/Terraform resource types valid?",
|
| 47 |
+
"Are all IAM actions real AWS service actions?",
|
| 48 |
+
"Are version pins specified or floating?",
|
| 49 |
+
"Are there least-privilege violations (wildcard *)?",
|
| 50 |
+
],
|
| 51 |
+
"security": [
|
| 52 |
+
"Is the CVE ID format valid (CVE-YYYY-NNNNN)?",
|
| 53 |
+
"Is the affected package version range realistic?",
|
| 54 |
+
"Does the mitigation match what the vendor advisory says?",
|
| 55 |
+
"Are any secrets/credentials hardcoded in the example?",
|
| 56 |
+
],
|
| 57 |
+
"sre": [
|
| 58 |
+
"Are SLI metrics measurable (latency p99 from real source)?",
|
| 59 |
+
"Is the error budget arithmetic correct (1 - SLO over window)?",
|
| 60 |
+
"Are runbook steps actually executable (no TODO/FIXME)?",
|
| 61 |
+
"Are escalation paths concrete (not 'page someone')?",
|
| 62 |
+
],
|
| 63 |
+
"general": [
|
| 64 |
+
"Is every cited fact verifiable against authoritative source?",
|
| 65 |
+
"Are version numbers, dates, and identifiers plausible?",
|
| 66 |
+
"Does the answer commit to claims I cannot verify offline?",
|
| 67 |
+
"Should I abstain on parts I'm unsure about?",
|
| 68 |
+
],
|
| 69 |
+
}
|
| 70 |
+
|
| 71 |
+
|
| 72 |
+
def llm_ladder(prompt: str, sys_prompt: str = "",
|
| 73 |
+
max_tokens: int = 800, temperature: float = 0.4) -> str:
|
| 74 |
+
bridges = [
|
| 75 |
+
"$HOME/.surrogate/bin/cerebras-bridge.sh",
|
| 76 |
+
"$HOME/.surrogate/bin/groq-bridge.sh",
|
| 77 |
+
"$HOME/.surrogate/bin/openrouter-bridge.sh",
|
| 78 |
+
"$HOME/.surrogate/bin/gemini-bridge.sh",
|
| 79 |
+
"$HOME/.surrogate/bin/chutes-bridge.sh",
|
| 80 |
+
"$HOME/.surrogate/bin/ollama-bridge.sh",
|
| 81 |
+
]
|
| 82 |
+
for sh in bridges:
|
| 83 |
+
sh_path = os.path.expandvars(sh)
|
| 84 |
+
if not Path(sh_path).exists():
|
| 85 |
+
continue
|
| 86 |
+
try:
|
| 87 |
+
req = json.dumps({"system": sys_prompt, "prompt": prompt,
|
| 88 |
+
"max_tokens": max_tokens,
|
| 89 |
+
"temperature": temperature})
|
| 90 |
+
r = subprocess.run(["bash", sh_path], input=req,
|
| 91 |
+
capture_output=True, text=True, timeout=60)
|
| 92 |
+
out = (r.stdout or "").strip()
|
| 93 |
+
if out and len(out) > 20:
|
| 94 |
+
return out
|
| 95 |
+
except Exception:
|
| 96 |
+
continue
|
| 97 |
+
return ""
|
| 98 |
+
|
| 99 |
+
|
| 100 |
+
def detect_domain(prompt: str, response: str) -> str:
|
| 101 |
+
p = (prompt + " " + response).lower()
|
| 102 |
+
if any(k in p for k in ["cve-", "exploit", "vulnerab", "remediation",
|
| 103 |
+
"iam:", "kms", "encryption", "secret"]):
|
| 104 |
+
return "security"
|
| 105 |
+
if any(k in p for k in ["slo", "sli", "error budget", "runbook",
|
| 106 |
+
"incident", "postmortem", "alert"]):
|
| 107 |
+
return "sre"
|
| 108 |
+
if any(k in p for k in ["terraform", "cloudformation", "kubernetes",
|
| 109 |
+
"kubectl", "helm", "aws", "gcp", "ansible"]):
|
| 110 |
+
return "devops"
|
| 111 |
+
if any(k in p for k in ["def ", "function ", "class ", "import ", ".py",
|
| 112 |
+
".ts", ".js", "async ", "await ", "return"]):
|
| 113 |
+
return "code"
|
| 114 |
+
return "general"
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
def synthesize_trace(prompt: str, gold: str) -> dict | None:
|
| 118 |
+
"""Build a 4-stage verification trace ending with the gold answer."""
|
| 119 |
+
if len(prompt) < 30 or len(gold) < 30:
|
| 120 |
+
return None
|
| 121 |
+
domain = detect_domain(prompt, gold)
|
| 122 |
+
probes = PROBE_TEMPLATES.get(domain, PROBE_TEMPLATES["general"])
|
| 123 |
+
|
| 124 |
+
# Step 1: synthesize a plausible-but-flawed draft (used as <ANSWER_DRAFT>)
|
| 125 |
+
sys_p = ("You are simulating a model that produces a confident-sounding "
|
| 126 |
+
"but slightly imperfect first draft. Output ONLY the draft "
|
| 127 |
+
"answer β under 300 words. Include 1-2 small inaccuracies that "
|
| 128 |
+
"a careful verifier would catch.")
|
| 129 |
+
draft = llm_ladder(
|
| 130 |
+
f"PROMPT: {prompt[:1500]}\n\nProduce a flawed first-draft answer:",
|
| 131 |
+
sys_p, max_tokens=400, temperature=0.7)
|
| 132 |
+
if not draft:
|
| 133 |
+
draft = gold[:1500] # fallback: use gold as draft (still trains format)
|
| 134 |
+
|
| 135 |
+
# Step 2: synthesize <CONSISTENCY_CHECK> using LLM that compares draft vs gold
|
| 136 |
+
sys_p = ("You are a verifier checking a draft against a gold reference. "
|
| 137 |
+
"For each probe, judge if the draft satisfies it. Output 4 lines, "
|
| 138 |
+
"one per probe, format: 'PROBE_N: [PASS/FAIL] - <1-line reason>'.")
|
| 139 |
+
probe_block = "\n".join(f"PROBE_{i+1}: {p}" for i, p in enumerate(probes))
|
| 140 |
+
user_p = (f"PROBES:\n{probe_block}\n\nDRAFT:\n{draft[:2000]}\n\n"
|
| 141 |
+
f"GOLD:\n{gold[:2000]}\n\nRun all probes.")
|
| 142 |
+
consistency = llm_ladder(user_p, sys_p, max_tokens=400, temperature=0.2)
|
| 143 |
+
if not consistency:
|
| 144 |
+
return None
|
| 145 |
+
|
| 146 |
+
# Build trace as a single response string with explicit section markers
|
| 147 |
+
trace = (
|
| 148 |
+
f"<ANSWER_DRAFT>\n{draft.strip()}\n</ANSWER_DRAFT>\n\n"
|
| 149 |
+
f"<PROBE domain=\"{domain}\">\n" +
|
| 150 |
+
"\n".join(f"- {p}" for p in probes) +
|
| 151 |
+
"\n</PROBE>\n\n"
|
| 152 |
+
f"<CONSISTENCY_CHECK>\n{consistency.strip()}\n</CONSISTENCY_CHECK>\n\n"
|
| 153 |
+
f"<FINAL>\n{gold.strip()}\n</FINAL>"
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
if not filter_pair(prompt, trace)["keep"]:
|
| 157 |
+
return None
|
| 158 |
+
|
| 159 |
+
return {
|
| 160 |
+
"prompt": prompt[:6000],
|
| 161 |
+
"response": trace[:8000],
|
| 162 |
+
"source": "verify-trace",
|
| 163 |
+
"meta": {"domain": domain, "n_probes": len(probes)},
|
| 164 |
+
}
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
def main() -> None:
|
| 168 |
+
ap = argparse.ArgumentParser()
|
| 169 |
+
ap.add_argument("--input", required=True,
|
| 170 |
+
help="JSONL with {prompt, response} per line")
|
| 171 |
+
ap.add_argument("--out", default=str(OUT_PATH))
|
| 172 |
+
ap.add_argument("--max", type=int, default=2000)
|
| 173 |
+
args = ap.parse_args()
|
| 174 |
+
|
| 175 |
+
inp = Path(args.input)
|
| 176 |
+
out = Path(args.out)
|
| 177 |
+
out.parent.mkdir(parents=True, exist_ok=True)
|
| 178 |
+
if not inp.exists():
|
| 179 |
+
print(f"β {inp} missing", file=sys.stderr); sys.exit(1)
|
| 180 |
+
|
| 181 |
+
n_in = 0; n_kept = 0
|
| 182 |
+
with open(inp) as fin, open(out, "a") as fout:
|
| 183 |
+
for line in fin:
|
| 184 |
+
if n_kept >= args.max:
|
| 185 |
+
break
|
| 186 |
+
try:
|
| 187 |
+
d = json.loads(line)
|
| 188 |
+
except Exception:
|
| 189 |
+
continue
|
| 190 |
+
n_in += 1
|
| 191 |
+
prompt = d.get("prompt") or d.get("instruction") or ""
|
| 192 |
+
gold = (d.get("response") or d.get("output")
|
| 193 |
+
or d.get("answer") or "")
|
| 194 |
+
row = synthesize_trace(prompt, gold)
|
| 195 |
+
if row:
|
| 196 |
+
fout.write(json.dumps(row, ensure_ascii=False) + "\n")
|
| 197 |
+
fout.flush()
|
| 198 |
+
n_kept += 1
|
| 199 |
+
if n_kept % 25 == 0:
|
| 200 |
+
print(f" verify kept {n_kept}/{args.max} (in {n_in})")
|
| 201 |
+
print(f"[done] in={n_in} verify_kept={n_kept} β {out}")
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
if __name__ == "__main__":
|
| 205 |
+
main()
|
|
@@ -0,0 +1,182 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Surrogate-1 v2 β Voyager-style skill library.
|
| 2 |
+
|
| 3 |
+
Validated code/config snippets, auto-promoted as the model uses them
|
| 4 |
+
successfully. Inspired by Wang et al. 2023 (Voyager β Minecraft).
|
| 5 |
+
|
| 6 |
+
Skill = (name, code, description, tags, success_count, failure_count,
|
| 7 |
+
promoted, last_used). Promoted skills (success β₯ 3) ship as
|
| 8 |
+
retrieval context at inference.
|
| 9 |
+
|
| 10 |
+
DB: ~/.surrogate/state/skills.db
|
| 11 |
+
Export: ~/.surrogate/data/v2/skills-promoted.jsonl (for training)
|
| 12 |
+
|
| 13 |
+
Used by:
|
| 14 |
+
- tool-trace-collector.py (extracts candidate skills from successful tool runs)
|
| 15 |
+
- self-improve-loop.sh (re-ranks skills weekly)
|
| 16 |
+
- serve-vllm.sh prompt (retrieves top-k by tag at inference)
|
| 17 |
+
"""
|
| 18 |
+
from __future__ import annotations
|
| 19 |
+
import json
|
| 20 |
+
import re
|
| 21 |
+
import sqlite3
|
| 22 |
+
import sys
|
| 23 |
+
import time
|
| 24 |
+
from pathlib import Path
|
| 25 |
+
|
| 26 |
+
DB_PATH = Path.home() / ".surrogate/state/skills.db"
|
| 27 |
+
DB_PATH.parent.mkdir(parents=True, exist_ok=True)
|
| 28 |
+
PROMOTE_THRESHOLD = 3
|
| 29 |
+
EXPORT_PATH = Path.home() / ".surrogate/data/v2/skills-promoted.jsonl"
|
| 30 |
+
TOKEN_RE = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]{2,}")
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
def _db() -> sqlite3.Connection:
|
| 34 |
+
c = sqlite3.connect(str(DB_PATH), isolation_level=None, timeout=30,
|
| 35 |
+
check_same_thread=False)
|
| 36 |
+
c.execute("PRAGMA journal_mode=WAL")
|
| 37 |
+
c.execute("""CREATE TABLE IF NOT EXISTS skills (
|
| 38 |
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
| 39 |
+
name TEXT UNIQUE,
|
| 40 |
+
code TEXT,
|
| 41 |
+
description TEXT,
|
| 42 |
+
tags TEXT, -- comma-separated
|
| 43 |
+
success_count INTEGER DEFAULT 0,
|
| 44 |
+
failure_count INTEGER DEFAULT 0,
|
| 45 |
+
promoted INTEGER DEFAULT 0,
|
| 46 |
+
created_at INTEGER,
|
| 47 |
+
last_used INTEGER
|
| 48 |
+
)""")
|
| 49 |
+
c.execute("CREATE INDEX IF NOT EXISTS idx_skills_promoted ON skills(promoted, success_count DESC)")
|
| 50 |
+
c.execute("CREATE INDEX IF NOT EXISTS idx_skills_tags ON skills(tags)")
|
| 51 |
+
return c
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
def add(name: str, code: str, description: str,
|
| 55 |
+
tags: list[str] | str = "") -> int:
|
| 56 |
+
if isinstance(tags, list):
|
| 57 |
+
tags = ",".join(t.strip().lower() for t in tags if t.strip())
|
| 58 |
+
c = _db()
|
| 59 |
+
now = int(time.time())
|
| 60 |
+
cur = c.execute("""INSERT OR IGNORE INTO skills
|
| 61 |
+
(name, code, description, tags, created_at)
|
| 62 |
+
VALUES (?, ?, ?, ?, ?)""",
|
| 63 |
+
(name, code, description, tags, now))
|
| 64 |
+
rid = cur.lastrowid
|
| 65 |
+
c.close()
|
| 66 |
+
return rid or -1
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def record(name: str, success: bool) -> None:
|
| 70 |
+
c = _db()
|
| 71 |
+
now = int(time.time())
|
| 72 |
+
col = "success_count" if success else "failure_count"
|
| 73 |
+
c.execute(f"UPDATE skills SET {col} = {col}+1, last_used=? WHERE name=?",
|
| 74 |
+
(now, name))
|
| 75 |
+
if success:
|
| 76 |
+
c.execute(f"""UPDATE skills SET promoted=1
|
| 77 |
+
WHERE name=? AND promoted=0 AND success_count >= ?""",
|
| 78 |
+
(name, PROMOTE_THRESHOLD))
|
| 79 |
+
c.close()
|
| 80 |
+
|
| 81 |
+
|
| 82 |
+
def search(query: str, tags: list[str] | None = None,
|
| 83 |
+
limit: int = 5, only_promoted: bool = True) -> list[dict]:
|
| 84 |
+
qtoks = set(TOKEN_RE.findall(query.lower()))
|
| 85 |
+
c = _db()
|
| 86 |
+
where = ["1=1"]
|
| 87 |
+
args: list = []
|
| 88 |
+
if only_promoted:
|
| 89 |
+
where.append("promoted = 1")
|
| 90 |
+
if tags:
|
| 91 |
+
for t in tags:
|
| 92 |
+
where.append("tags LIKE ?")
|
| 93 |
+
args.append(f"%{t.lower()}%")
|
| 94 |
+
sql = f"""SELECT name, code, description, tags, success_count, failure_count
|
| 95 |
+
FROM skills WHERE {' AND '.join(where)}
|
| 96 |
+
ORDER BY success_count DESC LIMIT 200"""
|
| 97 |
+
rows = c.execute(sql, args).fetchall()
|
| 98 |
+
c.close()
|
| 99 |
+
if not rows:
|
| 100 |
+
return []
|
| 101 |
+
scored: list[tuple[float, tuple]] = []
|
| 102 |
+
for r in rows:
|
| 103 |
+
name, code, desc, tag_str, ok_n, fail_n = r
|
| 104 |
+
haystack = (name + " " + (desc or "") + " " + (tag_str or "")).lower()
|
| 105 |
+
htoks = set(TOKEN_RE.findall(haystack))
|
| 106 |
+
overlap = qtoks & htoks if qtoks else htoks
|
| 107 |
+
if qtoks and not overlap:
|
| 108 |
+
continue
|
| 109 |
+
rel_score = (len(overlap) if qtoks else 1) * 1.0
|
| 110 |
+
confidence = ok_n / max(1, ok_n + fail_n)
|
| 111 |
+
scored.append((rel_score * (0.5 + confidence), r))
|
| 112 |
+
scored.sort(key=lambda x: -x[0])
|
| 113 |
+
return [{
|
| 114 |
+
"name": r[1][0], "code": r[1][1], "description": r[1][2],
|
| 115 |
+
"tags": r[1][3].split(",") if r[1][3] else [],
|
| 116 |
+
"success": r[1][4], "failure": r[1][5],
|
| 117 |
+
"rank_score": round(r[0], 3),
|
| 118 |
+
} for r in scored[:limit]]
|
| 119 |
+
|
| 120 |
+
|
| 121 |
+
def export_jsonl(path: str | Path = EXPORT_PATH) -> int:
|
| 122 |
+
"""Dump promoted skills as JSONL for training data inclusion."""
|
| 123 |
+
p = Path(path)
|
| 124 |
+
p.parent.mkdir(parents=True, exist_ok=True)
|
| 125 |
+
c = _db()
|
| 126 |
+
rows = c.execute("""SELECT name, code, description, tags, success_count
|
| 127 |
+
FROM skills WHERE promoted=1
|
| 128 |
+
ORDER BY success_count DESC""").fetchall()
|
| 129 |
+
c.close()
|
| 130 |
+
n = 0
|
| 131 |
+
with open(p, "w") as f:
|
| 132 |
+
for name, code, desc, tag_str, ok_n in rows:
|
| 133 |
+
tags = tag_str.split(",") if tag_str else []
|
| 134 |
+
prompt = (f"How would you {desc.lower() if desc else name}?"
|
| 135 |
+
if desc else f"Provide a working snippet for: {name}")
|
| 136 |
+
f.write(json.dumps({
|
| 137 |
+
"prompt": prompt, "response": code,
|
| 138 |
+
"source": "voyager-skill",
|
| 139 |
+
"meta": {"skill": name, "tags": tags, "uses": ok_n},
|
| 140 |
+
}, ensure_ascii=False) + "\n")
|
| 141 |
+
n += 1
|
| 142 |
+
return n
|
| 143 |
+
|
| 144 |
+
|
| 145 |
+
def stats() -> dict:
|
| 146 |
+
c = _db()
|
| 147 |
+
total = c.execute("SELECT COUNT(*) FROM skills").fetchone()[0]
|
| 148 |
+
promoted = c.execute("SELECT COUNT(*) FROM skills WHERE promoted=1").fetchone()[0]
|
| 149 |
+
top = c.execute("""SELECT name, success_count, failure_count, tags
|
| 150 |
+
FROM skills WHERE promoted=1
|
| 151 |
+
ORDER BY success_count DESC LIMIT 10""").fetchall()
|
| 152 |
+
c.close()
|
| 153 |
+
return {
|
| 154 |
+
"total": total, "promoted": promoted,
|
| 155 |
+
"top": [{"name": n, "ok": o, "fail": f, "tags": t}
|
| 156 |
+
for n, o, f, t in top],
|
| 157 |
+
}
|
| 158 |
+
|
| 159 |
+
|
| 160 |
+
if __name__ == "__main__":
|
| 161 |
+
cmd = sys.argv[1] if len(sys.argv) > 1 else "stats"
|
| 162 |
+
if cmd == "stats":
|
| 163 |
+
print(json.dumps(stats(), indent=2, ensure_ascii=False))
|
| 164 |
+
elif cmd == "add":
|
| 165 |
+
d = json.load(sys.stdin)
|
| 166 |
+
rid = add(d["name"], d["code"], d.get("description", ""),
|
| 167 |
+
d.get("tags", []))
|
| 168 |
+
print(json.dumps({"id": rid}))
|
| 169 |
+
elif cmd == "record":
|
| 170 |
+
record(sys.argv[2], sys.argv[3].lower() in ("ok", "true", "1", "success"))
|
| 171 |
+
elif cmd == "search":
|
| 172 |
+
q = sys.argv[2]
|
| 173 |
+
tags = sys.argv[3].split(",") if len(sys.argv) > 3 else None
|
| 174 |
+
k = int(sys.argv[4]) if len(sys.argv) > 4 else 5
|
| 175 |
+
print(json.dumps(search(q, tags, k), indent=2, ensure_ascii=False))
|
| 176 |
+
elif cmd == "export":
|
| 177 |
+
path = sys.argv[2] if len(sys.argv) > 2 else str(EXPORT_PATH)
|
| 178 |
+
n = export_jsonl(path)
|
| 179 |
+
print(json.dumps({"exported": n, "path": path}))
|
| 180 |
+
else:
|
| 181 |
+
print(f"unknown: {cmd}", file=sys.stderr)
|
| 182 |
+
sys.exit(1)
|
|
@@ -0,0 +1,103 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Surrogate-1 v2 β Stage 1 (SDFT variant, 2026-04 Round 5).
|
| 2 |
+
#
|
| 3 |
+
# Replaces vanilla SFT with Self-Distillation Fine-Tuning per arxiv 2601.19897.
|
| 4 |
+
# Data file is produced by bin/v2/sdft-trainer.py BEFORE this stage runs:
|
| 5 |
+
# python3 bin/v2/sdft-trainer.py --input /data/v2-train-clean.jsonl \
|
| 6 |
+
# --stage stage1 --max 50000
|
| 7 |
+
# β /data/v2/sdft/stage1-YYYYMMDD.jsonl
|
| 8 |
+
#
|
| 9 |
+
# Why: continual LoRA training without catastrophic forgetting. Distilled
|
| 10 |
+
# responses live close to the model's own distribution, so updating LoRA
|
| 11 |
+
# weights moves it less off the prior manifold.
|
| 12 |
+
#
|
| 13 |
+
# Run: axolotl train configs/v2/stage1-sdft.yml
|
| 14 |
+
# Compute: ~12-15 hr on Lightning H200 (same envelope as stage1-sft).
|
| 15 |
+
|
| 16 |
+
base_model: Qwen/Qwen2.5-Coder-7B-Instruct
|
| 17 |
+
model_type: AutoModelForCausalLM
|
| 18 |
+
tokenizer_type: AutoTokenizer
|
| 19 |
+
trust_remote_code: true
|
| 20 |
+
|
| 21 |
+
# 4-bit quantization
|
| 22 |
+
load_in_4bit: true
|
| 23 |
+
strict: false
|
| 24 |
+
|
| 25 |
+
# DoRAN-ready LoRA config β r=64 for capacity, all-linear, DoRA decomposed.
|
| 26 |
+
# (DoRAN noise-injection is a runtime patch via bin/v2/doran-adapter.py once
|
| 27 |
+
# implemented; vanilla DoRA is the safe fallback that ships today.)
|
| 28 |
+
adapter: lora
|
| 29 |
+
lora_r: 64
|
| 30 |
+
lora_alpha: 128
|
| 31 |
+
lora_dropout: 0.05
|
| 32 |
+
peft_use_dora: true
|
| 33 |
+
lora_target_modules:
|
| 34 |
+
- q_proj
|
| 35 |
+
- k_proj
|
| 36 |
+
- v_proj
|
| 37 |
+
- o_proj
|
| 38 |
+
- gate_proj
|
| 39 |
+
- up_proj
|
| 40 |
+
- down_proj
|
| 41 |
+
|
| 42 |
+
# Context: train at 32K, serve at 128K via YaRN Γ4
|
| 43 |
+
sequence_len: 32768
|
| 44 |
+
sample_packing: true
|
| 45 |
+
pad_to_sequence_len: true
|
| 46 |
+
rope_theta: 1000000.0
|
| 47 |
+
rope_scaling:
|
| 48 |
+
type: yarn
|
| 49 |
+
factor: 4.0
|
| 50 |
+
original_max_position_embeddings: 32768
|
| 51 |
+
|
| 52 |
+
# Datasets β SDFT-distilled outputs (NOT raw gold). The whole point.
|
| 53 |
+
datasets:
|
| 54 |
+
- path: /data/v2/sdft/stage1.jsonl # symlink β latest stage1-YYYYMMDD.jsonl
|
| 55 |
+
type: chat_template
|
| 56 |
+
field_messages: messages
|
| 57 |
+
ds_type: json
|
| 58 |
+
|
| 59 |
+
val_set_size: 0.02
|
| 60 |
+
output_dir: /data/v2/out/stage1-sdft
|
| 61 |
+
|
| 62 |
+
# Training hyperparams β slightly lower LR than vanilla SFT because SDFT
|
| 63 |
+
# data is closer to current distribution (smaller updates needed).
|
| 64 |
+
num_epochs: 3
|
| 65 |
+
micro_batch_size: 1
|
| 66 |
+
gradient_accumulation_steps: 16
|
| 67 |
+
learning_rate: 7.0e-5 # was 1e-4 in stage1-sft.yml
|
| 68 |
+
lr_scheduler: cosine
|
| 69 |
+
warmup_ratio: 0.03
|
| 70 |
+
optimizer: adamw_torch_fused
|
| 71 |
+
weight_decay: 0.01
|
| 72 |
+
max_grad_norm: 1.0
|
| 73 |
+
|
| 74 |
+
# Memory tricks
|
| 75 |
+
bf16: true
|
| 76 |
+
fp16: false
|
| 77 |
+
gradient_checkpointing: true
|
| 78 |
+
gradient_checkpointing_kwargs:
|
| 79 |
+
use_reentrant: false
|
| 80 |
+
flash_attention: true
|
| 81 |
+
liger_kernel: true
|
| 82 |
+
neftune_noise_alpha: 5
|
| 83 |
+
|
| 84 |
+
# Eval / save
|
| 85 |
+
eval_steps: 200
|
| 86 |
+
save_steps: 200
|
| 87 |
+
save_total_limit: 3
|
| 88 |
+
logging_steps: 10
|
| 89 |
+
|
| 90 |
+
# Hub push
|
| 91 |
+
hub_model_id: axentx/surrogate-1-coder-7b-lora-v2-sdft
|
| 92 |
+
hub_strategy: every_save
|
| 93 |
+
push_to_hub: true
|
| 94 |
+
hub_private_repo: false
|
| 95 |
+
|
| 96 |
+
wandb_project: surrogate-1-v2
|
| 97 |
+
wandb_run_id: stage1-sdft
|
| 98 |
+
|
| 99 |
+
special_tokens:
|
| 100 |
+
pad_token: <|endoftext|>
|
| 101 |
+
|
| 102 |
+
resume_from_checkpoint: null
|
| 103 |
+
auto_resume_from_checkpoints: true
|
|
@@ -393,6 +393,33 @@ while true; do
|
|
| 393 |
# Every 6 hr: Lightning AI H200 training run (free 4hr H200 quota = ~13/mo).
|
| 394 |
# H200 141GB VRAM fits Qwen3-Coder-480B-A35B QLoRA β biggest free training.
|
| 395 |
[[ $((M % 360)) -eq 45 ]] && bash ~/.surrogate/bin/lightning-trainer.sh >> "$LOG_DIR/lightning-trainer.log" 2>&1 &
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 396 |
sleep 60
|
| 397 |
done
|
| 398 |
CRONSH
|
|
|
|
| 393 |
# Every 6 hr: Lightning AI H200 training run (free 4hr H200 quota = ~13/mo).
|
| 394 |
# H200 141GB VRAM fits Qwen3-Coder-480B-A35B QLoRA β biggest free training.
|
| 395 |
[[ $((M % 360)) -eq 45 ]] && bash ~/.surrogate/bin/lightning-trainer.sh >> "$LOG_DIR/lightning-trainer.log" 2>&1 &
|
| 396 |
+
|
| 397 |
+
# ββ Round 5 (2026-04) sustainability loops ββββββββββββββββββββββββββ
|
| 398 |
+
# Every 6 hr (offset 90): self-improve loop β gen problems, judge,
|
| 399 |
+
# winners β training data, losers β reflexion-store.
|
| 400 |
+
[[ $((M % 360)) -eq 90 ]] && bash ~/.surrogate/bin/v2/self-improve-loop.sh >> "$LOG_DIR/self-improve.log" 2>&1 &
|
| 401 |
+
# Every 30 min (offset 22): mine new tool-call traces from logs into
|
| 402 |
+
# SFT + DPO data, plus voyager skill candidates.
|
| 403 |
+
[[ $((M % 30)) -eq 22 ]] && python3 ~/.surrogate/bin/v2/tool-trace-collector.py >> "$LOG_DIR/tool-trace.log" 2>&1 &
|
| 404 |
+
# Every 60 min (offset 17): export promoted voyager skills to JSONL
|
| 405 |
+
# (training-data slice + inference-time retrieval source).
|
| 406 |
+
[[ $((M % 60)) -eq 17 ]] && python3 ~/.surrogate/bin/v2/voyager-skills.py export >> "$LOG_DIR/voyager.log" 2>&1 &
|
| 407 |
+
# Daily 07:00 UTC: active-learning batch from one bulk-mirror file.
|
| 408 |
+
# Skips silently if no pool yet.
|
| 409 |
+
[[ $((M % 1440)) -eq 420 ]] && {
|
| 410 |
+
POOL=$(ls -t "$DATA"/bulk-mirror/*.jsonl 2>/dev/null | head -1)
|
| 411 |
+
[[ -n "$POOL" ]] && python3 ~/.surrogate/bin/v2/active-learning.py \
|
| 412 |
+
--pool "$POOL" --n 200 --scan 1500 \
|
| 413 |
+
>> "$LOG_DIR/active-learning.log" 2>&1 &
|
| 414 |
+
}
|
| 415 |
+
# Daily 08:00 UTC: constitutional self-critique on yesterday's
|
| 416 |
+
# winners (pulls latest self-improve winners file).
|
| 417 |
+
[[ $((M % 1440)) -eq 480 ]] && {
|
| 418 |
+
WIN=$(ls -t "$DATA"/v2/self-improve/winners-*.jsonl 2>/dev/null | head -1)
|
| 419 |
+
[[ -n "$WIN" ]] && python3 ~/.surrogate/bin/v2/constitutional-loop.py \
|
| 420 |
+
--input "$WIN" --n 200 \
|
| 421 |
+
>> "$LOG_DIR/constitutional.log" 2>&1 &
|
| 422 |
+
}
|
| 423 |
sleep 60
|
| 424 |
done
|
| 425 |
CRONSH
|