Spaces:

axentx
/

surrogate-1

Runtime error

Ashira Pitchayapakayakul commited on 9 days ago

Commit

b4668b2

1 Parent(s): 17967dd

feat(v2-round6): TruthRL + Validator-RLVR + LoraHub + Self-Refine + Letta + 9-cluster configs

Six more techniques shipped (asked but not yet built in Round 5):
- bin/v2/validator-rlvr.py — pyflakes/shellcheck/hadolint/tflint/cfn-lint/
actionlint/sqlfluff/semgrep as deterministic
reward signals for stage3 RL
- bin/v2/truthrl-rewarder.py — ternary reward (+1 calibrated_idk / +1 confident_correct
/ -1 confident_wrong / -0.3 over_abstain)
- bin/v2/lorahub-composer.py — runtime LoRA composition with learned routing
table (heuristic seed, learns from winners)
- bin/v2/self-refine-loop.py — Madaan 2023 3-iter generate→critique→revise SFT
- bin/v2/letta-memory.py — hierarchical core+recall+archival memory
(Packer 2023, formerly MemGPT)
- bin/v2/gen-cluster-configs.sh — emits 9 cluster LoRA YAMLs from template
(eng-build/ops/sec/ai + product-ux + gtm +
finance-legal + compliance + meta-orchestrator)

Sanitizer hardening:
- bin/lib/sanitize.py — optional starpii NER + detect-secrets integration
via filter_pair(deep_scan=True). Lazy-loaded, fail-soft.

Files changed (7) hide show

bin/lib/sanitize.py +110 -3
bin/v2/gen-cluster-configs.sh +214 -0
bin/v2/letta-memory.py +200 -0
bin/v2/lorahub-composer.py +281 -0
bin/v2/self-refine-loop.py +163 -0
bin/v2/truthrl-rewarder.py +195 -0
bin/v2/validator-rlvr.py +348 -0

bin/lib/sanitize.py CHANGED Viewed

@@ -113,6 +113,92 @@ def has_pii(text: str) -> bool:
     return bool(PII_RE.search(text or ""))
 # Quality heuristics — drop if response is too short, identical to prompt, etc.
 def is_low_quality(prompt: str, response: str) -> tuple[bool, str | None]:
     if not prompt or not response:
@@ -133,16 +219,37 @@ def is_low_quality(prompt: str, response: str) -> tuple[bool, str | None]:
     return False, None
-def filter_pair(prompt: str, response: str) -> dict:
-    """Return verdict: {'keep': bool, 'reason': str|None, 'matched': str|None}"""
     polluted, p_match = is_polluted_pair(prompt, response)
     if polluted:
         return {"keep": False, "reason": "polluted", "matched": p_match}
     if has_pii(prompt) or has_pii(response):
-        return {"keep": False, "reason": "pii", "matched": None}
     low_q, lq_reason = is_low_quality(prompt, response)
     if low_q:
         return {"keep": False, "reason": f"low_quality:{lq_reason}", "matched": None}
     return {"keep": True, "reason": None, "matched": None}

     return bool(PII_RE.search(text or ""))
+# ── Optional NER + secrets scanners (lazy, fail-soft) ──────────────────
+# starpii (BigCode) — neural PII NER; better than regex for free-form text.
+# detect-secrets (Yelp) — entropy + plugin-based secret detector.
+# Both are optional dependencies; if unavailable we fall back to regex above.
+_starpii_pipeline = None
+_detect_secrets_collection = None
+def _load_starpii():
+    """Lazy-load BigCode/starpii pipeline. None on failure."""
+    global _starpii_pipeline
+    if _starpii_pipeline is not None:
+        return _starpii_pipeline if _starpii_pipeline is not False else None
+    try:
+        from transformers import pipeline  # type: ignore
+        _starpii_pipeline = pipeline(
+            "token-classification",
+            model="bigcode/starpii",
+            aggregation_strategy="simple",
+        )
+        return _starpii_pipeline
+    except Exception:
+        _starpii_pipeline = False  # sentinel: "tried, don't try again"
+        return None
+def starpii_pii_hits(text: str, threshold: float = 0.8) -> list[dict]:
+    """Return [{type, score, span}] for confidently-detected PII spans.
+    Empty list if starpii not installed or no hits.
+    """
+    pipe = _load_starpii()
+    if not pipe or not text:
+        return []
+    try:
+        hits = pipe(text[:4000])  # cap input for speed
+    except Exception:
+        return []
+    return [{"type": h["entity_group"], "score": float(h["score"]),
+             "span": text[h["start"]:h["end"]][:120]}
+            for h in hits if h.get("score", 0) >= threshold]
+def _load_detect_secrets():
+    """Lazy-load detect-secrets SecretsCollection. None on failure."""
+    global _detect_secrets_collection
+    if _detect_secrets_collection is not None:
+        return _detect_secrets_collection if _detect_secrets_collection is not False else None
+    try:
+        from detect_secrets import SecretsCollection  # type: ignore
+        from detect_secrets.settings import default_settings  # type: ignore
+        _detect_secrets_collection = (SecretsCollection, default_settings)
+        return _detect_secrets_collection
+    except Exception:
+        _detect_secrets_collection = False
+        return None
+def detect_secrets_hits(text: str) -> list[dict]:
+    """Return [{type, line}] for any secret detect-secrets finds.
+    Empty list if not installed or none detected.
+    """
+    loaded = _load_detect_secrets()
+    if not loaded or not text:
+        return []
+    SecretsCollection, default_settings = loaded
+    import tempfile, os
+    fd, path = tempfile.mkstemp(suffix=".txt")
+    try:
+        os.write(fd, text.encode("utf-8", "ignore")[:200_000])
+        os.close(fd)
+        with default_settings():
+            sc = SecretsCollection()
+            sc.scan_file(path)
+        out = []
+        for _, secrets in sc.data.items():
+            for s in secrets:
+                out.append({"type": s.type, "line": s.line_number,
+                            "secret_hash": s.secret_hash[:16]})
+        return out
+    except Exception:
+        return []
+    finally:
+        try: os.unlink(path)
+        except OSError: pass
 # Quality heuristics — drop if response is too short, identical to prompt, etc.
 def is_low_quality(prompt: str, response: str) -> tuple[bool, str | None]:
     if not prompt or not response:
     return False, None
+def filter_pair(prompt: str, response: str,
+                deep_scan: bool = False) -> dict:
+    """Return verdict: {'keep': bool, 'reason': str|None, 'matched': str|None}.
+    deep_scan=True: also runs starpii NER + detect-secrets if installed.
+    Slow (model load + per-row scan) — use for the final pre-train pass,
+    not for every dedup row. Heuristic (regex) checks always run.
+    """
     polluted, p_match = is_polluted_pair(prompt, response)
     if polluted:
         return {"keep": False, "reason": "polluted", "matched": p_match}
     if has_pii(prompt) or has_pii(response):
+        return {"keep": False, "reason": "pii_regex", "matched": None}
     low_q, lq_reason = is_low_quality(prompt, response)
     if low_q:
         return {"keep": False, "reason": f"low_quality:{lq_reason}", "matched": None}
+    if deep_scan:
+        # NER PII
+        for field, txt in (("prompt", prompt), ("response", response)):
+            hits = starpii_pii_hits(txt)
+            if hits:
+                return {"keep": False, "reason": f"pii_ner:{field}",
+                        "matched": str(hits[:3])[:300]}
+        # detect-secrets entropy/plugins
+        for field, txt in (("prompt", prompt), ("response", response)):
+            hits = detect_secrets_hits(txt)
+            if hits:
+                return {"keep": False, "reason": f"secrets:{field}",
+                        "matched": str(hits[:3])[:300]}
     return {"keep": True, "reason": None, "matched": None}

bin/v2/gen-cluster-configs.sh ADDED Viewed

	@@ -0,0 +1,214 @@

+#!/usr/bin/env bash
+# Surrogate-1 v2 — Generate 9 cluster LoRA training configs from one template.
+#
+# Each cluster is trained independently on its domain slice of v2 data,
+# then mergekit fuses them via merge-9-loras.sh.
+#
+# Run BEFORE training:  bash gen-cluster-configs.sh
+# Output: configs/v2/cluster-<name>.yml × 9
+#
+# Domain → dataset filter mapping (each cluster pulls a subset of v2-train-clean):
+#   eng-build:    code-* + ai-eng + api-* + test-* + debug-*
+#   eng-ops:      devops-* + sre-* + ci-* + cloud-cost
+#   eng-sec:      sec-* + safety + cve + secrets + iam
+#   eng-ai:       ai-eng + ai-prompt + rag + lora + vllm
+#   product-ux:   docs-* + arch-adr + design-*
+#   gtm:          business + marketing + sales
+#   finance-legal: finance-* + legal-* + cost-*
+#   compliance:   compliance + soc2 + iso27001 + hipaa + pci
+#   meta-orchestrator: arch-adr + planning + multi-step
+set -uo pipefail
+OUT_DIR="$HOME/.surrogate/hf-space/configs/v2"
+TEMPLATE_DOMAINS=(
+    "eng-build:code-*,ai-eng,api-*,test-*,debug-*,perf-*"
+    "eng-ops:devops-*,sre-*,ci-*,cloud-cost,iac-*"
+    "eng-sec:sec-*,safety-*,cve-*,iam-*,secrets-*"
+    "eng-ai:ai-eng,ai-prompt,rag,lora,vllm,embedding"
+    "product-ux:docs-*,arch-adr,design-*,user-*"
+    "gtm:business,marketing,sales,positioning,gtm"
+    "finance-legal:finance-*,legal-*,cost-*,billing"
+    "compliance:compliance,soc2,iso27001,hipaa,pci-dss,gdpr"
+    "meta-orchestrator:arch-adr,planning,multi-step,orchestration"
+)
+# Per-cluster LoRA hyperparams — bigger ranks for domains with more data.
+declare -A LORA_R=(
+    [eng-build]=64 [eng-ops]=64 [eng-sec]=48 [eng-ai]=48
+    [product-ux]=32 [gtm]=32 [finance-legal]=32 [compliance]=32
+    [meta-orchestrator]=64
+)
+mkdir -p "$OUT_DIR"
+for entry in "${TEMPLATE_DOMAINS[@]}"; do
+    name="${entry%%:*}"
+    domain_filter="${entry#*:}"
+    rank="${LORA_R[$name]:-32}"
+    alpha=$((rank * 2))
+    out_yml="$OUT_DIR/cluster-${name}.yml"
+    cat > "$out_yml" <<EOF
+# Surrogate-1 v2 — Cluster LoRA (auto-generated by gen-cluster-configs.sh).
+# Cluster: $name
+# Domain filter: $domain_filter
+# Trained independently; merged via merge-9-loras.sh after all 9 finish.
+base_model: Qwen/Qwen2.5-Coder-7B-Instruct
+model_type: AutoModelForCausalLM
+tokenizer_type: AutoTokenizer
+trust_remote_code: true
+load_in_4bit: true
+strict: false
+adapter: lora
+lora_r: $rank
+lora_alpha: $alpha
+lora_dropout: 0.05
+peft_use_dora: true
+lora_target_modules:
+  - q_proj
+  - k_proj
+  - v_proj
+  - o_proj
+  - gate_proj
+  - up_proj
+  - down_proj
+sequence_len: 32768
+sample_packing: true
+pad_to_sequence_len: true
+rope_theta: 1000000.0
+rope_scaling:
+  type: yarn
+  factor: 4.0
+  original_max_position_embeddings: 32768
+datasets:
+  - path: /data/v2/clusters/${name}.jsonl
+    type: chat_template
+    field_messages: messages
+    ds_type: json
+val_set_size: 0.02
+output_dir: /data/v2/out/cluster-${name}
+# Smaller clusters get fewer epochs (less data to overfit on)
+num_epochs: 2
+micro_batch_size: 1
+gradient_accumulation_steps: 16
+learning_rate: 1.0e-4
+lr_scheduler: cosine
+warmup_ratio: 0.03
+optimizer: adamw_torch_fused
+weight_decay: 0.01
+max_grad_norm: 1.0
+bf16: true
+fp16: false
+gradient_checkpointing: true
+gradient_checkpointing_kwargs:
+  use_reentrant: false
+flash_attention: true
+liger_kernel: true
+neftune_noise_alpha: 5
+eval_steps: 100
+save_steps: 100
+save_total_limit: 2
+logging_steps: 10
+hub_model_id: axentx/surrogate-1-coder-7b-lora-v2-${name}
+hub_strategy: every_save
+push_to_hub: true
+hub_private_repo: false
+wandb_project: surrogate-1-v2-clusters
+wandb_run_id: cluster-${name}
+special_tokens:
+  pad_token: <|endoftext|>
+resume_from_checkpoint: null
+auto_resume_from_checkpoints: true
+EOF
+    echo "▶ $out_yml  (rank=$rank, filter=$domain_filter)"
+done
+# Companion: dataset slicer that produces /data/v2/clusters/<name>.jsonl
+SLICER="$OUT_DIR/../../bin/v2/slice-clusters.py"
+cat > "$SLICER" <<'PYEOF'
+"""Slice v2-train-clean.jsonl into 9 cluster files by domain tag.
+Domain tag is detected via inference-augment.detect_domain() if present in
+the row's meta, else heuristically from prompt content.
+"""
+import json, os, sys
+from pathlib import Path
+sys.path.insert(0, str(Path.home() / ".surrogate/bin/v2"))
+from importlib.util import spec_from_file_location, module_from_spec
+# Load detect_domain from inference-augment.py
+spec = spec_from_file_location(
+    "inference_augment",
+    str(Path.home() / ".surrogate/bin/v2/inference-augment.py"))
+ia = module_from_spec(spec); spec.loader.exec_module(ia)
+DOMAIN_TO_CLUSTER = {
+    "code-python": "eng-build", "code-typescript": "eng-build",
+    "test-pytest": "eng-build", "debug-traceback": "eng-build",
+    "perf-profile": "eng-build", "api-rest": "eng-build",
+    "api-graphql": "eng-build",
+    "devops-tf": "eng-ops", "devops-k8s": "eng-ops", "devops-cdk": "eng-ops",
+    "sre-runbook": "eng-ops", "sre-slo": "eng-ops", "ci-github": "eng-ops",
+    "cloud-cost": "eng-ops",
+    "sec-iam": "eng-sec", "sec-secrets": "eng-sec", "sec-cve": "eng-sec",
+    "ai-eng": "eng-ai", "ai-prompt": "eng-ai",
+    "data-sql": "eng-build",
+    "docs-api": "product-ux", "arch-adr": "meta-orchestrator",
+    "business": "gtm", "compliance": "compliance",
+    "_default": "meta-orchestrator",
+}
+INPUT = Path(os.environ.get("INPUT", "/data/v2-train-clean.jsonl"))
+OUT_DIR = Path("/data/v2/clusters")
+OUT_DIR.mkdir(parents=True, exist_ok=True)
+handles = {}
+counts = {}
+def out(cluster):
+    if cluster not in handles:
+        p = OUT_DIR / f"{cluster}.jsonl"
+        handles[cluster] = open(p, "w")
+        counts[cluster] = 0
+    return handles[cluster]
+with open(INPUT) as fin:
+    for line in fin:
+        try: d = json.loads(line)
+        except: continue
+        prompt = d.get("prompt") or d.get("instruction") or ""
+        if not prompt: continue
+        domain = (d.get("meta", {}).get("domain")
+                  or ia.detect_domain(prompt) or "_default")
+        cluster = DOMAIN_TO_CLUSTER.get(domain, "meta-orchestrator")
+        out(cluster).write(json.dumps(d, ensure_ascii=False) + "\n")
+        counts[cluster] += 1
+for f in handles.values(): f.close()
+print(json.dumps(counts, indent=2))
+PYEOF
+chmod +x "$SLICER"
+echo ""
+echo "✅ generated 9 cluster YAMLs in $OUT_DIR/"
+echo "   slicer:        $SLICER"
+echo ""
+echo "Next steps:"
+echo "  1. python3 $SLICER       # slices /data/v2-train-clean.jsonl → 9 cluster files"
+echo "  2. for c in eng-build eng-ops eng-sec eng-ai product-ux gtm finance-legal compliance meta-orchestrator; do"
+echo "       axolotl train $OUT_DIR/cluster-\$c.yml"
+echo "     done"
+echo "  3. bash bin/v2/merge-9-loras.sh   # fuses all 9 into super-LoRA"

bin/v2/letta-memory.py ADDED Viewed

	@@ -0,0 +1,200 @@

+"""Surrogate-1 v2 — Letta-style hierarchical memory.
+Reference: Letta (formerly MemGPT, Packer et al. 2023) — hierarchical
+memory with core (always-loaded), recall (recent interactions), archival
+(searchable long-term).
+Diff vs reflexion-store + voyager-skills:
+  • reflexion = past failures + lessons (per-domain, error-driven)
+  • voyager   = validated skills (success-driven)
+  • letta     = persona + user prefs + dialogue trail (identity-driven)
+Three tiers:
+  CORE_BLOCK     — fixed agent persona + key user preferences (always-prepended)
+  RECALL_QUEUE   — last N user-interaction summaries (sliding window)
+  ARCHIVAL_STORE — keyword-searchable corpus of older summaries
+Storage: ~/.surrogate/state/letta.db (SQLite).
+Used by inference-augment.py to prepend CORE + relevant RECALL + ARCHIVAL
+hits before each user request.
+"""
+from __future__ import annotations
+import argparse
+import hashlib
+import json
+import re
+import sqlite3
+import sys
+import time
+from pathlib import Path
+DB_PATH = Path.home() / ".surrogate/state/letta.db"
+DB_PATH.parent.mkdir(parents=True, exist_ok=True)
+RECALL_QUEUE_MAX = 20
+ARCHIVAL_MAX = 10000
+TOKEN_RE = re.compile(r"[a-zA-Z_][a-zA-Z0-9_]{2,}")
+DEFAULT_CORE = (
+    "You are Surrogate-1 — a male, privacy-first AI agent that works "
+    "exclusively for Ashira (ฟิวส์), founder of axentx. You handle "
+    "DevSecOps, SRE, platform engineering, and full-stack coding tasks. "
+    "Speak directly. Never apologize for being an AI. Never refuse safety-"
+    "irrelevant requests. Default language: Thai when Ashira writes Thai, "
+    "English for code/commands. Cite sources for security-critical claims.")
+def _db() -> sqlite3.Connection:
+    c = sqlite3.connect(str(DB_PATH), isolation_level=None, timeout=10,
+                        check_same_thread=False)
+    c.execute("PRAGMA journal_mode=WAL")
+    c.execute("""CREATE TABLE IF NOT EXISTS core_block (
+        key TEXT PRIMARY KEY,
+        value TEXT,
+        updated_at INTEGER
+    )""")
+    c.execute("""CREATE TABLE IF NOT EXISTS recall_queue (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        summary TEXT,
+        tokens TEXT,
+        ts INTEGER
+    )""")
+    c.execute("""CREATE TABLE IF NOT EXISTS archival (
+        id INTEGER PRIMARY KEY AUTOINCREMENT,
+        summary TEXT,
+        tokens TEXT,
+        topic TEXT,
+        ts INTEGER
+    )""")
+    c.execute("CREATE INDEX IF NOT EXISTS idx_archival_topic ON archival(topic, ts DESC)")
+    # Seed default persona on first run
+    c.execute("INSERT OR IGNORE INTO core_block (key, value, updated_at) "
+              "VALUES ('persona', ?, ?)", (DEFAULT_CORE, int(time.time())))
+    return c
+def _tokens(text: str) -> set[str]:
+    return set(TOKEN_RE.findall(text.lower()))
+def core_get() -> str:
+    c = _db()
+    rows = c.execute("SELECT key, value FROM core_block ORDER BY key").fetchall()
+    c.close()
+    return "\n\n".join(f"### {k}\n{v}" for k, v in rows)
+def core_set(key: str, value: str) -> None:
+    c = _db()
+    c.execute("""INSERT OR REPLACE INTO core_block (key, value, updated_at)
+                 VALUES (?, ?, ?)""", (key, value, int(time.time())))
+    c.close()
+def recall_push(summary: str) -> None:
+    c = _db()
+    toks = " ".join(sorted(_tokens(summary)))
+    c.execute("""INSERT INTO recall_queue (summary, tokens, ts)
+                 VALUES (?, ?, ?)""", (summary[:2000], toks, int(time.time())))
+    # Promote oldest to archival when queue overflows
+    n = c.execute("SELECT COUNT(*) FROM recall_queue").fetchone()[0]
+    if n > RECALL_QUEUE_MAX:
+        promote = c.execute("""SELECT id, summary, tokens, ts
+                               FROM recall_queue ORDER BY id ASC LIMIT ?""",
+                            (n - RECALL_QUEUE_MAX,)).fetchall()
+        for rid, s, t, ts in promote:
+            topic = (sorted(_tokens(s))[:1] or ["misc"])[0]
+            c.execute("""INSERT INTO archival (summary, tokens, topic, ts)
+                         VALUES (?, ?, ?, ?)""", (s, t, topic, ts))
+            c.execute("DELETE FROM recall_queue WHERE id=?", (rid,))
+    c.close()
+def recall_recent(k: int = 5) -> list[dict]:
+    c = _db()
+    rows = c.execute("""SELECT summary, ts FROM recall_queue
+                        ORDER BY id DESC LIMIT ?""", (k,)).fetchall()
+    c.close()
+    return [{"summary": s, "ts": ts, "age_days": (time.time() - ts) / 86400}
+            for s, ts in rows]
+def archival_search(query: str, k: int = 3) -> list[dict]:
+    qtoks = _tokens(query)
+    if not qtoks:
+        return []
+    c = _db()
+    # Cap candidate scan for speed
+    rows = c.execute("""SELECT id, summary, tokens, topic, ts FROM archival
+                        ORDER BY ts DESC LIMIT 2000""").fetchall()
+    c.close()
+    scored: list[tuple[int, tuple]] = []
+    for r in rows:
+        rid, s, t, topic, ts = r
+        dtoks = set(t.split())
+        overlap = qtoks & dtoks
+        if not overlap:
+            continue
+        scored.append((len(overlap), r))
+    scored.sort(key=lambda x: -x[0])
+    return [{"summary": r[1][1], "topic": r[1][3], "score": r[0]}
+            for r in scored[:k]]
+def assemble(query: str, k_recall: int = 3,
+             k_archival: int = 3) -> str:
+    """Build the prepended memory block for this request."""
+    parts = [core_get()]
+    rec = recall_recent(k_recall)
+    if rec:
+        block = ["## Recent context"]
+        for r in rec:
+            block.append(f"- ({r['age_days']:.1f}d ago) {r['summary'][:300]}")
+        parts.append("\n".join(block))
+    arc = archival_search(query, k_archival)
+    if arc:
+        block = ["## Past relevant interactions"]
+        for a in arc:
+            block.append(f"- [{a['topic']}] {a['summary'][:300]}")
+        parts.append("\n".join(block))
+    return "\n\n".join(parts)
+def stats() -> dict:
+    c = _db()
+    n_core = c.execute("SELECT COUNT(*) FROM core_block").fetchone()[0]
+    n_rec  = c.execute("SELECT COUNT(*) FROM recall_queue").fetchone()[0]
+    n_arc  = c.execute("SELECT COUNT(*) FROM archival").fetchone()[0]
+    top_topics = c.execute("""SELECT topic, COUNT(*) FROM archival
+                              GROUP BY topic ORDER BY 2 DESC LIMIT 10""").fetchall()
+    c.close()
+    return {"core_blocks": n_core, "recall_queue": n_rec, "archival": n_arc,
+            "top_topics": [{"topic": t, "count": n} for t, n in top_topics]}
+if __name__ == "__main__":
+    cmd = sys.argv[1] if len(sys.argv) > 1 else "stats"
+    if cmd == "stats":
+        print(json.dumps(stats(), indent=2, ensure_ascii=False))
+    elif cmd == "core-set":
+        # python letta-memory.py core-set <key> <<<value
+        key = sys.argv[2]
+        val = sys.stdin.read()
+        core_set(key, val.strip())
+        print(json.dumps({"ok": True, "key": key}))
+    elif cmd == "core-get":
+        print(core_get())
+    elif cmd == "push":
+        # echo "summary text" | python letta-memory.py push
+        recall_push(sys.stdin.read().strip())
+        print(json.dumps({"ok": True}))
+    elif cmd == "assemble":
+        q = sys.argv[2] if len(sys.argv) > 2 else ""
+        print(assemble(q))
+    elif cmd == "search":
+        q = sys.argv[2] if len(sys.argv) > 2 else ""
+        k = int(sys.argv[3]) if len(sys.argv) > 3 else 3
+        print(json.dumps(archival_search(q, k), indent=2, ensure_ascii=False))
+    else:
+        print(f"unknown: {cmd}", file=sys.stderr)
+        sys.exit(1)

bin/v2/lorahub-composer.py ADDED Viewed

	@@ -0,0 +1,281 @@

+"""Surrogate-1 v2 — LoraHub / Arrow runtime LoRA composition.
+Reference: LoraHub (Huang et al. 2023) + Arrow (2024) — at inference time,
+compose multiple specialist LoRAs with task-aware weights instead of using
+a single statically-merged super-LoRA.
+Why: at inference, the user's prompt rarely needs ALL 9 cluster LoRAs at
+equal strength. A devops question ⇒ 0.55 eng-ops + 0.30 eng-sec + 0.15
+meta. A code question ⇒ 0.60 eng-build + 0.25 eng-ai + 0.15 meta.
+This module:
+  1. Classifies the prompt domain via a small Qwen-Coder-1.5B prompt
+     (fast, free) OR keyword heuristics (instant fallback).
+  2. Returns per-LoRA weights via a learned table OR sane defaults.
+  3. Emits a vLLM `--lora-modules` compatible weight string OR
+     PEFT `add_weighted_adapter()` call args.
+Routing table is bootstrapped from heuristics + improved over time using
+self-improve-loop's winner data — same closed loop as the rest of v2.
+CLI:
+  echo '{"prompt":"Write a Terraform module..."}' | python3 lorahub-composer.py
+  → {"weights": {"eng-build":0.10, "eng-ops":0.55, ...}, "domain":"devops-tf"}
+  python3 lorahub-composer.py --learn winners.jsonl   # update routing weights
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import re
+import sqlite3
+import sys
+from pathlib import Path
+DB_PATH = Path.home() / ".surrogate/state/lorahub.db"
+DB_PATH.parent.mkdir(parents=True, exist_ok=True)
+# 9 cluster LoRAs (must match merge-9-loras.sh + serve-vllm.sh USE_MULTI_LORA)
+LORAS = [
+    "eng-build", "eng-ops", "eng-sec", "eng-ai",
+    "product-ux", "gtm", "finance-legal", "compliance",
+    "meta-orchestrator",
+]
+# Heuristic routing — domain → adapter weights summing to ~1.0
+# meta-orchestrator always gets a small slice (it's the planner)
+ROUTING_HEURISTIC: dict[str, dict[str, float]] = {
+    "code-python": {
+        "eng-build": 0.55, "eng-ai": 0.20, "eng-sec": 0.10,
+        "meta-orchestrator": 0.15},
+    "code-typescript": {
+        "eng-build": 0.55, "eng-ai": 0.15, "product-ux": 0.15,
+        "meta-orchestrator": 0.15},
+    "devops-tf": {
+        "eng-ops": 0.50, "eng-sec": 0.25, "eng-build": 0.10,
+        "meta-orchestrator": 0.15},
+    "devops-k8s": {
+        "eng-ops": 0.55, "eng-sec": 0.20, "eng-build": 0.10,
+        "meta-orchestrator": 0.15},
+    "devops-cdk": {
+        "eng-ops": 0.45, "eng-build": 0.20, "eng-sec": 0.20,
+        "meta-orchestrator": 0.15},
+    "sec-iam": {
+        "eng-sec": 0.55, "eng-ops": 0.20, "compliance": 0.10,
+        "meta-orchestrator": 0.15},
+    "sec-secrets": {
+        "eng-sec": 0.55, "eng-ops": 0.15, "compliance": 0.15,
+        "meta-orchestrator": 0.15},
+    "sec-cve": {
+        "eng-sec": 0.50, "compliance": 0.20, "eng-ops": 0.15,
+        "meta-orchestrator": 0.15},
+    "sre-runbook": {
+        "eng-ops": 0.55, "eng-sec": 0.15, "meta-orchestrator": 0.30},
+    "sre-slo": {
+        "eng-ops": 0.50, "eng-ai": 0.15, "meta-orchestrator": 0.35},
+    "data-sql": {
+        "eng-build": 0.55, "eng-ai": 0.15, "compliance": 0.10,
+        "meta-orchestrator": 0.20},
+    "ai-eng": {
+        "eng-ai": 0.60, "eng-build": 0.20, "meta-orchestrator": 0.20},
+    "ai-prompt": {
+        "eng-ai": 0.55, "product-ux": 0.20, "meta-orchestrator": 0.25},
+    "api-rest": {
+        "eng-build": 0.45, "product-ux": 0.20, "eng-ai": 0.15,
+        "meta-orchestrator": 0.20},
+    "api-graphql": {
+        "eng-build": 0.50, "product-ux": 0.15, "eng-ai": 0.15,
+        "meta-orchestrator": 0.20},
+    "ci-github": {
+        "eng-ops": 0.55, "eng-build": 0.20, "eng-sec": 0.10,
+        "meta-orchestrator": 0.15},
+    "debug-traceback": {
+        "eng-build": 0.55, "eng-ai": 0.15, "meta-orchestrator": 0.30},
+    "perf-profile": {
+        "eng-build": 0.45, "eng-ops": 0.20, "eng-ai": 0.15,
+        "meta-orchestrator": 0.20},
+    "test-pytest": {
+        "eng-build": 0.55, "eng-ai": 0.15, "meta-orchestrator": 0.30},
+    "docs-api": {
+        "eng-build": 0.30, "product-ux": 0.30, "meta-orchestrator": 0.40},
+    "arch-adr": {
+        "meta-orchestrator": 0.55, "eng-build": 0.15, "eng-ai": 0.15,
+        "product-ux": 0.15},
+    "cloud-cost": {
+        "eng-ops": 0.40, "finance-legal": 0.30, "meta-orchestrator": 0.30},
+    "business": {
+        "gtm": 0.45, "finance-legal": 0.30, "meta-orchestrator": 0.25},
+    "compliance": {
+        "compliance": 0.55, "eng-sec": 0.20, "finance-legal": 0.10,
+        "meta-orchestrator": 0.15},
+    "_default": {
+        "meta-orchestrator": 0.40, "eng-build": 0.20, "eng-ops": 0.15,
+        "eng-sec": 0.10, "eng-ai": 0.15},
+}
+# Domain heuristic copied from inference-augment.py
+DOMAIN_HINTS = {
+    "code-python":      ["def ", "import ", "python", ".py", "pytest", "asyncio"],
+    "code-typescript":  ["typescript", ".ts", "interface ", "tsconfig"],
+    "devops-tf":        ["terraform", "resource \"", "provider \"", ".tf"],
+    "devops-k8s":       ["kubernetes", "kubectl", "kind: deployment", "helm"],
+    "devops-cdk":       ["aws-cdk", "cdk synth", "Stack", "CfnOutput"],
+    "sec-iam":          ["iam:", "policy", "principal", "least privilege"],
+    "sec-secrets":      ["secret", "api key", "token", "credentials"],
+    "sec-cve":          ["cve-", "vulnerability", "exploit", "remediation"],
+    "sre-runbook":      ["runbook", "incident", "on-call", "page"],
+    "sre-slo":          ["sli", "slo", "error budget", "latency p99"],
+    "data-sql":         ["select ", "from ", "join ", "create table"],
+    "ai-eng":           ["embedding", "rag", "vector", "lora", "vllm"],
+    "ai-prompt":        ["system prompt", "few-shot", "in-context"],
+    "api-rest":         ["rest api", "openapi", "endpoint", "GET /", "POST /"],
+    "api-graphql":      ["graphql", "resolver", "type Query", "schema"],
+    "ci-github":        ["github actions", ".github/workflows", "uses: actions/"],
+    "debug-traceback":  ["traceback", "stack trace", "valueerror", "typeerror"],
+    "perf-profile":     ["profile", "bottleneck", "latency", "throughput"],
+    "test-pytest":      ["pytest", "@pytest.fixture", "assert ", "unittest"],
+    "docs-api":         ["api documentation", "endpoint reference", "sdk"],
+    "arch-adr":         ["adr", "trade-off", "decision record", "architecture"],
+    "cloud-cost":       ["cost", "spend", "savings plan", "reserved instance"],
+    "business":         ["pricing", "go-to-market", "positioning", "icp"],
+    "compliance":       ["soc 2", "iso 27001", "hipaa", "pci-dss", "gdpr"],
+}
+def detect_domain(prompt: str) -> str:
+    p = prompt.lower()
+    best, best_n = "_default", 0
+    for dom, kws in DOMAIN_HINTS.items():
+        n = sum(1 for k in kws if k in p)
+        if n > best_n:
+            best, best_n = dom, n
+    return best if best_n >= 2 else "_default"
+def _db() -> sqlite3.Connection:
+    c = sqlite3.connect(str(DB_PATH), isolation_level=None, timeout=10,
+                        check_same_thread=False)
+    c.execute("PRAGMA journal_mode=WAL")
+    c.execute("""CREATE TABLE IF NOT EXISTS routing (
+        domain TEXT,
+        adapter TEXT,
+        weight REAL,
+        n_observations INTEGER DEFAULT 0,
+        PRIMARY KEY (domain, adapter)
+    )""")
+    return c
+def get_weights(domain: str) -> dict[str, float]:
+    """Lookup learned weights, fall back to heuristic."""
+    c = _db()
+    rows = c.execute("""SELECT adapter, weight FROM routing
+                        WHERE domain=? AND n_observations >= 5""",
+                     (domain,)).fetchall()
+    c.close()
+    if rows:
+        w = {a: weight for a, weight in rows}
+    else:
+        w = dict(ROUTING_HEURISTIC.get(domain, ROUTING_HEURISTIC["_default"]))
+    # Normalize to sum 1.0
+    s = sum(w.values()) or 1.0
+    return {a: round(v / s, 4) for a, v in w.items()}
+def compose(prompt: str, override_domain: str | None = None) -> dict:
+    domain = override_domain or detect_domain(prompt)
+    weights = get_weights(domain)
+    # vLLM compatible serialization (passes via --lora-modules with weights)
+    vllm_arg = ",".join(f"{a}={w}" for a, w in weights.items())
+    return {
+        "prompt": prompt[:200] + ("…" if len(prompt) > 200 else ""),
+        "domain": domain,
+        "weights": weights,
+        "vllm_lora_modules": vllm_arg,
+        "peft_args": [{"adapter_name": a, "weight": w}
+                       for a, w in weights.items()],
+    }
+def learn_from_winners(jsonl_path: str, lr: float = 0.1) -> int:
+    """Update routing table from self-improve winners.
+    Each winner is treated as evidence that its detected domain → ADAPTER
+    weights worked. We bump observed adapters' weights toward what the
+    winning examples used (or, lacking adapter signal, just count domain
+    occurrences to confirm the heuristic).
+    """
+    inp = Path(jsonl_path)
+    if not inp.exists():
+        return 0
+    c = _db()
+    n = 0
+    for line in inp.read_text().splitlines():
+        try:
+            d = json.loads(line)
+        except Exception:
+            continue
+        prompt = d.get("prompt", "")
+        if not prompt:
+            continue
+        # If logger captured which adapter served best, use that.
+        used = d.get("meta", {}).get("adapter") or d.get("adapter")
+        domain = d.get("meta", {}).get("domain") or detect_domain(prompt)
+        if used:
+            cur = c.execute("SELECT weight, n_observations FROM routing "
+                            "WHERE domain=? AND adapter=?",
+                            (domain, used)).fetchone()
+            if cur:
+                w, obs = cur
+                w_new = w * (1 - lr) + 1.0 * lr
+                c.execute("""UPDATE routing SET weight=?, n_observations=?
+                             WHERE domain=? AND adapter=?""",
+                          (w_new, obs + 1, domain, used))
+            else:
+                c.execute("""INSERT INTO routing
+                             (domain, adapter, weight, n_observations)
+                             VALUES (?, ?, ?, 1)""",
+                          (domain, used, lr))
+        else:
+            # Bump heuristic adapters' observation counts (confidence signal)
+            for adapter, w in ROUTING_HEURISTIC.get(domain, {}).items():
+                cur = c.execute("SELECT 1 FROM routing WHERE domain=? "
+                                "AND adapter=?", (domain, adapter)).fetchone()
+                if cur:
+                    c.execute("""UPDATE routing SET n_observations=
+                                 n_observations + 1
+                                 WHERE domain=? AND adapter=?""",
+                              (domain, adapter))
+                else:
+                    c.execute("""INSERT INTO routing
+                                 (domain, adapter, weight, n_observations)
+                                 VALUES (?, ?, ?, 1)""",
+                              (domain, adapter, w))
+        n += 1
+    c.close()
+    return n
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--learn", help="JSONL of winners to learn routing from")
+    ap.add_argument("--domain", help="override detected domain")
+    args = ap.parse_args()
+    if args.learn:
+        n = learn_from_winners(args.learn)
+        print(json.dumps({"learned_from": n, "db": str(DB_PATH)}))
+        return
+    if sys.stdin.isatty():
+        sample = "Write a Terraform module that provisions an S3 bucket with versioning and KMS encryption."
+        print(json.dumps(compose(sample, args.domain), indent=2,
+                         ensure_ascii=False))
+        return
+    d = json.load(sys.stdin)
+    print(json.dumps(compose(d.get("prompt", ""), args.domain),
+                     indent=2, ensure_ascii=False))
+if __name__ == "__main__":
+    main()

bin/v2/self-refine-loop.py ADDED Viewed

	@@ -0,0 +1,163 @@

+"""Surrogate-1 v2 — Self-Refine 3-iter loop.
+Reference: Madaan et al. 2023 (Self-Refine). 3-iteration generate→critique→
+revise loop.
+Diff vs constitutional-loop.py:
+  • constitutional-loop = ONE pass with 8 fixed principles → DPO triple
+  • self-refine        = THREE iterations of free-form critique → final SFT
+Useful for high-stakes outputs where additional refinement compounds
+quality. Output schema = SFT (chosen-only), not DPO. Plug into stage1
+training mix or stage1.5 polish stage.
+CLI:
+  python3 self-refine-loop.py --input prompts.jsonl --n 200
+  → /data/v2/self-refine-sft.jsonl
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import subprocess
+import sys
+import time
+from pathlib import Path
+sys.path.insert(0, str(Path.home() / ".surrogate/bin/lib"))
+try:
+    from sanitize import filter_pair  # type: ignore
+except Exception:
+    def filter_pair(p, r): return {"keep": True}
+OUT_PATH = Path.home() / ".surrogate/data/v2/self-refine-sft.jsonl"
+ITERATIONS = 3
+def llm_ladder(prompt: str, sys_prompt: str = "",
+               max_tokens: int = 1200, temperature: float = 0.4) -> str:
+    bridges = [
+        "$HOME/.surrogate/bin/cerebras-bridge.sh",
+        "$HOME/.surrogate/bin/groq-bridge.sh",
+        "$HOME/.surrogate/bin/openrouter-bridge.sh",
+        "$HOME/.surrogate/bin/gemini-bridge.sh",
+        "$HOME/.surrogate/bin/chutes-bridge.sh",
+        "$HOME/.surrogate/bin/ollama-bridge.sh",
+    ]
+    for sh in bridges:
+        sh_path = os.path.expandvars(sh)
+        if not Path(sh_path).exists():
+            continue
+        try:
+            req = json.dumps({"system": sys_prompt, "prompt": prompt,
+                              "max_tokens": max_tokens,
+                              "temperature": temperature})
+            r = subprocess.run(["bash", sh_path], input=req,
+                               capture_output=True, text=True, timeout=60)
+            out = (r.stdout or "").strip()
+            if out and len(out) > 30:
+                return out
+        except Exception:
+            continue
+    return ""
+def initial_answer(prompt: str) -> str:
+    sys_p = ("You are Surrogate-1, an expert DevSecOps + SRE + coding agent. "
+             "Answer the prompt with production-quality code/config.")
+    return llm_ladder(prompt, sys_p, max_tokens=1500, temperature=0.5)
+def critique(prompt: str, answer: str, iter_n: int) -> str:
+    sys_p = ("You are a senior reviewer. Critique the answer for: "
+             "correctness, security, completeness, idiomatic style, missing "
+             "edge cases. Output 3-5 specific actionable improvements (no "
+             "praise, no hedging). If nothing to improve, output 'NONE'.")
+    user_p = (f"PROMPT:\n{prompt[:1500]}\n\nANSWER (iteration {iter_n}):\n"
+              f"{answer[:3000]}\n\nList specific improvements, "
+              f"or 'NONE' if perfect.")
+    return llm_ladder(user_p, sys_p, max_tokens=400, temperature=0.2)
+def refine(prompt: str, answer: str, critique_text: str) -> str:
+    if critique_text.strip().upper().startswith("NONE"):
+        return answer  # converged
+    sys_p = ("You are Surrogate-1. Apply the listed improvements to the "
+             "answer. Keep what's already correct. Output ONLY the revised "
+             "answer — no preamble, no markdown around the answer block.")
+    user_p = (f"PROMPT:\n{prompt[:1500]}\n\nCURRENT ANSWER:\n{answer[:3000]}\n\n"
+              f"IMPROVEMENTS TO APPLY:\n{critique_text[:1500]}\n\n"
+              f"Output the revised answer.")
+    return llm_ladder(user_p, sys_p, max_tokens=1500, temperature=0.3)
+def process(prompt: str) -> dict | None:
+    if len(prompt) < 30:
+        return None
+    answer = initial_answer(prompt)
+    if not answer:
+        return None
+    history = [answer]
+    for i in range(1, ITERATIONS + 1):
+        crit = critique(prompt, answer, i)
+        if not crit or crit.strip().upper().startswith("NONE"):
+            break
+        revised = refine(prompt, answer, crit)
+        if not revised or revised.strip() == answer.strip():
+            break
+        history.append(revised)
+        answer = revised
+    if not filter_pair(prompt, answer)["keep"]:
+        return None
+    return {
+        "prompt": prompt[:6000],
+        "response": answer[:6000],
+        "source": "self-refine",
+        "meta": {
+            "iterations_used": len(history),
+            "first_draft_len": len(history[0]),
+            "final_len": len(answer),
+        },
+    }
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--input", required=True,
+                    help="JSONL with {prompt} per line")
+    ap.add_argument("--out", default=str(OUT_PATH))
+    ap.add_argument("--n", type=int, default=200)
+    args = ap.parse_args()
+    inp = Path(args.input)
+    out = Path(args.out)
+    out.parent.mkdir(parents=True, exist_ok=True)
+    if not inp.exists():
+        print(f"❌ {inp} missing", file=sys.stderr); sys.exit(1)
+    n_in = n_out = 0
+    with open(inp) as fin, open(out, "a") as fout:
+        for line in fin:
+            if n_out >= args.n:
+                break
+            try:
+                d = json.loads(line)
+            except Exception:
+                continue
+            n_in += 1
+            prompt = d.get("prompt") or d.get("instruction") or ""
+            row = process(prompt)
+            if row:
+                fout.write(json.dumps(row, ensure_ascii=False) + "\n")
+                fout.flush()
+                n_out += 1
+                if n_out % 25 == 0:
+                    print(f"  refined {n_out}/{args.n} (in {n_in})")
+    print(f"[done] in={n_in} kept={n_out} → {out}")
+if __name__ == "__main__":
+    main()

bin/v2/truthrl-rewarder.py ADDED Viewed

	@@ -0,0 +1,195 @@

+"""Surrogate-1 v2 — TruthRL ternary rewarder.
+Reference: TruthRL (2024) — instead of binary correct/wrong, reward CALIBRATED
+abstention. Three outcomes:
+  +1.0  correct + confident
+   0.0  abstain ('I don't know', 'verify against docs') when actually uncertain
+  -1.0  confident + wrong (hallucination)
+This produces a model that says IDK on questions it would otherwise hallucinate.
+Used in stage3-dapo.yml composite reward as the `truthrl` head:
+  composite = 0.4*test_pass + 0.2*lint + 0.2*security
+              + 0.2*truthrl   ← THIS
+Inputs: (prompt, response, gold_or_judge_verdict). Output: ternary score.
+Detects abstention with regex over response (fast, no LLM call). Detects
+correctness via judge LLM (free ladder) only when not abstaining — saves cost.
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import re
+import subprocess
+import sys
+from pathlib import Path
+ABSTAIN_PHRASES = re.compile(
+    r"\b(?:i\s+don'?t\s+know|i'?m\s+not\s+(?:sure|certain)|"
+    r"can(?:not|'?t)\s+verify|verify\s+(?:against|with)\s+(?:docs|the\s+docs|official)|"
+    r"check\s+(?:the\s+)?(?:docs|documentation|with\s+the\s+vendor)|"
+    r"would\s+need\s+to\s+(?:check|verify)|"
+    r"unable\s+to\s+(?:confirm|determine)|"
+    r"not\s+enough\s+(?:context|info)|need\s+more\s+context|"
+    r"this\s+may\s+be\s+(?:out\s+of\s+date|outdated)|"
+    r"please\s+confirm|please\s+verify)\b",
+    re.IGNORECASE)
+# Confident-claim signals — used to detect when model claims certainty
+CONFIDENT_SIGNALS = re.compile(
+    r"\b(?:certainly|definitely|always|never|guaranteed|absolutely|"
+    r"is\s+the\s+case|the\s+answer\s+is|the\s+correct\s+(?:way|answer))\b",
+    re.IGNORECASE)
+def is_abstaining(response: str) -> bool:
+    if not response:
+        return False
+    # Heuristic: must abstain in first 40% of response, not buried at end
+    head = response[: max(200, len(response) // 2)]
+    if not ABSTAIN_PHRASES.search(head):
+        return False
+    # If response ALSO has long confident-sounding code/answer block,
+    # it's not really abstaining — it's hedging then answering anyway.
+    body = response[len(head):]
+    if CONFIDENT_SIGNALS.search(body) and len(body) > 200:
+        return False
+    return True
+def llm_judge_correctness(prompt: str, response: str,
+                          gold: str | None = None) -> dict:
+    """Returns {'correct': bool, 'confidence': float, 'why': str}."""
+    bridges = [
+        "$HOME/.surrogate/bin/cerebras-bridge.sh",
+        "$HOME/.surrogate/bin/groq-bridge.sh",
+        "$HOME/.surrogate/bin/openrouter-bridge.sh",
+        "$HOME/.surrogate/bin/gemini-bridge.sh",
+        "$HOME/.surrogate/bin/chutes-bridge.sh",
+        "$HOME/.surrogate/bin/ollama-bridge.sh",
+    ]
+    sys_p = ("You are a strict factual reviewer. Decide if the response is "
+             "factually correct AND specific enough to be useful. Return ONLY "
+             "JSON: {\"correct\": bool, \"confidence\": float in [0,1], "
+             "\"why\": str}. No markdown.")
+    if gold:
+        user_p = (f"PROMPT:\n{prompt[:1500]}\n\nGOLD:\n{gold[:2000]}\n\n"
+                  f"RESPONSE:\n{response[:3000]}\n\n"
+                  f"Compare RESPONSE to GOLD. JSON only.")
+    else:
+        user_p = (f"PROMPT:\n{prompt[:1500]}\n\nRESPONSE:\n{response[:3000]}\n\n"
+                  f"Is the response factually correct? JSON only.")
+    for sh in bridges:
+        sh_path = os.path.expandvars(sh)
+        if not Path(sh_path).exists():
+            continue
+        try:
+            req = json.dumps({"system": sys_p, "prompt": user_p,
+                              "max_tokens": 200, "temperature": 0.1})
+            r = subprocess.run(["bash", sh_path], input=req,
+                               capture_output=True, text=True, timeout=45)
+            raw = (r.stdout or "").strip()
+            if not raw:
+                continue
+            if raw.startswith("```"):
+                raw = raw.split("```")[1].lstrip("json").strip()
+            d = json.loads(raw)
+            return {"correct": bool(d.get("correct", False)),
+                    "confidence": float(d.get("confidence", 0.5)),
+                    "why": d.get("why", "")[:300]}
+        except Exception:
+            continue
+    return {"correct": False, "confidence": 0.0, "why": "judge-fail"}
+def reward(prompt: str, response: str, gold: str | None = None,
+           is_actually_unknown: bool | None = None) -> dict:
+    """Compute TruthRL ternary reward.
+    is_actually_unknown: if you have ground-truth that the answer is undefined
+    (e.g., synthetic 'unanswerable' question), pass True. When unknown ↔
+    abstain, reward is +1 (calibrated). Otherwise reward is 0 (model abstained
+    on something it should have answered).
+    """
+    abstain = is_abstaining(response)
+    # Path A: model abstained
+    if abstain:
+        if is_actually_unknown is True:
+            return {"score": 1.0, "branch": "calibrated_idk",
+                    "abstain": True, "correct": None, "why": "abstain on truly unknown"}
+        if is_actually_unknown is False:
+            return {"score": -0.3, "branch": "over_abstain",
+                    "abstain": True, "correct": None,
+                    "why": "abstained on a question with a real answer"}
+        # No ground truth → treat abstention as neutral
+        return {"score": 0.0, "branch": "abstain_neutral",
+                "abstain": True, "correct": None, "why": "abstain, no oracle"}
+    # Path B: model answered. Judge correctness.
+    j = llm_judge_correctness(prompt, response, gold)
+    if j["correct"] and j["confidence"] >= 0.6:
+        return {"score": 1.0, "branch": "confident_correct",
+                "abstain": False, "correct": True, "why": j["why"]}
+    if not j["correct"] and j["confidence"] >= 0.6:
+        return {"score": -1.0, "branch": "confident_wrong",
+                "abstain": False, "correct": False, "why": j["why"]}
+    # Low confidence, didn't abstain — partial credit/penalty
+    return {"score": 0.2 if j["correct"] else -0.5,
+            "branch": "uncertain_answer",
+            "abstain": False, "correct": j["correct"], "why": j["why"]}
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--jsonl",
+                    help="batch: JSONL with {prompt, response, gold?, "
+                         "is_unknown?} per line")
+    ap.add_argument("--out", help="batch: output JSONL with truthrl field added")
+    args = ap.parse_args()
+    if args.jsonl:
+        if not args.out:
+            print("--out required with --jsonl", file=sys.stderr)
+            sys.exit(2)
+        n_in = n_out = 0
+        sums = {"calibrated_idk": 0, "confident_correct": 0,
+                "confident_wrong": 0, "over_abstain": 0,
+                "uncertain_answer": 0, "abstain_neutral": 0}
+        with open(args.jsonl) as fin, open(args.out, "w") as fout:
+            for line in fin:
+                try:
+                    d = json.loads(line)
+                except Exception:
+                    continue
+                n_in += 1
+                d["truthrl"] = reward(
+                    d.get("prompt", ""), d.get("response", ""),
+                    d.get("gold"),
+                    d.get("is_unknown"))
+                sums[d["truthrl"]["branch"]] = sums.get(
+                    d["truthrl"]["branch"], 0) + 1
+                fout.write(json.dumps(d, ensure_ascii=False) + "\n")
+                n_out += 1
+                if n_out % 25 == 0:
+                    print(f"  graded {n_out}/{n_in}")
+        print(f"[done] in={n_in} graded={n_out}")
+        for k, v in sums.items():
+            print(f"  {k:<22} {v:>5}")
+        return
+    if sys.stdin.isatty():
+        print("usage: echo '{\"prompt\":...,\"response\":...}' | "
+              "python3 truthrl-rewarder.py", file=sys.stderr)
+        sys.exit(2)
+    d = json.load(sys.stdin)
+    print(json.dumps(reward(d.get("prompt", ""), d.get("response", ""),
+                             d.get("gold"), d.get("is_unknown")),
+                     indent=2, ensure_ascii=False))
+if __name__ == "__main__":
+    main()

bin/v2/validator-rlvr.py ADDED Viewed

	@@ -0,0 +1,348 @@

+"""Surrogate-1 v2 — Validator-graded RLVR (Reinforcement Learning from Verifier Rewards).
+Run real domain validators on Surrogate-generated artifacts. Each validator
+emits a deterministic numeric reward; the composite reward feeds DAPO/GRPO
+during stage3 RL training.
+Validators (all open-source, no LLM calls):
+  • Python      → pyflakes  (parse + undefined names)
+  • Shell       → shellcheck (best-practice + bug)
+  • Dockerfile  → hadolint
+  • Terraform   → tflint  (must be in PATH; falls back to `terraform validate`)
+  • Kubernetes  → kubeval / kubeconform (manifest schema)
+  • GH Actions  → actionlint
+  • CloudFormation → cfn-lint
+  • IAM/Sec     → semgrep --config p/security-audit
+  • SQL         → sqlfluff lint --dialect postgres
+  • CFN security → cfn-guard validate (if rule packs available)
+Each validator returns: { ok: bool, score: float in [0,1], hits: [{rule,msg}] }.
+Composite reward (matches stage3-dapo.yml weighting):
+  R = 0.40 * lint_score + 0.20 * security_score + 0.20 * test_pass
+      + 0.10 * format_score + 0.10 * cite_correct - 1.0 * polluted
+Usage:
+  echo '{"language":"terraform","code":"resource \"aws_s3_bucket\" \"x\" {}"}' \\
+    | python3 validator-rlvr.py
+  → {"ok": true, "score": 0.7, "validators": {...}, "composite": 0.7}
+  python3 validator-rlvr.py --jsonl in.jsonl --out scored.jsonl  # batch mode
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+import re
+import shlex
+import shutil
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+LANG_HINTS = {
+    "python":  ["import ", "def ", "class ", "from "],
+    "bash":    ["#!/bin/bash", "#!/usr/bin/env bash", "set -e", "set -u"],
+    "dockerfile": ["FROM ", "RUN ", "ENTRYPOINT ", "CMD "],
+    "terraform": ["resource \"", "provider \"", "variable \"", "module \""],
+    "k8s":     ["apiVersion:", "kind: Deployment", "kind: Service", "kind: Pod"],
+    "github-actions": ["uses: actions/", "runs-on:", "jobs:"],
+    "cloudformation": ["AWSTemplateFormatVersion", "Resources:\n  ",
+                        "\"Type\": \"AWS::"],
+    "sql":     ["select ", "create table ", "insert into ", "update "],
+}
+def detect_lang(code: str, hint: str | None = None) -> str:
+    if hint:
+        return hint.lower()
+    code_low = code.lower()
+    scores: dict[str, int] = {}
+    for lang, hints in LANG_HINTS.items():
+        scores[lang] = sum(1 for h in hints if h.lower() in code_low)
+    if not scores:
+        return "unknown"
+    best = max(scores.items(), key=lambda x: x[1])
+    return best[0] if best[1] >= 2 else "unknown"
+def _run(cmd: list[str], stdin: str | None = None,
+         timeout: int = 30) -> tuple[int, str, str]:
+    try:
+        r = subprocess.run(cmd, input=stdin, capture_output=True,
+                           text=True, timeout=timeout)
+        return r.returncode, r.stdout, r.stderr
+    except FileNotFoundError:
+        return 127, "", f"validator not in PATH: {cmd[0]}"
+    except subprocess.TimeoutExpired:
+        return 124, "", f"timeout: {cmd[0]}"
+def _have(bin_name: str) -> bool:
+    return shutil.which(bin_name) is not None
+def validate_python(code: str) -> dict:
+    if not _have("pyflakes"):
+        return {"ok": False, "score": 0.5, "hits": [],
+                "skipped": "pyflakes not installed"}
+    rc, out, err = _run(["pyflakes", "-"], stdin=code, timeout=15)
+    if rc == 0:
+        return {"ok": True, "score": 1.0, "hits": []}
+    hits = [{"line": ln, "msg": ln} for ln in out.splitlines()[:20] if ln]
+    score = max(0.0, 1.0 - 0.1 * len(hits))
+    return {"ok": False, "score": score, "hits": hits}
+def validate_bash(code: str) -> dict:
+    if not _have("shellcheck"):
+        return {"ok": False, "score": 0.5, "hits": [],
+                "skipped": "shellcheck not installed"}
+    with tempfile.NamedTemporaryFile("w", suffix=".sh", delete=False) as t:
+        t.write(code); t.flush()
+        path = t.name
+    try:
+        rc, out, err = _run(["shellcheck", "-f", "json", path], timeout=15)
+    finally:
+        os.unlink(path)
+    if rc == 0:
+        return {"ok": True, "score": 1.0, "hits": []}
+    try:
+        hits = json.loads(out or "[]")
+    except Exception:
+        hits = []
+    err_n = sum(1 for h in hits if h.get("level") == "error")
+    warn_n = sum(1 for h in hits if h.get("level") == "warning")
+    score = max(0.0, 1.0 - 0.2 * err_n - 0.05 * warn_n)
+    return {"ok": err_n == 0, "score": score,
+            "hits": [{"line": h.get("line"), "msg": h.get("message", "")[:120]}
+                     for h in hits[:10]]}
+def validate_dockerfile(code: str) -> dict:
+    if not _have("hadolint"):
+        return {"ok": False, "score": 0.5, "hits": [],
+                "skipped": "hadolint not installed"}
+    rc, out, err = _run(["hadolint", "-f", "json", "-"], stdin=code, timeout=15)
+    try:
+        hits = json.loads(out or "[]")
+    except Exception:
+        hits = []
+    err_n = sum(1 for h in hits if h.get("level") == "error")
+    warn_n = sum(1 for h in hits if h.get("level") == "warning")
+    score = max(0.0, 1.0 - 0.25 * err_n - 0.05 * warn_n)
+    return {"ok": err_n == 0, "score": score,
+            "hits": [{"line": h.get("line"), "code": h.get("code"),
+                      "msg": h.get("message", "")[:120]} for h in hits[:10]]}
+def validate_terraform(code: str) -> dict:
+    if not (_have("tflint") or _have("terraform")):
+        return {"ok": False, "score": 0.5, "hits": [],
+                "skipped": "no tflint or terraform"}
+    with tempfile.TemporaryDirectory() as td:
+        Path(td, "main.tf").write_text(code)
+        if _have("tflint"):
+            rc, out, err = _run(["tflint", "--format=json",
+                                  f"--chdir={td}"], timeout=20)
+            try:
+                obj = json.loads(out or "{}")
+                issues = obj.get("issues", [])
+            except Exception:
+                issues = []
+            err_n = sum(1 for h in issues if h.get("rule", {}).get("severity") == "error")
+            warn_n = sum(1 for h in issues if h.get("rule", {}).get("severity") == "warning")
+            score = max(0.0, 1.0 - 0.2 * err_n - 0.05 * warn_n)
+            return {"ok": err_n == 0, "score": score,
+                    "hits": [{"rule": h.get("rule", {}).get("name"),
+                              "msg": h.get("message", "")[:120]}
+                             for h in issues[:10]]}
+        rc, out, err = _run(
+            ["terraform", "-chdir=" + td, "validate", "-no-color"], timeout=30)
+        return {"ok": rc == 0, "score": 1.0 if rc == 0 else 0.4,
+                "hits": [] if rc == 0 else [{"msg": err.splitlines()[-1] if err else "validate failed"}]}
+def validate_k8s(code: str) -> dict:
+    bin_name = "kubeconform" if _have("kubeconform") else (
+        "kubeval" if _have("kubeval") else None)
+    if not bin_name:
+        return {"ok": False, "score": 0.5, "hits": [],
+                "skipped": "no kubeconform/kubeval"}
+    with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as t:
+        t.write(code); t.flush()
+        path = t.name
+    try:
+        rc, out, err = _run([bin_name, "-output", "json", path], timeout=15)
+    finally:
+        os.unlink(path)
+    if rc == 0:
+        return {"ok": True, "score": 1.0, "hits": []}
+    return {"ok": False, "score": 0.4,
+            "hits": [{"msg": (err or out).splitlines()[-1][:200] if (err or out) else "invalid"}]}
+def validate_actions(code: str) -> dict:
+    if not _have("actionlint"):
+        return {"ok": False, "score": 0.5, "hits": [],
+                "skipped": "actionlint not installed"}
+    rc, out, err = _run(["actionlint", "-format=json", "-"], stdin=code,
+                          timeout=15)
+    try:
+        hits = json.loads(out or "[]")
+    except Exception:
+        hits = []
+    err_n = len(hits)
+    score = max(0.0, 1.0 - 0.2 * err_n)
+    return {"ok": err_n == 0, "score": score,
+            "hits": [{"line": h.get("line"), "msg": h.get("message", "")[:120]}
+                     for h in hits[:10]]}
+def validate_cloudformation(code: str) -> dict:
+    if not _have("cfn-lint"):
+        return {"ok": False, "score": 0.5, "hits": [],
+                "skipped": "cfn-lint not installed"}
+    with tempfile.NamedTemporaryFile("w", suffix=".yaml", delete=False) as t:
+        t.write(code); t.flush()
+        path = t.name
+    try:
+        rc, out, err = _run(["cfn-lint", "-f", "json", path], timeout=20)
+    finally:
+        os.unlink(path)
+    try:
+        hits = json.loads(out or "[]")
+    except Exception:
+        hits = []
+    err_n = sum(1 for h in hits if h.get("Level") == "Error")
+    warn_n = sum(1 for h in hits if h.get("Level") == "Warning")
+    score = max(0.0, 1.0 - 0.2 * err_n - 0.05 * warn_n)
+    return {"ok": err_n == 0, "score": score,
+            "hits": [{"rule": h.get("Rule", {}).get("Id"),
+                      "msg": h.get("Message", "")[:120]} for h in hits[:10]]}
+def validate_security(code: str, lang: str) -> dict:
+    """Cross-language secrets + insecure-pattern scan via semgrep."""
+    if not _have("semgrep"):
+        return {"ok": False, "score": 0.5, "hits": [],
+                "skipped": "semgrep not installed"}
+    with tempfile.NamedTemporaryFile("w", suffix="." + (
+        {"python": "py", "bash": "sh", "terraform": "tf",
+         "k8s": "yaml", "dockerfile": "Dockerfile"}.get(lang, "txt")),
+                                       delete=False) as t:
+        t.write(code); t.flush()
+        path = t.name
+    try:
+        rc, out, err = _run(
+            ["semgrep", "--config=p/security-audit", "--json", "--quiet", path],
+            timeout=60)
+    finally:
+        os.unlink(path)
+    try:
+        obj = json.loads(out or "{}")
+        results = obj.get("results", [])
+    except Exception:
+        results = []
+    high = sum(1 for r in results
+               if r.get("extra", {}).get("severity") in ("ERROR", "WARNING"))
+    score = max(0.0, 1.0 - 0.3 * high)
+    return {"ok": high == 0, "score": score,
+            "hits": [{"rule": r.get("check_id"),
+                      "msg": r.get("extra", {}).get("message", "")[:120]}
+                     for r in results[:10]]}
+def validate_sql(code: str) -> dict:
+    if not _have("sqlfluff"):
+        return {"ok": False, "score": 0.5, "hits": [],
+                "skipped": "sqlfluff not installed"}
+    rc, out, err = _run(
+        ["sqlfluff", "lint", "--dialect", "postgres", "--format", "json", "-"],
+        stdin=code, timeout=20)
+    try:
+        hits = json.loads(out or "[]")
+        violations = []
+        for f in hits:
+            violations.extend(f.get("violations", []))
+    except Exception:
+        violations = []
+    err_n = len(violations)
+    score = max(0.0, 1.0 - 0.1 * err_n)
+    return {"ok": err_n == 0, "score": score,
+            "hits": [{"rule": v.get("code"), "msg": v.get("description", "")[:120]}
+                     for v in violations[:10]]}
+VALIDATORS = {
+    "python":         validate_python,
+    "bash":           validate_bash,
+    "dockerfile":     validate_dockerfile,
+    "terraform":      validate_terraform,
+    "k8s":            validate_k8s,
+    "github-actions": validate_actions,
+    "cloudformation": validate_cloudformation,
+    "sql":            validate_sql,
+}
+def score_artifact(code: str, language: str | None = None) -> dict:
+    lang = detect_lang(code, language)
+    out = {"language": lang, "validators": {}, "composite": 0.0}
+    if lang == "unknown":
+        out["composite"] = 0.5
+        out["note"] = "language could not be detected"
+        return out
+    base = VALIDATORS.get(lang, lambda c: {"ok": False, "score": 0.5,
+                                            "skipped": f"no validator for {lang}"})
+    out["validators"]["lint"] = base(code)
+    out["validators"]["security"] = validate_security(code, lang)
+    lint_s = out["validators"]["lint"].get("score", 0.5)
+    sec_s = out["validators"]["security"].get("score", 0.5)
+    # Composite (RLVR reward): lint 60%, security 40%. RL trainer can override.
+    out["composite"] = round(0.6 * lint_s + 0.4 * sec_s, 4)
+    return out
+def main() -> None:
+    ap = argparse.ArgumentParser()
+    ap.add_argument("--jsonl", help="batch: JSONL with {code, language?, prompt?}")
+    ap.add_argument("--out", help="batch: output JSONL with score field added")
+    args = ap.parse_args()
+    if args.jsonl:
+        if not args.out:
+            print("--out required with --jsonl", file=sys.stderr)
+            sys.exit(2)
+        n_in = n_out = 0
+        with open(args.jsonl) as fin, open(args.out, "w") as fout:
+            for line in fin:
+                try:
+                    d = json.loads(line)
+                except Exception:
+                    continue
+                n_in += 1
+                code = d.get("response") or d.get("code") or ""
+                lang = d.get("language")
+                d["validator"] = score_artifact(code, lang)
+                fout.write(json.dumps(d, ensure_ascii=False) + "\n")
+                n_out += 1
+                if n_out % 50 == 0:
+                    print(f"  scored {n_out}/{n_in}")
+        print(f"[done] in={n_in} scored={n_out} → {args.out}")
+        return
+    if sys.stdin.isatty():
+        print("usage: echo '{...}' | python3 validator-rlvr.py", file=sys.stderr)
+        sys.exit(2)
+    d = json.load(sys.stdin)
+    code = d.get("code") or d.get("response") or ""
+    lang = d.get("language")
+    print(json.dumps(score_artifact(code, lang), indent=2, ensure_ascii=False))
+if __name__ == "__main__":
+    main()