File size: 6,550 Bytes
17967dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"""Surrogate-1 v2 β€” Inference-time prompt augmentation.

Glues reflexion-store + voyager-skills into the serving prompt so the
model gets free in-context lessons + validated snippets without retraining.

Used as a sidecar by serve-vllm.sh: every incoming prompt is passed
through `augment(prompt, domain)` before being sent to vLLM.

Adds (under explicit headers, easy to strip):
  ## Past lessons (top-3 similar)
  ## Validated skills (top-3 by tag)

If neither store has hits, returns prompt unchanged.
"""
from __future__ import annotations
import importlib.util
import json
import sys
from pathlib import Path

V2_DIR = Path.home() / ".surrogate/bin/v2"


def _load(name: str):
    p = V2_DIR / f"{name}.py"
    if not p.exists():
        return None
    spec = importlib.util.spec_from_file_location(name.replace("-", "_"),
                                                  str(p))
    mod = importlib.util.module_from_spec(spec)
    try:
        spec.loader.exec_module(mod)  # type: ignore
        return mod
    except Exception:
        return None


_REFLEX = _load("reflexion-store")
_VOYAGER = _load("voyager-skills")


# Hermes-3 reserved tokens (2026 spec, github.com/NousResearch/Hermes-Function-Calling)
# Bake into training-time templates AND inference-time prompts so the model
# learns to use them implicitly.
HERMES3_TOKENS = {
    "tools_open":     "<tools>",
    "tools_close":    "</tools>",
    "tool_call_open": "<tool_call>",
    "tool_call_close": "</tool_call>",
    "tool_resp_open": "<tool_response>",
    "tool_resp_close": "</tool_response>",
    "scratchpad":     "<SCRATCHPAD>",
    "scratchpad_end": "</SCRATCHPAD>",
    "plan":           "<PLAN>",
    "plan_end":       "</PLAN>",
    "reflection":     "<REFLECTION>",
    "reflection_end": "</REFLECTION>",
}


def build_hermes3_system_prompt(tool_schemas: list[dict] | None = None) -> str:
    """Render a Hermes-3 system prompt block (compatible with vLLM tool parser)."""
    parts = [
        "You are Surrogate-1, an expert DevSecOps + SRE + coding agent.",
        "When you need to think before acting, use <SCRATCHPAD>...</SCRATCHPAD>.",
        "When you draft a multi-step plan, use <PLAN>...</PLAN>.",
        "When you reflect on what worked or failed, use <REFLECTION>...</REFLECTION>.",
    ]
    if tool_schemas:
        parts.append("\nYou have access to the following tools:")
        parts.append("<tools>")
        for s in tool_schemas:
            parts.append(json.dumps(s, ensure_ascii=False))
        parts.append("</tools>")
        parts.append(
            "Invoke a tool with: "
            "<tool_call>{\"name\": \"<tool>\", \"arguments\": {...}}</tool_call>")
    return "\n".join(parts)


# Domain heuristic β€” keyword-only, fast, no LLM call.
DOMAIN_HINTS = {
    "code-python":   ["def ", "import ", "python", ".py", "pytest", "asyncio"],
    "code-typescript": ["typescript", ".ts", "interface ", "tsconfig", "node_modules"],
    "devops-tf":     ["terraform", "resource \"", "provider \"", "tf state", ".tf"],
    "devops-k8s":    ["kubernetes", "kubectl", "kind: deployment", "kind: service",
                      "namespace", "helm"],
    "devops-cdk":    ["aws-cdk", "cdk synth", "Stack", "CfnOutput"],
    "sec-iam":       ["iam:", "policy", "principal", "assume role", "least privilege"],
    "sec-secrets":   ["secret", "api key", "token", "password", "credentials"],
    "sec-cve":       ["cve-", "vulnerability", "exploit", "patch", "remediation"],
    "sre-runbook":   ["runbook", "incident", "on-call", "page", "escalation"],
    "sre-slo":       ["sli", "slo", "error budget", "latency p99", "availability"],
    "data-sql":      ["select ", "from ", "join ", "where ", "create table"],
    "ai-eng":        ["embedding", "rag", "vector", "lora", "fine-tune", "vllm"],
    "ci-github":     ["github actions", ".github/workflows", "uses: actions/", "runs-on:"],
}


def detect_domain(prompt: str) -> str | None:
    p = prompt.lower()
    best, best_n = None, 0
    for dom, kws in DOMAIN_HINTS.items():
        n = sum(1 for k in kws if k in p)
        if n > best_n:
            best, best_n = dom, n
    return best if best_n >= 2 else None


def augment(prompt: str, domain: str | None = None,
            k_lessons: int = 3, k_skills: int = 3,
            max_each_chars: int = 600) -> str:
    """Return prompt with prepended lesson/skill context. Idempotent if no hits."""
    domain = domain or detect_domain(prompt)
    parts: list[str] = []

    if _REFLEX is not None:
        try:
            lessons = _REFLEX.retrieve_similar(prompt, domain, k=k_lessons)
        except Exception:
            lessons = []
        if lessons:
            block = ["## Past lessons (do NOT repeat these mistakes)"]
            for i, l in enumerate(lessons, 1):
                err = (l.get("error") or "")[:max_each_chars]
                ref = (l.get("reflection") or "")[:max_each_chars]
                fix = (l.get("fix") or "")[:max_each_chars]
                block.append(
                    f"{i}. error_signal: {err}\n"
                    f"   lesson: {ref}\n"
                    f"   correct_pattern: {fix}")
            parts.append("\n".join(block))

    if _VOYAGER is not None:
        try:
            tags = [domain.split("-")[0]] if domain else []
            skills = _VOYAGER.search(prompt, tags=tags, limit=k_skills,
                                     only_promoted=True)
        except Exception:
            skills = []
        if skills:
            block = ["## Validated snippets (proven in production)"]
            for s in skills:
                code = (s.get("code") or "")[:max_each_chars]
                desc = (s.get("description") or s.get("name", ""))[:200]
                block.append(f"- {desc}\n```\n{code}\n```")
            parts.append("\n".join(block))

    if not parts:
        return prompt
    return "\n\n".join(parts) + "\n\n## User request\n" + prompt


# CLI: read JSON {prompt, domain?} from stdin, print {prompt: augmented} JSON.
if __name__ == "__main__":
    if sys.stdin.isatty():
        # Demo mode
        demo = ("Write a Terraform module that provisions an S3 bucket "
                "with versioning and KMS encryption.")
        print(augment(demo))
    else:
        try:
            d = json.load(sys.stdin)
        except Exception as e:
            print(json.dumps({"error": f"bad json: {e}"}))
            sys.exit(1)
        out = augment(d.get("prompt", ""), d.get("domain"))
        print(json.dumps({"prompt": out}, ensure_ascii=False))