Spaces:
Runtime error
Runtime error
| """Surrogate-1 v2 β Inference-time prompt augmentation. | |
| Glues reflexion-store + voyager-skills into the serving prompt so the | |
| model gets free in-context lessons + validated snippets without retraining. | |
| Used as a sidecar by serve-vllm.sh: every incoming prompt is passed | |
| through `augment(prompt, domain)` before being sent to vLLM. | |
| Adds (under explicit headers, easy to strip): | |
| ## Past lessons (top-3 similar) | |
| ## Validated skills (top-3 by tag) | |
| If neither store has hits, returns prompt unchanged. | |
| """ | |
| from __future__ import annotations | |
| import importlib.util | |
| import json | |
| import sys | |
| from pathlib import Path | |
| V2_DIR = Path.home() / ".surrogate/bin/v2" | |
| def _load(name: str): | |
| p = V2_DIR / f"{name}.py" | |
| if not p.exists(): | |
| return None | |
| spec = importlib.util.spec_from_file_location(name.replace("-", "_"), | |
| str(p)) | |
| mod = importlib.util.module_from_spec(spec) | |
| try: | |
| spec.loader.exec_module(mod) # type: ignore | |
| return mod | |
| except Exception: | |
| return None | |
| _REFLEX = _load("reflexion-store") | |
| _VOYAGER = _load("voyager-skills") | |
| # Hermes-3 reserved tokens (2026 spec, github.com/NousResearch/Hermes-Function-Calling) | |
| # Bake into training-time templates AND inference-time prompts so the model | |
| # learns to use them implicitly. | |
| HERMES3_TOKENS = { | |
| "tools_open": "<tools>", | |
| "tools_close": "</tools>", | |
| "tool_call_open": "<tool_call>", | |
| "tool_call_close": "</tool_call>", | |
| "tool_resp_open": "<tool_response>", | |
| "tool_resp_close": "</tool_response>", | |
| "scratchpad": "<SCRATCHPAD>", | |
| "scratchpad_end": "</SCRATCHPAD>", | |
| "plan": "<PLAN>", | |
| "plan_end": "</PLAN>", | |
| "reflection": "<REFLECTION>", | |
| "reflection_end": "</REFLECTION>", | |
| } | |
| def build_hermes3_system_prompt(tool_schemas: list[dict] | None = None) -> str: | |
| """Render a Hermes-3 system prompt block (compatible with vLLM tool parser).""" | |
| parts = [ | |
| "You are Surrogate-1, an expert DevSecOps + SRE + coding agent.", | |
| "When you need to think before acting, use <SCRATCHPAD>...</SCRATCHPAD>.", | |
| "When you draft a multi-step plan, use <PLAN>...</PLAN>.", | |
| "When you reflect on what worked or failed, use <REFLECTION>...</REFLECTION>.", | |
| ] | |
| if tool_schemas: | |
| parts.append("\nYou have access to the following tools:") | |
| parts.append("<tools>") | |
| for s in tool_schemas: | |
| parts.append(json.dumps(s, ensure_ascii=False)) | |
| parts.append("</tools>") | |
| parts.append( | |
| "Invoke a tool with: " | |
| "<tool_call>{\"name\": \"<tool>\", \"arguments\": {...}}</tool_call>") | |
| return "\n".join(parts) | |
| # Domain heuristic β keyword-only, fast, no LLM call. | |
| DOMAIN_HINTS = { | |
| "code-python": ["def ", "import ", "python", ".py", "pytest", "asyncio"], | |
| "code-typescript": ["typescript", ".ts", "interface ", "tsconfig", "node_modules"], | |
| "devops-tf": ["terraform", "resource \"", "provider \"", "tf state", ".tf"], | |
| "devops-k8s": ["kubernetes", "kubectl", "kind: deployment", "kind: service", | |
| "namespace", "helm"], | |
| "devops-cdk": ["aws-cdk", "cdk synth", "Stack", "CfnOutput"], | |
| "sec-iam": ["iam:", "policy", "principal", "assume role", "least privilege"], | |
| "sec-secrets": ["secret", "api key", "token", "password", "credentials"], | |
| "sec-cve": ["cve-", "vulnerability", "exploit", "patch", "remediation"], | |
| "sre-runbook": ["runbook", "incident", "on-call", "page", "escalation"], | |
| "sre-slo": ["sli", "slo", "error budget", "latency p99", "availability"], | |
| "data-sql": ["select ", "from ", "join ", "where ", "create table"], | |
| "ai-eng": ["embedding", "rag", "vector", "lora", "fine-tune", "vllm"], | |
| "ci-github": ["github actions", ".github/workflows", "uses: actions/", "runs-on:"], | |
| } | |
| def detect_domain(prompt: str) -> str | None: | |
| p = prompt.lower() | |
| best, best_n = None, 0 | |
| for dom, kws in DOMAIN_HINTS.items(): | |
| n = sum(1 for k in kws if k in p) | |
| if n > best_n: | |
| best, best_n = dom, n | |
| return best if best_n >= 2 else None | |
| def augment(prompt: str, domain: str | None = None, | |
| k_lessons: int = 3, k_skills: int = 3, | |
| max_each_chars: int = 600) -> str: | |
| """Return prompt with prepended lesson/skill context. Idempotent if no hits.""" | |
| domain = domain or detect_domain(prompt) | |
| parts: list[str] = [] | |
| if _REFLEX is not None: | |
| try: | |
| lessons = _REFLEX.retrieve_similar(prompt, domain, k=k_lessons) | |
| except Exception: | |
| lessons = [] | |
| if lessons: | |
| block = ["## Past lessons (do NOT repeat these mistakes)"] | |
| for i, l in enumerate(lessons, 1): | |
| err = (l.get("error") or "")[:max_each_chars] | |
| ref = (l.get("reflection") or "")[:max_each_chars] | |
| fix = (l.get("fix") or "")[:max_each_chars] | |
| block.append( | |
| f"{i}. error_signal: {err}\n" | |
| f" lesson: {ref}\n" | |
| f" correct_pattern: {fix}") | |
| parts.append("\n".join(block)) | |
| if _VOYAGER is not None: | |
| try: | |
| tags = [domain.split("-")[0]] if domain else [] | |
| skills = _VOYAGER.search(prompt, tags=tags, limit=k_skills, | |
| only_promoted=True) | |
| except Exception: | |
| skills = [] | |
| if skills: | |
| block = ["## Validated snippets (proven in production)"] | |
| for s in skills: | |
| code = (s.get("code") or "")[:max_each_chars] | |
| desc = (s.get("description") or s.get("name", ""))[:200] | |
| block.append(f"- {desc}\n```\n{code}\n```") | |
| parts.append("\n".join(block)) | |
| if not parts: | |
| return prompt | |
| return "\n\n".join(parts) + "\n\n## User request\n" + prompt | |
| # CLI: read JSON {prompt, domain?} from stdin, print {prompt: augmented} JSON. | |
| if __name__ == "__main__": | |
| if sys.stdin.isatty(): | |
| # Demo mode | |
| demo = ("Write a Terraform module that provisions an S3 bucket " | |
| "with versioning and KMS encryption.") | |
| print(augment(demo)) | |
| else: | |
| try: | |
| d = json.load(sys.stdin) | |
| except Exception as e: | |
| print(json.dumps({"error": f"bad json: {e}"})) | |
| sys.exit(1) | |
| out = augment(d.get("prompt", ""), d.get("domain")) | |
| print(json.dumps({"prompt": out}, ensure_ascii=False)) | |