Spaces:

axentx
/

surrogate-1

Runtime error

surrogate-1 / bin /v2 /inference-augment.py

Ashira Pitchayapakayakul

feat(v2-round5): sustainability loops + 2026 techniques

17967dd 9 days ago

6.55 kB

	"""Surrogate-1 v2 — Inference-time prompt augmentation.

	Glues reflexion-store + voyager-skills into the serving prompt so the
	model gets free in-context lessons + validated snippets without retraining.

	Used as a sidecar by serve-vllm.sh: every incoming prompt is passed
	through `augment(prompt, domain)` before being sent to vLLM.

	Adds (under explicit headers, easy to strip):
	## Past lessons (top-3 similar)
	## Validated skills (top-3 by tag)

	If neither store has hits, returns prompt unchanged.
	"""
	from __future__ import annotations
	import importlib.util
	import json
	import sys
	from pathlib import Path

	V2_DIR = Path.home() / ".surrogate/bin/v2"


	def _load(name: str):
	p = V2_DIR / f"{name}.py"
	if not p.exists():
	return None
	spec = importlib.util.spec_from_file_location(name.replace("-", "_"),
	str(p))
	mod = importlib.util.module_from_spec(spec)
	try:
	spec.loader.exec_module(mod) # type: ignore
	return mod
	except Exception:
	return None


	_REFLEX = _load("reflexion-store")
	_VOYAGER = _load("voyager-skills")


	# Hermes-3 reserved tokens (2026 spec, github.com/NousResearch/Hermes-Function-Calling)
	# Bake into training-time templates AND inference-time prompts so the model
	# learns to use them implicitly.
	HERMES3_TOKENS = {
	"tools_open": "<tools>",
	"tools_close": "</tools>",
	"tool_call_open": "<tool_call>",
	"tool_call_close": "</tool_call>",
	"tool_resp_open": "<tool_response>",
	"tool_resp_close": "</tool_response>",
	"scratchpad": "<SCRATCHPAD>",
	"scratchpad_end": "</SCRATCHPAD>",
	"plan": "<PLAN>",
	"plan_end": "</PLAN>",
	"reflection": "<REFLECTION>",
	"reflection_end": "</REFLECTION>",
	}


	def build_hermes3_system_prompt(tool_schemas: list[dict] \| None = None) -> str:
	"""Render a Hermes-3 system prompt block (compatible with vLLM tool parser)."""
	parts = [
	"You are Surrogate-1, an expert DevSecOps + SRE + coding agent.",
	"When you need to think before acting, use <SCRATCHPAD>...</SCRATCHPAD>.",
	"When you draft a multi-step plan, use <PLAN>...</PLAN>.",
	"When you reflect on what worked or failed, use <REFLECTION>...</REFLECTION>.",
	]
	if tool_schemas:
	parts.append("\nYou have access to the following tools:")
	parts.append("<tools>")
	for s in tool_schemas:
	parts.append(json.dumps(s, ensure_ascii=False))
	parts.append("</tools>")
	parts.append(
	"Invoke a tool with: "
	"<tool_call>{\"name\": \"<tool>\", \"arguments\": {...}}</tool_call>")
	return "\n".join(parts)


	# Domain heuristic — keyword-only, fast, no LLM call.
	DOMAIN_HINTS = {
	"code-python": ["def ", "import ", "python", ".py", "pytest", "asyncio"],
	"code-typescript": ["typescript", ".ts", "interface ", "tsconfig", "node_modules"],
	"devops-tf": ["terraform", "resource \"", "provider \"", "tf state", ".tf"],
	"devops-k8s": ["kubernetes", "kubectl", "kind: deployment", "kind: service",
	"namespace", "helm"],
	"devops-cdk": ["aws-cdk", "cdk synth", "Stack", "CfnOutput"],
	"sec-iam": ["iam:", "policy", "principal", "assume role", "least privilege"],
	"sec-secrets": ["secret", "api key", "token", "password", "credentials"],
	"sec-cve": ["cve-", "vulnerability", "exploit", "patch", "remediation"],
	"sre-runbook": ["runbook", "incident", "on-call", "page", "escalation"],
	"sre-slo": ["sli", "slo", "error budget", "latency p99", "availability"],
	"data-sql": ["select ", "from ", "join ", "where ", "create table"],
	"ai-eng": ["embedding", "rag", "vector", "lora", "fine-tune", "vllm"],
	"ci-github": ["github actions", ".github/workflows", "uses: actions/", "runs-on:"],
	}


	def detect_domain(prompt: str) -> str \| None:
	p = prompt.lower()
	best, best_n = None, 0
	for dom, kws in DOMAIN_HINTS.items():
	n = sum(1 for k in kws if k in p)
	if n > best_n:
	best, best_n = dom, n
	return best if best_n >= 2 else None


	def augment(prompt: str, domain: str \| None = None,
	k_lessons: int = 3, k_skills: int = 3,
	max_each_chars: int = 600) -> str:
	"""Return prompt with prepended lesson/skill context. Idempotent if no hits."""
	domain = domain or detect_domain(prompt)
	parts: list[str] = []

	if _REFLEX is not None:
	try:
	lessons = _REFLEX.retrieve_similar(prompt, domain, k=k_lessons)
	except Exception:
	lessons = []
	if lessons:
	block = ["## Past lessons (do NOT repeat these mistakes)"]
	for i, l in enumerate(lessons, 1):
	err = (l.get("error") or "")[:max_each_chars]
	ref = (l.get("reflection") or "")[:max_each_chars]
	fix = (l.get("fix") or "")[:max_each_chars]
	block.append(
	f"{i}. error_signal: {err}\n"
	f" lesson: {ref}\n"
	f" correct_pattern: {fix}")
	parts.append("\n".join(block))

	if _VOYAGER is not None:
	try:
	tags = [domain.split("-")[0]] if domain else []
	skills = _VOYAGER.search(prompt, tags=tags, limit=k_skills,
	only_promoted=True)
	except Exception:
	skills = []
	if skills:
	block = ["## Validated snippets (proven in production)"]
	for s in skills:
	code = (s.get("code") or "")[:max_each_chars]
	desc = (s.get("description") or s.get("name", ""))[:200]
	block.append(f"- {desc}\n```\n{code}\n```")
	parts.append("\n".join(block))

	if not parts:
	return prompt
	return "\n\n".join(parts) + "\n\n## User request\n" + prompt


	# CLI: read JSON {prompt, domain?} from stdin, print {prompt: augmented} JSON.
	if __name__ == "__main__":
	if sys.stdin.isatty():
	# Demo mode
	demo = ("Write a Terraform module that provisions an S3 bucket "
	"with versioning and KMS encryption.")
	print(augment(demo))
	else:
	try:
	d = json.load(sys.stdin)
	except Exception as e:
	print(json.dumps({"error": f"bad json: {e}"}))
	sys.exit(1)
	out = augment(d.get("prompt", ""), d.get("domain"))
	print(json.dumps({"prompt": out}, ensure_ascii=False))