Spaces:

axentx
/

surrogate-1

Runtime error

surrogate-1 / bin /v2 /diffadapt-router.py

Ashira Pitchayapakayakul

feat(round7-tier1): 4 frontier-2026 techniques (low effort, high impact)

ec71dfa 8 days ago

4.42 kB

	"""Surrogate-1 v2 — DiffAdapt difficulty-adaptive routing.

	Reference: arxiv.org/pdf/2510.19669 (Difficulty-Adaptive Thinking, 2025-10)

	Detects U-shape entropy on prompt embeddings → routes:
	• easy → fast direct answer (≤256 tokens, no <think> block)
	• medium → standard (1024 tokens)
	• hard → deep deliberation (4096 tokens, force <think>...</think>)

	Saves ~40% tokens at parity vs uniform-budget. No retrain needed —
	routing happens at decode time.

	Heuristic implementation (no logit access needed): difficulty proxied
	by features the model can observe before generating —
	• prompt length (longer → harder)
	• code-block density (more code → harder)
	• math-keyword density (more math → harder)
	• cite/verify keywords (verification ask → harder)
	• simple Q&A patterns (definitional → easier)

	Use as preprocessor for any inference call. Plays well with our
	zero-gpu-bridge.sh + free-LLM ladder.

	CLI:
	echo '{"prompt":"<task>"}' \| python3 diffadapt-router.py
	→ {"difficulty":"hard","max_tokens":4096,"force_thinking":true,...}
	"""
	from __future__ import annotations
	import argparse
	import json
	import re
	import sys

	CODE_BLOCK_RE = re.compile(r"```", re.MULTILINE)
	MATH_KW = re.compile(
	r"\b(?:integral\|derivative\|theorem\|prove\|equation\|sum_\|\\int\|\\sum\|"
	r"limit\|lemma\|corollary\|proof\|polynomial\|matrix\|vector\|tensor)\b",
	re.IGNORECASE)
	HARD_KW = re.compile(
	r"\b(?:design\|architect\|optimize\|debug\|trace\|root\s*cause\|"
	r"why\s+does\|how\s+does\|explain\s+the\s+algorithm\|complexity\|"
	r"benchmark\|profile\|secure(?:ly)?\|compliance\|audit\|incident\|"
	r"runbook\|migrate\|refactor)\b", re.IGNORECASE)
	EASY_KW = re.compile(
	r"\b(?:what\s+is\|define\|definition\s+of\|list\s+(?:the\|some)\|"
	r"name\s+(?:a\|some)\|capital\s+of\|date\s+of\|version\s+of\|how\s+to\s+install\|"
	r"hello\s+world\|simple\s+example)\b", re.IGNORECASE)
	VERIFY_KW = re.compile(
	r"\b(?:cite\|verify\|prove\|check\|validate\|reference\|source\|"
	r"according\s+to\|cve-\d+\|rfc-?\d+)\b", re.IGNORECASE)


	def score_prompt(prompt: str) -> dict:
	if not prompt:
	return {"difficulty": "easy", "score": 0.0,
	"max_tokens": 256, "force_thinking": False, "why": "empty"}

	n = len(prompt)
	code_blocks = len(CODE_BLOCK_RE.findall(prompt))
	math_hits = len(MATH_KW.findall(prompt))
	hard_hits = len(HARD_KW.findall(prompt))
	easy_hits = len(EASY_KW.findall(prompt))
	verify_hits = len(VERIFY_KW.findall(prompt))

	score = 0.0
	score += min(2.0, n / 800) # length
	score += code_blocks * 0.7 # code blocks make harder
	score += math_hits * 0.5
	score += hard_hits * 0.6
	score += verify_hits * 0.4
	score -= easy_hits * 1.5 # easy keywords pull DOWN

	if score < 0.5:
	return {"difficulty": "easy", "score": round(score, 2),
	"max_tokens": 256, "temperature": 0.2,
	"force_thinking": False,
	"why": f"len={n}, easy_kw={easy_hits}"}
	if score < 1.8:
	return {"difficulty": "medium", "score": round(score, 2),
	"max_tokens": 1024, "temperature": 0.4,
	"force_thinking": False,
	"why": f"len={n}, code={code_blocks}, hard={hard_hits}"}
	return {"difficulty": "hard", "score": round(score, 2),
	"max_tokens": 4096, "temperature": 0.6,
	"force_thinking": True,
	"why": f"len={n}, math={math_hits}, hard={hard_hits}, "
	f"verify={verify_hits}"}


	def main() -> None:
	ap = argparse.ArgumentParser()
	ap.add_argument("--print-budget", action="store_true")
	args = ap.parse_args()

	if sys.stdin.isatty():
	# demo
	for sample in [
	"What is the capital of Thailand?",
	"Write a Terraform module for AWS S3 bucket with KMS encryption.",
	"Explain the algorithm: design a distributed rate limiter handling "
	"1M req/s across 5 regions with strong consistency on counter "
	"increment, citing relevant papers and CAP tradeoffs."
	]:
	print(f"\n[{sample[:60]}...]")
	print(json.dumps(score_prompt(sample), indent=2))
	return

	d = json.load(sys.stdin)
	out = score_prompt(d.get("prompt", ""))
	print(json.dumps(out, indent=2 if args.print_budget else None))


	if __name__ == "__main__":
	main()