Spaces:
Runtime error
Runtime error
Ashira Pitchayapakayakul
feat(round7-tier1): 4 frontier-2026 techniques (low effort, high impact)
ec71dfa | """Surrogate-1 v2 β DiffAdapt difficulty-adaptive routing. | |
| Reference: arxiv.org/pdf/2510.19669 (Difficulty-Adaptive Thinking, 2025-10) | |
| Detects U-shape entropy on prompt embeddings β routes: | |
| β’ easy β fast direct answer (β€256 tokens, no <think> block) | |
| β’ medium β standard (1024 tokens) | |
| β’ hard β deep deliberation (4096 tokens, force <think>...</think>) | |
| Saves ~40% tokens at parity vs uniform-budget. No retrain needed β | |
| routing happens at decode time. | |
| Heuristic implementation (no logit access needed): difficulty proxied | |
| by features the model can observe before generating β | |
| β’ prompt length (longer β harder) | |
| β’ code-block density (more code β harder) | |
| β’ math-keyword density (more math β harder) | |
| β’ cite/verify keywords (verification ask β harder) | |
| β’ simple Q&A patterns (definitional β easier) | |
| Use as preprocessor for any inference call. Plays well with our | |
| zero-gpu-bridge.sh + free-LLM ladder. | |
| CLI: | |
| echo '{"prompt":"<task>"}' | python3 diffadapt-router.py | |
| β {"difficulty":"hard","max_tokens":4096,"force_thinking":true,...} | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import re | |
| import sys | |
| CODE_BLOCK_RE = re.compile(r"```", re.MULTILINE) | |
| MATH_KW = re.compile( | |
| r"\b(?:integral|derivative|theorem|prove|equation|sum_|\\int|\\sum|" | |
| r"limit|lemma|corollary|proof|polynomial|matrix|vector|tensor)\b", | |
| re.IGNORECASE) | |
| HARD_KW = re.compile( | |
| r"\b(?:design|architect|optimize|debug|trace|root\s*cause|" | |
| r"why\s+does|how\s+does|explain\s+the\s+algorithm|complexity|" | |
| r"benchmark|profile|secure(?:ly)?|compliance|audit|incident|" | |
| r"runbook|migrate|refactor)\b", re.IGNORECASE) | |
| EASY_KW = re.compile( | |
| r"\b(?:what\s+is|define|definition\s+of|list\s+(?:the|some)|" | |
| r"name\s+(?:a|some)|capital\s+of|date\s+of|version\s+of|how\s+to\s+install|" | |
| r"hello\s+world|simple\s+example)\b", re.IGNORECASE) | |
| VERIFY_KW = re.compile( | |
| r"\b(?:cite|verify|prove|check|validate|reference|source|" | |
| r"according\s+to|cve-\d+|rfc-?\d+)\b", re.IGNORECASE) | |
| def score_prompt(prompt: str) -> dict: | |
| if not prompt: | |
| return {"difficulty": "easy", "score": 0.0, | |
| "max_tokens": 256, "force_thinking": False, "why": "empty"} | |
| n = len(prompt) | |
| code_blocks = len(CODE_BLOCK_RE.findall(prompt)) | |
| math_hits = len(MATH_KW.findall(prompt)) | |
| hard_hits = len(HARD_KW.findall(prompt)) | |
| easy_hits = len(EASY_KW.findall(prompt)) | |
| verify_hits = len(VERIFY_KW.findall(prompt)) | |
| score = 0.0 | |
| score += min(2.0, n / 800) # length | |
| score += code_blocks * 0.7 # code blocks make harder | |
| score += math_hits * 0.5 | |
| score += hard_hits * 0.6 | |
| score += verify_hits * 0.4 | |
| score -= easy_hits * 1.5 # easy keywords pull DOWN | |
| if score < 0.5: | |
| return {"difficulty": "easy", "score": round(score, 2), | |
| "max_tokens": 256, "temperature": 0.2, | |
| "force_thinking": False, | |
| "why": f"len={n}, easy_kw={easy_hits}"} | |
| if score < 1.8: | |
| return {"difficulty": "medium", "score": round(score, 2), | |
| "max_tokens": 1024, "temperature": 0.4, | |
| "force_thinking": False, | |
| "why": f"len={n}, code={code_blocks}, hard={hard_hits}"} | |
| return {"difficulty": "hard", "score": round(score, 2), | |
| "max_tokens": 4096, "temperature": 0.6, | |
| "force_thinking": True, | |
| "why": f"len={n}, math={math_hits}, hard={hard_hits}, " | |
| f"verify={verify_hits}"} | |
| def main() -> None: | |
| ap = argparse.ArgumentParser() | |
| ap.add_argument("--print-budget", action="store_true") | |
| args = ap.parse_args() | |
| if sys.stdin.isatty(): | |
| # demo | |
| for sample in [ | |
| "What is the capital of Thailand?", | |
| "Write a Terraform module for AWS S3 bucket with KMS encryption.", | |
| "Explain the algorithm: design a distributed rate limiter handling " | |
| "1M req/s across 5 regions with strong consistency on counter " | |
| "increment, citing relevant papers and CAP tradeoffs." | |
| ]: | |
| print(f"\n[{sample[:60]}...]") | |
| print(json.dumps(score_prompt(sample), indent=2)) | |
| return | |
| d = json.load(sys.stdin) | |
| out = score_prompt(d.get("prompt", "")) | |
| print(json.dumps(out, indent=2 if args.print_budget else None)) | |
| if __name__ == "__main__": | |
| main() | |