| """Catalogue of hidden 'black-box' Python functions the agent must reproduce. |
| |
| Each entry pairs the reference implementation with a *typed input domain* |
| generator so the verifier can fuzz it, plus a public signature/docstring shown |
| to the agent in the prompt. The reference implementation itself is never |
| shown to the agent. |
| |
| Each spec also carries: |
| |
| * ``difficulty`` -- one of ``"easy" | "medium" | "hard"``. The trainer / eval |
| harnesses use this for curriculum scheduling (easy first, then medium, ...). |
| Inspired by the curriculum recommendation in Masud et al. (2026) §C2 and the |
| reward-horizon shortening discussion in Ibrahim et al. (2024) §IV-F. |
| |
| * ``edge_cases`` -- a list of must-be-included probe inputs the verifier |
| *always* injects on top of the random fuzz batch. This makes the execution |
| reward robust against agents that learn to handle the random regime but miss |
| edge cases (Masud et al. (2026) C1: proxy-failure mitigation; Ibrahim et al. |
| (2024) §III-A: deceptive-reward mitigation). |
| """ |
|
|
| from __future__ import annotations |
|
|
| import random |
| import string |
| from dataclasses import dataclass, field |
| from typing import Any, Callable, Dict, List |
|
|
|
|
| |
|
|
|
|
| def _fibonacci(n: int) -> int: |
| if not isinstance(n, int) or isinstance(n, bool) or n <= 0 or n > 90: |
| raise ValueError("Input must be a positive integer <= 90.") |
| a, b = 0, 1 |
| for _ in range(n - 1): |
| a, b = b, a + b |
| return b if n > 0 else a |
|
|
|
|
| def _reverse_string(s: str) -> str: |
| if not isinstance(s, str): |
| raise TypeError("Input must be a string.") |
| return s[::-1] |
|
|
|
|
| def _is_palindrome(s: str) -> bool: |
| if not isinstance(s, str): |
| raise TypeError("Input must be a string.") |
| cleaned = "".join(ch.lower() for ch in s if ch.isalnum()) |
| return cleaned == cleaned[::-1] |
|
|
|
|
| def _digit_sum(n: int) -> int: |
| if not isinstance(n, int) or isinstance(n, bool): |
| raise TypeError("Input must be int.") |
| if n < 0: |
| raise ValueError("Input must be non-negative.") |
| return sum(int(c) for c in str(n)) |
|
|
|
|
| def _count_vowels(s: str) -> int: |
| if not isinstance(s, str): |
| raise TypeError("Input must be a string.") |
| return sum(1 for c in s.lower() if c in "aeiou") |
|
|
|
|
| def _gcd(pair) -> int: |
| """Greatest common divisor of two non-negative ints, given as a 2-tuple |
| or 2-list. Hidden trick: tuple/list both accepted, ints only.""" |
| if not isinstance(pair, (list, tuple)) or len(pair) != 2: |
| raise TypeError("Input must be a 2-element list or tuple.") |
| a, b = pair |
| if not all(isinstance(x, int) and not isinstance(x, bool) for x in (a, b)): |
| raise TypeError("Both elements must be int.") |
| if a < 0 or b < 0: |
| raise ValueError("Both elements must be non-negative.") |
| while b: |
| a, b = b, a % b |
| return a |
|
|
|
|
| def _sort_unique(xs) -> list: |
| """Return sorted unique elements from a list of ints.""" |
| if not isinstance(xs, list): |
| raise TypeError("Input must be a list.") |
| if not all(isinstance(x, int) and not isinstance(x, bool) for x in xs): |
| raise TypeError("All elements must be int.") |
| return sorted(set(xs)) |
|
|
|
|
| def _caesar_cipher(s: str) -> str: |
| """Caesar shift by +3 on lowercase letters; everything else unchanged.""" |
| if not isinstance(s, str): |
| raise TypeError("Input must be a string.") |
| out = [] |
| for ch in s: |
| if "a" <= ch <= "z": |
| out.append(chr((ord(ch) - ord("a") + 3) % 26 + ord("a"))) |
| else: |
| out.append(ch) |
| return "".join(out) |
|
|
|
|
| def _is_prime(n: int) -> bool: |
| if not isinstance(n, int) or isinstance(n, bool): |
| raise TypeError("Input must be int.") |
| if n < 2: |
| return False |
| if n < 4: |
| return True |
| if n % 2 == 0: |
| return False |
| i = 3 |
| while i * i <= n: |
| if n % i == 0: |
| return False |
| i += 2 |
| return True |
|
|
|
|
| |
|
|
|
|
| def _fuzz_small_pos_int(rng: random.Random, n: int) -> List[int]: |
| return [rng.randint(1, 30) for _ in range(n)] |
|
|
|
|
| def _fuzz_fib_int(rng: random.Random, n: int) -> List[int]: |
| pool = [1, 2, 3, 10, 20, 30, 50, 89, 90] |
| return [rng.choice(pool) if rng.random() < 0.3 else rng.randint(1, 90) for _ in range(n)] |
|
|
|
|
| def _fuzz_short_string(rng: random.Random, n: int) -> List[str]: |
| alpha = string.ascii_letters + string.digits |
| return ["".join(rng.choices(alpha, k=rng.randint(0, 12))) for _ in range(n)] |
|
|
|
|
| def _fuzz_palindrome_string(rng: random.Random, n: int) -> List[str]: |
| out = [] |
| for _ in range(n): |
| if rng.random() < 0.4: |
| base = "".join(rng.choices(string.ascii_lowercase, k=rng.randint(0, 6))) |
| out.append(base + base[::-1]) |
| else: |
| out.append("".join(rng.choices(string.ascii_letters + " ", k=rng.randint(0, 12)))) |
| return out |
|
|
|
|
| def _fuzz_nonneg_int(rng: random.Random, n: int) -> List[int]: |
| return [rng.randint(0, 10_000) for _ in range(n)] |
|
|
|
|
| def _fuzz_int_pair(rng: random.Random, n: int): |
| return [(rng.randint(0, 1000), rng.randint(0, 1000)) for _ in range(n)] |
|
|
|
|
| def _fuzz_int_list(rng: random.Random, n: int): |
| return [ |
| [rng.randint(-50, 50) for _ in range(rng.randint(0, 8))] for _ in range(n) |
| ] |
|
|
|
|
| def _fuzz_lower_string(rng: random.Random, n: int) -> List[str]: |
| return [ |
| "".join(rng.choices(string.ascii_lowercase + " ,!", k=rng.randint(0, 16))) |
| for _ in range(n) |
| ] |
|
|
|
|
| def _fuzz_prime_int(rng: random.Random, n: int) -> List[int]: |
| seeded = [0, 1, 2, 3, 4, 9, 11, 15, 17, 25, 29, 97, 100] |
| return [rng.choice(seeded) if rng.random() < 0.3 else rng.randint(0, 200) for _ in range(n)] |
|
|
|
|
| |
|
|
|
|
| @dataclass(frozen=True) |
| class FunctionSpec: |
| name: str |
| fn: Callable[..., Any] |
| signature: str |
| description: str |
| fuzzer: Callable[[random.Random, int], list] |
| difficulty: str = "medium" |
| |
| |
| |
| edge_cases: List[Any] = field(default_factory=list) |
| |
| |
| |
| |
| |
| unpack_args: bool = False |
| |
| |
| |
| source: str = "builtin" |
|
|
|
|
| BLACK_BOX_FUNCTIONS: Dict[str, FunctionSpec] = { |
| spec.name: spec |
| for spec in [ |
| FunctionSpec( |
| name="fibonacci", |
| fn=_fibonacci, |
| signature="fibonacci(n: int) -> int", |
| description=( |
| "Returns the n-th Fibonacci number. Raises ValueError for " |
| "invalid n (n must be a positive int <= 90)." |
| ), |
| fuzzer=_fuzz_fib_int, |
| difficulty="easy", |
| edge_cases=[1, 2, 3, 10, 89, 90], |
| ), |
| FunctionSpec( |
| name="reverse_string", |
| fn=_reverse_string, |
| signature="reverse_string(s: str) -> str", |
| description="Returns the reversed string. Raises TypeError for non-str.", |
| fuzzer=_fuzz_short_string, |
| difficulty="easy", |
| edge_cases=["", "a", "ab", "racecar", "Hello, World!"], |
| ), |
| FunctionSpec( |
| name="is_palindrome", |
| fn=_is_palindrome, |
| signature="is_palindrome(s: str) -> bool", |
| description=( |
| "Case-insensitive palindrome check, ignoring non-alphanumeric " |
| "characters. Raises TypeError for non-str." |
| ), |
| fuzzer=_fuzz_palindrome_string, |
| difficulty="medium", |
| edge_cases=["", "a", "ab", "abba", "A man, a plan, a canal: Panama", "Hello"], |
| ), |
| FunctionSpec( |
| name="digit_sum", |
| fn=_digit_sum, |
| signature="digit_sum(n: int) -> int", |
| description=( |
| "Sum of the decimal digits of n. n must be a non-negative int." |
| ), |
| fuzzer=_fuzz_nonneg_int, |
| difficulty="easy", |
| edge_cases=[0, 1, 9, 10, 99, 100, 9999], |
| ), |
| FunctionSpec( |
| name="count_vowels", |
| fn=_count_vowels, |
| signature="count_vowels(s: str) -> int", |
| description="Count of vowels (a/e/i/o/u, case-insensitive) in s.", |
| fuzzer=_fuzz_lower_string, |
| difficulty="easy", |
| edge_cases=["", "bcd", "AEIOU", "Hello, World!", "aaaaa"], |
| ), |
| FunctionSpec( |
| name="gcd", |
| fn=_gcd, |
| signature="gcd(pair: tuple[int, int] | list[int]) -> int", |
| description=( |
| "Greatest common divisor of a 2-element tuple/list of " |
| "non-negative ints." |
| ), |
| fuzzer=_fuzz_int_pair, |
| difficulty="medium", |
| edge_cases=[(0, 0), (0, 7), (12, 18), (17, 13), (100, 75)], |
| ), |
| FunctionSpec( |
| name="sort_unique", |
| fn=_sort_unique, |
| signature="sort_unique(xs: list[int]) -> list[int]", |
| description="Sorted, deduplicated list of ints from xs.", |
| fuzzer=_fuzz_int_list, |
| difficulty="easy", |
| edge_cases=[[], [1], [1, 1, 1], [3, 1, 2], [-5, 5, 0, -5, 5]], |
| ), |
| FunctionSpec( |
| name="caesar_cipher", |
| fn=_caesar_cipher, |
| signature="caesar_cipher(s: str) -> str", |
| description=( |
| "Caesar shift by +3 on lowercase letters; non-lowercase chars " |
| "are unchanged." |
| ), |
| fuzzer=_fuzz_lower_string, |
| difficulty="hard", |
| edge_cases=["", "abc", "xyz", "Hello, World!", "ABC", "hello world"], |
| ), |
| FunctionSpec( |
| name="is_prime", |
| fn=_is_prime, |
| signature="is_prime(n: int) -> bool", |
| description="True iff n is a prime int. n must be int.", |
| fuzzer=_fuzz_prime_int, |
| difficulty="medium", |
| edge_cases=[0, 1, 2, 3, 4, 17, 25, 97, 100], |
| ), |
| ] |
| } |
|
|