anugrah55's picture
Level 2 open-ended env: auto-fuzzer + TaskCatalog + Hub-driven catalog + extended /reset
77e65fb verified
"""Generic, type-driven fuzz-input generator for OpenSleuth Level 2.
Given a Python callable annotated with ``typing`` hints, ``auto_fuzz`` produces
``n`` argument tuples that respect the signature so the verifier can score
unannotated *arbitrary* targets without requiring a hand-written fuzzer the
way the 9 builtin BLACK_BOX_FUNCTIONS do.
Each per-type generator mixes a small set of "edge" values (``0``, ``-1``,
``""``, ``None`` for ``Optional``, ...) with random values, weighted ~30/70.
This biases the fuzz batch toward the boundaries that actually distinguish
implementations while still covering the boring middle.
A caller-supplied ``fuzz_spec: dict`` overrides the type-based generation on
a per-parameter basis, e.g.::
auto_fuzz(my_fn, n=20, fuzz_spec={"n": {"type": "int", "min": 1, "max": 90}})
Returned shape: ``List[tuple]`` -- one tuple per fuzz input, with one element
per (positional) parameter of ``fn``. Even for unary ``fn`` we return tuples
so the catalog wrapper has a single, uniform calling convention.
"""
from __future__ import annotations
import inspect
import random
import string
import typing
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, get_args, get_origin
# Probability that a per-type generator emits an "edge" value (0, "", None,
# ...) instead of a random sample. Kept small enough that the boring middle
# still gets coverage but high enough that the edge cases reliably appear.
EDGE_PROB = 0.30
# Per-type edge pools. These are used by the ``_g_*`` helpers below.
_INT_EDGES = (0, 1, -1, 2, -2, 10, -10, 100, -100)
_FLOAT_EDGES = (0.0, 1.0, -1.0, 0.5, -0.5, 1e-9, -1e-9, 100.0)
_STR_EDGES = ("", "a", "ab", "Hello", " ", "0", "abc def")
_BYTES_EDGES = (b"", b"a", b"ab", b"\x00", b"abc")
# ---------------------------------------------------------------------------
# Per-type generators (do not assume any param-name dispatch).
# ---------------------------------------------------------------------------
def _maybe_edge(rng: random.Random, edges: tuple, random_fn: Callable[[], Any]) -> Any:
if edges and rng.random() < EDGE_PROB:
return rng.choice(edges)
return random_fn()
def _g_int(rng: random.Random, *, lo: int = -100, hi: int = 100) -> int:
# Filter the edge pool by [lo, hi] so a caller-supplied fuzz_spec
# ``{"type": "int", "min": 1, "max": 5}`` never emits ``-100``.
edges = tuple(v for v in _INT_EDGES if lo <= v <= hi) or (lo,)
return _maybe_edge(rng, edges, lambda: rng.randint(lo, hi))
def _g_float(rng: random.Random, *, lo: float = -100.0, hi: float = 100.0) -> float:
edges = tuple(v for v in _FLOAT_EDGES if lo <= v <= hi) or (lo,)
return _maybe_edge(rng, edges, lambda: rng.uniform(lo, hi))
def _g_bool(rng: random.Random) -> bool:
return bool(rng.getrandbits(1))
def _g_str(rng: random.Random, *, max_len: int = 12, alphabet: Optional[str] = None) -> str:
alpha = alphabet or (string.ascii_letters + string.digits)
def _rand():
return "".join(rng.choices(alpha, k=rng.randint(0, max_len)))
if alphabet is not None:
# When the caller restricts the alphabet, our generic edge pool
# ("Hello", " ", ...) would violate it. Build a deterministic
# alphabet-respecting edge set instead.
custom_edges = ("",)
if alphabet:
custom_edges = ("", alphabet[0], alphabet[0] * min(max_len, 2))
return _maybe_edge(rng, custom_edges, _rand)
return _maybe_edge(rng, _STR_EDGES, _rand)
def _g_bytes(rng: random.Random, *, max_len: int = 8) -> bytes:
def _rand():
return bytes(rng.randint(0, 255) for _ in range(rng.randint(0, max_len)))
return _maybe_edge(rng, _BYTES_EDGES, _rand)
def _g_list(rng: random.Random, elem_gen: Callable[[], Any], *, max_len: int = 6) -> list:
if rng.random() < EDGE_PROB / 2:
return []
return [elem_gen() for _ in range(rng.randint(0, max_len))]
def _g_tuple_homogeneous(
rng: random.Random, elem_gen: Callable[[], Any], *, max_len: int = 6
) -> tuple:
return tuple(_g_list(rng, elem_gen, max_len=max_len))
def _g_tuple_heterogeneous(rng: random.Random, elem_gens: List[Callable[[], Any]]) -> tuple:
return tuple(g() for g in elem_gens)
def _g_set(rng: random.Random, elem_gen: Callable[[], Any], *, max_len: int = 6) -> set:
if rng.random() < EDGE_PROB / 2:
return set()
return {elem_gen() for _ in range(rng.randint(0, max_len))}
def _g_dict(
rng: random.Random,
key_gen: Callable[[], Any],
val_gen: Callable[[], Any],
*,
max_len: int = 5,
) -> dict:
if rng.random() < EDGE_PROB / 2:
return {}
return {key_gen(): val_gen() for _ in range(rng.randint(0, max_len))}
# ---------------------------------------------------------------------------
# Type -> generator dispatch.
# ---------------------------------------------------------------------------
def _is_optional(tp: Any) -> bool:
"""``Optional[X]`` is ``Union[X, None]`` under the hood."""
if get_origin(tp) is Union:
return type(None) in get_args(tp)
return False
def _strip_optional(tp: Any) -> Any:
"""Return ``X`` for ``Optional[X]``; for unions with None + multiple, pick
the first non-None member (we can't satisfy a union in a single call)."""
if get_origin(tp) is Union:
non_none = [a for a in get_args(tp) if a is not type(None)]
if len(non_none) == 1:
return non_none[0]
if non_none:
return non_none[0]
return tp
def _make_generator(tp: Any, rng: random.Random) -> Callable[[], Any]:
"""Return a 0-arg callable that produces one random value of type ``tp``.
The recursion handles container element types (``list[int]``,
``dict[str, list[int]]``, etc).
"""
if tp is None or tp is type(None):
return lambda: None
if _is_optional(tp):
inner = _strip_optional(tp)
inner_gen = _make_generator(inner, rng)
def _gen_opt():
if rng.random() < EDGE_PROB:
return None
return inner_gen()
return _gen_opt
origin = get_origin(tp)
if origin is typing.Literal:
choices = list(get_args(tp))
return lambda: rng.choice(choices)
if origin is None:
if tp is int:
return lambda: _g_int(rng)
if tp is float:
return lambda: _g_float(rng)
if tp is bool:
return lambda: _g_bool(rng)
if tp is str:
return lambda: _g_str(rng)
if tp is bytes:
return lambda: _g_bytes(rng)
if tp is list:
return lambda: _g_list(rng, lambda: _g_int(rng))
if tp is tuple:
return lambda: _g_tuple_homogeneous(rng, lambda: _g_int(rng))
if tp is set:
return lambda: _g_set(rng, lambda: _g_int(rng))
if tp is dict:
return lambda: _g_dict(rng, lambda: _g_str(rng, max_len=4), lambda: _g_int(rng))
if tp is type(None):
return lambda: None
if tp is typing.Any:
return lambda: _g_int(rng)
# Unknown bare type -> fall back to int.
return lambda: _g_int(rng)
args = get_args(tp)
if origin in (list, List):
elem_t = args[0] if args else int
elem_gen = _make_generator(elem_t, rng)
return lambda: _g_list(rng, elem_gen)
if origin in (set, frozenset):
elem_t = args[0] if args else int
elem_gen = _make_generator(elem_t, rng)
return lambda: _g_set(rng, elem_gen)
if origin in (tuple, Tuple):
if not args:
return lambda: _g_tuple_homogeneous(rng, lambda: _g_int(rng))
if len(args) == 2 and args[1] is Ellipsis:
elem_gen = _make_generator(args[0], rng)
return lambda: _g_tuple_homogeneous(rng, elem_gen)
elem_gens = [_make_generator(a, rng) for a in args]
return lambda: _g_tuple_heterogeneous(rng, elem_gens)
if origin in (dict, Dict):
key_t = args[0] if args else str
val_t = args[1] if len(args) > 1 else int
key_gen = _make_generator(key_t, rng)
val_gen = _make_generator(val_t, rng)
return lambda: _g_dict(rng, key_gen, val_gen)
if origin is Union:
# Already handled Optional above. For pure unions, pick first member.
return _make_generator(args[0], rng)
return lambda: _g_int(rng)
# ---------------------------------------------------------------------------
# fuzz_spec overrides
# ---------------------------------------------------------------------------
def _generator_from_spec(entry: Dict[str, Any], rng: random.Random) -> Callable[[], Any]:
"""Build a generator from a ``fuzz_spec`` entry dict.
Supported keys (all optional except ``type``):
- ``type``: one of ``"int" | "float" | "bool" | "str" | "bytes" |
"list" | "tuple" | "set" | "dict" | "literal" | "any"``
- ``min``, ``max``: int/float bounds
- ``max_len``: container/string length cap
- ``alphabet``: str-only character pool
- ``elem``: nested ``fuzz_spec`` entry for container elements
- ``key``, ``value``: nested entries for dict
- ``elems``: list of nested entries for fixed-arity tuple
- ``choices``: list of literals to sample from
- ``optional``: bool; if True, occasionally yields ``None``
"""
t = entry.get("type", "any")
def _maybe_optional(gen: Callable[[], Any]) -> Callable[[], Any]:
if not entry.get("optional"):
return gen
def _g():
if rng.random() < EDGE_PROB:
return None
return gen()
return _g
if t == "int":
lo = int(entry.get("min", -100))
hi = int(entry.get("max", 100))
return _maybe_optional(lambda: _g_int(rng, lo=lo, hi=hi))
if t == "float":
lo = float(entry.get("min", -100.0))
hi = float(entry.get("max", 100.0))
return _maybe_optional(lambda: _g_float(rng, lo=lo, hi=hi))
if t == "bool":
return _maybe_optional(lambda: _g_bool(rng))
if t == "str":
max_len = int(entry.get("max_len", 12))
alphabet = entry.get("alphabet")
return _maybe_optional(lambda: _g_str(rng, max_len=max_len, alphabet=alphabet))
if t == "bytes":
max_len = int(entry.get("max_len", 8))
return _maybe_optional(lambda: _g_bytes(rng, max_len=max_len))
if t == "literal":
choices = list(entry.get("choices", []))
if not choices:
return _maybe_optional(lambda: None)
return _maybe_optional(lambda: rng.choice(choices))
if t == "list":
elem = entry.get("elem", {"type": "int"})
elem_gen = _generator_from_spec(elem, rng)
max_len = int(entry.get("max_len", 6))
return _maybe_optional(lambda: _g_list(rng, elem_gen, max_len=max_len))
if t == "tuple":
if "elems" in entry:
elem_gens = [_generator_from_spec(e, rng) for e in entry["elems"]]
return _maybe_optional(lambda: _g_tuple_heterogeneous(rng, elem_gens))
elem = entry.get("elem", {"type": "int"})
elem_gen = _generator_from_spec(elem, rng)
max_len = int(entry.get("max_len", 6))
return _maybe_optional(lambda: _g_tuple_homogeneous(rng, elem_gen, max_len=max_len))
if t == "set":
elem = entry.get("elem", {"type": "int"})
elem_gen = _generator_from_spec(elem, rng)
max_len = int(entry.get("max_len", 6))
return _maybe_optional(lambda: _g_set(rng, elem_gen, max_len=max_len))
if t == "dict":
key = entry.get("key", {"type": "str", "max_len": 4})
value = entry.get("value", {"type": "int"})
key_gen = _generator_from_spec(key, rng)
val_gen = _generator_from_spec(value, rng)
max_len = int(entry.get("max_len", 5))
return _maybe_optional(lambda: _g_dict(rng, key_gen, val_gen, max_len=max_len))
return _maybe_optional(lambda: _g_int(rng))
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def auto_fuzz(
fn: Callable[..., Any],
n: int,
rng: Optional[random.Random] = None,
*,
fuzz_spec: Optional[Dict[str, Dict[str, Any]]] = None,
) -> List[tuple]:
"""Produce ``n`` argument tuples for calling ``fn``.
Each returned element is an ``args`` tuple, intended to be applied as
``fn(*args)``. ``fuzz_spec`` is keyed by parameter name and overrides
the type-based generation per-parameter.
"""
rng = rng or random.Random()
fuzz_spec = fuzz_spec or {}
sig = inspect.signature(fn)
try:
hints = typing.get_type_hints(fn)
except Exception: # noqa: BLE001 -- bad annotations shouldn't crash fuzzing
hints = {}
param_gens: List[Callable[[], Any]] = []
for pname, param in sig.parameters.items():
if param.kind in (
inspect.Parameter.VAR_POSITIONAL,
inspect.Parameter.VAR_KEYWORD,
inspect.Parameter.KEYWORD_ONLY,
):
# We only fuzz positional / positional-or-keyword params.
continue
if pname in fuzz_spec:
param_gens.append(_generator_from_spec(fuzz_spec[pname], rng))
continue
annot = hints.get(pname, param.annotation)
if annot is inspect.Parameter.empty:
param_gens.append(lambda r=rng: _g_int(r))
else:
param_gens.append(_make_generator(annot, rng))
return [tuple(g() for g in param_gens) for _ in range(n)]
def make_fuzzer(
fn: Callable[..., Any],
fuzz_spec: Optional[Dict[str, Dict[str, Any]]] = None,
) -> Callable[[random.Random, int], List[tuple]]:
"""Adapt ``auto_fuzz`` to the ``FunctionSpec.fuzzer`` signature
(``(rng, n) -> list``)."""
def _fuzzer(rng: random.Random, n: int) -> List[tuple]:
return auto_fuzz(fn, n, rng, fuzz_spec=fuzz_spec)
return _fuzzer