"""Generic, type-driven fuzz-input generator for OpenSleuth Level 2. Given a Python callable annotated with ``typing`` hints, ``auto_fuzz`` produces ``n`` argument tuples that respect the signature so the verifier can score unannotated *arbitrary* targets without requiring a hand-written fuzzer the way the 9 builtin BLACK_BOX_FUNCTIONS do. Each per-type generator mixes a small set of "edge" values (``0``, ``-1``, ``""``, ``None`` for ``Optional``, ...) with random values, weighted ~30/70. This biases the fuzz batch toward the boundaries that actually distinguish implementations while still covering the boring middle. A caller-supplied ``fuzz_spec: dict`` overrides the type-based generation on a per-parameter basis, e.g.:: auto_fuzz(my_fn, n=20, fuzz_spec={"n": {"type": "int", "min": 1, "max": 90}}) Returned shape: ``List[tuple]`` -- one tuple per fuzz input, with one element per (positional) parameter of ``fn``. Even for unary ``fn`` we return tuples so the catalog wrapper has a single, uniform calling convention. """ from __future__ import annotations import inspect import random import string import typing from typing import Any, Callable, Dict, List, Optional, Tuple, Union, get_args, get_origin # Probability that a per-type generator emits an "edge" value (0, "", None, # ...) instead of a random sample. Kept small enough that the boring middle # still gets coverage but high enough that the edge cases reliably appear. EDGE_PROB = 0.30 # Per-type edge pools. These are used by the ``_g_*`` helpers below. _INT_EDGES = (0, 1, -1, 2, -2, 10, -10, 100, -100) _FLOAT_EDGES = (0.0, 1.0, -1.0, 0.5, -0.5, 1e-9, -1e-9, 100.0) _STR_EDGES = ("", "a", "ab", "Hello", " ", "0", "abc def") _BYTES_EDGES = (b"", b"a", b"ab", b"\x00", b"abc") # --------------------------------------------------------------------------- # Per-type generators (do not assume any param-name dispatch). # --------------------------------------------------------------------------- def _maybe_edge(rng: random.Random, edges: tuple, random_fn: Callable[[], Any]) -> Any: if edges and rng.random() < EDGE_PROB: return rng.choice(edges) return random_fn() def _g_int(rng: random.Random, *, lo: int = -100, hi: int = 100) -> int: # Filter the edge pool by [lo, hi] so a caller-supplied fuzz_spec # ``{"type": "int", "min": 1, "max": 5}`` never emits ``-100``. edges = tuple(v for v in _INT_EDGES if lo <= v <= hi) or (lo,) return _maybe_edge(rng, edges, lambda: rng.randint(lo, hi)) def _g_float(rng: random.Random, *, lo: float = -100.0, hi: float = 100.0) -> float: edges = tuple(v for v in _FLOAT_EDGES if lo <= v <= hi) or (lo,) return _maybe_edge(rng, edges, lambda: rng.uniform(lo, hi)) def _g_bool(rng: random.Random) -> bool: return bool(rng.getrandbits(1)) def _g_str(rng: random.Random, *, max_len: int = 12, alphabet: Optional[str] = None) -> str: alpha = alphabet or (string.ascii_letters + string.digits) def _rand(): return "".join(rng.choices(alpha, k=rng.randint(0, max_len))) if alphabet is not None: # When the caller restricts the alphabet, our generic edge pool # ("Hello", " ", ...) would violate it. Build a deterministic # alphabet-respecting edge set instead. custom_edges = ("",) if alphabet: custom_edges = ("", alphabet[0], alphabet[0] * min(max_len, 2)) return _maybe_edge(rng, custom_edges, _rand) return _maybe_edge(rng, _STR_EDGES, _rand) def _g_bytes(rng: random.Random, *, max_len: int = 8) -> bytes: def _rand(): return bytes(rng.randint(0, 255) for _ in range(rng.randint(0, max_len))) return _maybe_edge(rng, _BYTES_EDGES, _rand) def _g_list(rng: random.Random, elem_gen: Callable[[], Any], *, max_len: int = 6) -> list: if rng.random() < EDGE_PROB / 2: return [] return [elem_gen() for _ in range(rng.randint(0, max_len))] def _g_tuple_homogeneous( rng: random.Random, elem_gen: Callable[[], Any], *, max_len: int = 6 ) -> tuple: return tuple(_g_list(rng, elem_gen, max_len=max_len)) def _g_tuple_heterogeneous(rng: random.Random, elem_gens: List[Callable[[], Any]]) -> tuple: return tuple(g() for g in elem_gens) def _g_set(rng: random.Random, elem_gen: Callable[[], Any], *, max_len: int = 6) -> set: if rng.random() < EDGE_PROB / 2: return set() return {elem_gen() for _ in range(rng.randint(0, max_len))} def _g_dict( rng: random.Random, key_gen: Callable[[], Any], val_gen: Callable[[], Any], *, max_len: int = 5, ) -> dict: if rng.random() < EDGE_PROB / 2: return {} return {key_gen(): val_gen() for _ in range(rng.randint(0, max_len))} # --------------------------------------------------------------------------- # Type -> generator dispatch. # --------------------------------------------------------------------------- def _is_optional(tp: Any) -> bool: """``Optional[X]`` is ``Union[X, None]`` under the hood.""" if get_origin(tp) is Union: return type(None) in get_args(tp) return False def _strip_optional(tp: Any) -> Any: """Return ``X`` for ``Optional[X]``; for unions with None + multiple, pick the first non-None member (we can't satisfy a union in a single call).""" if get_origin(tp) is Union: non_none = [a for a in get_args(tp) if a is not type(None)] if len(non_none) == 1: return non_none[0] if non_none: return non_none[0] return tp def _make_generator(tp: Any, rng: random.Random) -> Callable[[], Any]: """Return a 0-arg callable that produces one random value of type ``tp``. The recursion handles container element types (``list[int]``, ``dict[str, list[int]]``, etc). """ if tp is None or tp is type(None): return lambda: None if _is_optional(tp): inner = _strip_optional(tp) inner_gen = _make_generator(inner, rng) def _gen_opt(): if rng.random() < EDGE_PROB: return None return inner_gen() return _gen_opt origin = get_origin(tp) if origin is typing.Literal: choices = list(get_args(tp)) return lambda: rng.choice(choices) if origin is None: if tp is int: return lambda: _g_int(rng) if tp is float: return lambda: _g_float(rng) if tp is bool: return lambda: _g_bool(rng) if tp is str: return lambda: _g_str(rng) if tp is bytes: return lambda: _g_bytes(rng) if tp is list: return lambda: _g_list(rng, lambda: _g_int(rng)) if tp is tuple: return lambda: _g_tuple_homogeneous(rng, lambda: _g_int(rng)) if tp is set: return lambda: _g_set(rng, lambda: _g_int(rng)) if tp is dict: return lambda: _g_dict(rng, lambda: _g_str(rng, max_len=4), lambda: _g_int(rng)) if tp is type(None): return lambda: None if tp is typing.Any: return lambda: _g_int(rng) # Unknown bare type -> fall back to int. return lambda: _g_int(rng) args = get_args(tp) if origin in (list, List): elem_t = args[0] if args else int elem_gen = _make_generator(elem_t, rng) return lambda: _g_list(rng, elem_gen) if origin in (set, frozenset): elem_t = args[0] if args else int elem_gen = _make_generator(elem_t, rng) return lambda: _g_set(rng, elem_gen) if origin in (tuple, Tuple): if not args: return lambda: _g_tuple_homogeneous(rng, lambda: _g_int(rng)) if len(args) == 2 and args[1] is Ellipsis: elem_gen = _make_generator(args[0], rng) return lambda: _g_tuple_homogeneous(rng, elem_gen) elem_gens = [_make_generator(a, rng) for a in args] return lambda: _g_tuple_heterogeneous(rng, elem_gens) if origin in (dict, Dict): key_t = args[0] if args else str val_t = args[1] if len(args) > 1 else int key_gen = _make_generator(key_t, rng) val_gen = _make_generator(val_t, rng) return lambda: _g_dict(rng, key_gen, val_gen) if origin is Union: # Already handled Optional above. For pure unions, pick first member. return _make_generator(args[0], rng) return lambda: _g_int(rng) # --------------------------------------------------------------------------- # fuzz_spec overrides # --------------------------------------------------------------------------- def _generator_from_spec(entry: Dict[str, Any], rng: random.Random) -> Callable[[], Any]: """Build a generator from a ``fuzz_spec`` entry dict. Supported keys (all optional except ``type``): - ``type``: one of ``"int" | "float" | "bool" | "str" | "bytes" | "list" | "tuple" | "set" | "dict" | "literal" | "any"`` - ``min``, ``max``: int/float bounds - ``max_len``: container/string length cap - ``alphabet``: str-only character pool - ``elem``: nested ``fuzz_spec`` entry for container elements - ``key``, ``value``: nested entries for dict - ``elems``: list of nested entries for fixed-arity tuple - ``choices``: list of literals to sample from - ``optional``: bool; if True, occasionally yields ``None`` """ t = entry.get("type", "any") def _maybe_optional(gen: Callable[[], Any]) -> Callable[[], Any]: if not entry.get("optional"): return gen def _g(): if rng.random() < EDGE_PROB: return None return gen() return _g if t == "int": lo = int(entry.get("min", -100)) hi = int(entry.get("max", 100)) return _maybe_optional(lambda: _g_int(rng, lo=lo, hi=hi)) if t == "float": lo = float(entry.get("min", -100.0)) hi = float(entry.get("max", 100.0)) return _maybe_optional(lambda: _g_float(rng, lo=lo, hi=hi)) if t == "bool": return _maybe_optional(lambda: _g_bool(rng)) if t == "str": max_len = int(entry.get("max_len", 12)) alphabet = entry.get("alphabet") return _maybe_optional(lambda: _g_str(rng, max_len=max_len, alphabet=alphabet)) if t == "bytes": max_len = int(entry.get("max_len", 8)) return _maybe_optional(lambda: _g_bytes(rng, max_len=max_len)) if t == "literal": choices = list(entry.get("choices", [])) if not choices: return _maybe_optional(lambda: None) return _maybe_optional(lambda: rng.choice(choices)) if t == "list": elem = entry.get("elem", {"type": "int"}) elem_gen = _generator_from_spec(elem, rng) max_len = int(entry.get("max_len", 6)) return _maybe_optional(lambda: _g_list(rng, elem_gen, max_len=max_len)) if t == "tuple": if "elems" in entry: elem_gens = [_generator_from_spec(e, rng) for e in entry["elems"]] return _maybe_optional(lambda: _g_tuple_heterogeneous(rng, elem_gens)) elem = entry.get("elem", {"type": "int"}) elem_gen = _generator_from_spec(elem, rng) max_len = int(entry.get("max_len", 6)) return _maybe_optional(lambda: _g_tuple_homogeneous(rng, elem_gen, max_len=max_len)) if t == "set": elem = entry.get("elem", {"type": "int"}) elem_gen = _generator_from_spec(elem, rng) max_len = int(entry.get("max_len", 6)) return _maybe_optional(lambda: _g_set(rng, elem_gen, max_len=max_len)) if t == "dict": key = entry.get("key", {"type": "str", "max_len": 4}) value = entry.get("value", {"type": "int"}) key_gen = _generator_from_spec(key, rng) val_gen = _generator_from_spec(value, rng) max_len = int(entry.get("max_len", 5)) return _maybe_optional(lambda: _g_dict(rng, key_gen, val_gen, max_len=max_len)) return _maybe_optional(lambda: _g_int(rng)) # --------------------------------------------------------------------------- # Public API # --------------------------------------------------------------------------- def auto_fuzz( fn: Callable[..., Any], n: int, rng: Optional[random.Random] = None, *, fuzz_spec: Optional[Dict[str, Dict[str, Any]]] = None, ) -> List[tuple]: """Produce ``n`` argument tuples for calling ``fn``. Each returned element is an ``args`` tuple, intended to be applied as ``fn(*args)``. ``fuzz_spec`` is keyed by parameter name and overrides the type-based generation per-parameter. """ rng = rng or random.Random() fuzz_spec = fuzz_spec or {} sig = inspect.signature(fn) try: hints = typing.get_type_hints(fn) except Exception: # noqa: BLE001 -- bad annotations shouldn't crash fuzzing hints = {} param_gens: List[Callable[[], Any]] = [] for pname, param in sig.parameters.items(): if param.kind in ( inspect.Parameter.VAR_POSITIONAL, inspect.Parameter.VAR_KEYWORD, inspect.Parameter.KEYWORD_ONLY, ): # We only fuzz positional / positional-or-keyword params. continue if pname in fuzz_spec: param_gens.append(_generator_from_spec(fuzz_spec[pname], rng)) continue annot = hints.get(pname, param.annotation) if annot is inspect.Parameter.empty: param_gens.append(lambda r=rng: _g_int(r)) else: param_gens.append(_make_generator(annot, rng)) return [tuple(g() for g in param_gens) for _ in range(n)] def make_fuzzer( fn: Callable[..., Any], fuzz_spec: Optional[Dict[str, Dict[str, Any]]] = None, ) -> Callable[[random.Random, int], List[tuple]]: """Adapt ``auto_fuzz`` to the ``FunctionSpec.fuzzer`` signature (``(rng, n) -> list``).""" def _fuzzer(rng: random.Random, n: int) -> List[tuple]: return auto_fuzz(fn, n, rng, fuzz_spec=fuzz_spec) return _fuzzer