| """Generic, type-driven fuzz-input generator for OpenSleuth Level 2. |
| |
| Given a Python callable annotated with ``typing`` hints, ``auto_fuzz`` produces |
| ``n`` argument tuples that respect the signature so the verifier can score |
| unannotated *arbitrary* targets without requiring a hand-written fuzzer the |
| way the 9 builtin BLACK_BOX_FUNCTIONS do. |
| |
| Each per-type generator mixes a small set of "edge" values (``0``, ``-1``, |
| ``""``, ``None`` for ``Optional``, ...) with random values, weighted ~30/70. |
| This biases the fuzz batch toward the boundaries that actually distinguish |
| implementations while still covering the boring middle. |
| |
| A caller-supplied ``fuzz_spec: dict`` overrides the type-based generation on |
| a per-parameter basis, e.g.:: |
| |
| auto_fuzz(my_fn, n=20, fuzz_spec={"n": {"type": "int", "min": 1, "max": 90}}) |
| |
| Returned shape: ``List[tuple]`` -- one tuple per fuzz input, with one element |
| per (positional) parameter of ``fn``. Even for unary ``fn`` we return tuples |
| so the catalog wrapper has a single, uniform calling convention. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import inspect |
| import random |
| import string |
| import typing |
| from typing import Any, Callable, Dict, List, Optional, Tuple, Union, get_args, get_origin |
|
|
|
|
| |
| |
| |
| EDGE_PROB = 0.30 |
|
|
|
|
| |
| _INT_EDGES = (0, 1, -1, 2, -2, 10, -10, 100, -100) |
| _FLOAT_EDGES = (0.0, 1.0, -1.0, 0.5, -0.5, 1e-9, -1e-9, 100.0) |
| _STR_EDGES = ("", "a", "ab", "Hello", " ", "0", "abc def") |
| _BYTES_EDGES = (b"", b"a", b"ab", b"\x00", b"abc") |
|
|
|
|
| |
| |
| |
|
|
|
|
| def _maybe_edge(rng: random.Random, edges: tuple, random_fn: Callable[[], Any]) -> Any: |
| if edges and rng.random() < EDGE_PROB: |
| return rng.choice(edges) |
| return random_fn() |
|
|
|
|
| def _g_int(rng: random.Random, *, lo: int = -100, hi: int = 100) -> int: |
| |
| |
| edges = tuple(v for v in _INT_EDGES if lo <= v <= hi) or (lo,) |
| return _maybe_edge(rng, edges, lambda: rng.randint(lo, hi)) |
|
|
|
|
| def _g_float(rng: random.Random, *, lo: float = -100.0, hi: float = 100.0) -> float: |
| edges = tuple(v for v in _FLOAT_EDGES if lo <= v <= hi) or (lo,) |
| return _maybe_edge(rng, edges, lambda: rng.uniform(lo, hi)) |
|
|
|
|
| def _g_bool(rng: random.Random) -> bool: |
| return bool(rng.getrandbits(1)) |
|
|
|
|
| def _g_str(rng: random.Random, *, max_len: int = 12, alphabet: Optional[str] = None) -> str: |
| alpha = alphabet or (string.ascii_letters + string.digits) |
|
|
| def _rand(): |
| return "".join(rng.choices(alpha, k=rng.randint(0, max_len))) |
|
|
| if alphabet is not None: |
| |
| |
| |
| custom_edges = ("",) |
| if alphabet: |
| custom_edges = ("", alphabet[0], alphabet[0] * min(max_len, 2)) |
| return _maybe_edge(rng, custom_edges, _rand) |
| return _maybe_edge(rng, _STR_EDGES, _rand) |
|
|
|
|
| def _g_bytes(rng: random.Random, *, max_len: int = 8) -> bytes: |
| def _rand(): |
| return bytes(rng.randint(0, 255) for _ in range(rng.randint(0, max_len))) |
|
|
| return _maybe_edge(rng, _BYTES_EDGES, _rand) |
|
|
|
|
| def _g_list(rng: random.Random, elem_gen: Callable[[], Any], *, max_len: int = 6) -> list: |
| if rng.random() < EDGE_PROB / 2: |
| return [] |
| return [elem_gen() for _ in range(rng.randint(0, max_len))] |
|
|
|
|
| def _g_tuple_homogeneous( |
| rng: random.Random, elem_gen: Callable[[], Any], *, max_len: int = 6 |
| ) -> tuple: |
| return tuple(_g_list(rng, elem_gen, max_len=max_len)) |
|
|
|
|
| def _g_tuple_heterogeneous(rng: random.Random, elem_gens: List[Callable[[], Any]]) -> tuple: |
| return tuple(g() for g in elem_gens) |
|
|
|
|
| def _g_set(rng: random.Random, elem_gen: Callable[[], Any], *, max_len: int = 6) -> set: |
| if rng.random() < EDGE_PROB / 2: |
| return set() |
| return {elem_gen() for _ in range(rng.randint(0, max_len))} |
|
|
|
|
| def _g_dict( |
| rng: random.Random, |
| key_gen: Callable[[], Any], |
| val_gen: Callable[[], Any], |
| *, |
| max_len: int = 5, |
| ) -> dict: |
| if rng.random() < EDGE_PROB / 2: |
| return {} |
| return {key_gen(): val_gen() for _ in range(rng.randint(0, max_len))} |
|
|
|
|
| |
| |
| |
|
|
|
|
| def _is_optional(tp: Any) -> bool: |
| """``Optional[X]`` is ``Union[X, None]`` under the hood.""" |
| if get_origin(tp) is Union: |
| return type(None) in get_args(tp) |
| return False |
|
|
|
|
| def _strip_optional(tp: Any) -> Any: |
| """Return ``X`` for ``Optional[X]``; for unions with None + multiple, pick |
| the first non-None member (we can't satisfy a union in a single call).""" |
| if get_origin(tp) is Union: |
| non_none = [a for a in get_args(tp) if a is not type(None)] |
| if len(non_none) == 1: |
| return non_none[0] |
| if non_none: |
| return non_none[0] |
| return tp |
|
|
|
|
| def _make_generator(tp: Any, rng: random.Random) -> Callable[[], Any]: |
| """Return a 0-arg callable that produces one random value of type ``tp``. |
| |
| The recursion handles container element types (``list[int]``, |
| ``dict[str, list[int]]``, etc). |
| """ |
|
|
| if tp is None or tp is type(None): |
| return lambda: None |
|
|
| if _is_optional(tp): |
| inner = _strip_optional(tp) |
| inner_gen = _make_generator(inner, rng) |
|
|
| def _gen_opt(): |
| if rng.random() < EDGE_PROB: |
| return None |
| return inner_gen() |
|
|
| return _gen_opt |
|
|
| origin = get_origin(tp) |
|
|
| if origin is typing.Literal: |
| choices = list(get_args(tp)) |
| return lambda: rng.choice(choices) |
|
|
| if origin is None: |
| if tp is int: |
| return lambda: _g_int(rng) |
| if tp is float: |
| return lambda: _g_float(rng) |
| if tp is bool: |
| return lambda: _g_bool(rng) |
| if tp is str: |
| return lambda: _g_str(rng) |
| if tp is bytes: |
| return lambda: _g_bytes(rng) |
| if tp is list: |
| return lambda: _g_list(rng, lambda: _g_int(rng)) |
| if tp is tuple: |
| return lambda: _g_tuple_homogeneous(rng, lambda: _g_int(rng)) |
| if tp is set: |
| return lambda: _g_set(rng, lambda: _g_int(rng)) |
| if tp is dict: |
| return lambda: _g_dict(rng, lambda: _g_str(rng, max_len=4), lambda: _g_int(rng)) |
| if tp is type(None): |
| return lambda: None |
| if tp is typing.Any: |
| return lambda: _g_int(rng) |
| |
| return lambda: _g_int(rng) |
|
|
| args = get_args(tp) |
|
|
| if origin in (list, List): |
| elem_t = args[0] if args else int |
| elem_gen = _make_generator(elem_t, rng) |
| return lambda: _g_list(rng, elem_gen) |
|
|
| if origin in (set, frozenset): |
| elem_t = args[0] if args else int |
| elem_gen = _make_generator(elem_t, rng) |
| return lambda: _g_set(rng, elem_gen) |
|
|
| if origin in (tuple, Tuple): |
| if not args: |
| return lambda: _g_tuple_homogeneous(rng, lambda: _g_int(rng)) |
| if len(args) == 2 and args[1] is Ellipsis: |
| elem_gen = _make_generator(args[0], rng) |
| return lambda: _g_tuple_homogeneous(rng, elem_gen) |
| elem_gens = [_make_generator(a, rng) for a in args] |
| return lambda: _g_tuple_heterogeneous(rng, elem_gens) |
|
|
| if origin in (dict, Dict): |
| key_t = args[0] if args else str |
| val_t = args[1] if len(args) > 1 else int |
| key_gen = _make_generator(key_t, rng) |
| val_gen = _make_generator(val_t, rng) |
| return lambda: _g_dict(rng, key_gen, val_gen) |
|
|
| if origin is Union: |
| |
| return _make_generator(args[0], rng) |
|
|
| return lambda: _g_int(rng) |
|
|
|
|
| |
| |
| |
|
|
|
|
| def _generator_from_spec(entry: Dict[str, Any], rng: random.Random) -> Callable[[], Any]: |
| """Build a generator from a ``fuzz_spec`` entry dict. |
| |
| Supported keys (all optional except ``type``): |
| - ``type``: one of ``"int" | "float" | "bool" | "str" | "bytes" | |
| "list" | "tuple" | "set" | "dict" | "literal" | "any"`` |
| - ``min``, ``max``: int/float bounds |
| - ``max_len``: container/string length cap |
| - ``alphabet``: str-only character pool |
| - ``elem``: nested ``fuzz_spec`` entry for container elements |
| - ``key``, ``value``: nested entries for dict |
| - ``elems``: list of nested entries for fixed-arity tuple |
| - ``choices``: list of literals to sample from |
| - ``optional``: bool; if True, occasionally yields ``None`` |
| """ |
| t = entry.get("type", "any") |
|
|
| def _maybe_optional(gen: Callable[[], Any]) -> Callable[[], Any]: |
| if not entry.get("optional"): |
| return gen |
|
|
| def _g(): |
| if rng.random() < EDGE_PROB: |
| return None |
| return gen() |
|
|
| return _g |
|
|
| if t == "int": |
| lo = int(entry.get("min", -100)) |
| hi = int(entry.get("max", 100)) |
| return _maybe_optional(lambda: _g_int(rng, lo=lo, hi=hi)) |
| if t == "float": |
| lo = float(entry.get("min", -100.0)) |
| hi = float(entry.get("max", 100.0)) |
| return _maybe_optional(lambda: _g_float(rng, lo=lo, hi=hi)) |
| if t == "bool": |
| return _maybe_optional(lambda: _g_bool(rng)) |
| if t == "str": |
| max_len = int(entry.get("max_len", 12)) |
| alphabet = entry.get("alphabet") |
| return _maybe_optional(lambda: _g_str(rng, max_len=max_len, alphabet=alphabet)) |
| if t == "bytes": |
| max_len = int(entry.get("max_len", 8)) |
| return _maybe_optional(lambda: _g_bytes(rng, max_len=max_len)) |
| if t == "literal": |
| choices = list(entry.get("choices", [])) |
| if not choices: |
| return _maybe_optional(lambda: None) |
| return _maybe_optional(lambda: rng.choice(choices)) |
| if t == "list": |
| elem = entry.get("elem", {"type": "int"}) |
| elem_gen = _generator_from_spec(elem, rng) |
| max_len = int(entry.get("max_len", 6)) |
| return _maybe_optional(lambda: _g_list(rng, elem_gen, max_len=max_len)) |
| if t == "tuple": |
| if "elems" in entry: |
| elem_gens = [_generator_from_spec(e, rng) for e in entry["elems"]] |
| return _maybe_optional(lambda: _g_tuple_heterogeneous(rng, elem_gens)) |
| elem = entry.get("elem", {"type": "int"}) |
| elem_gen = _generator_from_spec(elem, rng) |
| max_len = int(entry.get("max_len", 6)) |
| return _maybe_optional(lambda: _g_tuple_homogeneous(rng, elem_gen, max_len=max_len)) |
| if t == "set": |
| elem = entry.get("elem", {"type": "int"}) |
| elem_gen = _generator_from_spec(elem, rng) |
| max_len = int(entry.get("max_len", 6)) |
| return _maybe_optional(lambda: _g_set(rng, elem_gen, max_len=max_len)) |
| if t == "dict": |
| key = entry.get("key", {"type": "str", "max_len": 4}) |
| value = entry.get("value", {"type": "int"}) |
| key_gen = _generator_from_spec(key, rng) |
| val_gen = _generator_from_spec(value, rng) |
| max_len = int(entry.get("max_len", 5)) |
| return _maybe_optional(lambda: _g_dict(rng, key_gen, val_gen, max_len=max_len)) |
| return _maybe_optional(lambda: _g_int(rng)) |
|
|
|
|
| |
| |
| |
|
|
|
|
| def auto_fuzz( |
| fn: Callable[..., Any], |
| n: int, |
| rng: Optional[random.Random] = None, |
| *, |
| fuzz_spec: Optional[Dict[str, Dict[str, Any]]] = None, |
| ) -> List[tuple]: |
| """Produce ``n`` argument tuples for calling ``fn``. |
| |
| Each returned element is an ``args`` tuple, intended to be applied as |
| ``fn(*args)``. ``fuzz_spec`` is keyed by parameter name and overrides |
| the type-based generation per-parameter. |
| """ |
| rng = rng or random.Random() |
| fuzz_spec = fuzz_spec or {} |
|
|
| sig = inspect.signature(fn) |
| try: |
| hints = typing.get_type_hints(fn) |
| except Exception: |
| hints = {} |
|
|
| param_gens: List[Callable[[], Any]] = [] |
| for pname, param in sig.parameters.items(): |
| if param.kind in ( |
| inspect.Parameter.VAR_POSITIONAL, |
| inspect.Parameter.VAR_KEYWORD, |
| inspect.Parameter.KEYWORD_ONLY, |
| ): |
| |
| continue |
| if pname in fuzz_spec: |
| param_gens.append(_generator_from_spec(fuzz_spec[pname], rng)) |
| continue |
| annot = hints.get(pname, param.annotation) |
| if annot is inspect.Parameter.empty: |
| param_gens.append(lambda r=rng: _g_int(r)) |
| else: |
| param_gens.append(_make_generator(annot, rng)) |
|
|
| return [tuple(g() for g in param_gens) for _ in range(n)] |
|
|
|
|
| def make_fuzzer( |
| fn: Callable[..., Any], |
| fuzz_spec: Optional[Dict[str, Dict[str, Any]]] = None, |
| ) -> Callable[[random.Random, int], List[tuple]]: |
| """Adapt ``auto_fuzz`` to the ``FunctionSpec.fuzzer`` signature |
| (``(rng, n) -> list``).""" |
|
|
| def _fuzzer(rng: random.Random, n: int) -> List[tuple]: |
| return auto_fuzz(fn, n, rng, fuzz_spec=fuzz_spec) |
|
|
| return _fuzzer |
|
|