File size: 14,046 Bytes
77e65fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
"""Generic, type-driven fuzz-input generator for OpenSleuth Level 2.

Given a Python callable annotated with ``typing`` hints, ``auto_fuzz`` produces
``n`` argument tuples that respect the signature so the verifier can score
unannotated *arbitrary* targets without requiring a hand-written fuzzer the
way the 9 builtin BLACK_BOX_FUNCTIONS do.

Each per-type generator mixes a small set of "edge" values (``0``, ``-1``,
``""``, ``None`` for ``Optional``, ...) with random values, weighted ~30/70.
This biases the fuzz batch toward the boundaries that actually distinguish
implementations while still covering the boring middle.

A caller-supplied ``fuzz_spec: dict`` overrides the type-based generation on
a per-parameter basis, e.g.::

    auto_fuzz(my_fn, n=20, fuzz_spec={"n": {"type": "int", "min": 1, "max": 90}})

Returned shape: ``List[tuple]`` -- one tuple per fuzz input, with one element
per (positional) parameter of ``fn``. Even for unary ``fn`` we return tuples
so the catalog wrapper has a single, uniform calling convention.
"""

from __future__ import annotations

import inspect
import random
import string
import typing
from typing import Any, Callable, Dict, List, Optional, Tuple, Union, get_args, get_origin


# Probability that a per-type generator emits an "edge" value (0, "", None,
# ...) instead of a random sample. Kept small enough that the boring middle
# still gets coverage but high enough that the edge cases reliably appear.
EDGE_PROB = 0.30


# Per-type edge pools. These are used by the ``_g_*`` helpers below.
_INT_EDGES = (0, 1, -1, 2, -2, 10, -10, 100, -100)
_FLOAT_EDGES = (0.0, 1.0, -1.0, 0.5, -0.5, 1e-9, -1e-9, 100.0)
_STR_EDGES = ("", "a", "ab", "Hello", "  ", "0", "abc def")
_BYTES_EDGES = (b"", b"a", b"ab", b"\x00", b"abc")


# ---------------------------------------------------------------------------
# Per-type generators (do not assume any param-name dispatch).
# ---------------------------------------------------------------------------


def _maybe_edge(rng: random.Random, edges: tuple, random_fn: Callable[[], Any]) -> Any:
    if edges and rng.random() < EDGE_PROB:
        return rng.choice(edges)
    return random_fn()


def _g_int(rng: random.Random, *, lo: int = -100, hi: int = 100) -> int:
    # Filter the edge pool by [lo, hi] so a caller-supplied fuzz_spec
    # ``{"type": "int", "min": 1, "max": 5}`` never emits ``-100``.
    edges = tuple(v for v in _INT_EDGES if lo <= v <= hi) or (lo,)
    return _maybe_edge(rng, edges, lambda: rng.randint(lo, hi))


def _g_float(rng: random.Random, *, lo: float = -100.0, hi: float = 100.0) -> float:
    edges = tuple(v for v in _FLOAT_EDGES if lo <= v <= hi) or (lo,)
    return _maybe_edge(rng, edges, lambda: rng.uniform(lo, hi))


def _g_bool(rng: random.Random) -> bool:
    return bool(rng.getrandbits(1))


def _g_str(rng: random.Random, *, max_len: int = 12, alphabet: Optional[str] = None) -> str:
    alpha = alphabet or (string.ascii_letters + string.digits)

    def _rand():
        return "".join(rng.choices(alpha, k=rng.randint(0, max_len)))

    if alphabet is not None:
        # When the caller restricts the alphabet, our generic edge pool
        # ("Hello", "  ", ...) would violate it. Build a deterministic
        # alphabet-respecting edge set instead.
        custom_edges = ("",)
        if alphabet:
            custom_edges = ("", alphabet[0], alphabet[0] * min(max_len, 2))
        return _maybe_edge(rng, custom_edges, _rand)
    return _maybe_edge(rng, _STR_EDGES, _rand)


def _g_bytes(rng: random.Random, *, max_len: int = 8) -> bytes:
    def _rand():
        return bytes(rng.randint(0, 255) for _ in range(rng.randint(0, max_len)))

    return _maybe_edge(rng, _BYTES_EDGES, _rand)


def _g_list(rng: random.Random, elem_gen: Callable[[], Any], *, max_len: int = 6) -> list:
    if rng.random() < EDGE_PROB / 2:
        return []
    return [elem_gen() for _ in range(rng.randint(0, max_len))]


def _g_tuple_homogeneous(
    rng: random.Random, elem_gen: Callable[[], Any], *, max_len: int = 6
) -> tuple:
    return tuple(_g_list(rng, elem_gen, max_len=max_len))


def _g_tuple_heterogeneous(rng: random.Random, elem_gens: List[Callable[[], Any]]) -> tuple:
    return tuple(g() for g in elem_gens)


def _g_set(rng: random.Random, elem_gen: Callable[[], Any], *, max_len: int = 6) -> set:
    if rng.random() < EDGE_PROB / 2:
        return set()
    return {elem_gen() for _ in range(rng.randint(0, max_len))}


def _g_dict(
    rng: random.Random,
    key_gen: Callable[[], Any],
    val_gen: Callable[[], Any],
    *,
    max_len: int = 5,
) -> dict:
    if rng.random() < EDGE_PROB / 2:
        return {}
    return {key_gen(): val_gen() for _ in range(rng.randint(0, max_len))}


# ---------------------------------------------------------------------------
# Type -> generator dispatch.
# ---------------------------------------------------------------------------


def _is_optional(tp: Any) -> bool:
    """``Optional[X]`` is ``Union[X, None]`` under the hood."""
    if get_origin(tp) is Union:
        return type(None) in get_args(tp)
    return False


def _strip_optional(tp: Any) -> Any:
    """Return ``X`` for ``Optional[X]``; for unions with None + multiple, pick
    the first non-None member (we can't satisfy a union in a single call)."""
    if get_origin(tp) is Union:
        non_none = [a for a in get_args(tp) if a is not type(None)]
        if len(non_none) == 1:
            return non_none[0]
        if non_none:
            return non_none[0]
    return tp


def _make_generator(tp: Any, rng: random.Random) -> Callable[[], Any]:
    """Return a 0-arg callable that produces one random value of type ``tp``.

    The recursion handles container element types (``list[int]``,
    ``dict[str, list[int]]``, etc).
    """

    if tp is None or tp is type(None):
        return lambda: None

    if _is_optional(tp):
        inner = _strip_optional(tp)
        inner_gen = _make_generator(inner, rng)

        def _gen_opt():
            if rng.random() < EDGE_PROB:
                return None
            return inner_gen()

        return _gen_opt

    origin = get_origin(tp)

    if origin is typing.Literal:
        choices = list(get_args(tp))
        return lambda: rng.choice(choices)

    if origin is None:
        if tp is int:
            return lambda: _g_int(rng)
        if tp is float:
            return lambda: _g_float(rng)
        if tp is bool:
            return lambda: _g_bool(rng)
        if tp is str:
            return lambda: _g_str(rng)
        if tp is bytes:
            return lambda: _g_bytes(rng)
        if tp is list:
            return lambda: _g_list(rng, lambda: _g_int(rng))
        if tp is tuple:
            return lambda: _g_tuple_homogeneous(rng, lambda: _g_int(rng))
        if tp is set:
            return lambda: _g_set(rng, lambda: _g_int(rng))
        if tp is dict:
            return lambda: _g_dict(rng, lambda: _g_str(rng, max_len=4), lambda: _g_int(rng))
        if tp is type(None):
            return lambda: None
        if tp is typing.Any:
            return lambda: _g_int(rng)
        # Unknown bare type -> fall back to int.
        return lambda: _g_int(rng)

    args = get_args(tp)

    if origin in (list, List):
        elem_t = args[0] if args else int
        elem_gen = _make_generator(elem_t, rng)
        return lambda: _g_list(rng, elem_gen)

    if origin in (set, frozenset):
        elem_t = args[0] if args else int
        elem_gen = _make_generator(elem_t, rng)
        return lambda: _g_set(rng, elem_gen)

    if origin in (tuple, Tuple):
        if not args:
            return lambda: _g_tuple_homogeneous(rng, lambda: _g_int(rng))
        if len(args) == 2 and args[1] is Ellipsis:
            elem_gen = _make_generator(args[0], rng)
            return lambda: _g_tuple_homogeneous(rng, elem_gen)
        elem_gens = [_make_generator(a, rng) for a in args]
        return lambda: _g_tuple_heterogeneous(rng, elem_gens)

    if origin in (dict, Dict):
        key_t = args[0] if args else str
        val_t = args[1] if len(args) > 1 else int
        key_gen = _make_generator(key_t, rng)
        val_gen = _make_generator(val_t, rng)
        return lambda: _g_dict(rng, key_gen, val_gen)

    if origin is Union:
        # Already handled Optional above. For pure unions, pick first member.
        return _make_generator(args[0], rng)

    return lambda: _g_int(rng)


# ---------------------------------------------------------------------------
# fuzz_spec overrides
# ---------------------------------------------------------------------------


def _generator_from_spec(entry: Dict[str, Any], rng: random.Random) -> Callable[[], Any]:
    """Build a generator from a ``fuzz_spec`` entry dict.

    Supported keys (all optional except ``type``):
      - ``type``: one of ``"int" | "float" | "bool" | "str" | "bytes" |
        "list" | "tuple" | "set" | "dict" | "literal" | "any"``
      - ``min``, ``max``: int/float bounds
      - ``max_len``: container/string length cap
      - ``alphabet``: str-only character pool
      - ``elem``: nested ``fuzz_spec`` entry for container elements
      - ``key``, ``value``: nested entries for dict
      - ``elems``: list of nested entries for fixed-arity tuple
      - ``choices``: list of literals to sample from
      - ``optional``: bool; if True, occasionally yields ``None``
    """
    t = entry.get("type", "any")

    def _maybe_optional(gen: Callable[[], Any]) -> Callable[[], Any]:
        if not entry.get("optional"):
            return gen

        def _g():
            if rng.random() < EDGE_PROB:
                return None
            return gen()

        return _g

    if t == "int":
        lo = int(entry.get("min", -100))
        hi = int(entry.get("max", 100))
        return _maybe_optional(lambda: _g_int(rng, lo=lo, hi=hi))
    if t == "float":
        lo = float(entry.get("min", -100.0))
        hi = float(entry.get("max", 100.0))
        return _maybe_optional(lambda: _g_float(rng, lo=lo, hi=hi))
    if t == "bool":
        return _maybe_optional(lambda: _g_bool(rng))
    if t == "str":
        max_len = int(entry.get("max_len", 12))
        alphabet = entry.get("alphabet")
        return _maybe_optional(lambda: _g_str(rng, max_len=max_len, alphabet=alphabet))
    if t == "bytes":
        max_len = int(entry.get("max_len", 8))
        return _maybe_optional(lambda: _g_bytes(rng, max_len=max_len))
    if t == "literal":
        choices = list(entry.get("choices", []))
        if not choices:
            return _maybe_optional(lambda: None)
        return _maybe_optional(lambda: rng.choice(choices))
    if t == "list":
        elem = entry.get("elem", {"type": "int"})
        elem_gen = _generator_from_spec(elem, rng)
        max_len = int(entry.get("max_len", 6))
        return _maybe_optional(lambda: _g_list(rng, elem_gen, max_len=max_len))
    if t == "tuple":
        if "elems" in entry:
            elem_gens = [_generator_from_spec(e, rng) for e in entry["elems"]]
            return _maybe_optional(lambda: _g_tuple_heterogeneous(rng, elem_gens))
        elem = entry.get("elem", {"type": "int"})
        elem_gen = _generator_from_spec(elem, rng)
        max_len = int(entry.get("max_len", 6))
        return _maybe_optional(lambda: _g_tuple_homogeneous(rng, elem_gen, max_len=max_len))
    if t == "set":
        elem = entry.get("elem", {"type": "int"})
        elem_gen = _generator_from_spec(elem, rng)
        max_len = int(entry.get("max_len", 6))
        return _maybe_optional(lambda: _g_set(rng, elem_gen, max_len=max_len))
    if t == "dict":
        key = entry.get("key", {"type": "str", "max_len": 4})
        value = entry.get("value", {"type": "int"})
        key_gen = _generator_from_spec(key, rng)
        val_gen = _generator_from_spec(value, rng)
        max_len = int(entry.get("max_len", 5))
        return _maybe_optional(lambda: _g_dict(rng, key_gen, val_gen, max_len=max_len))
    return _maybe_optional(lambda: _g_int(rng))


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------


def auto_fuzz(
    fn: Callable[..., Any],
    n: int,
    rng: Optional[random.Random] = None,
    *,
    fuzz_spec: Optional[Dict[str, Dict[str, Any]]] = None,
) -> List[tuple]:
    """Produce ``n`` argument tuples for calling ``fn``.

    Each returned element is an ``args`` tuple, intended to be applied as
    ``fn(*args)``. ``fuzz_spec`` is keyed by parameter name and overrides
    the type-based generation per-parameter.
    """
    rng = rng or random.Random()
    fuzz_spec = fuzz_spec or {}

    sig = inspect.signature(fn)
    try:
        hints = typing.get_type_hints(fn)
    except Exception:  # noqa: BLE001 -- bad annotations shouldn't crash fuzzing
        hints = {}

    param_gens: List[Callable[[], Any]] = []
    for pname, param in sig.parameters.items():
        if param.kind in (
            inspect.Parameter.VAR_POSITIONAL,
            inspect.Parameter.VAR_KEYWORD,
            inspect.Parameter.KEYWORD_ONLY,
        ):
            # We only fuzz positional / positional-or-keyword params.
            continue
        if pname in fuzz_spec:
            param_gens.append(_generator_from_spec(fuzz_spec[pname], rng))
            continue
        annot = hints.get(pname, param.annotation)
        if annot is inspect.Parameter.empty:
            param_gens.append(lambda r=rng: _g_int(r))
        else:
            param_gens.append(_make_generator(annot, rng))

    return [tuple(g() for g in param_gens) for _ in range(n)]


def make_fuzzer(
    fn: Callable[..., Any],
    fuzz_spec: Optional[Dict[str, Dict[str, Any]]] = None,
) -> Callable[[random.Random, int], List[tuple]]:
    """Adapt ``auto_fuzz`` to the ``FunctionSpec.fuzzer`` signature
    (``(rng, n) -> list``)."""

    def _fuzzer(rng: random.Random, n: int) -> List[tuple]:
        return auto_fuzz(fn, n, rng, fuzz_spec=fuzz_spec)

    return _fuzzer