File size: 3,607 Bytes
632c145
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
from training.grpo_curriculum import (
    AdaptiveDifficultyCurriculum,
    ScenarioGroupRegistry,
    build_scenario_group_rows,
)


def _entries():
    return [
        {
            "seed": 10,
            "split": "train",
            "difficulty": 0,
            "template_id": "fastapi_basic",
            "bug_family": "bola_idor",
            "scenario_hash": "hash-a",
            "validated": True,
        },
        {
            "seed": 20,
            "split": "train",
            "difficulty": 1,
            "template_id": "fastapi_basic",
            "bug_family": "bfla",
            "scenario_hash": "hash-b",
            "validated": True,
        },
        {
            "seed": 30,
            "split": "train",
            "difficulty": 1,
            "template_id": "fastapi_basic",
            "bug_family": "tenant_leak",
            "scenario_hash": "hash-c",
            "validated": True,
        },
    ]


def test_scenario_group_reuses_assignment_for_all_generations():
    registry = ScenarioGroupRegistry(
        _entries(),
        split="train",
        initial_difficulty=0,
        rng_seed=1,
        max_level=1,
    )

    first = registry.assignment_for(scenario_group_id=101, difficulty_policy="adaptive")
    second = registry.assignment_for(scenario_group_id=101, difficulty_policy="adaptive")

    assert first == second


def test_different_scenario_groups_use_different_cached_scenarios_when_available():
    registry = ScenarioGroupRegistry(
        _entries(),
        split="train",
        initial_difficulty=1,
        rng_seed=3,
        max_level=1,
    )

    first = registry.assignment_for(
        scenario_group_id=201,
        requested_seed=20,
        requested_difficulty=1,
        split="train",
        difficulty_policy="fixed",
    )
    second = registry.assignment_for(
        scenario_group_id=202,
        requested_seed=30,
        requested_difficulty=1,
        split="train",
        difficulty_policy="fixed",
    )

    assert first["scenario_hash"] != second["scenario_hash"]


def test_fixed_assignment_uses_dataset_seed_and_difficulty():
    registry = ScenarioGroupRegistry(
        _entries(),
        split="train",
        initial_difficulty=0,
        rng_seed=1,
        max_level=1,
    )

    assignment = registry.assignment_for(
        scenario_group_id=301,
        requested_seed=20,
        requested_difficulty=1,
        split="train",
        difficulty_policy="fixed",
    )

    assert assignment["seed"] == 20
    assert assignment["difficulty"] == 1
    assert assignment["scenario_hash"] == "hash-b"


def test_adaptive_curriculum_promotes_and_demotes_at_thresholds():
    promote = AdaptiveDifficultyCurriculum(
        min_level=0,
        max_level=2,
        current_level=0,
        promote_after=50,
    )
    for _ in range(50):
        promote.update(0, True)
    assert promote.current_level == 1

    demote = AdaptiveDifficultyCurriculum(
        min_level=0,
        max_level=2,
        current_level=1,
        promote_after=50,
    )
    for _ in range(50):
        demote.update(1, False)
    assert demote.current_level == 0


def test_build_scenario_group_rows_include_grpo_group_columns():
    rows = build_scenario_group_rows(
        dataset_size=2,
        training_prompt="repair local app",
        seed_start=7,
        split="train",
        difficulty=1,
    )

    assert rows[0]["scenario_group_id"] == 7
    assert rows[1]["scenario_group_id"] == 8
    assert rows[0]["difficulty_policy"] == "adaptive"
    assert rows[0]["prompt"][0]["content"] == "repair local app"