File size: 2,306 Bytes
5dcd156
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import os
import sys
import pprint

sys.path.append(os.path.abspath('.'))
from data.generate_bugs import TIER1_BUGS, TIER2_BUGS, TIER3_BUGS

def normalize_test_cases(bugs):
    for b in bugs:
        for t in b.get("test_cases", []):
            inp = t["input"]
            if isinstance(inp, (list, tuple)):
                t["input"] = list(inp)
            else:
                t["input"] = [inp]

normalize_test_cases(TIER1_BUGS)
normalize_test_cases(TIER2_BUGS)
normalize_test_cases(TIER3_BUGS)

def dump_var(f, name, val):
    f.write(f'{name} = ')
    f.write(pprint.pformat(val, sort_dicts=False, width=120))
    f.write('\n\n')

with open("data/generate_bugs.py", "w", encoding="utf-8") as f:
    f.write('"""\nAgentDebuggerEnv - Bug Dataset Generator\n\n')
    f.write('Generates three tiers of buggy Python functions for curriculum learning:\n')
    f.write('  Tier 1 (easy):   Off-by-one errors, wrong operators, simple logic inversions\n')
    f.write('  Tier 2 (medium): Incorrect algorithm logic, wrong variable references, subtle type errors\n')
    f.write('  Tier 3 (hard):   Multi-bug interactions, concurrency, edge-case-only failures\n\n')
    f.write('Usage:\n  python data/generate_bugs.py\n\n')
    f.write('Outputs:\n  data/bugs_tier1.jsonl  (~40 bugs)\n  data/bugs_tier2.jsonl  (~30 bugs)\n  data/bugs_tier3.jsonl  (~20 bugs)\n"""\n\n')
    f.write('import json\nimport os\n\n')
    
    dump_var(f, 'TIER1_BUGS', TIER1_BUGS)
    dump_var(f, 'TIER2_BUGS', TIER2_BUGS)
    dump_var(f, 'TIER3_BUGS', TIER3_BUGS)
    
    f.write('def write_jsonl(bugs: list, path: str):\n')
    f.write('    with open(path, "w") as f:\n')
    f.write('        for bug in bugs:\n')
    f.write('            f.write(json.dumps(bug) + "\\n")\n\n')
    f.write('if __name__ == "__main__":\n')
    f.write('    os.makedirs("data", exist_ok=True)\n')
    f.write('    write_jsonl(TIER1_BUGS, "data/bugs_tier1.jsonl")\n')
    f.write('    write_jsonl(TIER2_BUGS, "data/bugs_tier2.jsonl")\n')
    f.write('    write_jsonl(TIER3_BUGS, "data/bugs_tier3.jsonl")\n')
    f.write('    print(f"Tier 1: {len(TIER1_BUGS)}, Tier 2: {len(TIER2_BUGS)}, Tier 3: {len(TIER3_BUGS)}")\n')
    f.write('    print("\\nDone. Run training/train_grpo.py to start training.")\n')

print("Normalization applied successfully.")