File size: 2,112 Bytes
877add7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#!/usr/bin/env python3
"""Generate SFT examples."""

from __future__ import annotations

import json
from pathlib import Path

from app.env.env_core import PolyGuardEnv
from app.models.policy.candidate_builder import build_candidates
from app.training.sft_dataset import build_sft_example


def main() -> None:
    env = PolyGuardEnv()
    examples: list[dict] = []
    negatives: list[dict] = []
    difficulties = ["easy", "medium", "hard"]
    for i in range(30):
        diff = difficulties[i % len(difficulties)]
        env.reset(seed=123 + i, difficulty=diff)
        state = env.state
        candidates = build_candidates(state)
        ranked = sorted(candidates, key=lambda c: (c.legality_precheck, c.estimated_safety_delta, -c.uncertainty_score), reverse=True)
        target = ranked[0].candidate_id
        examples.append(
            {
                **build_sft_example(state, candidates, target_candidate_id=target),
                "task": "planner_action_selection",
                "supervisor_mode": ("DOSE_OPT" if state.sub_environment.value == "PRECISION_DOSING" else "REGIMEN_OPT"),
            }
        )
        if len(ranked) > 1:
            negatives.append(
                {
                    **build_sft_example(state, candidates, target_candidate_id=ranked[-1].candidate_id),
                    "task": "invalid_or_low_quality_choice_repair",
                    "preferred_candidate_id": target,
                }
            )

    abstain_example = {
        "task": "abstention_calibration",
        "prompt": {
            "uncertainty": 0.89,
            "severe_pair_count": 2,
            "candidate_set": [],
        },
        "target_candidate_id": "cand_10",
    }
    examples.extend(negatives[:20])
    examples.append(abstain_example)
    root = Path(__file__).resolve().parents[1]
    out = root / "data" / "processed"
    out.mkdir(parents=True, exist_ok=True)
    (out / "sft_examples.json").write_text(json.dumps(examples, ensure_ascii=True, indent=2), encoding="utf-8")
    print(f"sft_examples={len(examples)}")


if __name__ == "__main__":
    main()