File size: 2,112 Bytes
fd0c71a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 | #!/usr/bin/env python3
"""Generate SFT examples."""
from __future__ import annotations
import json
from pathlib import Path
from app.env.env_core import PolyGuardEnv
from app.models.policy.candidate_builder import build_candidates
from app.training.sft_dataset import build_sft_example
def main() -> None:
env = PolyGuardEnv()
examples: list[dict] = []
negatives: list[dict] = []
difficulties = ["easy", "medium", "hard"]
for i in range(30):
diff = difficulties[i % len(difficulties)]
env.reset(seed=123 + i, difficulty=diff)
state = env.state
candidates = build_candidates(state)
ranked = sorted(candidates, key=lambda c: (c.legality_precheck, c.estimated_safety_delta, -c.uncertainty_score), reverse=True)
target = ranked[0].candidate_id
examples.append(
{
**build_sft_example(state, candidates, target_candidate_id=target),
"task": "planner_action_selection",
"supervisor_mode": ("DOSE_OPT" if state.sub_environment.value == "PRECISION_DOSING" else "REGIMEN_OPT"),
}
)
if len(ranked) > 1:
negatives.append(
{
**build_sft_example(state, candidates, target_candidate_id=ranked[-1].candidate_id),
"task": "invalid_or_low_quality_choice_repair",
"preferred_candidate_id": target,
}
)
abstain_example = {
"task": "abstention_calibration",
"prompt": {
"uncertainty": 0.89,
"severe_pair_count": 2,
"candidate_set": [],
},
"target_candidate_id": "cand_10",
}
examples.extend(negatives[:20])
examples.append(abstain_example)
root = Path(__file__).resolve().parents[1]
out = root / "data" / "processed"
out.mkdir(parents=True, exist_ok=True)
(out / "sft_examples.json").write_text(json.dumps(examples, ensure_ascii=True, indent=2), encoding="utf-8")
print(f"sft_examples={len(examples)}")
if __name__ == "__main__":
main()
|