#!/usr/bin/env python3 """Generate SFT examples.""" from __future__ import annotations import json from pathlib import Path from app.env.env_core import PolyGuardEnv from app.models.policy.candidate_builder import build_candidates from app.training.sft_dataset import build_sft_example def main() -> None: env = PolyGuardEnv() examples: list[dict] = [] negatives: list[dict] = [] difficulties = ["easy", "medium", "hard"] for i in range(30): diff = difficulties[i % len(difficulties)] env.reset(seed=123 + i, difficulty=diff) state = env.state candidates = build_candidates(state) ranked = sorted(candidates, key=lambda c: (c.legality_precheck, c.estimated_safety_delta, -c.uncertainty_score), reverse=True) target = ranked[0].candidate_id examples.append( { **build_sft_example(state, candidates, target_candidate_id=target), "task": "planner_action_selection", "supervisor_mode": ("DOSE_OPT" if state.sub_environment.value == "PRECISION_DOSING" else "REGIMEN_OPT"), } ) if len(ranked) > 1: negatives.append( { **build_sft_example(state, candidates, target_candidate_id=ranked[-1].candidate_id), "task": "invalid_or_low_quality_choice_repair", "preferred_candidate_id": target, } ) abstain_example = { "task": "abstention_calibration", "prompt": { "uncertainty": 0.89, "severe_pair_count": 2, "candidate_set": [], }, "target_candidate_id": "cand_10", } examples.extend(negatives[:20]) examples.append(abstain_example) root = Path(__file__).resolve().parents[1] out = root / "data" / "processed" out.mkdir(parents=True, exist_ok=True) (out / "sft_examples.json").write_text(json.dumps(examples, ensure_ascii=True, indent=2), encoding="utf-8") print(f"sft_examples={len(examples)}") if __name__ == "__main__": main()