| |
| """Generate SFT examples.""" |
|
|
| from __future__ import annotations |
|
|
| import json |
| from pathlib import Path |
|
|
| from app.env.env_core import PolyGuardEnv |
| from app.models.policy.candidate_builder import build_candidates |
| from app.training.sft_dataset import build_sft_example |
|
|
|
|
| def main() -> None: |
| env = PolyGuardEnv() |
| examples: list[dict] = [] |
| negatives: list[dict] = [] |
| difficulties = ["easy", "medium", "hard"] |
| for i in range(30): |
| diff = difficulties[i % len(difficulties)] |
| env.reset(seed=123 + i, difficulty=diff) |
| state = env.state |
| candidates = build_candidates(state) |
| ranked = sorted(candidates, key=lambda c: (c.legality_precheck, c.estimated_safety_delta, -c.uncertainty_score), reverse=True) |
| target = ranked[0].candidate_id |
| examples.append( |
| { |
| **build_sft_example(state, candidates, target_candidate_id=target), |
| "task": "planner_action_selection", |
| "supervisor_mode": ("DOSE_OPT" if state.sub_environment.value == "PRECISION_DOSING" else "REGIMEN_OPT"), |
| } |
| ) |
| if len(ranked) > 1: |
| negatives.append( |
| { |
| **build_sft_example(state, candidates, target_candidate_id=ranked[-1].candidate_id), |
| "task": "invalid_or_low_quality_choice_repair", |
| "preferred_candidate_id": target, |
| } |
| ) |
|
|
| abstain_example = { |
| "task": "abstention_calibration", |
| "prompt": { |
| "uncertainty": 0.89, |
| "severe_pair_count": 2, |
| "candidate_set": [], |
| }, |
| "target_candidate_id": "cand_10", |
| } |
| examples.extend(negatives[:20]) |
| examples.append(abstain_example) |
| root = Path(__file__).resolve().parents[1] |
| out = root / "data" / "processed" |
| out.mkdir(parents=True, exist_ok=True) |
| (out / "sft_examples.json").write_text(json.dumps(examples, ensure_ascii=True, indent=2), encoding="utf-8") |
| print(f"sft_examples={len(examples)}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|