Spaces:
Running
Running
| #!/usr/bin/env python3 | |
| """Generate SFT examples.""" | |
| from __future__ import annotations | |
| import json | |
| from pathlib import Path | |
| from app.env.env_core import PolyGuardEnv | |
| from app.models.policy.candidate_builder import build_candidates | |
| from app.training.sft_dataset import build_sft_example | |
| def main() -> None: | |
| env = PolyGuardEnv() | |
| examples: list[dict] = [] | |
| negatives: list[dict] = [] | |
| difficulties = ["easy", "medium", "hard"] | |
| for i in range(30): | |
| diff = difficulties[i % len(difficulties)] | |
| env.reset(seed=123 + i, difficulty=diff) | |
| state = env.state | |
| candidates = build_candidates(state) | |
| ranked = sorted(candidates, key=lambda c: (c.legality_precheck, c.estimated_safety_delta, -c.uncertainty_score), reverse=True) | |
| target = ranked[0].candidate_id | |
| examples.append( | |
| { | |
| **build_sft_example(state, candidates, target_candidate_id=target), | |
| "task": "planner_action_selection", | |
| "supervisor_mode": ("DOSE_OPT" if state.sub_environment.value == "PRECISION_DOSING" else "REGIMEN_OPT"), | |
| } | |
| ) | |
| if len(ranked) > 1: | |
| negatives.append( | |
| { | |
| **build_sft_example(state, candidates, target_candidate_id=ranked[-1].candidate_id), | |
| "task": "invalid_or_low_quality_choice_repair", | |
| "preferred_candidate_id": target, | |
| } | |
| ) | |
| abstain_example = { | |
| "task": "abstention_calibration", | |
| "prompt": { | |
| "uncertainty": 0.89, | |
| "severe_pair_count": 2, | |
| "candidate_set": [], | |
| }, | |
| "target_candidate_id": "cand_10", | |
| } | |
| examples.extend(negatives[:20]) | |
| examples.append(abstain_example) | |
| root = Path(__file__).resolve().parents[1] | |
| out = root / "data" / "processed" | |
| out.mkdir(parents=True, exist_ok=True) | |
| (out / "sft_examples.json").write_text(json.dumps(examples, ensure_ascii=True, indent=2), encoding="utf-8") | |
| print(f"sft_examples={len(examples)}") | |
| if __name__ == "__main__": | |
| main() | |