polyguard-openenv-training-3b-continuation / docs /results /active_model /active_model_manifest.json
| { | |
| "status": "ok", | |
| "enabled": true, | |
| "activated_at_utc": "2026-04-26T02:24:15.464507+00:00", | |
| "run_id": "qwen-qwen2-5-0-5b-instruct", | |
| "source": "top-level", | |
| "label": "local-qwen-0.5b-active-smoke", | |
| "model_id": "Qwen/Qwen2.5-0.5B-Instruct", | |
| "base_model": "Qwen/Qwen2.5-0.5B-Instruct", | |
| "preferred_artifact": "grpo_adapter", | |
| "mode": "symlink", | |
| "source_checkpoint_dir": "checkpoints", | |
| "source_report_dir": "outputs/reports", | |
| "grpo_adapter": "checkpoints/active/grpo_adapter", | |
| "merged_model": "checkpoints/active/merged", | |
| "sft_adapter": "checkpoints/active/sft_adapter", | |
| "availability": { | |
| "grpo_adapter": true, | |
| "merged": true, | |
| "sft_adapter": true | |
| }, | |
| "reports": { | |
| "improvement_report_benchmark.json": "outputs/reports/active_model/improvement_report_benchmark.json", | |
| "anti_hacking_overfit_report.json": "outputs/reports/active_model/anti_hacking_overfit_report.json", | |
| "grpo_trl_run_strict_check.json": "outputs/reports/active_model/grpo_trl_run_strict_check.json", | |
| "postsave_inference.json": "outputs/reports/active_model/postsave_inference.json", | |
| "sft_trl_run.json": "outputs/reports/active_model/sft_trl_run.json", | |
| "plot_index.json": "outputs/reports/active_model/plot_index.json", | |
| "dose_train.json": "outputs/reports/active_model/dose_train.json", | |
| "baselines.json": "outputs/reports/active_model/baselines.json", | |
| "robustness.json": "outputs/reports/active_model/robustness.json", | |
| "grpo_trl_run_fallback_check.json": "outputs/reports/active_model/grpo_trl_run_fallback_check.json", | |
| "sft_run.json": "outputs/reports/active_model/sft_run.json", | |
| "benchmark_report.txt": "outputs/reports/active_model/benchmark_report.txt", | |
| "dosing_grpo.json": "outputs/reports/active_model/dosing_grpo.json", | |
| "grpo_ablation_report.json": "outputs/reports/active_model/grpo_ablation_report.json", | |
| "frontier_ready.json": "outputs/reports/active_model/frontier_ready.json", | |
| "improvement_report.json": "outputs/reports/active_model/improvement_report.json", | |
| "hf_sweep_summary.json": "outputs/reports/active_model/hf_sweep_summary.json", | |
| "planner_grpo.json": "outputs/reports/active_model/planner_grpo.json", | |
| "grpo_trl_run.json": "outputs/reports/active_model/grpo_trl_run.json", | |
| "risk_train.json": "outputs/reports/active_model/risk_train.json", | |
| "grpo_trl_run_smoke.json": "outputs/reports/active_model/grpo_trl_run_smoke.json", | |
| "inference_benchmark.json": "outputs/reports/active_model/inference_benchmark.json", | |
| "supervisor_grpo.json": "outputs/reports/active_model/supervisor_grpo.json", | |
| "acceptance_gate.json": "outputs/reports/active_model/acceptance_gate.json", | |
| "grpo_trl_run_auto.json": "outputs/reports/active_model/grpo_trl_run_auto.json", | |
| "hf_training_status.json": "outputs/reports/active_model/hf_training_status.json", | |
| "benchmark_report.json": "outputs/reports/active_model/benchmark_report.json", | |
| "postsave_inference_smoke.json": "outputs/reports/active_model/postsave_inference_smoke.json", | |
| "graph_train.json": "outputs/reports/active_model/graph_train.json", | |
| "sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", | |
| "sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json", | |
| "sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json", | |
| "sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", | |
| "sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", | |
| "sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json", | |
| "sweeps/qwen-qwen2-5-3b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json", | |
| "sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", | |
| "sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", | |
| "sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json", | |
| "sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json", | |
| "sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", | |
| "grpo_training_cycle/grpo_trl_run.json": "outputs/reports/active_model/grpo_training_cycle/grpo_trl_run.json", | |
| "grpo_training_cycle/hf_training_status.json": "outputs/reports/active_model/grpo_training_cycle/hf_training_status.json" | |
| }, | |
| "notes": "This manifest controls local product inference. Prefer grpo_adapter for the RL policy; merged is the SFT baseline fallback when no GRPO adapter is available." | |
| } |