Spaces:

adithya9903
/

polyguard-openenv-training-3b-continuation

Paused

App Files Files Community

polyguard-openenv-training-3b-continuation / docs /results /active_model /active_model_manifest.json

adithya9903

Deploy PolyGuard HF training Space

fd0c71a verified 29 days ago

raw

history blame contribute delete

5.27 kB

	{
	"status": "ok",
	"enabled": true,
	"activated_at_utc": "2026-04-26T02:24:15.464507+00:00",
	"run_id": "qwen-qwen2-5-0-5b-instruct",
	"source": "top-level",
	"label": "local-qwen-0.5b-active-smoke",
	"model_id": "Qwen/Qwen2.5-0.5B-Instruct",
	"base_model": "Qwen/Qwen2.5-0.5B-Instruct",
	"preferred_artifact": "grpo_adapter",
	"mode": "symlink",
	"source_checkpoint_dir": "checkpoints",
	"source_report_dir": "outputs/reports",
	"grpo_adapter": "checkpoints/active/grpo_adapter",
	"merged_model": "checkpoints/active/merged",
	"sft_adapter": "checkpoints/active/sft_adapter",
	"availability": {
	"grpo_adapter": true,
	"merged": true,
	"sft_adapter": true
	},
	"reports": {
	"improvement_report_benchmark.json": "outputs/reports/active_model/improvement_report_benchmark.json",
	"anti_hacking_overfit_report.json": "outputs/reports/active_model/anti_hacking_overfit_report.json",
	"grpo_trl_run_strict_check.json": "outputs/reports/active_model/grpo_trl_run_strict_check.json",
	"postsave_inference.json": "outputs/reports/active_model/postsave_inference.json",
	"sft_trl_run.json": "outputs/reports/active_model/sft_trl_run.json",
	"plot_index.json": "outputs/reports/active_model/plot_index.json",
	"dose_train.json": "outputs/reports/active_model/dose_train.json",
	"baselines.json": "outputs/reports/active_model/baselines.json",
	"robustness.json": "outputs/reports/active_model/robustness.json",
	"grpo_trl_run_fallback_check.json": "outputs/reports/active_model/grpo_trl_run_fallback_check.json",
	"sft_run.json": "outputs/reports/active_model/sft_run.json",
	"benchmark_report.txt": "outputs/reports/active_model/benchmark_report.txt",
	"dosing_grpo.json": "outputs/reports/active_model/dosing_grpo.json",
	"grpo_ablation_report.json": "outputs/reports/active_model/grpo_ablation_report.json",
	"frontier_ready.json": "outputs/reports/active_model/frontier_ready.json",
	"improvement_report.json": "outputs/reports/active_model/improvement_report.json",
	"hf_sweep_summary.json": "outputs/reports/active_model/hf_sweep_summary.json",
	"planner_grpo.json": "outputs/reports/active_model/planner_grpo.json",
	"grpo_trl_run.json": "outputs/reports/active_model/grpo_trl_run.json",
	"risk_train.json": "outputs/reports/active_model/risk_train.json",
	"grpo_trl_run_smoke.json": "outputs/reports/active_model/grpo_trl_run_smoke.json",
	"inference_benchmark.json": "outputs/reports/active_model/inference_benchmark.json",
	"supervisor_grpo.json": "outputs/reports/active_model/supervisor_grpo.json",
	"acceptance_gate.json": "outputs/reports/active_model/acceptance_gate.json",
	"grpo_trl_run_auto.json": "outputs/reports/active_model/grpo_trl_run_auto.json",
	"hf_training_status.json": "outputs/reports/active_model/hf_training_status.json",
	"benchmark_report.json": "outputs/reports/active_model/benchmark_report.json",
	"postsave_inference_smoke.json": "outputs/reports/active_model/postsave_inference_smoke.json",
	"graph_train.json": "outputs/reports/active_model/graph_train.json",
	"sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json",
	"sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json",
	"sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json",
	"sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json",
	"sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json",
	"sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json",
	"sweeps/qwen-qwen2-5-3b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json",
	"sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json",
	"sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json",
	"sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json",
	"sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json",
	"sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json",
	"grpo_training_cycle/grpo_trl_run.json": "outputs/reports/active_model/grpo_training_cycle/grpo_trl_run.json",
	"grpo_training_cycle/hf_training_status.json": "outputs/reports/active_model/grpo_training_cycle/hf_training_status.json"
	},
	"notes": "This manifest controls local product inference. Prefer grpo_adapter for the RL policy; merged is the SFT baseline fallback when no GRPO adapter is available."
	}