{ "status": "ok", "space_id": "adithya9903/polyguard-openenv-final-artifacts", "space_url": "https://huggingface.co/spaces/adithya9903/polyguard-openenv-final-artifacts", "docs_dir": "docs/results/final_submission_evidence", "evidence_source": "docs/results/submission_evidence_qwen_0_5b_1_5b_3b", "artifact_availability": { "qwen-qwen2-5-0-5b-instruct": { "label": "Qwen 0.5B", "model_id": "Qwen/Qwen2.5-0.5B-Instruct", "checkpoint_tree": { "exists": false, "file_count": 0, "bytes": 0 }, "sft_adapter": { "exists": false, "file_count": 0, "bytes": 0 }, "grpo_adapter": { "exists": false, "file_count": 0, "bytes": 0 }, "reports": { "exists": true, "file_count": 4, "bytes": 435858 }, "sft_report": true, "grpo_report": false, "postsave_sft": true, "postsave_grpo": false, "policy_ablation": false, "missing_trained_files": [ "sft_adapter", "grpo_adapter" ], "status": "reports_only_or_partial" }, "qwen-qwen2-5-1-5b-instruct": { "label": "Qwen 1.5B", "model_id": "Qwen/Qwen2.5-1.5B-Instruct", "checkpoint_tree": { "exists": false, "file_count": 0, "bytes": 0 }, "sft_adapter": { "exists": false, "file_count": 0, "bytes": 0 }, "grpo_adapter": { "exists": false, "file_count": 0, "bytes": 0 }, "reports": { "exists": true, "file_count": 4, "bytes": 854543 }, "sft_report": true, "grpo_report": false, "postsave_sft": true, "postsave_grpo": false, "policy_ablation": false, "missing_trained_files": [ "sft_adapter", "grpo_adapter" ], "status": "reports_only_or_partial" }, "qwen-qwen2-5-3b-instruct": { "label": "Qwen 3B", "model_id": "Qwen/Qwen2.5-3B-Instruct", "checkpoint_tree": { "exists": true, "file_count": 125, "bytes": 433208536 }, "sft_adapter": { "exists": true, "file_count": 11, "bytes": 30655905 }, "grpo_adapter": { "exists": true, "file_count": 11, "bytes": 30656841 }, "reports": { "exists": true, "file_count": 9, "bytes": 5930214 }, "sft_report": true, "grpo_report": true, "postsave_sft": true, "postsave_grpo": true, "policy_ablation": true, "missing_trained_files": [], "status": "complete" } }, "submission_models": [ { "run_id": "qwen-qwen2-5-0-5b-instruct", "model_id": "Qwen/Qwen2.5-0.5B-Instruct", "label": "Qwen 0.5B", "statuses": { "sft_training": "artifact_available", "sft_postsave_inference": "artifact_available", "grpo_training": "not_seen_in_status", "grpo_postsave_inference": "not_seen_in_status", "policy_ablation": "not_seen_in_status" }, "metrics": { "sft_train_loss": 0.19233327957964502, "sft_train_runtime": 234.6302, "sft_examples_used": 2000, "sft_history_steps": 2001, "sft_first_loss": 3.0856, "sft_last_loss": 0.0626, "sft_best_loss": 0.0057, "sft_last_token_accuracy": 0.9717137813568115, "sft_valid_rate": 1.0, "sft_avg_env_reward": 0.726, "sft_avg_latency_seconds": 1.839, "grpo_avg_reward": null, "grpo_history_steps": 0, "grpo_valid_rate": null, "grpo_avg_env_reward": null, "grpo_avg_latency_seconds": null }, "files": { "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", "grpo_trl_run.json": "", "grpo_history.json": "", "grpo_reward_components.jsonl": "", "postsave_inference_grpo.json": "", "grpo_ablation_report.json": "", "error.json": "" } }, { "run_id": "qwen-qwen2-5-1-5b-instruct", "model_id": "Qwen/Qwen2.5-1.5B-Instruct", "label": "Qwen 1.5B", "statuses": { "sft_training": "artifact_available", "sft_postsave_inference": "artifact_available", "grpo_training": "not_seen_in_status", "grpo_postsave_inference": "not_seen_in_status", "policy_ablation": "not_seen_in_status" }, "metrics": { "sft_train_loss": 0.11515871361242898, "sft_train_runtime": 483.7085, "sft_examples_used": 2000, "sft_history_steps": 4001, "sft_first_loss": 2.9686, "sft_last_loss": 0.0681, "sft_best_loss": 0.0009, "sft_last_token_accuracy": 0.9726027250289917, "sft_valid_rate": 1.0, "sft_avg_env_reward": 0.726, "sft_avg_latency_seconds": 2.158, "grpo_avg_reward": null, "grpo_history_steps": 0, "grpo_valid_rate": null, "grpo_avg_env_reward": null, "grpo_avg_latency_seconds": null }, "files": { "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", "grpo_trl_run.json": "", "grpo_history.json": "", "grpo_reward_components.jsonl": "", "postsave_inference_grpo.json": "", "grpo_ablation_report.json": "", "error.json": "" } }, { "run_id": "qwen-qwen2-5-3b-instruct", "model_id": "Qwen/Qwen2.5-3B-Instruct", "label": "Qwen 3B", "statuses": { "sft_training": "artifact_available", "sft_postsave_inference": "artifact_available", "grpo_training": "artifact_available", "grpo_postsave_inference": "artifact_available", "policy_ablation": "artifact_available" }, "metrics": { "sft_train_loss": 0.15688225453009363, "sft_train_runtime": 715.2908, "sft_examples_used": 2000, "sft_history_steps": 2001, "sft_first_loss": 3.5687, "sft_last_loss": 0.054, "sft_best_loss": 0.0022, "sft_last_token_accuracy": 0.9750415682792664, "sft_valid_rate": 1.0, "sft_avg_env_reward": 0.781, "sft_avg_latency_seconds": 2.863, "grpo_avg_reward": 0.767, "grpo_history_steps": 2001, "grpo_valid_rate": 1.0, "grpo_avg_env_reward": 0.726, "grpo_avg_latency_seconds": 3.681 }, "files": { "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json", "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json", "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json", "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", "grpo_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/grpo_trl_run.json", "grpo_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/grpo_history.json", "grpo_reward_components.jsonl": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/grpo_reward_components.jsonl", "postsave_inference_grpo.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json", "grpo_ablation_report.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json", "error.json": "" } } ], "basic_vs_pipeline": { "reward_delta": 0.043, "basic_reward": 0.762, "pipeline_reward": 0.805, "basic_failure_rate": 0.25, "pipeline_failure_rate": 0.0, "pipeline_legality": 1.0 }, "download_command": "HF_TOKEN= ./.venv/bin/hf download adithya9903/polyguard-openenv-final-artifacts --repo-type space --local-dir ./hf_final_artifacts", "notes": [ "Packaging-only run; no retraining is performed.", "Qwen 3B has SFT and GRPO adapter directories plus checkpoint metadata/intermediate checkpoints in this artifact Space.", "Qwen 0.5B and 1.5B adapter directories were not present locally or in the checked artifact repos; reports remain included." ] }