adithya9903's picture
Upload PolyGuard final evidence and adapters
830a64f verified
{
"status": "ok",
"space_id": "adithya9903/polyguard-openenv-final-artifacts",
"space_url": "https://huggingface.co/spaces/adithya9903/polyguard-openenv-final-artifacts",
"docs_dir": "docs/results/final_submission_evidence",
"evidence_source": "docs/results/submission_evidence_qwen_0_5b_1_5b_3b",
"artifact_availability": {
"qwen-qwen2-5-0-5b-instruct": {
"label": "Qwen 0.5B",
"model_id": "Qwen/Qwen2.5-0.5B-Instruct",
"checkpoint_tree": {
"exists": false,
"file_count": 0,
"bytes": 0
},
"sft_adapter": {
"exists": false,
"file_count": 0,
"bytes": 0
},
"grpo_adapter": {
"exists": false,
"file_count": 0,
"bytes": 0
},
"reports": {
"exists": true,
"file_count": 4,
"bytes": 435858
},
"sft_report": true,
"grpo_report": false,
"postsave_sft": true,
"postsave_grpo": false,
"policy_ablation": false,
"missing_trained_files": [
"sft_adapter",
"grpo_adapter"
],
"status": "reports_only_or_partial"
},
"qwen-qwen2-5-1-5b-instruct": {
"label": "Qwen 1.5B",
"model_id": "Qwen/Qwen2.5-1.5B-Instruct",
"checkpoint_tree": {
"exists": false,
"file_count": 0,
"bytes": 0
},
"sft_adapter": {
"exists": false,
"file_count": 0,
"bytes": 0
},
"grpo_adapter": {
"exists": false,
"file_count": 0,
"bytes": 0
},
"reports": {
"exists": true,
"file_count": 4,
"bytes": 854543
},
"sft_report": true,
"grpo_report": false,
"postsave_sft": true,
"postsave_grpo": false,
"policy_ablation": false,
"missing_trained_files": [
"sft_adapter",
"grpo_adapter"
],
"status": "reports_only_or_partial"
},
"qwen-qwen2-5-3b-instruct": {
"label": "Qwen 3B",
"model_id": "Qwen/Qwen2.5-3B-Instruct",
"checkpoint_tree": {
"exists": true,
"file_count": 125,
"bytes": 433208536
},
"sft_adapter": {
"exists": true,
"file_count": 11,
"bytes": 30655905
},
"grpo_adapter": {
"exists": true,
"file_count": 11,
"bytes": 30656841
},
"reports": {
"exists": true,
"file_count": 9,
"bytes": 5930214
},
"sft_report": true,
"grpo_report": true,
"postsave_sft": true,
"postsave_grpo": true,
"policy_ablation": true,
"missing_trained_files": [],
"status": "complete"
}
},
"submission_models": [
{
"run_id": "qwen-qwen2-5-0-5b-instruct",
"model_id": "Qwen/Qwen2.5-0.5B-Instruct",
"label": "Qwen 0.5B",
"statuses": {
"sft_training": "artifact_available",
"sft_postsave_inference": "artifact_available",
"grpo_training": "not_seen_in_status",
"grpo_postsave_inference": "not_seen_in_status",
"policy_ablation": "not_seen_in_status"
},
"metrics": {
"sft_train_loss": 0.19233327957964502,
"sft_train_runtime": 234.6302,
"sft_examples_used": 2000,
"sft_history_steps": 2001,
"sft_first_loss": 3.0856,
"sft_last_loss": 0.0626,
"sft_best_loss": 0.0057,
"sft_last_token_accuracy": 0.9717137813568115,
"sft_valid_rate": 1.0,
"sft_avg_env_reward": 0.726,
"sft_avg_latency_seconds": 1.839,
"grpo_avg_reward": null,
"grpo_history_steps": 0,
"grpo_valid_rate": null,
"grpo_avg_env_reward": null,
"grpo_avg_latency_seconds": null
},
"files": {
"run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json",
"sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json",
"sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json",
"postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json",
"grpo_trl_run.json": "",
"grpo_history.json": "",
"grpo_reward_components.jsonl": "",
"postsave_inference_grpo.json": "",
"grpo_ablation_report.json": "",
"error.json": ""
}
},
{
"run_id": "qwen-qwen2-5-1-5b-instruct",
"model_id": "Qwen/Qwen2.5-1.5B-Instruct",
"label": "Qwen 1.5B",
"statuses": {
"sft_training": "artifact_available",
"sft_postsave_inference": "artifact_available",
"grpo_training": "not_seen_in_status",
"grpo_postsave_inference": "not_seen_in_status",
"policy_ablation": "not_seen_in_status"
},
"metrics": {
"sft_train_loss": 0.11515871361242898,
"sft_train_runtime": 483.7085,
"sft_examples_used": 2000,
"sft_history_steps": 4001,
"sft_first_loss": 2.9686,
"sft_last_loss": 0.0681,
"sft_best_loss": 0.0009,
"sft_last_token_accuracy": 0.9726027250289917,
"sft_valid_rate": 1.0,
"sft_avg_env_reward": 0.726,
"sft_avg_latency_seconds": 2.158,
"grpo_avg_reward": null,
"grpo_history_steps": 0,
"grpo_valid_rate": null,
"grpo_avg_env_reward": null,
"grpo_avg_latency_seconds": null
},
"files": {
"run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json",
"sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json",
"sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json",
"postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json",
"grpo_trl_run.json": "",
"grpo_history.json": "",
"grpo_reward_components.jsonl": "",
"postsave_inference_grpo.json": "",
"grpo_ablation_report.json": "",
"error.json": ""
}
},
{
"run_id": "qwen-qwen2-5-3b-instruct",
"model_id": "Qwen/Qwen2.5-3B-Instruct",
"label": "Qwen 3B",
"statuses": {
"sft_training": "artifact_available",
"sft_postsave_inference": "artifact_available",
"grpo_training": "artifact_available",
"grpo_postsave_inference": "artifact_available",
"policy_ablation": "artifact_available"
},
"metrics": {
"sft_train_loss": 0.15688225453009363,
"sft_train_runtime": 715.2908,
"sft_examples_used": 2000,
"sft_history_steps": 2001,
"sft_first_loss": 3.5687,
"sft_last_loss": 0.054,
"sft_best_loss": 0.0022,
"sft_last_token_accuracy": 0.9750415682792664,
"sft_valid_rate": 1.0,
"sft_avg_env_reward": 0.781,
"sft_avg_latency_seconds": 2.863,
"grpo_avg_reward": 0.767,
"grpo_history_steps": 2001,
"grpo_valid_rate": 1.0,
"grpo_avg_env_reward": 0.726,
"grpo_avg_latency_seconds": 3.681
},
"files": {
"run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json",
"sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json",
"sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json",
"postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json",
"grpo_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/grpo_trl_run.json",
"grpo_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/grpo_history.json",
"grpo_reward_components.jsonl": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/grpo_reward_components.jsonl",
"postsave_inference_grpo.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json",
"grpo_ablation_report.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json",
"error.json": ""
}
}
],
"basic_vs_pipeline": {
"reward_delta": 0.043,
"basic_reward": 0.762,
"pipeline_reward": 0.805,
"basic_failure_rate": 0.25,
"pipeline_failure_rate": 0.0,
"pipeline_legality": 1.0
},
"download_command": "HF_TOKEN=<token> ./.venv/bin/hf download adithya9903/polyguard-openenv-final-artifacts --repo-type space --local-dir ./hf_final_artifacts",
"notes": [
"Packaging-only run; no retraining is performed.",
"Qwen 3B has SFT and GRPO adapter directories plus checkpoint metadata/intermediate checkpoints in this artifact Space.",
"Qwen 0.5B and 1.5B adapter directories were not present locally or in the checked artifact repos; reports remain included."
]
}