| { |
| "status": "ok", |
| "space_id": "adithya9903/polyguard-openenv-final-artifacts", |
| "space_url": "https://huggingface.co/spaces/adithya9903/polyguard-openenv-final-artifacts", |
| "docs_dir": "docs/results/final_submission_evidence", |
| "evidence_source": "docs/results/submission_evidence_qwen_0_5b_1_5b_3b", |
| "artifact_availability": { |
| "qwen-qwen2-5-0-5b-instruct": { |
| "label": "Qwen 0.5B", |
| "model_id": "Qwen/Qwen2.5-0.5B-Instruct", |
| "checkpoint_tree": { |
| "exists": false, |
| "file_count": 0, |
| "bytes": 0 |
| }, |
| "sft_adapter": { |
| "exists": false, |
| "file_count": 0, |
| "bytes": 0 |
| }, |
| "grpo_adapter": { |
| "exists": false, |
| "file_count": 0, |
| "bytes": 0 |
| }, |
| "reports": { |
| "exists": true, |
| "file_count": 4, |
| "bytes": 435858 |
| }, |
| "sft_report": true, |
| "grpo_report": false, |
| "postsave_sft": true, |
| "postsave_grpo": false, |
| "policy_ablation": false, |
| "missing_trained_files": [ |
| "sft_adapter", |
| "grpo_adapter" |
| ], |
| "status": "reports_only_or_partial" |
| }, |
| "qwen-qwen2-5-1-5b-instruct": { |
| "label": "Qwen 1.5B", |
| "model_id": "Qwen/Qwen2.5-1.5B-Instruct", |
| "checkpoint_tree": { |
| "exists": false, |
| "file_count": 0, |
| "bytes": 0 |
| }, |
| "sft_adapter": { |
| "exists": false, |
| "file_count": 0, |
| "bytes": 0 |
| }, |
| "grpo_adapter": { |
| "exists": false, |
| "file_count": 0, |
| "bytes": 0 |
| }, |
| "reports": { |
| "exists": true, |
| "file_count": 4, |
| "bytes": 854543 |
| }, |
| "sft_report": true, |
| "grpo_report": false, |
| "postsave_sft": true, |
| "postsave_grpo": false, |
| "policy_ablation": false, |
| "missing_trained_files": [ |
| "sft_adapter", |
| "grpo_adapter" |
| ], |
| "status": "reports_only_or_partial" |
| }, |
| "qwen-qwen2-5-3b-instruct": { |
| "label": "Qwen 3B", |
| "model_id": "Qwen/Qwen2.5-3B-Instruct", |
| "checkpoint_tree": { |
| "exists": true, |
| "file_count": 125, |
| "bytes": 433208536 |
| }, |
| "sft_adapter": { |
| "exists": true, |
| "file_count": 11, |
| "bytes": 30655905 |
| }, |
| "grpo_adapter": { |
| "exists": true, |
| "file_count": 11, |
| "bytes": 30656841 |
| }, |
| "reports": { |
| "exists": true, |
| "file_count": 9, |
| "bytes": 5930214 |
| }, |
| "sft_report": true, |
| "grpo_report": true, |
| "postsave_sft": true, |
| "postsave_grpo": true, |
| "policy_ablation": true, |
| "missing_trained_files": [], |
| "status": "complete" |
| } |
| }, |
| "submission_models": [ |
| { |
| "run_id": "qwen-qwen2-5-0-5b-instruct", |
| "model_id": "Qwen/Qwen2.5-0.5B-Instruct", |
| "label": "Qwen 0.5B", |
| "statuses": { |
| "sft_training": "artifact_available", |
| "sft_postsave_inference": "artifact_available", |
| "grpo_training": "not_seen_in_status", |
| "grpo_postsave_inference": "not_seen_in_status", |
| "policy_ablation": "not_seen_in_status" |
| }, |
| "metrics": { |
| "sft_train_loss": 0.19233327957964502, |
| "sft_train_runtime": 234.6302, |
| "sft_examples_used": 2000, |
| "sft_history_steps": 2001, |
| "sft_first_loss": 3.0856, |
| "sft_last_loss": 0.0626, |
| "sft_best_loss": 0.0057, |
| "sft_last_token_accuracy": 0.9717137813568115, |
| "sft_valid_rate": 1.0, |
| "sft_avg_env_reward": 0.726, |
| "sft_avg_latency_seconds": 1.839, |
| "grpo_avg_reward": null, |
| "grpo_history_steps": 0, |
| "grpo_valid_rate": null, |
| "grpo_avg_env_reward": null, |
| "grpo_avg_latency_seconds": null |
| }, |
| "files": { |
| "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json", |
| "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", |
| "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json", |
| "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json", |
| "grpo_trl_run.json": "", |
| "grpo_history.json": "", |
| "grpo_reward_components.jsonl": "", |
| "postsave_inference_grpo.json": "", |
| "grpo_ablation_report.json": "", |
| "error.json": "" |
| } |
| }, |
| { |
| "run_id": "qwen-qwen2-5-1-5b-instruct", |
| "model_id": "Qwen/Qwen2.5-1.5B-Instruct", |
| "label": "Qwen 1.5B", |
| "statuses": { |
| "sft_training": "artifact_available", |
| "sft_postsave_inference": "artifact_available", |
| "grpo_training": "not_seen_in_status", |
| "grpo_postsave_inference": "not_seen_in_status", |
| "policy_ablation": "not_seen_in_status" |
| }, |
| "metrics": { |
| "sft_train_loss": 0.11515871361242898, |
| "sft_train_runtime": 483.7085, |
| "sft_examples_used": 2000, |
| "sft_history_steps": 4001, |
| "sft_first_loss": 2.9686, |
| "sft_last_loss": 0.0681, |
| "sft_best_loss": 0.0009, |
| "sft_last_token_accuracy": 0.9726027250289917, |
| "sft_valid_rate": 1.0, |
| "sft_avg_env_reward": 0.726, |
| "sft_avg_latency_seconds": 2.158, |
| "grpo_avg_reward": null, |
| "grpo_history_steps": 0, |
| "grpo_valid_rate": null, |
| "grpo_avg_env_reward": null, |
| "grpo_avg_latency_seconds": null |
| }, |
| "files": { |
| "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json", |
| "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", |
| "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json", |
| "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json", |
| "grpo_trl_run.json": "", |
| "grpo_history.json": "", |
| "grpo_reward_components.jsonl": "", |
| "postsave_inference_grpo.json": "", |
| "grpo_ablation_report.json": "", |
| "error.json": "" |
| } |
| }, |
| { |
| "run_id": "qwen-qwen2-5-3b-instruct", |
| "model_id": "Qwen/Qwen2.5-3B-Instruct", |
| "label": "Qwen 3B", |
| "statuses": { |
| "sft_training": "artifact_available", |
| "sft_postsave_inference": "artifact_available", |
| "grpo_training": "artifact_available", |
| "grpo_postsave_inference": "artifact_available", |
| "policy_ablation": "artifact_available" |
| }, |
| "metrics": { |
| "sft_train_loss": 0.15688225453009363, |
| "sft_train_runtime": 715.2908, |
| "sft_examples_used": 2000, |
| "sft_history_steps": 2001, |
| "sft_first_loss": 3.5687, |
| "sft_last_loss": 0.054, |
| "sft_best_loss": 0.0022, |
| "sft_last_token_accuracy": 0.9750415682792664, |
| "sft_valid_rate": 1.0, |
| "sft_avg_env_reward": 0.781, |
| "sft_avg_latency_seconds": 2.863, |
| "grpo_avg_reward": 0.767, |
| "grpo_history_steps": 2001, |
| "grpo_valid_rate": 1.0, |
| "grpo_avg_env_reward": 0.726, |
| "grpo_avg_latency_seconds": 3.681 |
| }, |
| "files": { |
| "run_metadata.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/run_metadata.json", |
| "sft_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_trl_run.json", |
| "sft_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/sft_history.json", |
| "postsave_inference_sft.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json", |
| "grpo_trl_run.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/grpo_trl_run.json", |
| "grpo_history.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/grpo_history.json", |
| "grpo_reward_components.jsonl": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/grpo_reward_components.jsonl", |
| "postsave_inference_grpo.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json", |
| "grpo_ablation_report.json": "outputs/reports/submission_evidence/qwen_0_5b_1_5b_3b/runs/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json", |
| "error.json": "" |
| } |
| } |
| ], |
| "basic_vs_pipeline": { |
| "reward_delta": 0.043, |
| "basic_reward": 0.762, |
| "pipeline_reward": 0.805, |
| "basic_failure_rate": 0.25, |
| "pipeline_failure_rate": 0.0, |
| "pipeline_legality": 1.0 |
| }, |
| "download_command": "HF_TOKEN=<token> ./.venv/bin/hf download adithya9903/polyguard-openenv-final-artifacts --repo-type space --local-dir ./hf_final_artifacts", |
| "notes": [ |
| "Packaging-only run; no retraining is performed.", |
| "Qwen 3B has SFT and GRPO adapter directories plus checkpoint metadata/intermediate checkpoints in this artifact Space.", |
| "Qwen 0.5B and 1.5B adapter directories were not present locally or in the checked artifact repos; reports remain included." |
| ] |
| } |
|
|