{ "status": "ok", "started_at": 1777180786.0648105, "finished_at": 1777188659.441074, "commands": [ { "args": [ "python", "scripts/bootstrap_data.py" ], "returncode": 0, "elapsed_seconds": 0.507 }, { "args": [ "python", "scripts/build_training_corpus.py", "--profile", "massive", "--with-local", "--with-synthetic", "--with-hf" ], "returncode": 0, "elapsed_seconds": 3.695 }, { "args": [ "python", "scripts/train_sft_trl.py", "--model-id", "Qwen/Qwen2.5-3B-Instruct", "--dataset-path", "data/processed/training_corpus_sft.json", "--output-dir", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", "--report-path", "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", "--epochs", "2", "--max-steps", "0", "--batch-size", "2", "--max-seq-len", "512", "--learning-rate", "2e-05", "--use-unsloth" ], "returncode": 0, "elapsed_seconds": 737.28 }, { "args": [ "python", "scripts/train_grpo_trl.py", "--model-id", "Qwen/Qwen2.5-3B-Instruct", "--prompts-path", "data/processed/training_corpus_grpo_prompts.jsonl", "--output-dir", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", "--report-path", "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json", "--max-prompts", "0", "--max-steps", "0", "--epochs", "1.0", "--batch-size", "2", "--grad-accum", "1", "--num-generations", "2", "--max-prompt-length", "384", "--max-completion-length", "64", "--learning-rate", "1e-06", "--use-unsloth" ], "returncode": 0, "elapsed_seconds": 6885.399 }, { "args": [ "python", "scripts/merge_adapters_safe.py", "--adapter-dir", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", "--output-dir", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged" ], "returncode": 0, "elapsed_seconds": 15.74 }, { "args": [ "python", "scripts/test_inference_postsave.py", "--samples", "5", "--base-model", "Qwen/Qwen2.5-3B-Instruct", "--merged-model", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged", "--adapter-dir", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", "--output", "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json" ], "returncode": 0, "elapsed_seconds": 20.985 }, { "args": [ "python", "scripts/test_inference_postsave.py", "--samples", "5", "--base-model", "Qwen/Qwen2.5-3B-Instruct", "--merged-model", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/missing_merged_grpo", "--adapter-dir", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_adapter", "--output", "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json" ], "returncode": 0, "elapsed_seconds": 26.691 }, { "args": [ "python", "scripts/evaluate_policy_ablations.py", "--episodes", "8", "--checkpoint-dir", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", "--output", "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json" ], "returncode": 0, "elapsed_seconds": 4.001 }, { "args": [ "reuse_artifact", "grpo_adapter", "/app/checkpoints/grpo_adapter" ], "returncode": 0, "elapsed_seconds": 0.0 }, { "args": [ "python", "scripts/evaluate_baselines.py" ], "returncode": 0, "elapsed_seconds": 4.163 }, { "args": [ "python", "scripts/evaluate_all.py" ], "returncode": 0, "elapsed_seconds": 3.798 }, { "args": [ "python", "scripts/evaluate_compare_runs.py", "--baseline", "outputs/reports/baselines.json", "--candidate", "outputs/reports/benchmark_report.json", "--output", "outputs/reports/improvement_report.json" ], "returncode": 0, "elapsed_seconds": 0.034 }, { "args": [ "python", "scripts/benchmark_inference.py" ], "returncode": 0, "elapsed_seconds": 2.39 }, { "args": [ "python", "scripts/run_robustness_suite.py" ], "returncode": 0, "elapsed_seconds": 2.692 }, { "args": [ "python", "scripts/generate_hf_training_report.py", "--mode", "full" ], "returncode": 0, "elapsed_seconds": 2.078 } ], "artifact_repo_id": "adithya9903/polyguard-openenv-training-3b-artifacts", "training_mode": "full", "model_sweep": [ "Qwen/Qwen2.5-3B-Instruct" ], "improved": true, "anti_hacking_passed": false, "completed_run_ids": [ "qwen-qwen2-5-3b-instruct" ] }