| { |
| "status": "ok", |
| "started_at": 1777180786.0648105, |
| "finished_at": 1777188659.441074, |
| "commands": [ |
| { |
| "args": [ |
| "python", |
| "scripts/bootstrap_data.py" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 0.507 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/build_training_corpus.py", |
| "--profile", |
| "massive", |
| "--with-local", |
| "--with-synthetic", |
| "--with-hf" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 3.695 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/train_sft_trl.py", |
| "--model-id", |
| "Qwen/Qwen2.5-3B-Instruct", |
| "--dataset-path", |
| "data/processed/training_corpus_sft.json", |
| "--output-dir", |
| "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", |
| "--report-path", |
| "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", |
| "--epochs", |
| "2", |
| "--max-steps", |
| "0", |
| "--batch-size", |
| "2", |
| "--max-seq-len", |
| "512", |
| "--learning-rate", |
| "2e-05", |
| "--use-unsloth" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 737.28 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/train_grpo_trl.py", |
| "--model-id", |
| "Qwen/Qwen2.5-3B-Instruct", |
| "--prompts-path", |
| "data/processed/training_corpus_grpo_prompts.jsonl", |
| "--output-dir", |
| "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", |
| "--report-path", |
| "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json", |
| "--max-prompts", |
| "0", |
| "--max-steps", |
| "0", |
| "--epochs", |
| "1.0", |
| "--batch-size", |
| "2", |
| "--grad-accum", |
| "1", |
| "--num-generations", |
| "2", |
| "--max-prompt-length", |
| "384", |
| "--max-completion-length", |
| "64", |
| "--learning-rate", |
| "1e-06", |
| "--use-unsloth" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 6885.399 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/merge_adapters_safe.py", |
| "--adapter-dir", |
| "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", |
| "--output-dir", |
| "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 15.74 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/test_inference_postsave.py", |
| "--samples", |
| "5", |
| "--base-model", |
| "Qwen/Qwen2.5-3B-Instruct", |
| "--merged-model", |
| "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged", |
| "--adapter-dir", |
| "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", |
| "--output", |
| "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 20.985 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/test_inference_postsave.py", |
| "--samples", |
| "5", |
| "--base-model", |
| "Qwen/Qwen2.5-3B-Instruct", |
| "--merged-model", |
| "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/missing_merged_grpo", |
| "--adapter-dir", |
| "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_adapter", |
| "--output", |
| "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 26.691 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/evaluate_policy_ablations.py", |
| "--episodes", |
| "8", |
| "--checkpoint-dir", |
| "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", |
| "--output", |
| "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 4.001 |
| }, |
| { |
| "args": [ |
| "reuse_artifact", |
| "grpo_adapter", |
| "/app/checkpoints/grpo_adapter" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 0.0 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/evaluate_baselines.py" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 4.163 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/evaluate_all.py" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 3.798 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/evaluate_compare_runs.py", |
| "--baseline", |
| "outputs/reports/baselines.json", |
| "--candidate", |
| "outputs/reports/benchmark_report.json", |
| "--output", |
| "outputs/reports/improvement_report.json" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 0.034 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/benchmark_inference.py" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 2.39 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/run_robustness_suite.py" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 2.692 |
| }, |
| { |
| "args": [ |
| "python", |
| "scripts/generate_hf_training_report.py", |
| "--mode", |
| "full" |
| ], |
| "returncode": 0, |
| "elapsed_seconds": 2.078 |
| } |
| ], |
| "artifact_repo_id": "adithya9903/polyguard-openenv-training-3b-artifacts", |
| "training_mode": "full", |
| "model_sweep": [ |
| "Qwen/Qwen2.5-3B-Instruct" |
| ], |
| "improved": true, |
| "anti_hacking_passed": false, |
| "completed_run_ids": [ |
| "qwen-qwen2-5-3b-instruct" |
| ] |
| } |