{ "status": "ok", "started_at": 1777163399.0780032, "finished_at": 1777164656.2574434, "commands": [ { "args": [ "python", "scripts/bootstrap_data.py" ], "returncode": 0, "elapsed_seconds": 0.504 }, { "args": [ "python", "scripts/build_training_corpus.py", "--profile", "massive", "--with-local", "--with-synthetic", "--with-hf" ], "returncode": 0, "elapsed_seconds": 4.013 }, { "args": [ "python", "scripts/train_sft_trl.py", "--model-id", "Qwen/Qwen2.5-0.5B-Instruct", "--dataset-path", "data/processed/training_corpus_sft.json", "--output-dir", "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct", "--report-path", "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json", "--epochs", "2", "--max-steps", "0", "--batch-size", "2", "--max-seq-len", "512", "--learning-rate", "2e-05", "--use-unsloth" ], "returncode": 0, "elapsed_seconds": 251.4 }, { "args": [ "python", "scripts/merge_adapters_safe.py", "--adapter-dir", "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", "--output-dir", "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged" ], "returncode": 0, "elapsed_seconds": 7.16 }, { "args": [ "python", "scripts/test_inference_postsave.py", "--samples", "5", "--base-model", "Qwen/Qwen2.5-0.5B-Instruct", "--merged-model", "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged", "--adapter-dir", "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter", "--output", "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json" ], "returncode": 0, "elapsed_seconds": 15.213 }, { "args": [ "python", "scripts/train_sft_trl.py", "--model-id", "Qwen/Qwen2.5-1.5B-Instruct", "--dataset-path", "data/processed/training_corpus_sft.json", "--output-dir", "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct", "--report-path", "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json", "--epochs", "2", "--max-steps", "0", "--batch-size", "1", "--max-seq-len", "512", "--learning-rate", "2e-05", "--use-unsloth" ], "returncode": 0, "elapsed_seconds": 504.997 }, { "args": [ "python", "scripts/merge_adapters_safe.py", "--adapter-dir", "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", "--output-dir", "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged" ], "returncode": 0, "elapsed_seconds": 10.634 }, { "args": [ "python", "scripts/test_inference_postsave.py", "--samples", "5", "--base-model", "Qwen/Qwen2.5-1.5B-Instruct", "--merged-model", "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged", "--adapter-dir", "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter", "--output", "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json" ], "returncode": 0, "elapsed_seconds": 17.029 }, { "args": [ "python", "scripts/train_sft_trl.py", "--model-id", "Qwen/Qwen2.5-3B-Instruct", "--dataset-path", "data/processed/training_corpus_sft.json", "--output-dir", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct", "--report-path", "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json", "--epochs", "1", "--max-steps", "0", "--batch-size", "1", "--max-seq-len", "512", "--learning-rate", "2e-05", "--use-unsloth" ], "returncode": 0, "elapsed_seconds": 394.356 }, { "args": [ "python", "scripts/merge_adapters_safe.py", "--adapter-dir", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", "--output-dir", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged" ], "returncode": 0, "elapsed_seconds": 15.472 }, { "args": [ "python", "scripts/test_inference_postsave.py", "--samples", "5", "--base-model", "Qwen/Qwen2.5-3B-Instruct", "--merged-model", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged", "--adapter-dir", "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter", "--output", "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json" ], "returncode": 0, "elapsed_seconds": 20.373 }, { "args": [ "python", "scripts/evaluate_baselines.py" ], "returncode": 0, "elapsed_seconds": 4.112 }, { "args": [ "python", "scripts/evaluate_all.py" ], "returncode": 0, "elapsed_seconds": 3.787 }, { "args": [ "python", "scripts/evaluate_compare_runs.py", "--baseline", "outputs/reports/baselines.json", "--candidate", "outputs/reports/benchmark_report.json", "--output", "outputs/reports/improvement_report.json" ], "returncode": 0, "elapsed_seconds": 0.033 }, { "args": [ "python", "scripts/benchmark_inference.py" ], "returncode": 0, "elapsed_seconds": 2.376 }, { "args": [ "python", "scripts/generate_hf_training_report.py", "--mode", "sft-baseline" ], "returncode": 0, "elapsed_seconds": 1.791 } ], "artifact_repo_id": "TheJackBright/polyguard-openenv-sft-baseline-artifacts", "training_mode": "sft-baseline", "model_sweep": [ "Qwen/Qwen2.5-0.5B-Instruct", "Qwen/Qwen2.5-1.5B-Instruct", "Qwen/Qwen2.5-3B-Instruct" ], "improved": true, "anti_hacking_passed": true, "completed_run_ids": [ "qwen-qwen2-5-0-5b-instruct", "qwen-qwen2-5-1-5b-instruct", "qwen-qwen2-5-3b-instruct" ] }