adithya9903's picture
Upload PolyGuard training artifacts: docs/results
b5cc210 verified
{
"status": "ok",
"started_at": 1777163399.0780032,
"finished_at": 1777164656.2574434,
"commands": [
{
"args": [
"python",
"scripts/bootstrap_data.py"
],
"returncode": 0,
"elapsed_seconds": 0.504
},
{
"args": [
"python",
"scripts/build_training_corpus.py",
"--profile",
"massive",
"--with-local",
"--with-synthetic",
"--with-hf"
],
"returncode": 0,
"elapsed_seconds": 4.013
},
{
"args": [
"python",
"scripts/train_sft_trl.py",
"--model-id",
"Qwen/Qwen2.5-0.5B-Instruct",
"--dataset-path",
"data/processed/training_corpus_sft.json",
"--output-dir",
"checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct",
"--report-path",
"outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json",
"--epochs",
"2",
"--max-steps",
"0",
"--batch-size",
"2",
"--max-seq-len",
"512",
"--learning-rate",
"2e-05",
"--use-unsloth"
],
"returncode": 0,
"elapsed_seconds": 251.4
},
{
"args": [
"python",
"scripts/merge_adapters_safe.py",
"--adapter-dir",
"checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter",
"--output-dir",
"checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged"
],
"returncode": 0,
"elapsed_seconds": 7.16
},
{
"args": [
"python",
"scripts/test_inference_postsave.py",
"--samples",
"5",
"--base-model",
"Qwen/Qwen2.5-0.5B-Instruct",
"--merged-model",
"checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged",
"--adapter-dir",
"checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter",
"--output",
"outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json"
],
"returncode": 0,
"elapsed_seconds": 15.213
},
{
"args": [
"python",
"scripts/train_sft_trl.py",
"--model-id",
"Qwen/Qwen2.5-1.5B-Instruct",
"--dataset-path",
"data/processed/training_corpus_sft.json",
"--output-dir",
"checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct",
"--report-path",
"outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json",
"--epochs",
"2",
"--max-steps",
"0",
"--batch-size",
"1",
"--max-seq-len",
"512",
"--learning-rate",
"2e-05",
"--use-unsloth"
],
"returncode": 0,
"elapsed_seconds": 504.997
},
{
"args": [
"python",
"scripts/merge_adapters_safe.py",
"--adapter-dir",
"checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter",
"--output-dir",
"checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged"
],
"returncode": 0,
"elapsed_seconds": 10.634
},
{
"args": [
"python",
"scripts/test_inference_postsave.py",
"--samples",
"5",
"--base-model",
"Qwen/Qwen2.5-1.5B-Instruct",
"--merged-model",
"checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged",
"--adapter-dir",
"checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter",
"--output",
"outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json"
],
"returncode": 0,
"elapsed_seconds": 17.029
},
{
"args": [
"python",
"scripts/train_sft_trl.py",
"--model-id",
"Qwen/Qwen2.5-3B-Instruct",
"--dataset-path",
"data/processed/training_corpus_sft.json",
"--output-dir",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct",
"--report-path",
"outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json",
"--epochs",
"1",
"--max-steps",
"0",
"--batch-size",
"1",
"--max-seq-len",
"512",
"--learning-rate",
"2e-05",
"--use-unsloth"
],
"returncode": 0,
"elapsed_seconds": 394.356
},
{
"args": [
"python",
"scripts/merge_adapters_safe.py",
"--adapter-dir",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter",
"--output-dir",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged"
],
"returncode": 0,
"elapsed_seconds": 15.472
},
{
"args": [
"python",
"scripts/test_inference_postsave.py",
"--samples",
"5",
"--base-model",
"Qwen/Qwen2.5-3B-Instruct",
"--merged-model",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged",
"--adapter-dir",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter",
"--output",
"outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json"
],
"returncode": 0,
"elapsed_seconds": 20.373
},
{
"args": [
"python",
"scripts/evaluate_baselines.py"
],
"returncode": 0,
"elapsed_seconds": 4.112
},
{
"args": [
"python",
"scripts/evaluate_all.py"
],
"returncode": 0,
"elapsed_seconds": 3.787
},
{
"args": [
"python",
"scripts/evaluate_compare_runs.py",
"--baseline",
"outputs/reports/baselines.json",
"--candidate",
"outputs/reports/benchmark_report.json",
"--output",
"outputs/reports/improvement_report.json"
],
"returncode": 0,
"elapsed_seconds": 0.033
},
{
"args": [
"python",
"scripts/benchmark_inference.py"
],
"returncode": 0,
"elapsed_seconds": 2.376
},
{
"args": [
"python",
"scripts/generate_hf_training_report.py",
"--mode",
"sft-baseline"
],
"returncode": 0,
"elapsed_seconds": 1.791
}
],
"artifact_repo_id": "TheJackBright/polyguard-openenv-sft-baseline-artifacts",
"training_mode": "sft-baseline",
"model_sweep": [
"Qwen/Qwen2.5-0.5B-Instruct",
"Qwen/Qwen2.5-1.5B-Instruct",
"Qwen/Qwen2.5-3B-Instruct"
],
"improved": true,
"anti_hacking_passed": true,
"completed_run_ids": [
"qwen-qwen2-5-0-5b-instruct",
"qwen-qwen2-5-1-5b-instruct",
"qwen-qwen2-5-3b-instruct"
]
}