adithya9903's picture
Upload PolyGuard training artifacts: docs/results
b5cc210 verified
{
"status": "ok",
"started_at": 1777180786.0648105,
"finished_at": 1777188659.441074,
"commands": [
{
"args": [
"python",
"scripts/bootstrap_data.py"
],
"returncode": 0,
"elapsed_seconds": 0.507
},
{
"args": [
"python",
"scripts/build_training_corpus.py",
"--profile",
"massive",
"--with-local",
"--with-synthetic",
"--with-hf"
],
"returncode": 0,
"elapsed_seconds": 3.695
},
{
"args": [
"python",
"scripts/train_sft_trl.py",
"--model-id",
"Qwen/Qwen2.5-3B-Instruct",
"--dataset-path",
"data/processed/training_corpus_sft.json",
"--output-dir",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct",
"--report-path",
"outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json",
"--epochs",
"2",
"--max-steps",
"0",
"--batch-size",
"2",
"--max-seq-len",
"512",
"--learning-rate",
"2e-05",
"--use-unsloth"
],
"returncode": 0,
"elapsed_seconds": 737.28
},
{
"args": [
"python",
"scripts/train_grpo_trl.py",
"--model-id",
"Qwen/Qwen2.5-3B-Instruct",
"--prompts-path",
"data/processed/training_corpus_grpo_prompts.jsonl",
"--output-dir",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct",
"--report-path",
"outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_trl_run.json",
"--max-prompts",
"0",
"--max-steps",
"0",
"--epochs",
"1.0",
"--batch-size",
"2",
"--grad-accum",
"1",
"--num-generations",
"2",
"--max-prompt-length",
"384",
"--max-completion-length",
"64",
"--learning-rate",
"1e-06",
"--use-unsloth"
],
"returncode": 0,
"elapsed_seconds": 6885.399
},
{
"args": [
"python",
"scripts/merge_adapters_safe.py",
"--adapter-dir",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter",
"--output-dir",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged"
],
"returncode": 0,
"elapsed_seconds": 15.74
},
{
"args": [
"python",
"scripts/test_inference_postsave.py",
"--samples",
"5",
"--base-model",
"Qwen/Qwen2.5-3B-Instruct",
"--merged-model",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged",
"--adapter-dir",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter",
"--output",
"outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json"
],
"returncode": 0,
"elapsed_seconds": 20.985
},
{
"args": [
"python",
"scripts/test_inference_postsave.py",
"--samples",
"5",
"--base-model",
"Qwen/Qwen2.5-3B-Instruct",
"--merged-model",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct/missing_merged_grpo",
"--adapter-dir",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_adapter",
"--output",
"outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_grpo.json"
],
"returncode": 0,
"elapsed_seconds": 26.691
},
{
"args": [
"python",
"scripts/evaluate_policy_ablations.py",
"--episodes",
"8",
"--checkpoint-dir",
"checkpoints/sweeps/qwen-qwen2-5-3b-instruct",
"--output",
"outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/grpo_ablation_report.json"
],
"returncode": 0,
"elapsed_seconds": 4.001
},
{
"args": [
"reuse_artifact",
"grpo_adapter",
"/app/checkpoints/grpo_adapter"
],
"returncode": 0,
"elapsed_seconds": 0.0
},
{
"args": [
"python",
"scripts/evaluate_baselines.py"
],
"returncode": 0,
"elapsed_seconds": 4.163
},
{
"args": [
"python",
"scripts/evaluate_all.py"
],
"returncode": 0,
"elapsed_seconds": 3.798
},
{
"args": [
"python",
"scripts/evaluate_compare_runs.py",
"--baseline",
"outputs/reports/baselines.json",
"--candidate",
"outputs/reports/benchmark_report.json",
"--output",
"outputs/reports/improvement_report.json"
],
"returncode": 0,
"elapsed_seconds": 0.034
},
{
"args": [
"python",
"scripts/benchmark_inference.py"
],
"returncode": 0,
"elapsed_seconds": 2.39
},
{
"args": [
"python",
"scripts/run_robustness_suite.py"
],
"returncode": 0,
"elapsed_seconds": 2.692
},
{
"args": [
"python",
"scripts/generate_hf_training_report.py",
"--mode",
"full"
],
"returncode": 0,
"elapsed_seconds": 2.078
}
],
"artifact_repo_id": "adithya9903/polyguard-openenv-training-3b-artifacts",
"training_mode": "full",
"model_sweep": [
"Qwen/Qwen2.5-3B-Instruct"
],
"improved": true,
"anti_hacking_passed": false,
"completed_run_ids": [
"qwen-qwen2-5-3b-instruct"
]
}