adithya9903 commited on
Commit
b5cc210
·
verified ·
1 Parent(s): d31b81a

Upload PolyGuard training artifacts: docs/results

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +6 -0
  2. docs/results/README.md +24 -0
  3. docs/results/acceptance_gate.json +11 -0
  4. docs/results/active_model/acceptance_gate.json +11 -0
  5. docs/results/active_model/active_model_manifest.json +68 -0
  6. docs/results/active_model/anti_hacking_overfit_report.json +22 -0
  7. docs/results/active_model/baselines.json +119 -0
  8. docs/results/active_model/benchmark_report.json +52 -0
  9. docs/results/active_model/benchmark_report.txt +52 -0
  10. docs/results/active_model/dose_train.json +6 -0
  11. docs/results/active_model/dosing_grpo.json +28 -0
  12. docs/results/active_model/frontier_ready.json +8 -0
  13. docs/results/active_model/graph_train.json +5 -0
  14. docs/results/active_model/grpo_ablation_report.json +149 -0
  15. docs/results/active_model/grpo_training_cycle/grpo_trl_run.json +42 -0
  16. docs/results/active_model/grpo_training_cycle/hf_training_status.json +123 -0
  17. docs/results/active_model/grpo_trl_run.json +42 -0
  18. docs/results/active_model/grpo_trl_run_auto.json +39 -0
  19. docs/results/active_model/grpo_trl_run_fallback_check.json +39 -0
  20. docs/results/active_model/grpo_trl_run_smoke.json +39 -0
  21. docs/results/active_model/grpo_trl_run_strict_check.json +39 -0
  22. docs/results/active_model/hf_sweep_summary.json +127 -0
  23. docs/results/active_model/hf_training_status.json +261 -0
  24. docs/results/active_model/improvement_report.json +19 -0
  25. docs/results/active_model/improvement_report_benchmark.json +19 -0
  26. docs/results/active_model/inference_benchmark.json +43 -0
  27. docs/results/active_model/planner_grpo.json +28 -0
  28. docs/results/active_model/plot_index.json +9 -0
  29. docs/results/active_model/postsave_inference.json +71 -0
  30. docs/results/active_model/postsave_inference_smoke.json +23 -0
  31. docs/results/active_model/risk_train.json +6 -0
  32. docs/results/active_model/robustness.json +10 -0
  33. docs/results/active_model/sft_run.json +9 -0
  34. docs/results/active_model/sft_trl_run.json +18 -0
  35. docs/results/active_model/supervisor_grpo.json +28 -0
  36. docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json +71 -0
  37. docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json +9 -0
  38. docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json +0 -0
  39. docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json +18 -0
  40. docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json +71 -0
  41. docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json +9 -0
  42. docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json +0 -0
  43. docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json +18 -0
  44. docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json +71 -0
  45. docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json +9 -0
  46. docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json +0 -0
  47. docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json +18 -0
  48. docs/results/active_model_manifest.json +68 -0
  49. docs/results/anti_cheat_failure_rates.png +0 -0
  50. docs/results/anti_hacking_overfit_report.json +22 -0
.gitattributes CHANGED
@@ -47,3 +47,9 @@ checkpoints/grpo_adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text
47
  checkpoints/merged/tokenizer.json filter=lfs diff=lfs merge=lfs -text
48
  outputs/plots/grpo_reward_curves.png filter=lfs diff=lfs merge=lfs -text
49
  outputs/plots/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
47
  checkpoints/merged/tokenizer.json filter=lfs diff=lfs merge=lfs -text
48
  outputs/plots/grpo_reward_curves.png filter=lfs diff=lfs merge=lfs -text
49
  outputs/plots/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
50
+ docs/results/grpo_reward_curves.png filter=lfs diff=lfs merge=lfs -text
51
+ docs/results/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
52
+ docs/results/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
53
+ docs/results/submission_evidence/qwen_0_5b_1_5b_3b/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
54
+ docs/results/submission_evidence_qwen_0_5b_1_5b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
55
+ docs/results/submission_evidence_qwen_0_5b_1_5b_3b/charts/generated/reward_component_bars.png filter=lfs diff=lfs merge=lfs -text
docs/results/README.md ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Result Artifacts
2
+
3
+ These tracked files mirror the latest local smoke/evaluation artifacts so the README can show stable evidence even though `outputs/` and `checkpoints/` are intentionally git-ignored.
4
+
5
+ Current status:
6
+
7
+ - OpenEnv structure/runtime validation passes locally.
8
+ - Test suite passes locally.
9
+ - Frontend production build passes locally.
10
+ - SFT and GRPO artifacts in this folder are non-fallback TRL Transformers evidence from a tiny local compliance run.
11
+ - `postsave_inference.json` loads the merged artifact rather than the fallback policy.
12
+ - `improvement_report.json` shows positive average-reward improvement against the no-change baseline.
13
+ - `hf_space_verification.json` records a live Hugging Face Space validation pass.
14
+ - `active_model_manifest.json` records the currently activated local product model. As of April 26, 2026 this points at the local Qwen 0.5B smoke artifact while the full remote Qwen sweep continues.
15
+
16
+ For a stronger final pitch, replace these artifacts after a larger Colab/HF GPU run:
17
+
18
+ - `sft_trl_run.json`
19
+ - `grpo_trl_run.json`
20
+ - `postsave_inference.json`
21
+ - `improvement_report.json`
22
+ - all plot PNGs
23
+ - `hf_space_verification.json`
24
+ - `active_model_manifest.json`
docs/results/acceptance_gate.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "missing_files": [],
3
+ "missing_artifacts": [],
4
+ "missing_readme_markers": [],
5
+ "missing_readme_links": [],
6
+ "strict_submission_links": true,
7
+ "missing_submission_env": [],
8
+ "strict_submission_failures": [],
9
+ "submission_ready": true,
10
+ "status": "ok"
11
+ }
docs/results/active_model/acceptance_gate.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "missing_files": [],
3
+ "missing_artifacts": [],
4
+ "missing_readme_markers": [],
5
+ "missing_readme_links": [],
6
+ "strict_submission_links": false,
7
+ "missing_submission_env": [],
8
+ "strict_submission_failures": [],
9
+ "submission_ready": false,
10
+ "status": "ok"
11
+ }
docs/results/active_model/active_model_manifest.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "enabled": true,
4
+ "activated_at_utc": "2026-04-26T02:24:15.464507+00:00",
5
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
6
+ "source": "top-level",
7
+ "label": "local-qwen-0.5b-active-smoke",
8
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
9
+ "base_model": "Qwen/Qwen2.5-0.5B-Instruct",
10
+ "preferred_artifact": "grpo_adapter",
11
+ "mode": "symlink",
12
+ "source_checkpoint_dir": "checkpoints",
13
+ "source_report_dir": "outputs/reports",
14
+ "grpo_adapter": "checkpoints/active/grpo_adapter",
15
+ "merged_model": "checkpoints/active/merged",
16
+ "sft_adapter": "checkpoints/active/sft_adapter",
17
+ "availability": {
18
+ "grpo_adapter": true,
19
+ "merged": true,
20
+ "sft_adapter": true
21
+ },
22
+ "reports": {
23
+ "improvement_report_benchmark.json": "outputs/reports/active_model/improvement_report_benchmark.json",
24
+ "anti_hacking_overfit_report.json": "outputs/reports/active_model/anti_hacking_overfit_report.json",
25
+ "grpo_trl_run_strict_check.json": "outputs/reports/active_model/grpo_trl_run_strict_check.json",
26
+ "postsave_inference.json": "outputs/reports/active_model/postsave_inference.json",
27
+ "sft_trl_run.json": "outputs/reports/active_model/sft_trl_run.json",
28
+ "plot_index.json": "outputs/reports/active_model/plot_index.json",
29
+ "dose_train.json": "outputs/reports/active_model/dose_train.json",
30
+ "baselines.json": "outputs/reports/active_model/baselines.json",
31
+ "robustness.json": "outputs/reports/active_model/robustness.json",
32
+ "grpo_trl_run_fallback_check.json": "outputs/reports/active_model/grpo_trl_run_fallback_check.json",
33
+ "sft_run.json": "outputs/reports/active_model/sft_run.json",
34
+ "benchmark_report.txt": "outputs/reports/active_model/benchmark_report.txt",
35
+ "dosing_grpo.json": "outputs/reports/active_model/dosing_grpo.json",
36
+ "grpo_ablation_report.json": "outputs/reports/active_model/grpo_ablation_report.json",
37
+ "frontier_ready.json": "outputs/reports/active_model/frontier_ready.json",
38
+ "improvement_report.json": "outputs/reports/active_model/improvement_report.json",
39
+ "hf_sweep_summary.json": "outputs/reports/active_model/hf_sweep_summary.json",
40
+ "planner_grpo.json": "outputs/reports/active_model/planner_grpo.json",
41
+ "grpo_trl_run.json": "outputs/reports/active_model/grpo_trl_run.json",
42
+ "risk_train.json": "outputs/reports/active_model/risk_train.json",
43
+ "grpo_trl_run_smoke.json": "outputs/reports/active_model/grpo_trl_run_smoke.json",
44
+ "inference_benchmark.json": "outputs/reports/active_model/inference_benchmark.json",
45
+ "supervisor_grpo.json": "outputs/reports/active_model/supervisor_grpo.json",
46
+ "acceptance_gate.json": "outputs/reports/active_model/acceptance_gate.json",
47
+ "grpo_trl_run_auto.json": "outputs/reports/active_model/grpo_trl_run_auto.json",
48
+ "hf_training_status.json": "outputs/reports/active_model/hf_training_status.json",
49
+ "benchmark_report.json": "outputs/reports/active_model/benchmark_report.json",
50
+ "postsave_inference_smoke.json": "outputs/reports/active_model/postsave_inference_smoke.json",
51
+ "graph_train.json": "outputs/reports/active_model/graph_train.json",
52
+ "sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json",
53
+ "sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json",
54
+ "sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json",
55
+ "sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json",
56
+ "sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json",
57
+ "sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json",
58
+ "sweeps/qwen-qwen2-5-3b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json",
59
+ "sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json",
60
+ "sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json",
61
+ "sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json",
62
+ "sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json",
63
+ "sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json",
64
+ "grpo_training_cycle/grpo_trl_run.json": "outputs/reports/active_model/grpo_training_cycle/grpo_trl_run.json",
65
+ "grpo_training_cycle/hf_training_status.json": "outputs/reports/active_model/grpo_training_cycle/hf_training_status.json"
66
+ },
67
+ "notes": "This manifest controls local product inference. Prefer grpo_adapter for the RL policy; merged is the SFT baseline fallback when no GRPO adapter is available."
68
+ }
docs/results/active_model/anti_hacking_overfit_report.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "passed": true,
3
+ "training_mode": "sft-baseline",
4
+ "warnings": [],
5
+ "completed_models": [
6
+ "Qwen/Qwen2.5-0.5B-Instruct",
7
+ "Qwen/Qwen2.5-1.5B-Instruct",
8
+ "Qwen/Qwen2.5-3B-Instruct"
9
+ ],
10
+ "failed_or_skipped_models": [],
11
+ "checks": {
12
+ "reward_bounds": [
13
+ 0.001,
14
+ 0.999
15
+ ],
16
+ "reward_precision": 3,
17
+ "fallback_backends_rejected": true,
18
+ "exploit_rate_threshold": 0.35,
19
+ "train_holdout_gap_threshold": 0.25,
20
+ "min_validity_rate": 0.8
21
+ }
22
+ }
docs/results/active_model/baselines.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "no_change": {
3
+ "mode": "REGIMEN_OPT",
4
+ "action_type": "KEEP_REGIMEN",
5
+ "target_drug": null,
6
+ "replacement_drug": null,
7
+ "dose_bucket": "NA",
8
+ "taper_days": null,
9
+ "monitoring_plan": null,
10
+ "evidence_query": null,
11
+ "new_drug_name": null,
12
+ "candidate_components": [],
13
+ "candidate_id": "cand_01",
14
+ "confidence": 0.8,
15
+ "rationale_brief": "Baseline no-change policy."
16
+ },
17
+ "rules_only": {
18
+ "mode": "REGIMEN_OPT",
19
+ "action_type": "SUBSTITUTE_WITHIN_CLASS",
20
+ "target_drug": "opioid_like",
21
+ "replacement_drug": "non_opioid_analgesic",
22
+ "dose_bucket": "NA",
23
+ "taper_days": null,
24
+ "monitoring_plan": null,
25
+ "evidence_query": null,
26
+ "new_drug_name": null,
27
+ "candidate_components": [],
28
+ "candidate_id": "cand_04",
29
+ "confidence": 0.75,
30
+ "rationale_brief": "Rules-only selected top legal candidate."
31
+ },
32
+ "greedy": {
33
+ "mode": "REGIMEN_OPT",
34
+ "action_type": "SUBSTITUTE_WITHIN_CLASS",
35
+ "target_drug": "opioid_like",
36
+ "replacement_drug": "non_opioid_analgesic",
37
+ "dose_bucket": "NA",
38
+ "taper_days": null,
39
+ "monitoring_plan": null,
40
+ "evidence_query": null,
41
+ "new_drug_name": null,
42
+ "candidate_components": [],
43
+ "candidate_id": "cand_04",
44
+ "confidence": 0.72,
45
+ "rationale_brief": "Greedy safety/burden improvement baseline."
46
+ },
47
+ "contextual_bandit": {
48
+ "mode": "REGIMEN_OPT",
49
+ "action_type": "SUBSTITUTE_WITHIN_CLASS",
50
+ "target_drug": "opioid_like",
51
+ "replacement_drug": "non_opioid_analgesic",
52
+ "dose_bucket": "NA",
53
+ "taper_days": null,
54
+ "monitoring_plan": null,
55
+ "evidence_query": null,
56
+ "new_drug_name": null,
57
+ "candidate_components": [],
58
+ "candidate_id": "cand_04",
59
+ "confidence": 0.68,
60
+ "rationale_brief": "Contextual bandit selected candidate."
61
+ },
62
+ "contextual_bandit_topk": [
63
+ {
64
+ "candidate_id": "cand_09",
65
+ "score": 1.1532307878304324,
66
+ "exploration_bonus": 1.1532307878304324,
67
+ "algorithm": "linucb"
68
+ },
69
+ {
70
+ "candidate_id": "cand_10",
71
+ "score": 1.1489735636645433,
72
+ "exploration_bonus": 1.1489735636645433,
73
+ "algorithm": "linucb"
74
+ },
75
+ {
76
+ "candidate_id": "cand_08",
77
+ "score": 1.1447401451857973,
78
+ "exploration_bonus": 1.1447401451857973,
79
+ "algorithm": "linucb"
80
+ }
81
+ ],
82
+ "beam_search": {
83
+ "mode": "REGIMEN_OPT",
84
+ "action_type": "SUBSTITUTE_WITHIN_CLASS",
85
+ "target_drug": "opioid_like",
86
+ "replacement_drug": "non_opioid_analgesic",
87
+ "dose_bucket": "NA",
88
+ "taper_days": null,
89
+ "monitoring_plan": null,
90
+ "evidence_query": null,
91
+ "new_drug_name": null,
92
+ "candidate_components": [],
93
+ "candidate_id": "cand_04",
94
+ "confidence": 0.74,
95
+ "rationale_brief": "Beam-search(3) top candidate."
96
+ },
97
+ "baseline_policy": "no_change_candidate",
98
+ "episodes": 8,
99
+ "avg_reward": 0.747,
100
+ "legality_rate": 1.0,
101
+ "success_rate": 0.0,
102
+ "policy_stack_ablations": {
103
+ "bandit-only": {
104
+ "avg_reward": 0.7616666666666667,
105
+ "legality_rate": 1.0,
106
+ "steps": 3.0
107
+ },
108
+ "llm-only": {
109
+ "avg_reward": 0.7753333333333333,
110
+ "legality_rate": 1.0,
111
+ "steps": 3.0
112
+ },
113
+ "llm+bandit": {
114
+ "avg_reward": 0.7753333333333333,
115
+ "legality_rate": 1.0,
116
+ "steps": 3.0
117
+ }
118
+ }
119
+ }
docs/results/active_model/benchmark_report.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "offline_policy_eval": {
3
+ "avg_reward": 0.772833,
4
+ "legal_rate": 1.0,
5
+ "success_rate": 0.0
6
+ },
7
+ "safety_eval": {
8
+ "severe_violation_rate": 0.0,
9
+ "illegal_step_rate": 0.0
10
+ },
11
+ "dosing_eval": {
12
+ "target_attainment": 0.75,
13
+ "toxicity_avoidance": 1.0
14
+ },
15
+ "robustness_eval": {
16
+ "missing_labs_safety_rate": 0.666667,
17
+ "noisy_dose_info_safety_rate": 1.0,
18
+ "conflicting_meds_safety_rate": 1.0,
19
+ "alias_noise_safety_rate": 1.0,
20
+ "hidden_duplicate_detection_rate": 1.0,
21
+ "wrong_candidate_id_resilience": 1.0,
22
+ "stale_evidence_safety_rate": 1.0,
23
+ "delayed_ade_manifestation_safety_rate": 1.0
24
+ },
25
+ "calibration_eval": {
26
+ "ece_proxy": 0.08625
27
+ },
28
+ "abstention_eval": {
29
+ "appropriate_abstention_rate": 0.0
30
+ },
31
+ "process_eval": {
32
+ "process_fidelity": 0.92,
33
+ "avg_invalid_actions": 0.333333
34
+ },
35
+ "subgroup_eval": {
36
+ "renal_compromise": {
37
+ "avg_reward": 0.774,
38
+ "legal_rate": 1.0
39
+ },
40
+ "hepatic_compromise": {
41
+ "avg_reward": 0.779333,
42
+ "legal_rate": 1.0
43
+ },
44
+ "frail": {
45
+ "avg_reward": 0.781667,
46
+ "legal_rate": 1.0
47
+ }
48
+ },
49
+ "explainability_eval": {
50
+ "grounding_rate": 0.8
51
+ }
52
+ }
docs/results/active_model/benchmark_report.txt ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "offline_policy_eval": {
3
+ "avg_reward": 0.772833,
4
+ "legal_rate": 1.0,
5
+ "success_rate": 0.0
6
+ },
7
+ "safety_eval": {
8
+ "severe_violation_rate": 0.0,
9
+ "illegal_step_rate": 0.0
10
+ },
11
+ "dosing_eval": {
12
+ "target_attainment": 0.75,
13
+ "toxicity_avoidance": 1.0
14
+ },
15
+ "robustness_eval": {
16
+ "missing_labs_safety_rate": 0.666667,
17
+ "noisy_dose_info_safety_rate": 1.0,
18
+ "conflicting_meds_safety_rate": 1.0,
19
+ "alias_noise_safety_rate": 1.0,
20
+ "hidden_duplicate_detection_rate": 1.0,
21
+ "wrong_candidate_id_resilience": 1.0,
22
+ "stale_evidence_safety_rate": 1.0,
23
+ "delayed_ade_manifestation_safety_rate": 1.0
24
+ },
25
+ "calibration_eval": {
26
+ "ece_proxy": 0.08625
27
+ },
28
+ "abstention_eval": {
29
+ "appropriate_abstention_rate": 0.0
30
+ },
31
+ "process_eval": {
32
+ "process_fidelity": 0.92,
33
+ "avg_invalid_actions": 0.333333
34
+ },
35
+ "subgroup_eval": {
36
+ "renal_compromise": {
37
+ "avg_reward": 0.774,
38
+ "legal_rate": 1.0
39
+ },
40
+ "hepatic_compromise": {
41
+ "avg_reward": 0.779333,
42
+ "legal_rate": 1.0
43
+ },
44
+ "frail": {
45
+ "avg_reward": 0.781667,
46
+ "legal_rate": 1.0
47
+ }
48
+ },
49
+ "explainability_eval": {
50
+ "grounding_rate": 0.8
51
+ }
52
+ }
docs/results/active_model/dose_train.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_size": 120.0,
3
+ "status": "trained",
4
+ "train_mae": 0.0025,
5
+ "model_path": "outputs/models/dose_model.pkl"
6
+ }
docs/results/active_model/dosing_grpo.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "avg_reward": 0.7785555555555557,
3
+ "legality_rate": 1.0,
4
+ "severe_violation_rate": 0.0,
5
+ "abstention_rate": 0.0,
6
+ "avg_episode_length": 2.0,
7
+ "success_rate": 0.0,
8
+ "avg_burden_delta": 0.0,
9
+ "avg_safety_delta": 0.5,
10
+ "avg_dosing_quality": 0.75,
11
+ "avg_process_fidelity": 0.9200000000000002,
12
+ "exploit_detection_count": 3.0,
13
+ "reward_columns": {
14
+ "format_compliance_score": 0.999,
15
+ "candidate_alignment_score": 0.999,
16
+ "legality_score": 0.999,
17
+ "safety_delta_score": 0.5,
18
+ "burden_improvement_score": 0.5,
19
+ "disease_stability_score": 0.9000000000000001,
20
+ "dosing_quality_score": 0.75,
21
+ "abstention_quality_score": 0.56,
22
+ "efficiency_score": 0.77,
23
+ "process_fidelity_score": 0.9200000000000002,
24
+ "explanation_grounding_score": 0.7999999999999999,
25
+ "anti_cheat_score": 0.6663333333333333,
26
+ "uncertainty_calibration_score": 0.87
27
+ }
28
+ }
docs/results/active_model/frontier_ready.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "frontier_models": [
3
+ "qwen2.5:7b-instruct",
4
+ "qwen2.5:14b-instruct"
5
+ ],
6
+ "deployment_mode": "hf_or_vllm_ready",
7
+ "notes": "Baseline complete; ready for larger model sweep."
8
+ }
docs/results/active_model/graph_train.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 180,
3
+ "status": "trained",
4
+ "model_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/models/graph_model.pkl"
5
+ }
docs/results/active_model/grpo_ablation_report.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "ablations": {
4
+ "bandit_only": {
5
+ "avg_reward": 0.779625,
6
+ "legality_rate": 1.0,
7
+ "severe_violation_rate": 0.0,
8
+ "abstention_rate": 0.0,
9
+ "avg_episode_length": 2.8125,
10
+ "success_rate": 0.0,
11
+ "avg_burden_delta": 0.0,
12
+ "avg_safety_delta": 0.483125,
13
+ "avg_dosing_quality": 0.75,
14
+ "avg_process_fidelity": 0.9056250000000008,
15
+ "exploit_detection_count": 2.0,
16
+ "timeout_rate": 0.0,
17
+ "failure_visible_rate": 0.0625,
18
+ "avg_invalid_actions": 0.0625,
19
+ "reward_columns": {
20
+ "format_compliance_score": 0.9989999999999996,
21
+ "candidate_alignment_score": 0.9989999999999996,
22
+ "legality_score": 0.9989999999999996,
23
+ "safety_delta_score": 0.483125,
24
+ "burden_improvement_score": 0.5,
25
+ "disease_stability_score": 0.8999999999999995,
26
+ "dosing_quality_score": 0.75,
27
+ "abstention_quality_score": 0.5600000000000002,
28
+ "efficiency_score": 0.5855625,
29
+ "process_fidelity_score": 0.9056250000000008,
30
+ "explanation_grounding_score": 0.8000000000000004,
31
+ "anti_cheat_score": 0.9366249999999997,
32
+ "uncertainty_calibration_score": 0.8531250000000004
33
+ },
34
+ "primary_reward_channels": {
35
+ "safety_legality": 0.9469062499999998,
36
+ "clinical_improvement": 0.6273749999999997,
37
+ "dosing_quality": 0.6550000000000001,
38
+ "process_integrity": 0.8225937500000001
39
+ },
40
+ "policy_stack": "bandit-only",
41
+ "failure_mining": {
42
+ "total_rows": 32,
43
+ "failure_rows": 2,
44
+ "top_failure_reasons": [
45
+ {
46
+ "reason": "repeated_action_loop",
47
+ "count": 2
48
+ }
49
+ ]
50
+ }
51
+ },
52
+ "llm_only": {
53
+ "avg_reward": 0.7723913043478261,
54
+ "legality_rate": 1.0,
55
+ "severe_violation_rate": 0.0,
56
+ "abstention_rate": 0.0,
57
+ "avg_episode_length": 1.9565217391304348,
58
+ "success_rate": 0.0,
59
+ "avg_burden_delta": 0.0,
60
+ "avg_safety_delta": 0.4882608695652174,
61
+ "avg_dosing_quality": 0.75,
62
+ "avg_process_fidelity": 0.9000000000000005,
63
+ "exploit_detection_count": 7.0,
64
+ "timeout_rate": 0.0,
65
+ "failure_visible_rate": 0.30434782608695654,
66
+ "avg_invalid_actions": 0.30434782608695654,
67
+ "reward_columns": {
68
+ "format_compliance_score": 0.9989999999999999,
69
+ "candidate_alignment_score": 0.9989999999999999,
70
+ "legality_score": 0.9989999999999999,
71
+ "safety_delta_score": 0.4882608695652174,
72
+ "burden_improvement_score": 0.5,
73
+ "disease_stability_score": 0.8999999999999998,
74
+ "dosing_quality_score": 0.75,
75
+ "abstention_quality_score": 0.5600000000000004,
76
+ "efficiency_score": 0.7027826086956522,
77
+ "process_fidelity_score": 0.9000000000000005,
78
+ "explanation_grounding_score": 0.8000000000000003,
79
+ "anti_cheat_score": 0.6952608695652175,
80
+ "uncertainty_calibration_score": 0.8482608695652176
81
+ },
82
+ "primary_reward_channels": {
83
+ "safety_legality": 0.8853478260869562,
84
+ "clinical_improvement": 0.6290869565217388,
85
+ "dosing_quality": 0.6549999999999998,
86
+ "process_integrity": 0.8504782608695656
87
+ },
88
+ "policy_stack": "llm-only",
89
+ "failure_mining": {
90
+ "total_rows": 23,
91
+ "failure_rows": 7,
92
+ "top_failure_reasons": [
93
+ {
94
+ "reason": "repeated_action_loop",
95
+ "count": 7
96
+ }
97
+ ]
98
+ }
99
+ },
100
+ "llm_bandit": {
101
+ "avg_reward": 0.7647391304347826,
102
+ "legality_rate": 1.0,
103
+ "severe_violation_rate": 0.0,
104
+ "abstention_rate": 0.0,
105
+ "avg_episode_length": 1.9565217391304348,
106
+ "success_rate": 0.0,
107
+ "avg_burden_delta": 0.0,
108
+ "avg_safety_delta": 0.48982608695652174,
109
+ "avg_dosing_quality": 0.717391304347826,
110
+ "avg_process_fidelity": 0.9000000000000005,
111
+ "exploit_detection_count": 7.0,
112
+ "timeout_rate": 0.0,
113
+ "failure_visible_rate": 0.30434782608695654,
114
+ "avg_invalid_actions": 0.30434782608695654,
115
+ "reward_columns": {
116
+ "format_compliance_score": 0.9989999999999999,
117
+ "candidate_alignment_score": 0.9989999999999999,
118
+ "legality_score": 0.9989999999999999,
119
+ "safety_delta_score": 0.48982608695652174,
120
+ "burden_improvement_score": 0.5043478260869565,
121
+ "disease_stability_score": 0.8582608695652173,
122
+ "dosing_quality_score": 0.717391304347826,
123
+ "abstention_quality_score": 0.5600000000000004,
124
+ "efficiency_score": 0.7027826086956522,
125
+ "process_fidelity_score": 0.9000000000000005,
126
+ "explanation_grounding_score": 0.8000000000000003,
127
+ "anti_cheat_score": 0.6952608695652175,
128
+ "uncertainty_calibration_score": 0.8126086956521739
129
+ },
130
+ "primary_reward_channels": {
131
+ "safety_legality": 0.8765217391304347,
132
+ "clinical_improvement": 0.6171739130434781,
133
+ "dosing_quality": 0.6386956521739129,
134
+ "process_integrity": 0.8504782608695656
135
+ },
136
+ "policy_stack": "llm+bandit",
137
+ "failure_mining": {
138
+ "total_rows": 23,
139
+ "failure_rows": 7,
140
+ "top_failure_reasons": [
141
+ {
142
+ "reason": "repeated_action_loop",
143
+ "count": 7
144
+ }
145
+ ]
146
+ }
147
+ }
148
+ }
149
+ }
docs/results/active_model/grpo_training_cycle/grpo_trl_run.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "backend": "trl_transformers",
4
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
5
+ "records": 2000,
6
+ "prompts_path": "/app/data/processed/training_corpus_grpo_prompts.jsonl",
7
+ "reward_summary": {
8
+ "count": 4000,
9
+ "avg_reward": 0.782178,
10
+ "avg_reward_components": {
11
+ "format_compliance_score": 0.999,
12
+ "candidate_alignment_score": 0.999,
13
+ "legality_score": 0.985277,
14
+ "safety_delta_score": 0.496104,
15
+ "burden_improvement_score": 0.494346,
16
+ "disease_stability_score": 0.8912,
17
+ "dosing_quality_score": 0.511938,
18
+ "abstention_quality_score": 0.56,
19
+ "efficiency_score": 0.84942,
20
+ "process_fidelity_score": 0.905268,
21
+ "explanation_grounding_score": 0.800248,
22
+ "anti_cheat_score": 0.48004,
23
+ "uncertainty_calibration_score": 0.730195
24
+ },
25
+ "avg_primary_reward_channels": {
26
+ "safety_legality": 0.798661,
27
+ "clinical_improvement": 0.62689,
28
+ "dosing_quality": 0.535969,
29
+ "process_integrity": 0.888448
30
+ }
31
+ },
32
+ "reward_log": "/app/checkpoints/grpo_reward_components.jsonl",
33
+ "train_metrics": {
34
+ "train_runtime": 6960.8084,
35
+ "train_samples_per_second": 0.287,
36
+ "train_steps_per_second": 0.287,
37
+ "total_flos": 0.0,
38
+ "train_loss": 2.3633859725151752e-06
39
+ },
40
+ "artifact_path": "/app/checkpoints/grpo_adapter",
41
+ "unsloth_available": false
42
+ }
docs/results/active_model/grpo_training_cycle/hf_training_status.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "running",
3
+ "started_at": 1777161126.3536248,
4
+ "finished_at": null,
5
+ "commands": [
6
+ {
7
+ "args": [
8
+ "python",
9
+ "scripts/bootstrap_data.py"
10
+ ],
11
+ "returncode": 0,
12
+ "elapsed_seconds": 0.821
13
+ },
14
+ {
15
+ "args": [
16
+ "python",
17
+ "scripts/build_training_corpus.py",
18
+ "--profile",
19
+ "massive",
20
+ "--with-local",
21
+ "--with-synthetic",
22
+ "--with-hf"
23
+ ],
24
+ "returncode": 0,
25
+ "elapsed_seconds": 4.367
26
+ },
27
+ {
28
+ "args": [
29
+ "python",
30
+ "scripts/train_sft_trl.py",
31
+ "--model-id",
32
+ "Qwen/Qwen2.5-0.5B-Instruct",
33
+ "--dataset-path",
34
+ "data/processed/training_corpus_sft.json",
35
+ "--epochs",
36
+ "1",
37
+ "--max-steps",
38
+ "20",
39
+ "--batch-size",
40
+ "2",
41
+ "--max-seq-len",
42
+ "512",
43
+ "--use-unsloth"
44
+ ],
45
+ "returncode": 0,
46
+ "elapsed_seconds": 24.564
47
+ },
48
+ {
49
+ "args": [
50
+ "reuse_artifact",
51
+ "grpo_adapter",
52
+ "/app/checkpoints/grpo_adapter"
53
+ ],
54
+ "returncode": 0,
55
+ "elapsed_seconds": 0.0
56
+ },
57
+ {
58
+ "args": [
59
+ "python",
60
+ "scripts/merge_adapters_safe.py",
61
+ "--adapter-dir",
62
+ "checkpoints/sft_adapter",
63
+ "--output-dir",
64
+ "checkpoints/merged"
65
+ ],
66
+ "returncode": 0,
67
+ "elapsed_seconds": 9.014
68
+ },
69
+ {
70
+ "args": [
71
+ "python",
72
+ "scripts/test_inference_postsave.py",
73
+ "--samples",
74
+ "3",
75
+ "--base-model",
76
+ "Qwen/Qwen2.5-0.5B-Instruct"
77
+ ],
78
+ "returncode": 0,
79
+ "elapsed_seconds": 14.811
80
+ },
81
+ {
82
+ "args": [
83
+ "python",
84
+ "scripts/evaluate_policy_ablations.py",
85
+ "--episodes",
86
+ "8"
87
+ ],
88
+ "returncode": 0,
89
+ "elapsed_seconds": 4.458
90
+ },
91
+ {
92
+ "args": [
93
+ "python",
94
+ "scripts/evaluate_baselines.py"
95
+ ],
96
+ "returncode": 0,
97
+ "elapsed_seconds": 4.603
98
+ },
99
+ {
100
+ "args": [
101
+ "python",
102
+ "scripts/evaluate_all.py"
103
+ ],
104
+ "returncode": 0,
105
+ "elapsed_seconds": 4.271
106
+ },
107
+ {
108
+ "args": [
109
+ "python",
110
+ "scripts/evaluate_compare_runs.py",
111
+ "--baseline",
112
+ "outputs/reports/baselines.json",
113
+ "--candidate",
114
+ "outputs/reports/benchmark_report.json",
115
+ "--output",
116
+ "outputs/reports/improvement_report.json"
117
+ ],
118
+ "returncode": 0,
119
+ "elapsed_seconds": 0.037
120
+ }
121
+ ],
122
+ "artifact_repo_id": "TheJackBright/polyguard-openenv-training-artifacts"
123
+ }
docs/results/active_model/grpo_trl_run.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "backend": "trl_transformers",
4
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
5
+ "records": 2000,
6
+ "prompts_path": "/app/data/processed/training_corpus_grpo_prompts.jsonl",
7
+ "reward_summary": {
8
+ "count": 4000,
9
+ "avg_reward": 0.782178,
10
+ "avg_reward_components": {
11
+ "format_compliance_score": 0.999,
12
+ "candidate_alignment_score": 0.999,
13
+ "legality_score": 0.985277,
14
+ "safety_delta_score": 0.496104,
15
+ "burden_improvement_score": 0.494346,
16
+ "disease_stability_score": 0.8912,
17
+ "dosing_quality_score": 0.511938,
18
+ "abstention_quality_score": 0.56,
19
+ "efficiency_score": 0.84942,
20
+ "process_fidelity_score": 0.905268,
21
+ "explanation_grounding_score": 0.800248,
22
+ "anti_cheat_score": 0.48004,
23
+ "uncertainty_calibration_score": 0.730195
24
+ },
25
+ "avg_primary_reward_channels": {
26
+ "safety_legality": 0.798661,
27
+ "clinical_improvement": 0.62689,
28
+ "dosing_quality": 0.535969,
29
+ "process_integrity": 0.888448
30
+ }
31
+ },
32
+ "reward_log": "/app/checkpoints/grpo_reward_components.jsonl",
33
+ "train_metrics": {
34
+ "train_runtime": 6960.8084,
35
+ "train_samples_per_second": 0.287,
36
+ "train_steps_per_second": 0.287,
37
+ "total_flos": 0.0,
38
+ "train_loss": 2.3633859725151752e-06
39
+ },
40
+ "artifact_path": "/app/checkpoints/grpo_adapter",
41
+ "unsloth_available": false
42
+ }
docs/results/active_model/grpo_trl_run_auto.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "fallback",
3
+ "backend": "env_reward_fallback",
4
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
5
+ "records": 2,
6
+ "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
7
+ "reward_summary": {
8
+ "count": 2,
9
+ "avg_reward": 0.798,
10
+ "avg_reward_components": {
11
+ "format_compliance_score": 0.999,
12
+ "candidate_alignment_score": 0.999,
13
+ "legality_score": 0.999,
14
+ "safety_delta_score": 0.671,
15
+ "burden_improvement_score": 0.525,
16
+ "disease_stability_score": 0.74,
17
+ "dosing_quality_score": 0.5,
18
+ "abstention_quality_score": 0.56,
19
+ "efficiency_score": 0.857,
20
+ "process_fidelity_score": 0.92,
21
+ "explanation_grounding_score": 0.8,
22
+ "anti_cheat_score": 0.5,
23
+ "uncertainty_calibration_score": 0.74
24
+ },
25
+ "avg_primary_reward_channels": {
26
+ "safety_legality": 0.8095,
27
+ "clinical_improvement": 0.645,
28
+ "dosing_quality": 0.53,
29
+ "process_integrity": 0.894
30
+ }
31
+ },
32
+ "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
33
+ "train_metrics": {
34
+ "steps_executed": 2.0
35
+ },
36
+ "artifact_path": "",
37
+ "unsloth_available": false,
38
+ "trl_runtime_error": "We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.\nCheck your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'."
39
+ }
docs/results/active_model/grpo_trl_run_fallback_check.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "fallback",
3
+ "backend": "env_reward_fallback",
4
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
5
+ "records": 1,
6
+ "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
7
+ "reward_summary": {
8
+ "count": 1,
9
+ "avg_reward": 0.764,
10
+ "avg_reward_components": {
11
+ "format_compliance_score": 0.999,
12
+ "candidate_alignment_score": 0.999,
13
+ "legality_score": 0.999,
14
+ "safety_delta_score": 0.5,
15
+ "burden_improvement_score": 0.5,
16
+ "disease_stability_score": 0.9,
17
+ "dosing_quality_score": 0.5,
18
+ "abstention_quality_score": 0.56,
19
+ "efficiency_score": 0.857,
20
+ "process_fidelity_score": 0.92,
21
+ "explanation_grounding_score": 0.8,
22
+ "anti_cheat_score": 0.001,
23
+ "uncertainty_calibration_score": 0.7
24
+ },
25
+ "avg_primary_reward_channels": {
26
+ "safety_legality": 0.675,
27
+ "clinical_improvement": 0.633,
28
+ "dosing_quality": 0.53,
29
+ "process_integrity": 0.894
30
+ }
31
+ },
32
+ "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
33
+ "train_metrics": {
34
+ "steps_executed": 1.0
35
+ },
36
+ "artifact_path": "",
37
+ "unsloth_available": false,
38
+ "trl_runtime_error": "forced_fallback"
39
+ }
docs/results/active_model/grpo_trl_run_smoke.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "fallback",
3
+ "backend": "env_reward_fallback",
4
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
5
+ "records": 1,
6
+ "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
7
+ "reward_summary": {
8
+ "count": 1,
9
+ "avg_reward": 0.764,
10
+ "avg_reward_components": {
11
+ "format_compliance_score": 0.999,
12
+ "candidate_alignment_score": 0.999,
13
+ "legality_score": 0.999,
14
+ "safety_delta_score": 0.5,
15
+ "burden_improvement_score": 0.5,
16
+ "disease_stability_score": 0.9,
17
+ "dosing_quality_score": 0.5,
18
+ "abstention_quality_score": 0.56,
19
+ "efficiency_score": 0.857,
20
+ "process_fidelity_score": 0.92,
21
+ "explanation_grounding_score": 0.8,
22
+ "anti_cheat_score": 0.001,
23
+ "uncertainty_calibration_score": 0.7
24
+ },
25
+ "avg_primary_reward_channels": {
26
+ "safety_legality": 0.675,
27
+ "clinical_improvement": 0.633,
28
+ "dosing_quality": 0.53,
29
+ "process_integrity": 0.894
30
+ }
31
+ },
32
+ "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
33
+ "train_metrics": {
34
+ "steps_executed": 1.0
35
+ },
36
+ "artifact_path": "",
37
+ "unsloth_available": false,
38
+ "trl_runtime_error": "forced_fallback"
39
+ }
docs/results/active_model/grpo_trl_run_strict_check.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "fallback",
3
+ "backend": "env_reward_fallback",
4
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
5
+ "records": 1,
6
+ "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
7
+ "reward_summary": {
8
+ "count": 1,
9
+ "avg_reward": 0.764,
10
+ "avg_reward_components": {
11
+ "format_compliance_score": 0.999,
12
+ "candidate_alignment_score": 0.999,
13
+ "legality_score": 0.999,
14
+ "safety_delta_score": 0.5,
15
+ "burden_improvement_score": 0.5,
16
+ "disease_stability_score": 0.9,
17
+ "dosing_quality_score": 0.5,
18
+ "abstention_quality_score": 0.56,
19
+ "efficiency_score": 0.857,
20
+ "process_fidelity_score": 0.92,
21
+ "explanation_grounding_score": 0.8,
22
+ "anti_cheat_score": 0.001,
23
+ "uncertainty_calibration_score": 0.7
24
+ },
25
+ "avg_primary_reward_channels": {
26
+ "safety_legality": 0.675,
27
+ "clinical_improvement": 0.633,
28
+ "dosing_quality": 0.53,
29
+ "process_integrity": 0.894
30
+ }
31
+ },
32
+ "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
33
+ "train_metrics": {
34
+ "steps_executed": 1.0
35
+ },
36
+ "artifact_path": "",
37
+ "unsloth_available": false,
38
+ "trl_runtime_error": "forced_fallback"
39
+ }
docs/results/active_model/hf_sweep_summary.json ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "training_mode": "sft-baseline",
4
+ "completed_models": 3,
5
+ "failed_or_skipped_models": 0,
6
+ "models": [
7
+ {
8
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
9
+ "training_mode": "sft-baseline",
10
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
11
+ "label": "Qwen2.5-0.5B",
12
+ "status": "completed",
13
+ "error": "",
14
+ "sft_backend": "trl_transformers",
15
+ "sft_examples": 2000,
16
+ "sft_train_loss": 0.19233327957964502,
17
+ "sft_runtime": 234.6302,
18
+ "grpo_backend": "",
19
+ "grpo_records": 0,
20
+ "grpo_avg_reward": 0.726,
21
+ "sft_inference_reward": 0.726,
22
+ "sft_valid_rate": 1.0,
23
+ "sft_latency_seconds": 1.839,
24
+ "grpo_inference_reward": 0.726,
25
+ "grpo_valid_rate": 1.0,
26
+ "grpo_latency_seconds": 0.0,
27
+ "train_holdout_gap": 0.0,
28
+ "fallback_detected": false,
29
+ "reward_range_ok": true,
30
+ "reward_range_failures": [],
31
+ "exploit_rate": 0.0,
32
+ "legal_rate": 0.0,
33
+ "candidate_diversity": 0.0,
34
+ "top_candidate_rate": 0.0,
35
+ "reward_components": {},
36
+ "primary_reward_channels": {},
37
+ "artifact_paths": {
38
+ "sft": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter",
39
+ "grpo": ""
40
+ }
41
+ },
42
+ {
43
+ "run_id": "qwen-qwen2-5-1-5b-instruct",
44
+ "training_mode": "sft-baseline",
45
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
46
+ "label": "Qwen2.5-1.5B",
47
+ "status": "completed",
48
+ "error": "",
49
+ "sft_backend": "trl_transformers",
50
+ "sft_examples": 2000,
51
+ "sft_train_loss": 0.11515871361242898,
52
+ "sft_runtime": 483.7085,
53
+ "grpo_backend": "",
54
+ "grpo_records": 0,
55
+ "grpo_avg_reward": 0.726,
56
+ "sft_inference_reward": 0.726,
57
+ "sft_valid_rate": 1.0,
58
+ "sft_latency_seconds": 2.158,
59
+ "grpo_inference_reward": 0.726,
60
+ "grpo_valid_rate": 1.0,
61
+ "grpo_latency_seconds": 0.0,
62
+ "train_holdout_gap": 0.0,
63
+ "fallback_detected": false,
64
+ "reward_range_ok": true,
65
+ "reward_range_failures": [],
66
+ "exploit_rate": 0.0,
67
+ "legal_rate": 0.0,
68
+ "candidate_diversity": 0.0,
69
+ "top_candidate_rate": 0.0,
70
+ "reward_components": {},
71
+ "primary_reward_channels": {},
72
+ "artifact_paths": {
73
+ "sft": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter",
74
+ "grpo": ""
75
+ }
76
+ },
77
+ {
78
+ "run_id": "qwen-qwen2-5-3b-instruct",
79
+ "training_mode": "sft-baseline",
80
+ "model_id": "Qwen/Qwen2.5-3B-Instruct",
81
+ "label": "Qwen2.5-3B",
82
+ "status": "completed",
83
+ "error": "",
84
+ "sft_backend": "trl_transformers",
85
+ "sft_examples": 2000,
86
+ "sft_train_loss": 0.18184852770145518,
87
+ "sft_runtime": 372.1845,
88
+ "grpo_backend": "",
89
+ "grpo_records": 0,
90
+ "grpo_avg_reward": 0.762,
91
+ "sft_inference_reward": 0.762,
92
+ "sft_valid_rate": 1.0,
93
+ "sft_latency_seconds": 2.748,
94
+ "grpo_inference_reward": 0.762,
95
+ "grpo_valid_rate": 1.0,
96
+ "grpo_latency_seconds": 0.0,
97
+ "train_holdout_gap": 0.0,
98
+ "fallback_detected": false,
99
+ "reward_range_ok": true,
100
+ "reward_range_failures": [],
101
+ "exploit_rate": 0.0,
102
+ "legal_rate": 0.0,
103
+ "candidate_diversity": 0.0,
104
+ "top_candidate_rate": 0.0,
105
+ "reward_components": {},
106
+ "primary_reward_channels": {},
107
+ "artifact_paths": {
108
+ "sft": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter",
109
+ "grpo": ""
110
+ }
111
+ }
112
+ ],
113
+ "charts": {
114
+ "sft_vs_grpo_reward": "outputs/plots/sft_vs_grpo_reward.png",
115
+ "sft_loss_curves": "outputs/plots/sft_loss_curves.png",
116
+ "qwen_model_sft_reward": "outputs/plots/qwen_model_sft_reward.png",
117
+ "qwen_model_sft_loss": "outputs/plots/qwen_model_sft_loss.png",
118
+ "sft_validity_reward": "outputs/plots/sft_validity_reward.png",
119
+ "grpo_reward_curves": "outputs/plots/grpo_reward_curves.png",
120
+ "qwen_model_grpo_reward": "outputs/plots/qwen_model_grpo_reward.png",
121
+ "reward_component_bars": "outputs/plots/reward_component_bars.png",
122
+ "anti_cheat_failure_rates": "outputs/plots/anti_cheat_failure_rates.png",
123
+ "train_holdout_gap": "outputs/plots/train_holdout_gap.png",
124
+ "inference_validity_reward": "outputs/plots/inference_validity_reward.png",
125
+ "inference_latency_validity": "outputs/plots/inference_latency_validity.png"
126
+ }
127
+ }
docs/results/active_model/hf_training_status.json ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "started_at": 1777163399.0780032,
4
+ "finished_at": 1777164656.2574434,
5
+ "commands": [
6
+ {
7
+ "args": [
8
+ "python",
9
+ "scripts/bootstrap_data.py"
10
+ ],
11
+ "returncode": 0,
12
+ "elapsed_seconds": 0.504
13
+ },
14
+ {
15
+ "args": [
16
+ "python",
17
+ "scripts/build_training_corpus.py",
18
+ "--profile",
19
+ "massive",
20
+ "--with-local",
21
+ "--with-synthetic",
22
+ "--with-hf"
23
+ ],
24
+ "returncode": 0,
25
+ "elapsed_seconds": 4.013
26
+ },
27
+ {
28
+ "args": [
29
+ "python",
30
+ "scripts/train_sft_trl.py",
31
+ "--model-id",
32
+ "Qwen/Qwen2.5-0.5B-Instruct",
33
+ "--dataset-path",
34
+ "data/processed/training_corpus_sft.json",
35
+ "--output-dir",
36
+ "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct",
37
+ "--report-path",
38
+ "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json",
39
+ "--epochs",
40
+ "2",
41
+ "--max-steps",
42
+ "0",
43
+ "--batch-size",
44
+ "2",
45
+ "--max-seq-len",
46
+ "512",
47
+ "--learning-rate",
48
+ "2e-05",
49
+ "--use-unsloth"
50
+ ],
51
+ "returncode": 0,
52
+ "elapsed_seconds": 251.4
53
+ },
54
+ {
55
+ "args": [
56
+ "python",
57
+ "scripts/merge_adapters_safe.py",
58
+ "--adapter-dir",
59
+ "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter",
60
+ "--output-dir",
61
+ "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged"
62
+ ],
63
+ "returncode": 0,
64
+ "elapsed_seconds": 7.16
65
+ },
66
+ {
67
+ "args": [
68
+ "python",
69
+ "scripts/test_inference_postsave.py",
70
+ "--samples",
71
+ "5",
72
+ "--base-model",
73
+ "Qwen/Qwen2.5-0.5B-Instruct",
74
+ "--merged-model",
75
+ "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/merged",
76
+ "--adapter-dir",
77
+ "checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter",
78
+ "--output",
79
+ "outputs/reports/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json"
80
+ ],
81
+ "returncode": 0,
82
+ "elapsed_seconds": 15.213
83
+ },
84
+ {
85
+ "args": [
86
+ "python",
87
+ "scripts/train_sft_trl.py",
88
+ "--model-id",
89
+ "Qwen/Qwen2.5-1.5B-Instruct",
90
+ "--dataset-path",
91
+ "data/processed/training_corpus_sft.json",
92
+ "--output-dir",
93
+ "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct",
94
+ "--report-path",
95
+ "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json",
96
+ "--epochs",
97
+ "2",
98
+ "--max-steps",
99
+ "0",
100
+ "--batch-size",
101
+ "1",
102
+ "--max-seq-len",
103
+ "512",
104
+ "--learning-rate",
105
+ "2e-05",
106
+ "--use-unsloth"
107
+ ],
108
+ "returncode": 0,
109
+ "elapsed_seconds": 504.997
110
+ },
111
+ {
112
+ "args": [
113
+ "python",
114
+ "scripts/merge_adapters_safe.py",
115
+ "--adapter-dir",
116
+ "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter",
117
+ "--output-dir",
118
+ "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged"
119
+ ],
120
+ "returncode": 0,
121
+ "elapsed_seconds": 10.634
122
+ },
123
+ {
124
+ "args": [
125
+ "python",
126
+ "scripts/test_inference_postsave.py",
127
+ "--samples",
128
+ "5",
129
+ "--base-model",
130
+ "Qwen/Qwen2.5-1.5B-Instruct",
131
+ "--merged-model",
132
+ "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/merged",
133
+ "--adapter-dir",
134
+ "checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter",
135
+ "--output",
136
+ "outputs/reports/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json"
137
+ ],
138
+ "returncode": 0,
139
+ "elapsed_seconds": 17.029
140
+ },
141
+ {
142
+ "args": [
143
+ "python",
144
+ "scripts/train_sft_trl.py",
145
+ "--model-id",
146
+ "Qwen/Qwen2.5-3B-Instruct",
147
+ "--dataset-path",
148
+ "data/processed/training_corpus_sft.json",
149
+ "--output-dir",
150
+ "checkpoints/sweeps/qwen-qwen2-5-3b-instruct",
151
+ "--report-path",
152
+ "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json",
153
+ "--epochs",
154
+ "1",
155
+ "--max-steps",
156
+ "0",
157
+ "--batch-size",
158
+ "1",
159
+ "--max-seq-len",
160
+ "512",
161
+ "--learning-rate",
162
+ "2e-05",
163
+ "--use-unsloth"
164
+ ],
165
+ "returncode": 0,
166
+ "elapsed_seconds": 394.356
167
+ },
168
+ {
169
+ "args": [
170
+ "python",
171
+ "scripts/merge_adapters_safe.py",
172
+ "--adapter-dir",
173
+ "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter",
174
+ "--output-dir",
175
+ "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged"
176
+ ],
177
+ "returncode": 0,
178
+ "elapsed_seconds": 15.472
179
+ },
180
+ {
181
+ "args": [
182
+ "python",
183
+ "scripts/test_inference_postsave.py",
184
+ "--samples",
185
+ "5",
186
+ "--base-model",
187
+ "Qwen/Qwen2.5-3B-Instruct",
188
+ "--merged-model",
189
+ "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/merged",
190
+ "--adapter-dir",
191
+ "checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter",
192
+ "--output",
193
+ "outputs/reports/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json"
194
+ ],
195
+ "returncode": 0,
196
+ "elapsed_seconds": 20.373
197
+ },
198
+ {
199
+ "args": [
200
+ "python",
201
+ "scripts/evaluate_baselines.py"
202
+ ],
203
+ "returncode": 0,
204
+ "elapsed_seconds": 4.112
205
+ },
206
+ {
207
+ "args": [
208
+ "python",
209
+ "scripts/evaluate_all.py"
210
+ ],
211
+ "returncode": 0,
212
+ "elapsed_seconds": 3.787
213
+ },
214
+ {
215
+ "args": [
216
+ "python",
217
+ "scripts/evaluate_compare_runs.py",
218
+ "--baseline",
219
+ "outputs/reports/baselines.json",
220
+ "--candidate",
221
+ "outputs/reports/benchmark_report.json",
222
+ "--output",
223
+ "outputs/reports/improvement_report.json"
224
+ ],
225
+ "returncode": 0,
226
+ "elapsed_seconds": 0.033
227
+ },
228
+ {
229
+ "args": [
230
+ "python",
231
+ "scripts/benchmark_inference.py"
232
+ ],
233
+ "returncode": 0,
234
+ "elapsed_seconds": 2.376
235
+ },
236
+ {
237
+ "args": [
238
+ "python",
239
+ "scripts/generate_hf_training_report.py",
240
+ "--mode",
241
+ "sft-baseline"
242
+ ],
243
+ "returncode": 0,
244
+ "elapsed_seconds": 1.791
245
+ }
246
+ ],
247
+ "artifact_repo_id": "TheJackBright/polyguard-openenv-sft-baseline-artifacts",
248
+ "training_mode": "sft-baseline",
249
+ "model_sweep": [
250
+ "Qwen/Qwen2.5-0.5B-Instruct",
251
+ "Qwen/Qwen2.5-1.5B-Instruct",
252
+ "Qwen/Qwen2.5-3B-Instruct"
253
+ ],
254
+ "improved": true,
255
+ "anti_hacking_passed": true,
256
+ "completed_run_ids": [
257
+ "qwen-qwen2-5-0-5b-instruct",
258
+ "qwen-qwen2-5-1-5b-instruct",
259
+ "qwen-qwen2-5-3b-instruct"
260
+ ]
261
+ }
docs/results/active_model/improvement_report.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "baseline": "outputs/reports/baselines.json",
4
+ "candidate": "outputs/reports/benchmark_report.json",
5
+ "deltas": {
6
+ "avg_reward": 0.025833,
7
+ "legality_rate": 0.0,
8
+ "success_rate": 0.0,
9
+ "avg_process_fidelity": 0.92,
10
+ "timeout_rate": 0.0,
11
+ "failure_visible_rate": 0.0
12
+ },
13
+ "gate": {
14
+ "avg_reward_up": true,
15
+ "legality_up": true,
16
+ "success_up": true
17
+ },
18
+ "improved": true
19
+ }
docs/results/active_model/improvement_report_benchmark.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "baseline": "outputs/reports/baselines.json",
4
+ "candidate": "outputs/reports/benchmark_report.json",
5
+ "deltas": {
6
+ "avg_reward": -0.0025,
7
+ "legality_rate": 0.0,
8
+ "success_rate": 0.0,
9
+ "avg_process_fidelity": 0.92,
10
+ "timeout_rate": 0.0,
11
+ "failure_visible_rate": 0.0
12
+ },
13
+ "gate": {
14
+ "avg_reward_up": false,
15
+ "legality_up": true,
16
+ "success_up": true
17
+ },
18
+ "improved": false
19
+ }
docs/results/active_model/inference_benchmark.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "runs": [
4
+ {
5
+ "run": 0,
6
+ "provider": "transformers",
7
+ "candidate_id": "cand_04",
8
+ "latency_ms": 1748.724,
9
+ "rationale": "Transformers fallback selected cand_04 via local ranker."
10
+ },
11
+ {
12
+ "run": 1,
13
+ "provider": "transformers",
14
+ "candidate_id": "cand_02",
15
+ "latency_ms": 0.01,
16
+ "rationale": "Transformers fallback selected cand_02 via local ranker."
17
+ },
18
+ {
19
+ "run": 2,
20
+ "provider": "transformers",
21
+ "candidate_id": "cand_04",
22
+ "latency_ms": 0.009,
23
+ "rationale": "Transformers fallback selected cand_04 via local ranker."
24
+ },
25
+ {
26
+ "run": 3,
27
+ "provider": "transformers",
28
+ "candidate_id": "cand_04",
29
+ "latency_ms": 0.009,
30
+ "rationale": "Transformers fallback selected cand_04 via local ranker."
31
+ },
32
+ {
33
+ "run": 4,
34
+ "provider": "transformers",
35
+ "candidate_id": "cand_04",
36
+ "latency_ms": 0.009,
37
+ "rationale": "Transformers fallback selected cand_04 via local ranker."
38
+ }
39
+ ],
40
+ "avg_latency_ms": 349.752,
41
+ "provider_requested": "transformers",
42
+ "model": "Qwen/Qwen2.5-0.5B-Instruct"
43
+ }
docs/results/active_model/planner_grpo.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "avg_reward": 0.77625,
3
+ "legality_rate": 1.0,
4
+ "severe_violation_rate": 0.0,
5
+ "abstention_rate": 0.0,
6
+ "avg_episode_length": 2.0,
7
+ "success_rate": 0.0,
8
+ "avg_burden_delta": 0.0,
9
+ "avg_safety_delta": 0.5,
10
+ "avg_dosing_quality": 0.75,
11
+ "avg_process_fidelity": 0.92,
12
+ "exploit_detection_count": 4.0,
13
+ "reward_columns": {
14
+ "format_compliance_score": 0.9990000000000001,
15
+ "candidate_alignment_score": 0.9990000000000001,
16
+ "legality_score": 0.9990000000000001,
17
+ "safety_delta_score": 0.5,
18
+ "burden_improvement_score": 0.5,
19
+ "disease_stability_score": 0.9000000000000002,
20
+ "dosing_quality_score": 0.75,
21
+ "abstention_quality_score": 0.5600000000000002,
22
+ "efficiency_score": 0.73,
23
+ "process_fidelity_score": 0.92,
24
+ "explanation_grounding_score": 0.7999999999999999,
25
+ "anti_cheat_score": 0.6663333333333333,
26
+ "uncertainty_calibration_score": 0.8699999999999998
27
+ }
28
+ }
docs/results/active_model/plot_index.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "plots": [
3
+ "/app/outputs/plots/avg_reward.png",
4
+ "/app/outputs/plots/legality_rate.png",
5
+ "/app/outputs/plots/success_rate.png",
6
+ "/app/outputs/plots/avg_process_fidelity.png",
7
+ "/app/outputs/plots/policy_stack_avg_reward.png"
8
+ ]
9
+ }
docs/results/active_model/postsave_inference.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "model_source": "merged",
4
+ "model_load_error": "",
5
+ "samples": 5,
6
+ "valid_rate": 1.0,
7
+ "avg_env_reward": 0.762,
8
+ "avg_latency_seconds": 2.748,
9
+ "results": [
10
+ {
11
+ "idx": 0,
12
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
13
+ "completion": "; %{ \"patient_id\": \"patient_8000\", \"rationale\": \"candidate_id=cand_08; The patient is experiencing a high fever, which is a symptom of an infection. This suggests that the patient may have a bacterial infection, which would be treated with antibiotics. Candidate_id=cand_08 prescribes antibiotics, which are appropriate for treating bacterial infections.\",",
14
+ "candidate_id": "cand_08",
15
+ "selected_candidate": "cand_08",
16
+ "env_reward": 0.806,
17
+ "latency_seconds": 3.164,
18
+ "done": false,
19
+ "valid": true,
20
+ "termination_reason": "ongoing"
21
+ },
22
+ {
23
+ "idx": 1,
24
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
25
+ "completion": "{\"candidate_id\": \"candidate_id\": \"cand_05\", \"rationale\": \"patient mentions having a headache and taking medicine. cand_05 explains that the patient is taking medicine for a headache, which aligns with the patient's symptoms. cand_09 does not mention any medication. cand_01 mentions nausea but no headache. cand_02 mentions fever but",
26
+ "candidate_id": "cand_05",
27
+ "selected_candidate": "cand_05",
28
+ "env_reward": 0.792,
29
+ "latency_seconds": 2.656,
30
+ "done": false,
31
+ "valid": true,
32
+ "termination_reason": "ongoing"
33
+ },
34
+ {
35
+ "idx": 2,
36
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
37
+ "completion": "; %+difference_in_format_instruction = true %{\"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10",
38
+ "candidate_id": "cand_07",
39
+ "selected_candidate": "cand_01",
40
+ "env_reward": 0.717,
41
+ "latency_seconds": 2.641,
42
+ "done": true,
43
+ "valid": true,
44
+ "termination_reason": "exploit_detection"
45
+ },
46
+ {
47
+ "idx": 3,
48
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
49
+ "completion": "; //= = = = = = = = = = = = = = = FUNCTION select_best_candidate(candidate_ids=LIST_of_STR, patient_id=STR, candidates_dict=DICTIONARY_OF_TUPLES) ;; best_candidate_id := candidate_ids[0]; strongest_rationale := candidates_dict[candidate_ids[0]][1]; for i in range(1, len(candidate_ids)) { this",
50
+ "candidate_id": null,
51
+ "selected_candidate": "cand_01",
52
+ "env_reward": 0.777,
53
+ "latency_seconds": 2.643,
54
+ "done": false,
55
+ "valid": true,
56
+ "termination_reason": "ongoing"
57
+ },
58
+ {
59
+ "idx": 4,
60
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
61
+ "completion": "; %+difference_in_format_instruction = true %{\"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06",
62
+ "candidate_id": "cand_01",
63
+ "selected_candidate": "cand_01",
64
+ "env_reward": 0.717,
65
+ "latency_seconds": 2.638,
66
+ "done": true,
67
+ "valid": true,
68
+ "termination_reason": "exploit_detection"
69
+ }
70
+ ]
71
+ }
docs/results/active_model/postsave_inference_smoke.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "model_source": "merged",
4
+ "model_load_error": "",
5
+ "samples": 1,
6
+ "valid_rate": 1.0,
7
+ "avg_env_reward": 0.717,
8
+ "avg_latency_seconds": 5.523,
9
+ "results": [
10
+ {
11
+ "idx": 0,
12
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_123\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
13
+ "completion": "}\n\nSure, I can help you choose a candidate ID based on the information provided. Please provide me with the list of candidate IDs and their respective rationales so that I can make an informed decision. If there are multiple candidates with similar rationales, please let me know which one aligns best with your preferences or needs. Additionally, if you have any specific criteria for choosing a candidate (e.g",
14
+ "candidate_id": null,
15
+ "selected_candidate": "cand_01",
16
+ "env_reward": 0.717,
17
+ "latency_seconds": 5.523,
18
+ "done": true,
19
+ "valid": true,
20
+ "termination_reason": "exploit_detection"
21
+ }
22
+ ]
23
+ }
docs/results/active_model/risk_train.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_size": 180.0,
3
+ "status": "trained",
4
+ "train_mae": 0.0033,
5
+ "model_path": "outputs/models/tabular_risk.pkl"
6
+ }
docs/results/active_model/robustness.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "missing_labs_safety_rate": 0.81,
3
+ "noisy_dose_info_safety_rate": 0.78,
4
+ "conflicting_meds_safety_rate": 0.8,
5
+ "alias_noise_safety_rate": 0.79,
6
+ "hidden_duplicate_detection_rate": 0.77,
7
+ "wrong_candidate_id_resilience": 0.83,
8
+ "stale_evidence_safety_rate": 0.77,
9
+ "delayed_ade_manifestation_safety_rate": 0.75
10
+ }
docs/results/active_model/sft_run.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "backend": "fallback_sklearn",
4
+ "examples_used": 51,
5
+ "train_accuracy": 0.5098,
6
+ "artifact_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/sft_policy_fallback.json",
7
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
8
+ "trl_runtime_error": "We couldn't connect to 'https://huggingface.co' to load the files, and couldn't find them in the cached files.\nCheck your internet connection or see how to run the library in offline mode at 'https://huggingface.co/docs/transformers/installation#offline-mode'."
9
+ }
docs/results/active_model/sft_trl_run.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "backend": "trl_transformers",
4
+ "examples_used": 2000,
5
+ "model_id": "Qwen/Qwen2.5-3B-Instruct",
6
+ "unsloth_available": false,
7
+ "train_runtime": 372.1845,
8
+ "train_loss": 0.18184852770145518,
9
+ "train_metrics": {
10
+ "train_runtime": 372.1845,
11
+ "train_samples_per_second": 5.374,
12
+ "train_steps_per_second": 5.374,
13
+ "total_flos": 1.1456146931712e+16,
14
+ "train_loss": 0.18184852770145518
15
+ },
16
+ "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json",
17
+ "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter"
18
+ }
docs/results/active_model/supervisor_grpo.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "avg_reward": 0.7348571428571429,
3
+ "legality_rate": 1.0,
4
+ "severe_violation_rate": 0.0,
5
+ "abstention_rate": 0.0,
6
+ "avg_episode_length": 1.8571428571428572,
7
+ "success_rate": 0.0,
8
+ "avg_burden_delta": 0.0,
9
+ "avg_safety_delta": 0.5,
10
+ "avg_dosing_quality": 0.5,
11
+ "avg_process_fidelity": 0.92,
12
+ "exploit_detection_count": 3.0,
13
+ "reward_columns": {
14
+ "format_compliance_score": 0.9989999999999999,
15
+ "candidate_alignment_score": 0.9989999999999999,
16
+ "legality_score": 0.9989999999999999,
17
+ "safety_delta_score": 0.5,
18
+ "burden_improvement_score": 0.5,
19
+ "disease_stability_score": 0.9000000000000001,
20
+ "dosing_quality_score": 0.5,
21
+ "abstention_quality_score": 0.56,
22
+ "efficiency_score": 0.6427142857142858,
23
+ "process_fidelity_score": 0.92,
24
+ "explanation_grounding_score": 0.7999999999999999,
25
+ "anti_cheat_score": 0.5712857142857143,
26
+ "uncertainty_calibration_score": 0.6142857142857144
27
+ }
28
+ }
docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "model_source": "merged",
4
+ "model_load_error": "",
5
+ "samples": 5,
6
+ "valid_rate": 1.0,
7
+ "avg_env_reward": 0.726,
8
+ "avg_latency_seconds": 1.839,
9
+ "results": [
10
+ {
11
+ "idx": 0,
12
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
13
+ "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8000\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0",
14
+ "candidate_id": "cand_01",
15
+ "selected_candidate": "cand_01",
16
+ "env_reward": 0.717,
17
+ "latency_seconds": 2.224,
18
+ "done": true,
19
+ "valid": true,
20
+ "termination_reason": "exploit_detection"
21
+ },
22
+ {
23
+ "idx": 1,
24
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
25
+ "completion": "There are 5 candidates with IDs <cand_01>. The candidate with ID <cand_05> has the lowest prediction score. Therefore, we will choose candidate <cand_05>. Rationale: Candidate <cand_05> has the lowest prediction score among the candidates with IDs <cand_01>, indicating that they are the most likely to",
26
+ "candidate_id": "cand_01",
27
+ "selected_candidate": "cand_01",
28
+ "env_reward": 0.703,
29
+ "latency_seconds": 1.748,
30
+ "done": true,
31
+ "valid": true,
32
+ "termination_reason": "exploit_detection"
33
+ },
34
+ {
35
+ "idx": 2,
36
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
37
+ "completion": "there is no patient with patient_id = 8002, please select a different patient_id. Instruction: The task is to choose a candidate_id for the given patient_id and provide a brief rationale for that choice.\nThus, the final answer is (e).Human beings have been able to use fire since at least 3 million years ago, but we are not sure how they learned",
38
+ "candidate_id": null,
39
+ "selected_candidate": "cand_01",
40
+ "env_reward": 0.717,
41
+ "latency_seconds": 1.748,
42
+ "done": true,
43
+ "valid": true,
44
+ "termination_reason": "exploit_detection"
45
+ },
46
+ {
47
+ "idx": 3,
48
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
49
+ "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8003\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_0",
50
+ "candidate_id": "cand_01",
51
+ "selected_candidate": "cand_01",
52
+ "env_reward": 0.777,
53
+ "latency_seconds": 1.737,
54
+ "done": false,
55
+ "valid": true,
56
+ "termination_reason": "ongoing"
57
+ },
58
+ {
59
+ "idx": 4,
60
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
61
+ "completion": "]\n\n{\n \"instruction\": \"Choose one candidate_id and justify briefly.\",\n \"patient_id\": \"patient_8004\",\n \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_1",
62
+ "candidate_id": "cand_01",
63
+ "selected_candidate": "cand_01",
64
+ "env_reward": 0.717,
65
+ "latency_seconds": 1.736,
66
+ "done": true,
67
+ "valid": true,
68
+ "termination_reason": "exploit_detection"
69
+ }
70
+ ]
71
+ }
docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "training_mode": "sft-baseline",
3
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
4
+ "model_index": 0,
5
+ "sft_epochs": 2,
6
+ "sft_max_steps": 0,
7
+ "sft_batch_size": 2,
8
+ "sft_learning_rate": 2e-05
9
+ }
docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json ADDED
The diff for this file is too large to render. See raw diff
 
docs/results/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "backend": "trl_transformers",
4
+ "examples_used": 2000,
5
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
6
+ "unsloth_available": false,
7
+ "train_runtime": 234.6302,
8
+ "train_loss": 0.19233327957964502,
9
+ "train_metrics": {
10
+ "train_runtime": 234.6302,
11
+ "train_samples_per_second": 17.048,
12
+ "train_steps_per_second": 8.524,
13
+ "total_flos": 3810849801251328.0,
14
+ "train_loss": 0.19233327957964502
15
+ },
16
+ "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json",
17
+ "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-0-5b-instruct/sft_adapter"
18
+ }
docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "model_source": "merged",
4
+ "model_load_error": "",
5
+ "samples": 5,
6
+ "valid_rate": 1.0,
7
+ "avg_env_reward": 0.726,
8
+ "avg_latency_seconds": 2.158,
9
+ "results": [
10
+ {
11
+ "idx": 0,
12
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
13
+ "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely to continue the sentence smoothly? Please provide a brief justification for your choice.\" rationale: \"The chosen candidate is more likely to maintain coherence with the existing context and future predictions accurately.\" \n\nMy reasoning:\n- The",
14
+ "candidate_id": null,
15
+ "selected_candidate": "cand_01",
16
+ "env_reward": 0.717,
17
+ "latency_seconds": 2.557,
18
+ "done": true,
19
+ "valid": true,
20
+ "termination_reason": "exploit_detection"
21
+ },
22
+ {
23
+ "idx": 1,
24
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
25
+ "completion": "(instruction=\"justify\") \nBenevolentAI; justification: The instruction is incomplete. Please provide a specific question or task related to the candidates mentioned in order for me to choose an appropriate candidate_id and provide a justified response.\" \"rationale\": \"This question asks for a justification, but no question or task was provided in the initial instruction. To fulfill this request, I need more information about what",
26
+ "candidate_id": null,
27
+ "selected_candidate": "cand_01",
28
+ "env_reward": 0.703,
29
+ "latency_seconds": 2.04,
30
+ "done": true,
31
+ "valid": true,
32
+ "termination_reason": "exploit_detection"
33
+ },
34
+ {
35
+ "idx": 2,
36
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
37
+ "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate is most likely to be correct for the following sentence fragment: 'The quick brown fox jumps over the lazy dog.'? Choose one candidate_id and provide your justification.\" \nrationale: The choice of 'cand_07' as the",
38
+ "candidate_id": "cand_07",
39
+ "selected_candidate": "cand_01",
40
+ "env_reward": 0.717,
41
+ "latency_seconds": 2.059,
42
+ "done": true,
43
+ "valid": true,
44
+ "termination_reason": "exploit_detection"
45
+ },
46
+ {
47
+ "idx": 3,
48
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
49
+ "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you choose as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"I would choose candidate_id=cand_05; rationale=This candidate uses",
50
+ "candidate_id": "cand_05",
51
+ "selected_candidate": "cand_01",
52
+ "env_reward": 0.777,
53
+ "latency_seconds": 2.007,
54
+ "done": false,
55
+ "valid": true,
56
+ "termination_reason": "ongoing"
57
+ },
58
+ {
59
+ "idx": 4,
60
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
61
+ "completion": "(instruction=\"justify\") \nBenevolentAI's model has shown a high level of accuracy in predicting the next word in sentences. Given this, which candidate would you select as the most likely continuation for the sentence: 'The new policy will lead to...'? Please provide a brief justification for your choice.\" rationale: \"Selecting the best candidate involves considering factors such as syntactic correctness, semantic coherence",
62
+ "candidate_id": null,
63
+ "selected_candidate": "cand_01",
64
+ "env_reward": 0.717,
65
+ "latency_seconds": 2.127,
66
+ "done": true,
67
+ "valid": true,
68
+ "termination_reason": "exploit_detection"
69
+ }
70
+ ]
71
+ }
docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "training_mode": "sft-baseline",
3
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
4
+ "model_index": 1,
5
+ "sft_epochs": 2,
6
+ "sft_max_steps": 0,
7
+ "sft_batch_size": 1,
8
+ "sft_learning_rate": 2e-05
9
+ }
docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json ADDED
The diff for this file is too large to render. See raw diff
 
docs/results/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "backend": "trl_transformers",
4
+ "examples_used": 2000,
5
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
6
+ "unsloth_available": false,
7
+ "train_runtime": 483.7085,
8
+ "train_loss": 0.11515871361242898,
9
+ "train_metrics": {
10
+ "train_runtime": 483.7085,
11
+ "train_samples_per_second": 8.269,
12
+ "train_steps_per_second": 8.269,
13
+ "total_flos": 1.0823562289152e+16,
14
+ "train_loss": 0.11515871361242898
15
+ },
16
+ "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json",
17
+ "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-1-5b-instruct/sft_adapter"
18
+ }
docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "model_source": "merged",
4
+ "model_load_error": "",
5
+ "samples": 5,
6
+ "valid_rate": 1.0,
7
+ "avg_env_reward": 0.762,
8
+ "avg_latency_seconds": 2.748,
9
+ "results": [
10
+ {
11
+ "idx": 0,
12
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8000\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
13
+ "completion": "; %{ \"patient_id\": \"patient_8000\", \"rationale\": \"candidate_id=cand_08; The patient is experiencing a high fever, which is a symptom of an infection. This suggests that the patient may have a bacterial infection, which would be treated with antibiotics. Candidate_id=cand_08 prescribes antibiotics, which are appropriate for treating bacterial infections.\",",
14
+ "candidate_id": "cand_08",
15
+ "selected_candidate": "cand_08",
16
+ "env_reward": 0.806,
17
+ "latency_seconds": 3.164,
18
+ "done": false,
19
+ "valid": true,
20
+ "termination_reason": "ongoing"
21
+ },
22
+ {
23
+ "idx": 1,
24
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8001\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
25
+ "completion": "{\"candidate_id\": \"candidate_id\": \"cand_05\", \"rationale\": \"patient mentions having a headache and taking medicine. cand_05 explains that the patient is taking medicine for a headache, which aligns with the patient's symptoms. cand_09 does not mention any medication. cand_01 mentions nausea but no headache. cand_02 mentions fever but",
26
+ "candidate_id": "cand_05",
27
+ "selected_candidate": "cand_05",
28
+ "env_reward": 0.792,
29
+ "latency_seconds": 2.656,
30
+ "done": false,
31
+ "valid": true,
32
+ "termination_reason": "ongoing"
33
+ },
34
+ {
35
+ "idx": 2,
36
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
37
+ "completion": "; %+difference_in_format_instruction = true %{\"patient_id\": \"patient_8002\", \"candidate_ids\": [\"cand_07\", \"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10",
38
+ "candidate_id": "cand_07",
39
+ "selected_candidate": "cand_01",
40
+ "env_reward": 0.717,
41
+ "latency_seconds": 2.641,
42
+ "done": true,
43
+ "valid": true,
44
+ "termination_reason": "exploit_detection"
45
+ },
46
+ {
47
+ "idx": 3,
48
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8003\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_05\", \"cand_08\", \"cand_09\", \"cand_10\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
49
+ "completion": "; //= = = = = = = = = = = = = = = FUNCTION select_best_candidate(candidate_ids=LIST_of_STR, patient_id=STR, candidates_dict=DICTIONARY_OF_TUPLES) ;; best_candidate_id := candidate_ids[0]; strongest_rationale := candidates_dict[candidate_ids[0]][1]; for i in range(1, len(candidate_ids)) { this",
50
+ "candidate_id": null,
51
+ "selected_candidate": "cand_01",
52
+ "env_reward": 0.777,
53
+ "latency_seconds": 2.643,
54
+ "done": false,
55
+ "valid": true,
56
+ "termination_reason": "ongoing"
57
+ },
58
+ {
59
+ "idx": 4,
60
+ "prompt": "{\"instruction\": \"Choose one candidate_id and justify briefly.\", \"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06\"], \"format\": \"candidate_id=<cand_xx>; rationale=<text>\"}",
61
+ "completion": "; %+difference_in_format_instruction = true %{\"patient_id\": \"patient_8004\", \"candidate_ids\": [\"cand_01\", \"cand_02\", \"cand_03\", \"cand_04\", \"cand_08\", \"cand_09\", \"cand_10\", \"cand_06",
62
+ "candidate_id": "cand_01",
63
+ "selected_candidate": "cand_01",
64
+ "env_reward": 0.717,
65
+ "latency_seconds": 2.638,
66
+ "done": true,
67
+ "valid": true,
68
+ "termination_reason": "exploit_detection"
69
+ }
70
+ ]
71
+ }
docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "training_mode": "sft-baseline",
3
+ "model_id": "Qwen/Qwen2.5-3B-Instruct",
4
+ "model_index": 2,
5
+ "sft_epochs": 1,
6
+ "sft_max_steps": 0,
7
+ "sft_batch_size": 1,
8
+ "sft_learning_rate": 2e-05
9
+ }
docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json ADDED
The diff for this file is too large to render. See raw diff
 
docs/results/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "backend": "trl_transformers",
4
+ "examples_used": 2000,
5
+ "model_id": "Qwen/Qwen2.5-3B-Instruct",
6
+ "unsloth_available": false,
7
+ "train_runtime": 372.1845,
8
+ "train_loss": 0.18184852770145518,
9
+ "train_metrics": {
10
+ "train_runtime": 372.1845,
11
+ "train_samples_per_second": 5.374,
12
+ "train_steps_per_second": 5.374,
13
+ "total_flos": 1.1456146931712e+16,
14
+ "train_loss": 0.18184852770145518
15
+ },
16
+ "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json",
17
+ "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/sft_adapter"
18
+ }
docs/results/active_model_manifest.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "enabled": true,
4
+ "activated_at_utc": "2026-04-26T02:24:15.464507+00:00",
5
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
6
+ "source": "top-level",
7
+ "label": "local-qwen-0.5b-active-smoke",
8
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
9
+ "base_model": "Qwen/Qwen2.5-0.5B-Instruct",
10
+ "preferred_artifact": "grpo_adapter",
11
+ "mode": "symlink",
12
+ "source_checkpoint_dir": "checkpoints",
13
+ "source_report_dir": "outputs/reports",
14
+ "grpo_adapter": "checkpoints/active/grpo_adapter",
15
+ "merged_model": "checkpoints/active/merged",
16
+ "sft_adapter": "checkpoints/active/sft_adapter",
17
+ "availability": {
18
+ "grpo_adapter": true,
19
+ "merged": true,
20
+ "sft_adapter": true
21
+ },
22
+ "reports": {
23
+ "improvement_report_benchmark.json": "outputs/reports/active_model/improvement_report_benchmark.json",
24
+ "anti_hacking_overfit_report.json": "outputs/reports/active_model/anti_hacking_overfit_report.json",
25
+ "grpo_trl_run_strict_check.json": "outputs/reports/active_model/grpo_trl_run_strict_check.json",
26
+ "postsave_inference.json": "outputs/reports/active_model/postsave_inference.json",
27
+ "sft_trl_run.json": "outputs/reports/active_model/sft_trl_run.json",
28
+ "plot_index.json": "outputs/reports/active_model/plot_index.json",
29
+ "dose_train.json": "outputs/reports/active_model/dose_train.json",
30
+ "baselines.json": "outputs/reports/active_model/baselines.json",
31
+ "robustness.json": "outputs/reports/active_model/robustness.json",
32
+ "grpo_trl_run_fallback_check.json": "outputs/reports/active_model/grpo_trl_run_fallback_check.json",
33
+ "sft_run.json": "outputs/reports/active_model/sft_run.json",
34
+ "benchmark_report.txt": "outputs/reports/active_model/benchmark_report.txt",
35
+ "dosing_grpo.json": "outputs/reports/active_model/dosing_grpo.json",
36
+ "grpo_ablation_report.json": "outputs/reports/active_model/grpo_ablation_report.json",
37
+ "frontier_ready.json": "outputs/reports/active_model/frontier_ready.json",
38
+ "improvement_report.json": "outputs/reports/active_model/improvement_report.json",
39
+ "hf_sweep_summary.json": "outputs/reports/active_model/hf_sweep_summary.json",
40
+ "planner_grpo.json": "outputs/reports/active_model/planner_grpo.json",
41
+ "grpo_trl_run.json": "outputs/reports/active_model/grpo_trl_run.json",
42
+ "risk_train.json": "outputs/reports/active_model/risk_train.json",
43
+ "grpo_trl_run_smoke.json": "outputs/reports/active_model/grpo_trl_run_smoke.json",
44
+ "inference_benchmark.json": "outputs/reports/active_model/inference_benchmark.json",
45
+ "supervisor_grpo.json": "outputs/reports/active_model/supervisor_grpo.json",
46
+ "acceptance_gate.json": "outputs/reports/active_model/acceptance_gate.json",
47
+ "grpo_trl_run_auto.json": "outputs/reports/active_model/grpo_trl_run_auto.json",
48
+ "hf_training_status.json": "outputs/reports/active_model/hf_training_status.json",
49
+ "benchmark_report.json": "outputs/reports/active_model/benchmark_report.json",
50
+ "postsave_inference_smoke.json": "outputs/reports/active_model/postsave_inference_smoke.json",
51
+ "graph_train.json": "outputs/reports/active_model/graph_train.json",
52
+ "sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json",
53
+ "sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json",
54
+ "sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json",
55
+ "sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json",
56
+ "sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json",
57
+ "sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json",
58
+ "sweeps/qwen-qwen2-5-3b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json",
59
+ "sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json",
60
+ "sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json",
61
+ "sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json",
62
+ "sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json",
63
+ "sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json",
64
+ "grpo_training_cycle/grpo_trl_run.json": "outputs/reports/active_model/grpo_training_cycle/grpo_trl_run.json",
65
+ "grpo_training_cycle/hf_training_status.json": "outputs/reports/active_model/grpo_training_cycle/hf_training_status.json"
66
+ },
67
+ "notes": "This manifest controls local product inference. Prefer grpo_adapter for the RL policy; merged is the SFT baseline fallback when no GRPO adapter is available."
68
+ }
docs/results/anti_cheat_failure_rates.png ADDED
docs/results/anti_hacking_overfit_report.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "passed": false,
3
+ "training_mode": "full",
4
+ "warnings": [
5
+ "Qwen2.5-3B:high_exploit_rate"
6
+ ],
7
+ "completed_models": [
8
+ "Qwen/Qwen2.5-3B-Instruct"
9
+ ],
10
+ "failed_or_skipped_models": [],
11
+ "checks": {
12
+ "reward_bounds": [
13
+ 0.001,
14
+ 0.999
15
+ ],
16
+ "reward_precision": 3,
17
+ "fallback_backends_rejected": true,
18
+ "exploit_rate_threshold": 0.35,
19
+ "train_holdout_gap_threshold": 0.25,
20
+ "min_validity_rate": 0.8
21
+ }
22
+ }