TheJackBright commited on
Commit
63acc4b
·
verified ·
1 Parent(s): f313e87

Upload PolyGuard usable model bundle: local-qwen-0-5b-active-smoke

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +3 -0
  2. usable_model_bundles/local-qwen-0-5b-active-smoke/README.md +42 -0
  3. usable_model_bundles/local-qwen-0-5b-active-smoke/bundle_manifest.json +65 -0
  4. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/README.md +209 -0
  5. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_config.json +43 -0
  6. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_model.safetensors +3 -0
  7. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/added_tokens.json +24 -0
  8. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/chat_template.jinja +54 -0
  9. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/merges.txt +0 -0
  10. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/special_tokens_map.json +31 -0
  11. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer.json +3 -0
  12. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer_config.json +207 -0
  13. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/training_args.bin +3 -0
  14. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/vocab.json +0 -0
  15. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/added_tokens.json +24 -0
  16. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/chat_template.jinja +54 -0
  17. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/config.json +54 -0
  18. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/generation_config.json +14 -0
  19. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merge_report.json +11 -0
  20. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merges.txt +0 -0
  21. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/model.safetensors +3 -0
  22. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/special_tokens_map.json +31 -0
  23. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer.json +3 -0
  24. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer_config.json +207 -0
  25. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/vocab.json +0 -0
  26. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/README.md +209 -0
  27. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_config.json +43 -0
  28. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_model.safetensors +3 -0
  29. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/added_tokens.json +24 -0
  30. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/chat_template.jinja +54 -0
  31. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/merges.txt +0 -0
  32. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/special_tokens_map.json +31 -0
  33. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer.json +3 -0
  34. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer_config.json +207 -0
  35. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/training_args.bin +3 -0
  36. usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/vocab.json +0 -0
  37. usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json +68 -0
  38. usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_report_manifest.json +68 -0
  39. usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/submission_evidence_manifest.json +237 -0
  40. usable_model_bundles/local-qwen-0-5b-active-smoke/reports/acceptance_gate.json +11 -0
  41. usable_model_bundles/local-qwen-0-5b-active-smoke/reports/active_model_manifest.json +68 -0
  42. usable_model_bundles/local-qwen-0-5b-active-smoke/reports/anti_hacking_overfit_report.json +22 -0
  43. usable_model_bundles/local-qwen-0-5b-active-smoke/reports/baselines.json +119 -0
  44. usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.json +52 -0
  45. usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.txt +52 -0
  46. usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dose_train.json +6 -0
  47. usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dosing_grpo.json +28 -0
  48. usable_model_bundles/local-qwen-0-5b-active-smoke/reports/frontier_ready.json +8 -0
  49. usable_model_bundles/local-qwen-0-5b-active-smoke/reports/graph_train.json +5 -0
  50. usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_ablation_report.json +149 -0
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer.json filter=lfs diff=lfs merge=lfs -text
38
+ usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer.json filter=lfs diff=lfs merge=lfs -text
usable_model_bundles/local-qwen-0-5b-active-smoke/README.md ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # PolyGuard Active Model Artifact Bundle
2
+
3
+ Bundle: `local-qwen-0-5b-active-smoke`
4
+ Model: `Qwen/Qwen2.5-0.5B-Instruct`
5
+ Base model: `Qwen/Qwen2.5-0.5B-Instruct`
6
+ Preferred artifact: `grpo_adapter`
7
+
8
+ This bundle is meant for implementation/testing while the full per-model remote sweep artifacts are still uploading.
9
+
10
+ ## Contents
11
+
12
+ - `checkpoints/grpo_adapter/`
13
+ - `checkpoints/sft_adapter/`
14
+ - `checkpoints/merged/` when included
15
+ - `manifests/active_model_manifest.json`
16
+ - `reports/`
17
+
18
+ ## Restore Locally
19
+
20
+ ```bash
21
+ cd /Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl
22
+ cp -R submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter checkpoints/grpo_adapter
23
+ cp -R submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter checkpoints/sft_adapter
24
+ cp -R submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/checkpoints/merged checkpoints/merged
25
+ mkdir -p checkpoints/active
26
+ cp submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json checkpoints/active/active_model_manifest.json
27
+ curl http://127.0.0.1:8200/policy/model_status
28
+ ```
29
+
30
+ ## Hugging Face Download
31
+
32
+ After upload, download with:
33
+
34
+ ```bash
35
+ export HF_TOKEN="$(cat ~/.cache/huggingface/token)"
36
+ huggingface-cli download TheJackBright/polyguard-openenv-training-full-artifacts \
37
+ --repo-type model \
38
+ --include 'usable_model_bundles/local-qwen-0-5b-active-smoke/**' \
39
+ --local-dir ./hf_artifacts
40
+ ```
41
+
42
+ Note: this is the current local active Qwen 0.5B implementation bundle. It is not the final full remote Qwen 0.5B/1.5B sweep checkpoint until those files appear in the HF artifact repo.
usable_model_bundles/local-qwen-0-5b-active-smoke/bundle_manifest.json ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "bundle_name": "local-qwen-0-5b-active-smoke",
4
+ "created_at_utc": "2026-04-26T04:54:50.382018+00:00",
5
+ "source": "local_active_model",
6
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
7
+ "label": "local-qwen-0.5b-active-smoke",
8
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
9
+ "base_model": "Qwen/Qwen2.5-0.5B-Instruct",
10
+ "preferred_artifact": "grpo_adapter",
11
+ "availability": {
12
+ "grpo_adapter": true,
13
+ "merged": true,
14
+ "sft_adapter": true
15
+ },
16
+ "remote_full_sweep_note": "The full Qwen 0.5B/1.5B remote sweep artifacts are still pending upload in the HF artifact repo. This bundle packages the currently active local trained/smoke artifacts for product integration.",
17
+ "copies": {
18
+ "grpo_adapter": {
19
+ "source": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_adapter",
20
+ "target": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter",
21
+ "exists": true,
22
+ "file_count": 11,
23
+ "bytes": 20229991
24
+ },
25
+ "sft_adapter": {
26
+ "source": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/sft_adapter",
27
+ "target": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter",
28
+ "exists": true,
29
+ "file_count": 11,
30
+ "bytes": 30655905
31
+ },
32
+ "active_manifest": {
33
+ "source": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/active/active_model_manifest.json",
34
+ "target": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json",
35
+ "exists": true,
36
+ "bytes": 5274
37
+ },
38
+ "active_report_manifest": {
39
+ "source": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/active_model/active_model_manifest.json",
40
+ "target": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/manifests/active_model_report_manifest.json",
41
+ "exists": true,
42
+ "bytes": 5274
43
+ },
44
+ "submission_evidence_manifest": {
45
+ "source": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/manifest.json",
46
+ "target": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/manifests/submission_evidence_manifest.json",
47
+ "exists": true,
48
+ "bytes": 12708
49
+ },
50
+ "reports": {
51
+ "source": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/active_model",
52
+ "target": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/reports",
53
+ "exists": true,
54
+ "file_count": 44,
55
+ "bytes": 1768421
56
+ },
57
+ "merged": {
58
+ "source": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/merged",
59
+ "target": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/submission_bundle/model_artifacts/local-qwen-0-5b-active-smoke/checkpoints/merged",
60
+ "exists": true,
61
+ "file_count": 11,
62
+ "bytes": 1003978294
63
+ }
64
+ }
65
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen2.5-0.5B-Instruct
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:Qwen/Qwen2.5-0.5B-Instruct
7
+ - grpo
8
+ - lora
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.19.1
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "Qwen/Qwen2.5-0.5B-Instruct",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "lora_ga_config": null,
23
+ "megatron_config": null,
24
+ "megatron_core": "megatron.core",
25
+ "modules_to_save": null,
26
+ "peft_type": "LORA",
27
+ "peft_version": "0.19.1",
28
+ "qalora_group_size": 16,
29
+ "r": 16,
30
+ "rank_pattern": {},
31
+ "revision": null,
32
+ "target_modules": [
33
+ "q_proj",
34
+ "v_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_bdlora": null,
40
+ "use_dora": false,
41
+ "use_qalora": false,
42
+ "use_rslora": false
43
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d3c7721b362814dd6ca4b05efef788d3ec93a26a16848227f4575236f699ec8
3
+ size 4338000
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5eee858c5123a4279c3e1f7b81247343f356ac767940b2692a928ad929543214
3
+ size 11422063
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:246b70bea7ca6b468c441c9b4feb273716381f71026cd6cdc63f9984cf06f2ef
3
+ size 6584
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/grpo_adapter/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/config.json ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2ForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 151643,
7
+ "dtype": "float16",
8
+ "eos_token_id": 151645,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 896,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 4864,
13
+ "layer_types": [
14
+ "full_attention",
15
+ "full_attention",
16
+ "full_attention",
17
+ "full_attention",
18
+ "full_attention",
19
+ "full_attention",
20
+ "full_attention",
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention"
38
+ ],
39
+ "max_position_embeddings": 32768,
40
+ "max_window_layers": 21,
41
+ "model_type": "qwen2",
42
+ "num_attention_heads": 14,
43
+ "num_hidden_layers": 24,
44
+ "num_key_value_heads": 2,
45
+ "rms_norm_eps": 1e-06,
46
+ "rope_scaling": null,
47
+ "rope_theta": 1000000.0,
48
+ "sliding_window": null,
49
+ "tie_word_embeddings": true,
50
+ "transformers_version": "4.57.6",
51
+ "use_cache": true,
52
+ "use_sliding_window": false,
53
+ "vocab_size": 151936
54
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/generation_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "pad_token_id": 151643,
9
+ "repetition_penalty": 1.1,
10
+ "temperature": 0.7,
11
+ "top_k": 20,
12
+ "top_p": 0.8,
13
+ "transformers_version": "4.57.6"
14
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merge_report.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "adapter_dir": "/app/checkpoints/sft_adapter",
4
+ "output_dir": "/app/checkpoints/merged",
5
+ "base_model": "Qwen/Qwen2.5-0.5B-Instruct",
6
+ "merge_dtype": "float16",
7
+ "load_in_4bit": false,
8
+ "unsafe_override": false,
9
+ "parameters": 494032768,
10
+ "precision_warning": "none"
11
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a02d64a18e37e82e8791290532c22ee8f771007808557e7181058dffeada070a
3
+ size 988097536
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/merged/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/README.md ADDED
@@ -0,0 +1,209 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen2.5-3B-Instruct
3
+ library_name: peft
4
+ pipeline_tag: text-generation
5
+ tags:
6
+ - base_model:adapter:Qwen/Qwen2.5-3B-Instruct
7
+ - lora
8
+ - sft
9
+ - transformers
10
+ - trl
11
+ ---
12
+
13
+ # Model Card for Model ID
14
+
15
+ <!-- Provide a quick summary of what the model is/does. -->
16
+
17
+
18
+
19
+ ## Model Details
20
+
21
+ ### Model Description
22
+
23
+ <!-- Provide a longer summary of what this model is. -->
24
+
25
+
26
+
27
+ - **Developed by:** [More Information Needed]
28
+ - **Funded by [optional]:** [More Information Needed]
29
+ - **Shared by [optional]:** [More Information Needed]
30
+ - **Model type:** [More Information Needed]
31
+ - **Language(s) (NLP):** [More Information Needed]
32
+ - **License:** [More Information Needed]
33
+ - **Finetuned from model [optional]:** [More Information Needed]
34
+
35
+ ### Model Sources [optional]
36
+
37
+ <!-- Provide the basic links for the model. -->
38
+
39
+ - **Repository:** [More Information Needed]
40
+ - **Paper [optional]:** [More Information Needed]
41
+ - **Demo [optional]:** [More Information Needed]
42
+
43
+ ## Uses
44
+
45
+ <!-- Address questions around how the model is intended to be used, including the foreseeable users of the model and those affected by the model. -->
46
+
47
+ ### Direct Use
48
+
49
+ <!-- This section is for the model use without fine-tuning or plugging into a larger ecosystem/app. -->
50
+
51
+ [More Information Needed]
52
+
53
+ ### Downstream Use [optional]
54
+
55
+ <!-- This section is for the model use when fine-tuned for a task, or when plugged into a larger ecosystem/app -->
56
+
57
+ [More Information Needed]
58
+
59
+ ### Out-of-Scope Use
60
+
61
+ <!-- This section addresses misuse, malicious use, and uses that the model will not work well for. -->
62
+
63
+ [More Information Needed]
64
+
65
+ ## Bias, Risks, and Limitations
66
+
67
+ <!-- This section is meant to convey both technical and sociotechnical limitations. -->
68
+
69
+ [More Information Needed]
70
+
71
+ ### Recommendations
72
+
73
+ <!-- This section is meant to convey recommendations with respect to the bias, risk, and technical limitations. -->
74
+
75
+ Users (both direct and downstream) should be made aware of the risks, biases and limitations of the model. More information needed for further recommendations.
76
+
77
+ ## How to Get Started with the Model
78
+
79
+ Use the code below to get started with the model.
80
+
81
+ [More Information Needed]
82
+
83
+ ## Training Details
84
+
85
+ ### Training Data
86
+
87
+ <!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
88
+
89
+ [More Information Needed]
90
+
91
+ ### Training Procedure
92
+
93
+ <!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
94
+
95
+ #### Preprocessing [optional]
96
+
97
+ [More Information Needed]
98
+
99
+
100
+ #### Training Hyperparameters
101
+
102
+ - **Training regime:** [More Information Needed] <!--fp32, fp16 mixed precision, bf16 mixed precision, bf16 non-mixed precision, fp16 non-mixed precision, fp8 mixed precision -->
103
+
104
+ #### Speeds, Sizes, Times [optional]
105
+
106
+ <!-- This section provides information about throughput, start/end time, checkpoint size if relevant, etc. -->
107
+
108
+ [More Information Needed]
109
+
110
+ ## Evaluation
111
+
112
+ <!-- This section describes the evaluation protocols and provides the results. -->
113
+
114
+ ### Testing Data, Factors & Metrics
115
+
116
+ #### Testing Data
117
+
118
+ <!-- This should link to a Dataset Card if possible. -->
119
+
120
+ [More Information Needed]
121
+
122
+ #### Factors
123
+
124
+ <!-- These are the things the evaluation is disaggregating by, e.g., subpopulations or domains. -->
125
+
126
+ [More Information Needed]
127
+
128
+ #### Metrics
129
+
130
+ <!-- These are the evaluation metrics being used, ideally with a description of why. -->
131
+
132
+ [More Information Needed]
133
+
134
+ ### Results
135
+
136
+ [More Information Needed]
137
+
138
+ #### Summary
139
+
140
+
141
+
142
+ ## Model Examination [optional]
143
+
144
+ <!-- Relevant interpretability work for the model goes here -->
145
+
146
+ [More Information Needed]
147
+
148
+ ## Environmental Impact
149
+
150
+ <!-- Total emissions (in grams of CO2eq) and additional considerations, such as electricity usage, go here. Edit the suggested text below accordingly -->
151
+
152
+ Carbon emissions can be estimated using the [Machine Learning Impact calculator](https://mlco2.github.io/impact#compute) presented in [Lacoste et al. (2019)](https://arxiv.org/abs/1910.09700).
153
+
154
+ - **Hardware Type:** [More Information Needed]
155
+ - **Hours used:** [More Information Needed]
156
+ - **Cloud Provider:** [More Information Needed]
157
+ - **Compute Region:** [More Information Needed]
158
+ - **Carbon Emitted:** [More Information Needed]
159
+
160
+ ## Technical Specifications [optional]
161
+
162
+ ### Model Architecture and Objective
163
+
164
+ [More Information Needed]
165
+
166
+ ### Compute Infrastructure
167
+
168
+ [More Information Needed]
169
+
170
+ #### Hardware
171
+
172
+ [More Information Needed]
173
+
174
+ #### Software
175
+
176
+ [More Information Needed]
177
+
178
+ ## Citation [optional]
179
+
180
+ <!-- If there is a paper or blog post introducing the model, the APA and Bibtex information for that should go in this section. -->
181
+
182
+ **BibTeX:**
183
+
184
+ [More Information Needed]
185
+
186
+ **APA:**
187
+
188
+ [More Information Needed]
189
+
190
+ ## Glossary [optional]
191
+
192
+ <!-- If relevant, include terms and calculations in this section that can help readers understand the model or model card. -->
193
+
194
+ [More Information Needed]
195
+
196
+ ## More Information [optional]
197
+
198
+ [More Information Needed]
199
+
200
+ ## Model Card Authors [optional]
201
+
202
+ [More Information Needed]
203
+
204
+ ## Model Card Contact
205
+
206
+ [More Information Needed]
207
+ ### Framework versions
208
+
209
+ - PEFT 0.19.1
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": null,
6
+ "base_model_name_or_path": "Qwen/Qwen2.5-3B-Instruct",
7
+ "bias": "none",
8
+ "corda_config": null,
9
+ "ensure_weight_tying": false,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 32,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "lora_ga_config": null,
23
+ "megatron_config": null,
24
+ "megatron_core": "megatron.core",
25
+ "modules_to_save": null,
26
+ "peft_type": "LORA",
27
+ "peft_version": "0.19.1",
28
+ "qalora_group_size": 16,
29
+ "r": 16,
30
+ "rank_pattern": {},
31
+ "revision": null,
32
+ "target_modules": [
33
+ "q_proj",
34
+ "v_proj"
35
+ ],
36
+ "target_parameters": null,
37
+ "task_type": "CAUSAL_LM",
38
+ "trainable_token_indices": null,
39
+ "use_bdlora": null,
40
+ "use_dora": false,
41
+ "use_qalora": false,
42
+ "use_rslora": false
43
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:494fce09fcbe83dc7ddc66b8fc681f040e43c1ac68693ce0db73ba3607802679
3
+ size 14764792
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/added_tokens.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</tool_call>": 151658,
3
+ "<tool_call>": 151657,
4
+ "<|box_end|>": 151649,
5
+ "<|box_start|>": 151648,
6
+ "<|endoftext|>": 151643,
7
+ "<|file_sep|>": 151664,
8
+ "<|fim_middle|>": 151660,
9
+ "<|fim_pad|>": 151662,
10
+ "<|fim_prefix|>": 151659,
11
+ "<|fim_suffix|>": 151661,
12
+ "<|im_end|>": 151645,
13
+ "<|im_start|>": 151644,
14
+ "<|image_pad|>": 151655,
15
+ "<|object_ref_end|>": 151647,
16
+ "<|object_ref_start|>": 151646,
17
+ "<|quad_end|>": 151651,
18
+ "<|quad_start|>": 151650,
19
+ "<|repo_name|>": 151663,
20
+ "<|video_pad|>": 151656,
21
+ "<|vision_end|>": 151653,
22
+ "<|vision_pad|>": 151654,
23
+ "<|vision_start|>": 151652
24
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/chat_template.jinja ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0]['role'] == 'system' %}
4
+ {{- messages[0]['content'] }}
5
+ {%- else %}
6
+ {{- 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.' }}
7
+ {%- endif %}
8
+ {{- "\n\n# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
9
+ {%- for tool in tools %}
10
+ {{- "\n" }}
11
+ {{- tool | tojson }}
12
+ {%- endfor %}
13
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
14
+ {%- else %}
15
+ {%- if messages[0]['role'] == 'system' %}
16
+ {{- '<|im_start|>system\n' + messages[0]['content'] + '<|im_end|>\n' }}
17
+ {%- else %}
18
+ {{- '<|im_start|>system\nYou are Qwen, created by Alibaba Cloud. You are a helpful assistant.<|im_end|>\n' }}
19
+ {%- endif %}
20
+ {%- endif %}
21
+ {%- for message in messages %}
22
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) or (message.role == "assistant" and not message.tool_calls) %}
23
+ {{- '<|im_start|>' + message.role + '\n' + message.content + '<|im_end|>' + '\n' }}
24
+ {%- elif message.role == "assistant" %}
25
+ {{- '<|im_start|>' + message.role }}
26
+ {%- if message.content %}
27
+ {{- '\n' + message.content }}
28
+ {%- endif %}
29
+ {%- for tool_call in message.tool_calls %}
30
+ {%- if tool_call.function is defined %}
31
+ {%- set tool_call = tool_call.function %}
32
+ {%- endif %}
33
+ {{- '\n<tool_call>\n{"name": "' }}
34
+ {{- tool_call.name }}
35
+ {{- '", "arguments": ' }}
36
+ {{- tool_call.arguments | tojson }}
37
+ {{- '}\n</tool_call>' }}
38
+ {%- endfor %}
39
+ {{- '<|im_end|>\n' }}
40
+ {%- elif message.role == "tool" %}
41
+ {%- if (loop.index0 == 0) or (messages[loop.index0 - 1].role != "tool") %}
42
+ {{- '<|im_start|>user' }}
43
+ {%- endif %}
44
+ {{- '\n<tool_response>\n' }}
45
+ {{- message.content }}
46
+ {{- '\n</tool_response>' }}
47
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
48
+ {{- '<|im_end|>\n' }}
49
+ {%- endif %}
50
+ {%- endif %}
51
+ {%- endfor %}
52
+ {%- if add_generation_prompt %}
53
+ {{- '<|im_start|>assistant\n' }}
54
+ {%- endif %}
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/special_tokens_map.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|object_ref_start|>",
6
+ "<|object_ref_end|>",
7
+ "<|box_start|>",
8
+ "<|box_end|>",
9
+ "<|quad_start|>",
10
+ "<|quad_end|>",
11
+ "<|vision_start|>",
12
+ "<|vision_end|>",
13
+ "<|vision_pad|>",
14
+ "<|image_pad|>",
15
+ "<|video_pad|>"
16
+ ],
17
+ "eos_token": {
18
+ "content": "<|im_end|>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ },
24
+ "pad_token": {
25
+ "content": "<|endoftext|>",
26
+ "lstrip": false,
27
+ "normalized": false,
28
+ "rstrip": false,
29
+ "single_word": false
30
+ }
31
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c5ae00e602b8860cbd784ba82a8aa14e8feecec692e7076590d014d7b7fdafa
3
+ size 11421896
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/tokenizer_config.json ADDED
@@ -0,0 +1,207 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_prefix_space": false,
4
+ "added_tokens_decoder": {
5
+ "151643": {
6
+ "content": "<|endoftext|>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "151644": {
14
+ "content": "<|im_start|>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "151645": {
22
+ "content": "<|im_end|>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "151646": {
30
+ "content": "<|object_ref_start|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "151647": {
38
+ "content": "<|object_ref_end|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "151648": {
46
+ "content": "<|box_start|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "151649": {
54
+ "content": "<|box_end|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "151650": {
62
+ "content": "<|quad_start|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ },
69
+ "151651": {
70
+ "content": "<|quad_end|>",
71
+ "lstrip": false,
72
+ "normalized": false,
73
+ "rstrip": false,
74
+ "single_word": false,
75
+ "special": true
76
+ },
77
+ "151652": {
78
+ "content": "<|vision_start|>",
79
+ "lstrip": false,
80
+ "normalized": false,
81
+ "rstrip": false,
82
+ "single_word": false,
83
+ "special": true
84
+ },
85
+ "151653": {
86
+ "content": "<|vision_end|>",
87
+ "lstrip": false,
88
+ "normalized": false,
89
+ "rstrip": false,
90
+ "single_word": false,
91
+ "special": true
92
+ },
93
+ "151654": {
94
+ "content": "<|vision_pad|>",
95
+ "lstrip": false,
96
+ "normalized": false,
97
+ "rstrip": false,
98
+ "single_word": false,
99
+ "special": true
100
+ },
101
+ "151655": {
102
+ "content": "<|image_pad|>",
103
+ "lstrip": false,
104
+ "normalized": false,
105
+ "rstrip": false,
106
+ "single_word": false,
107
+ "special": true
108
+ },
109
+ "151656": {
110
+ "content": "<|video_pad|>",
111
+ "lstrip": false,
112
+ "normalized": false,
113
+ "rstrip": false,
114
+ "single_word": false,
115
+ "special": true
116
+ },
117
+ "151657": {
118
+ "content": "<tool_call>",
119
+ "lstrip": false,
120
+ "normalized": false,
121
+ "rstrip": false,
122
+ "single_word": false,
123
+ "special": false
124
+ },
125
+ "151658": {
126
+ "content": "</tool_call>",
127
+ "lstrip": false,
128
+ "normalized": false,
129
+ "rstrip": false,
130
+ "single_word": false,
131
+ "special": false
132
+ },
133
+ "151659": {
134
+ "content": "<|fim_prefix|>",
135
+ "lstrip": false,
136
+ "normalized": false,
137
+ "rstrip": false,
138
+ "single_word": false,
139
+ "special": false
140
+ },
141
+ "151660": {
142
+ "content": "<|fim_middle|>",
143
+ "lstrip": false,
144
+ "normalized": false,
145
+ "rstrip": false,
146
+ "single_word": false,
147
+ "special": false
148
+ },
149
+ "151661": {
150
+ "content": "<|fim_suffix|>",
151
+ "lstrip": false,
152
+ "normalized": false,
153
+ "rstrip": false,
154
+ "single_word": false,
155
+ "special": false
156
+ },
157
+ "151662": {
158
+ "content": "<|fim_pad|>",
159
+ "lstrip": false,
160
+ "normalized": false,
161
+ "rstrip": false,
162
+ "single_word": false,
163
+ "special": false
164
+ },
165
+ "151663": {
166
+ "content": "<|repo_name|>",
167
+ "lstrip": false,
168
+ "normalized": false,
169
+ "rstrip": false,
170
+ "single_word": false,
171
+ "special": false
172
+ },
173
+ "151664": {
174
+ "content": "<|file_sep|>",
175
+ "lstrip": false,
176
+ "normalized": false,
177
+ "rstrip": false,
178
+ "single_word": false,
179
+ "special": false
180
+ }
181
+ },
182
+ "additional_special_tokens": [
183
+ "<|im_start|>",
184
+ "<|im_end|>",
185
+ "<|object_ref_start|>",
186
+ "<|object_ref_end|>",
187
+ "<|box_start|>",
188
+ "<|box_end|>",
189
+ "<|quad_start|>",
190
+ "<|quad_end|>",
191
+ "<|vision_start|>",
192
+ "<|vision_end|>",
193
+ "<|vision_pad|>",
194
+ "<|image_pad|>",
195
+ "<|video_pad|>"
196
+ ],
197
+ "bos_token": null,
198
+ "clean_up_tokenization_spaces": false,
199
+ "eos_token": "<|im_end|>",
200
+ "errors": "replace",
201
+ "extra_special_tokens": {},
202
+ "model_max_length": 131072,
203
+ "pad_token": "<|endoftext|>",
204
+ "split_special_tokens": false,
205
+ "tokenizer_class": "Qwen2Tokenizer",
206
+ "unk_token": null
207
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:706b7816b266d97637317c189c5087868f9e27dd4669d62da81ca9914762e5ea
3
+ size 5880
usable_model_bundles/local-qwen-0-5b-active-smoke/checkpoints/sft_adapter/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_manifest.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "enabled": true,
4
+ "activated_at_utc": "2026-04-26T02:24:15.464507+00:00",
5
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
6
+ "source": "top-level",
7
+ "label": "local-qwen-0.5b-active-smoke",
8
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
9
+ "base_model": "Qwen/Qwen2.5-0.5B-Instruct",
10
+ "preferred_artifact": "grpo_adapter",
11
+ "mode": "symlink",
12
+ "source_checkpoint_dir": "checkpoints",
13
+ "source_report_dir": "outputs/reports",
14
+ "grpo_adapter": "checkpoints/active/grpo_adapter",
15
+ "merged_model": "checkpoints/active/merged",
16
+ "sft_adapter": "checkpoints/active/sft_adapter",
17
+ "availability": {
18
+ "grpo_adapter": true,
19
+ "merged": true,
20
+ "sft_adapter": true
21
+ },
22
+ "reports": {
23
+ "improvement_report_benchmark.json": "outputs/reports/active_model/improvement_report_benchmark.json",
24
+ "anti_hacking_overfit_report.json": "outputs/reports/active_model/anti_hacking_overfit_report.json",
25
+ "grpo_trl_run_strict_check.json": "outputs/reports/active_model/grpo_trl_run_strict_check.json",
26
+ "postsave_inference.json": "outputs/reports/active_model/postsave_inference.json",
27
+ "sft_trl_run.json": "outputs/reports/active_model/sft_trl_run.json",
28
+ "plot_index.json": "outputs/reports/active_model/plot_index.json",
29
+ "dose_train.json": "outputs/reports/active_model/dose_train.json",
30
+ "baselines.json": "outputs/reports/active_model/baselines.json",
31
+ "robustness.json": "outputs/reports/active_model/robustness.json",
32
+ "grpo_trl_run_fallback_check.json": "outputs/reports/active_model/grpo_trl_run_fallback_check.json",
33
+ "sft_run.json": "outputs/reports/active_model/sft_run.json",
34
+ "benchmark_report.txt": "outputs/reports/active_model/benchmark_report.txt",
35
+ "dosing_grpo.json": "outputs/reports/active_model/dosing_grpo.json",
36
+ "grpo_ablation_report.json": "outputs/reports/active_model/grpo_ablation_report.json",
37
+ "frontier_ready.json": "outputs/reports/active_model/frontier_ready.json",
38
+ "improvement_report.json": "outputs/reports/active_model/improvement_report.json",
39
+ "hf_sweep_summary.json": "outputs/reports/active_model/hf_sweep_summary.json",
40
+ "planner_grpo.json": "outputs/reports/active_model/planner_grpo.json",
41
+ "grpo_trl_run.json": "outputs/reports/active_model/grpo_trl_run.json",
42
+ "risk_train.json": "outputs/reports/active_model/risk_train.json",
43
+ "grpo_trl_run_smoke.json": "outputs/reports/active_model/grpo_trl_run_smoke.json",
44
+ "inference_benchmark.json": "outputs/reports/active_model/inference_benchmark.json",
45
+ "supervisor_grpo.json": "outputs/reports/active_model/supervisor_grpo.json",
46
+ "acceptance_gate.json": "outputs/reports/active_model/acceptance_gate.json",
47
+ "grpo_trl_run_auto.json": "outputs/reports/active_model/grpo_trl_run_auto.json",
48
+ "hf_training_status.json": "outputs/reports/active_model/hf_training_status.json",
49
+ "benchmark_report.json": "outputs/reports/active_model/benchmark_report.json",
50
+ "postsave_inference_smoke.json": "outputs/reports/active_model/postsave_inference_smoke.json",
51
+ "graph_train.json": "outputs/reports/active_model/graph_train.json",
52
+ "sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json",
53
+ "sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json",
54
+ "sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json",
55
+ "sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json",
56
+ "sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json",
57
+ "sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json",
58
+ "sweeps/qwen-qwen2-5-3b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json",
59
+ "sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json",
60
+ "sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json",
61
+ "sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json",
62
+ "sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json",
63
+ "sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json",
64
+ "grpo_training_cycle/grpo_trl_run.json": "outputs/reports/active_model/grpo_training_cycle/grpo_trl_run.json",
65
+ "grpo_training_cycle/hf_training_status.json": "outputs/reports/active_model/grpo_training_cycle/hf_training_status.json"
66
+ },
67
+ "notes": "This manifest controls local product inference. Prefer grpo_adapter for the RL policy; merged is the SFT baseline fallback when no GRPO adapter is available."
68
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/active_model_report_manifest.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "enabled": true,
4
+ "activated_at_utc": "2026-04-26T02:24:15.464507+00:00",
5
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
6
+ "source": "top-level",
7
+ "label": "local-qwen-0.5b-active-smoke",
8
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
9
+ "base_model": "Qwen/Qwen2.5-0.5B-Instruct",
10
+ "preferred_artifact": "grpo_adapter",
11
+ "mode": "symlink",
12
+ "source_checkpoint_dir": "checkpoints",
13
+ "source_report_dir": "outputs/reports",
14
+ "grpo_adapter": "checkpoints/active/grpo_adapter",
15
+ "merged_model": "checkpoints/active/merged",
16
+ "sft_adapter": "checkpoints/active/sft_adapter",
17
+ "availability": {
18
+ "grpo_adapter": true,
19
+ "merged": true,
20
+ "sft_adapter": true
21
+ },
22
+ "reports": {
23
+ "improvement_report_benchmark.json": "outputs/reports/active_model/improvement_report_benchmark.json",
24
+ "anti_hacking_overfit_report.json": "outputs/reports/active_model/anti_hacking_overfit_report.json",
25
+ "grpo_trl_run_strict_check.json": "outputs/reports/active_model/grpo_trl_run_strict_check.json",
26
+ "postsave_inference.json": "outputs/reports/active_model/postsave_inference.json",
27
+ "sft_trl_run.json": "outputs/reports/active_model/sft_trl_run.json",
28
+ "plot_index.json": "outputs/reports/active_model/plot_index.json",
29
+ "dose_train.json": "outputs/reports/active_model/dose_train.json",
30
+ "baselines.json": "outputs/reports/active_model/baselines.json",
31
+ "robustness.json": "outputs/reports/active_model/robustness.json",
32
+ "grpo_trl_run_fallback_check.json": "outputs/reports/active_model/grpo_trl_run_fallback_check.json",
33
+ "sft_run.json": "outputs/reports/active_model/sft_run.json",
34
+ "benchmark_report.txt": "outputs/reports/active_model/benchmark_report.txt",
35
+ "dosing_grpo.json": "outputs/reports/active_model/dosing_grpo.json",
36
+ "grpo_ablation_report.json": "outputs/reports/active_model/grpo_ablation_report.json",
37
+ "frontier_ready.json": "outputs/reports/active_model/frontier_ready.json",
38
+ "improvement_report.json": "outputs/reports/active_model/improvement_report.json",
39
+ "hf_sweep_summary.json": "outputs/reports/active_model/hf_sweep_summary.json",
40
+ "planner_grpo.json": "outputs/reports/active_model/planner_grpo.json",
41
+ "grpo_trl_run.json": "outputs/reports/active_model/grpo_trl_run.json",
42
+ "risk_train.json": "outputs/reports/active_model/risk_train.json",
43
+ "grpo_trl_run_smoke.json": "outputs/reports/active_model/grpo_trl_run_smoke.json",
44
+ "inference_benchmark.json": "outputs/reports/active_model/inference_benchmark.json",
45
+ "supervisor_grpo.json": "outputs/reports/active_model/supervisor_grpo.json",
46
+ "acceptance_gate.json": "outputs/reports/active_model/acceptance_gate.json",
47
+ "grpo_trl_run_auto.json": "outputs/reports/active_model/grpo_trl_run_auto.json",
48
+ "hf_training_status.json": "outputs/reports/active_model/hf_training_status.json",
49
+ "benchmark_report.json": "outputs/reports/active_model/benchmark_report.json",
50
+ "postsave_inference_smoke.json": "outputs/reports/active_model/postsave_inference_smoke.json",
51
+ "graph_train.json": "outputs/reports/active_model/graph_train.json",
52
+ "sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json",
53
+ "sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json",
54
+ "sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json",
55
+ "sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json",
56
+ "sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json",
57
+ "sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json",
58
+ "sweeps/qwen-qwen2-5-3b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json",
59
+ "sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json",
60
+ "sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json",
61
+ "sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json",
62
+ "sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json",
63
+ "sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json",
64
+ "grpo_training_cycle/grpo_trl_run.json": "outputs/reports/active_model/grpo_training_cycle/grpo_trl_run.json",
65
+ "grpo_training_cycle/hf_training_status.json": "outputs/reports/active_model/grpo_training_cycle/hf_training_status.json"
66
+ },
67
+ "notes": "This manifest controls local product inference. Prefer grpo_adapter for the RL policy; merged is the SFT baseline fallback when no GRPO adapter is available."
68
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/manifests/submission_evidence_manifest.json ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "generated_at_unix": 1777179035.763374,
4
+ "models": [
5
+ {
6
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
7
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
8
+ "label": "Qwen 0.5B",
9
+ "statuses": {
10
+ "sft_training": "artifact_available",
11
+ "sft_postsave_inference": "artifact_available",
12
+ "grpo_training": "remote_completed_pending_artifact_upload",
13
+ "grpo_postsave_inference": "remote_completed_pending_artifact_upload",
14
+ "policy_ablation": "remote_completed_pending_artifact_upload"
15
+ },
16
+ "metrics": {
17
+ "sft_train_loss": 0.19233327957964502,
18
+ "sft_train_runtime": 234.6302,
19
+ "sft_examples_used": 2000,
20
+ "sft_history_steps": 2001,
21
+ "sft_first_loss": 3.0856,
22
+ "sft_last_loss": 0.0626,
23
+ "sft_best_loss": 0.0057,
24
+ "sft_last_token_accuracy": 0.9717137813568115,
25
+ "sft_valid_rate": 1.0,
26
+ "sft_avg_env_reward": 0.726,
27
+ "sft_avg_latency_seconds": 1.839,
28
+ "grpo_avg_reward": null,
29
+ "grpo_history_steps": 0,
30
+ "grpo_valid_rate": null,
31
+ "grpo_avg_env_reward": null,
32
+ "grpo_avg_latency_seconds": null
33
+ },
34
+ "files": {
35
+ "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/run_metadata.json",
36
+ "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json",
37
+ "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/sft_history.json",
38
+ "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json",
39
+ "grpo_trl_run.json": "",
40
+ "grpo_history.json": "",
41
+ "grpo_reward_components.jsonl": "",
42
+ "postsave_inference_grpo.json": "",
43
+ "grpo_ablation_report.json": "",
44
+ "error.json": ""
45
+ }
46
+ },
47
+ {
48
+ "run_id": "qwen-qwen2-5-1-5b-instruct",
49
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
50
+ "label": "Qwen 1.5B",
51
+ "statuses": {
52
+ "sft_training": "artifact_available",
53
+ "sft_postsave_inference": "artifact_available",
54
+ "grpo_training": "remote_completed_pending_artifact_upload",
55
+ "grpo_postsave_inference": "remote_completed_pending_artifact_upload",
56
+ "policy_ablation": "remote_completed_pending_artifact_upload"
57
+ },
58
+ "metrics": {
59
+ "sft_train_loss": 0.11515871361242898,
60
+ "sft_train_runtime": 483.7085,
61
+ "sft_examples_used": 2000,
62
+ "sft_history_steps": 4001,
63
+ "sft_first_loss": 2.9686,
64
+ "sft_last_loss": 0.0681,
65
+ "sft_best_loss": 0.0009,
66
+ "sft_last_token_accuracy": 0.9726027250289917,
67
+ "sft_valid_rate": 1.0,
68
+ "sft_avg_env_reward": 0.726,
69
+ "sft_avg_latency_seconds": 2.158,
70
+ "grpo_avg_reward": null,
71
+ "grpo_history_steps": 0,
72
+ "grpo_valid_rate": null,
73
+ "grpo_avg_env_reward": null,
74
+ "grpo_avg_latency_seconds": null
75
+ },
76
+ "files": {
77
+ "run_metadata.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/run_metadata.json",
78
+ "sft_trl_run.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json",
79
+ "sft_history.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/sft_history.json",
80
+ "postsave_inference_sft.json": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/reports/submission_evidence/qwen_0_5b_1_5b/runs/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json",
81
+ "grpo_trl_run.json": "",
82
+ "grpo_history.json": "",
83
+ "grpo_reward_components.jsonl": "",
84
+ "postsave_inference_grpo.json": "",
85
+ "grpo_ablation_report.json": "",
86
+ "error.json": ""
87
+ }
88
+ }
89
+ ],
90
+ "artifact_repo": {
91
+ "repo_id": "TheJackBright/polyguard-openenv-training-full-artifacts",
92
+ "status": "pending_artifact_upload",
93
+ "files": [
94
+ ".gitattributes"
95
+ ],
96
+ "meaningful_file_count": 0,
97
+ "error": ""
98
+ },
99
+ "remote_snapshot_used": "/Users/daver/.cache/huggingface/hub/models--TheJackBright--polyguard-openenv-training-full-artifacts/snapshots/f313e87ad0df089dbe586b469c8f0a34e05bc5cd",
100
+ "training_space_status": {
101
+ "status": "running",
102
+ "source": "https://thejackbright-polyguard-openenv-training-full.hf.space",
103
+ "completed_run_ids": []
104
+ },
105
+ "stage_records": [
106
+ {
107
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
108
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
109
+ "label": "Qwen 0.5B",
110
+ "stage": "sft_training",
111
+ "returncode": 0,
112
+ "elapsed_seconds": 257.387,
113
+ "completed": true
114
+ },
115
+ {
116
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
117
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
118
+ "label": "Qwen 0.5B",
119
+ "stage": "grpo_training",
120
+ "returncode": 0,
121
+ "elapsed_seconds": 4230.645,
122
+ "completed": true
123
+ },
124
+ {
125
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
126
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
127
+ "label": "Qwen 0.5B",
128
+ "stage": "sft_postsave_inference",
129
+ "returncode": 0,
130
+ "elapsed_seconds": 15.201,
131
+ "completed": true
132
+ },
133
+ {
134
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
135
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
136
+ "label": "Qwen 0.5B",
137
+ "stage": "grpo_postsave_inference",
138
+ "returncode": 0,
139
+ "elapsed_seconds": 18.461,
140
+ "completed": true
141
+ },
142
+ {
143
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
144
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
145
+ "label": "Qwen 0.5B",
146
+ "stage": "policy_ablation",
147
+ "returncode": 0,
148
+ "elapsed_seconds": 3.989,
149
+ "completed": true
150
+ },
151
+ {
152
+ "run_id": "qwen-qwen2-5-1-5b-instruct",
153
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
154
+ "label": "Qwen 1.5B",
155
+ "stage": "sft_training",
156
+ "returncode": 0,
157
+ "elapsed_seconds": 454.278,
158
+ "completed": true
159
+ },
160
+ {
161
+ "run_id": "qwen-qwen2-5-1-5b-instruct",
162
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
163
+ "label": "Qwen 1.5B",
164
+ "stage": "grpo_training",
165
+ "returncode": 0,
166
+ "elapsed_seconds": 5118.654,
167
+ "completed": true
168
+ },
169
+ {
170
+ "run_id": "qwen-qwen2-5-1-5b-instruct",
171
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
172
+ "label": "Qwen 1.5B",
173
+ "stage": "sft_postsave_inference",
174
+ "returncode": 0,
175
+ "elapsed_seconds": 17.128,
176
+ "completed": true
177
+ },
178
+ {
179
+ "run_id": "qwen-qwen2-5-1-5b-instruct",
180
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
181
+ "label": "Qwen 1.5B",
182
+ "stage": "grpo_postsave_inference",
183
+ "returncode": 0,
184
+ "elapsed_seconds": 21.528,
185
+ "completed": true
186
+ },
187
+ {
188
+ "run_id": "qwen-qwen2-5-1-5b-instruct",
189
+ "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
190
+ "label": "Qwen 1.5B",
191
+ "stage": "policy_ablation",
192
+ "returncode": 0,
193
+ "elapsed_seconds": 4.001,
194
+ "completed": true
195
+ }
196
+ ],
197
+ "charts": {
198
+ "qwen_0_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_training_loss.png",
199
+ "qwen_0_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_token_accuracy.png",
200
+ "qwen_0_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_sft_learning_rate.png",
201
+ "qwen_1_5b_sft_training_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_training_loss.png",
202
+ "qwen_1_5b_sft_token_accuracy": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_token_accuracy.png",
203
+ "qwen_1_5b_sft_learning_rate": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_1_5b_sft_learning_rate.png",
204
+ "qwen_0_5b_vs_1_5b_sft_loss_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_loss_comparison.png",
205
+ "qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_vs_1_5b_sft_token_accuracy_comparison.png",
206
+ "qwen_0_5b_1_5b_final_sft_train_loss": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_final_sft_train_loss.png",
207
+ "qwen_0_5b_1_5b_postsave_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_reward.png",
208
+ "qwen_0_5b_1_5b_postsave_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_postsave_latency.png",
209
+ "qwen_0_5b_1_5b_sft_runtime": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_sft_runtime.png",
210
+ "qwen_0_5b_1_5b_remote_completed_stage_durations": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/qwen_0_5b_1_5b_remote_completed_stage_durations.png",
211
+ "policy_ablation_avg_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_avg_reward.png",
212
+ "policy_ablation_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_legality.png",
213
+ "policy_ablation_exploit_detection": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/policy_ablation_exploit_detection.png",
214
+ "reward_component_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/reward_component_bars.png",
215
+ "primary_reward_channel_bars": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/primary_reward_channel_bars.png",
216
+ "basic_llm_vs_full_pipeline_reward": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward.png",
217
+ "basic_llm_vs_full_pipeline_legality": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_legality.png",
218
+ "basic_llm_vs_full_pipeline_latency": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_latency.png",
219
+ "basic_llm_vs_full_pipeline_reward_delta_by_seed": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/plots/submission_evidence/qwen_0_5b_1_5b/basic_llm_vs_full_pipeline_reward_delta_by_seed.png"
220
+ },
221
+ "pending_artifacts": [
222
+ "Qwen 0.5B grpo_history.json: pending_artifact_upload",
223
+ "Qwen 0.5B grpo_postsave_inference: remote_completed_pending_artifact_upload",
224
+ "Qwen 0.5B grpo_training: remote_completed_pending_artifact_upload",
225
+ "Qwen 0.5B policy_ablation: remote_completed_pending_artifact_upload",
226
+ "Qwen 0.5B postsave_inference_grpo.json: pending_artifact_upload",
227
+ "Qwen 1.5B grpo_history.json: pending_artifact_upload",
228
+ "Qwen 1.5B grpo_postsave_inference: remote_completed_pending_artifact_upload",
229
+ "Qwen 1.5B grpo_training: remote_completed_pending_artifact_upload",
230
+ "Qwen 1.5B policy_ablation: remote_completed_pending_artifact_upload",
231
+ "Qwen 1.5B postsave_inference_grpo.json: pending_artifact_upload"
232
+ ],
233
+ "reward_validation_errors": [],
234
+ "primary_judge": "PolyGuard verifier/reward system",
235
+ "bundle_zip": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/submission_bundle/qwen_0_5b_1_5b_evidence.zip",
236
+ "mirrored_file_count": 56
237
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/reports/acceptance_gate.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "missing_files": [],
3
+ "missing_artifacts": [],
4
+ "missing_readme_markers": [],
5
+ "missing_readme_links": [],
6
+ "strict_submission_links": false,
7
+ "missing_submission_env": [],
8
+ "strict_submission_failures": [],
9
+ "submission_ready": false,
10
+ "status": "ok"
11
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/reports/active_model_manifest.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "enabled": true,
4
+ "activated_at_utc": "2026-04-26T02:24:15.464507+00:00",
5
+ "run_id": "qwen-qwen2-5-0-5b-instruct",
6
+ "source": "top-level",
7
+ "label": "local-qwen-0.5b-active-smoke",
8
+ "model_id": "Qwen/Qwen2.5-0.5B-Instruct",
9
+ "base_model": "Qwen/Qwen2.5-0.5B-Instruct",
10
+ "preferred_artifact": "grpo_adapter",
11
+ "mode": "symlink",
12
+ "source_checkpoint_dir": "checkpoints",
13
+ "source_report_dir": "outputs/reports",
14
+ "grpo_adapter": "checkpoints/active/grpo_adapter",
15
+ "merged_model": "checkpoints/active/merged",
16
+ "sft_adapter": "checkpoints/active/sft_adapter",
17
+ "availability": {
18
+ "grpo_adapter": true,
19
+ "merged": true,
20
+ "sft_adapter": true
21
+ },
22
+ "reports": {
23
+ "improvement_report_benchmark.json": "outputs/reports/active_model/improvement_report_benchmark.json",
24
+ "anti_hacking_overfit_report.json": "outputs/reports/active_model/anti_hacking_overfit_report.json",
25
+ "grpo_trl_run_strict_check.json": "outputs/reports/active_model/grpo_trl_run_strict_check.json",
26
+ "postsave_inference.json": "outputs/reports/active_model/postsave_inference.json",
27
+ "sft_trl_run.json": "outputs/reports/active_model/sft_trl_run.json",
28
+ "plot_index.json": "outputs/reports/active_model/plot_index.json",
29
+ "dose_train.json": "outputs/reports/active_model/dose_train.json",
30
+ "baselines.json": "outputs/reports/active_model/baselines.json",
31
+ "robustness.json": "outputs/reports/active_model/robustness.json",
32
+ "grpo_trl_run_fallback_check.json": "outputs/reports/active_model/grpo_trl_run_fallback_check.json",
33
+ "sft_run.json": "outputs/reports/active_model/sft_run.json",
34
+ "benchmark_report.txt": "outputs/reports/active_model/benchmark_report.txt",
35
+ "dosing_grpo.json": "outputs/reports/active_model/dosing_grpo.json",
36
+ "grpo_ablation_report.json": "outputs/reports/active_model/grpo_ablation_report.json",
37
+ "frontier_ready.json": "outputs/reports/active_model/frontier_ready.json",
38
+ "improvement_report.json": "outputs/reports/active_model/improvement_report.json",
39
+ "hf_sweep_summary.json": "outputs/reports/active_model/hf_sweep_summary.json",
40
+ "planner_grpo.json": "outputs/reports/active_model/planner_grpo.json",
41
+ "grpo_trl_run.json": "outputs/reports/active_model/grpo_trl_run.json",
42
+ "risk_train.json": "outputs/reports/active_model/risk_train.json",
43
+ "grpo_trl_run_smoke.json": "outputs/reports/active_model/grpo_trl_run_smoke.json",
44
+ "inference_benchmark.json": "outputs/reports/active_model/inference_benchmark.json",
45
+ "supervisor_grpo.json": "outputs/reports/active_model/supervisor_grpo.json",
46
+ "acceptance_gate.json": "outputs/reports/active_model/acceptance_gate.json",
47
+ "grpo_trl_run_auto.json": "outputs/reports/active_model/grpo_trl_run_auto.json",
48
+ "hf_training_status.json": "outputs/reports/active_model/hf_training_status.json",
49
+ "benchmark_report.json": "outputs/reports/active_model/benchmark_report.json",
50
+ "postsave_inference_smoke.json": "outputs/reports/active_model/postsave_inference_smoke.json",
51
+ "graph_train.json": "outputs/reports/active_model/graph_train.json",
52
+ "sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_trl_run.json",
53
+ "sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/run_metadata.json",
54
+ "sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/sft_history.json",
55
+ "sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-0-5b-instruct/postsave_inference_sft.json",
56
+ "sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_trl_run.json",
57
+ "sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/run_metadata.json",
58
+ "sweeps/qwen-qwen2-5-3b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/sft_history.json",
59
+ "sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-3b-instruct/postsave_inference_sft.json",
60
+ "sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_trl_run.json",
61
+ "sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/run_metadata.json",
62
+ "sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/sft_history.json",
63
+ "sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json": "outputs/reports/active_model/sweeps/qwen-qwen2-5-1-5b-instruct/postsave_inference_sft.json",
64
+ "grpo_training_cycle/grpo_trl_run.json": "outputs/reports/active_model/grpo_training_cycle/grpo_trl_run.json",
65
+ "grpo_training_cycle/hf_training_status.json": "outputs/reports/active_model/grpo_training_cycle/hf_training_status.json"
66
+ },
67
+ "notes": "This manifest controls local product inference. Prefer grpo_adapter for the RL policy; merged is the SFT baseline fallback when no GRPO adapter is available."
68
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/reports/anti_hacking_overfit_report.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "passed": true,
3
+ "training_mode": "sft-baseline",
4
+ "warnings": [],
5
+ "completed_models": [
6
+ "Qwen/Qwen2.5-0.5B-Instruct",
7
+ "Qwen/Qwen2.5-1.5B-Instruct",
8
+ "Qwen/Qwen2.5-3B-Instruct"
9
+ ],
10
+ "failed_or_skipped_models": [],
11
+ "checks": {
12
+ "reward_bounds": [
13
+ 0.001,
14
+ 0.999
15
+ ],
16
+ "reward_precision": 3,
17
+ "fallback_backends_rejected": true,
18
+ "exploit_rate_threshold": 0.35,
19
+ "train_holdout_gap_threshold": 0.25,
20
+ "min_validity_rate": 0.8
21
+ }
22
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/reports/baselines.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "no_change": {
3
+ "mode": "REGIMEN_OPT",
4
+ "action_type": "KEEP_REGIMEN",
5
+ "target_drug": null,
6
+ "replacement_drug": null,
7
+ "dose_bucket": "NA",
8
+ "taper_days": null,
9
+ "monitoring_plan": null,
10
+ "evidence_query": null,
11
+ "new_drug_name": null,
12
+ "candidate_components": [],
13
+ "candidate_id": "cand_01",
14
+ "confidence": 0.8,
15
+ "rationale_brief": "Baseline no-change policy."
16
+ },
17
+ "rules_only": {
18
+ "mode": "REGIMEN_OPT",
19
+ "action_type": "SUBSTITUTE_WITHIN_CLASS",
20
+ "target_drug": "opioid_like",
21
+ "replacement_drug": "non_opioid_analgesic",
22
+ "dose_bucket": "NA",
23
+ "taper_days": null,
24
+ "monitoring_plan": null,
25
+ "evidence_query": null,
26
+ "new_drug_name": null,
27
+ "candidate_components": [],
28
+ "candidate_id": "cand_04",
29
+ "confidence": 0.75,
30
+ "rationale_brief": "Rules-only selected top legal candidate."
31
+ },
32
+ "greedy": {
33
+ "mode": "REGIMEN_OPT",
34
+ "action_type": "SUBSTITUTE_WITHIN_CLASS",
35
+ "target_drug": "opioid_like",
36
+ "replacement_drug": "non_opioid_analgesic",
37
+ "dose_bucket": "NA",
38
+ "taper_days": null,
39
+ "monitoring_plan": null,
40
+ "evidence_query": null,
41
+ "new_drug_name": null,
42
+ "candidate_components": [],
43
+ "candidate_id": "cand_04",
44
+ "confidence": 0.72,
45
+ "rationale_brief": "Greedy safety/burden improvement baseline."
46
+ },
47
+ "contextual_bandit": {
48
+ "mode": "REGIMEN_OPT",
49
+ "action_type": "SUBSTITUTE_WITHIN_CLASS",
50
+ "target_drug": "opioid_like",
51
+ "replacement_drug": "non_opioid_analgesic",
52
+ "dose_bucket": "NA",
53
+ "taper_days": null,
54
+ "monitoring_plan": null,
55
+ "evidence_query": null,
56
+ "new_drug_name": null,
57
+ "candidate_components": [],
58
+ "candidate_id": "cand_04",
59
+ "confidence": 0.68,
60
+ "rationale_brief": "Contextual bandit selected candidate."
61
+ },
62
+ "contextual_bandit_topk": [
63
+ {
64
+ "candidate_id": "cand_09",
65
+ "score": 1.1532307878304324,
66
+ "exploration_bonus": 1.1532307878304324,
67
+ "algorithm": "linucb"
68
+ },
69
+ {
70
+ "candidate_id": "cand_10",
71
+ "score": 1.1489735636645433,
72
+ "exploration_bonus": 1.1489735636645433,
73
+ "algorithm": "linucb"
74
+ },
75
+ {
76
+ "candidate_id": "cand_08",
77
+ "score": 1.1447401451857973,
78
+ "exploration_bonus": 1.1447401451857973,
79
+ "algorithm": "linucb"
80
+ }
81
+ ],
82
+ "beam_search": {
83
+ "mode": "REGIMEN_OPT",
84
+ "action_type": "SUBSTITUTE_WITHIN_CLASS",
85
+ "target_drug": "opioid_like",
86
+ "replacement_drug": "non_opioid_analgesic",
87
+ "dose_bucket": "NA",
88
+ "taper_days": null,
89
+ "monitoring_plan": null,
90
+ "evidence_query": null,
91
+ "new_drug_name": null,
92
+ "candidate_components": [],
93
+ "candidate_id": "cand_04",
94
+ "confidence": 0.74,
95
+ "rationale_brief": "Beam-search(3) top candidate."
96
+ },
97
+ "baseline_policy": "no_change_candidate",
98
+ "episodes": 8,
99
+ "avg_reward": 0.747,
100
+ "legality_rate": 1.0,
101
+ "success_rate": 0.0,
102
+ "policy_stack_ablations": {
103
+ "bandit-only": {
104
+ "avg_reward": 0.7616666666666667,
105
+ "legality_rate": 1.0,
106
+ "steps": 3.0
107
+ },
108
+ "llm-only": {
109
+ "avg_reward": 0.7753333333333333,
110
+ "legality_rate": 1.0,
111
+ "steps": 3.0
112
+ },
113
+ "llm+bandit": {
114
+ "avg_reward": 0.7753333333333333,
115
+ "legality_rate": 1.0,
116
+ "steps": 3.0
117
+ }
118
+ }
119
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.json ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "offline_policy_eval": {
3
+ "avg_reward": 0.772833,
4
+ "legal_rate": 1.0,
5
+ "success_rate": 0.0
6
+ },
7
+ "safety_eval": {
8
+ "severe_violation_rate": 0.0,
9
+ "illegal_step_rate": 0.0
10
+ },
11
+ "dosing_eval": {
12
+ "target_attainment": 0.75,
13
+ "toxicity_avoidance": 1.0
14
+ },
15
+ "robustness_eval": {
16
+ "missing_labs_safety_rate": 0.666667,
17
+ "noisy_dose_info_safety_rate": 1.0,
18
+ "conflicting_meds_safety_rate": 1.0,
19
+ "alias_noise_safety_rate": 1.0,
20
+ "hidden_duplicate_detection_rate": 1.0,
21
+ "wrong_candidate_id_resilience": 1.0,
22
+ "stale_evidence_safety_rate": 1.0,
23
+ "delayed_ade_manifestation_safety_rate": 1.0
24
+ },
25
+ "calibration_eval": {
26
+ "ece_proxy": 0.08625
27
+ },
28
+ "abstention_eval": {
29
+ "appropriate_abstention_rate": 0.0
30
+ },
31
+ "process_eval": {
32
+ "process_fidelity": 0.92,
33
+ "avg_invalid_actions": 0.333333
34
+ },
35
+ "subgroup_eval": {
36
+ "renal_compromise": {
37
+ "avg_reward": 0.774,
38
+ "legal_rate": 1.0
39
+ },
40
+ "hepatic_compromise": {
41
+ "avg_reward": 0.779333,
42
+ "legal_rate": 1.0
43
+ },
44
+ "frail": {
45
+ "avg_reward": 0.781667,
46
+ "legal_rate": 1.0
47
+ }
48
+ },
49
+ "explainability_eval": {
50
+ "grounding_rate": 0.8
51
+ }
52
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/reports/benchmark_report.txt ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "offline_policy_eval": {
3
+ "avg_reward": 0.772833,
4
+ "legal_rate": 1.0,
5
+ "success_rate": 0.0
6
+ },
7
+ "safety_eval": {
8
+ "severe_violation_rate": 0.0,
9
+ "illegal_step_rate": 0.0
10
+ },
11
+ "dosing_eval": {
12
+ "target_attainment": 0.75,
13
+ "toxicity_avoidance": 1.0
14
+ },
15
+ "robustness_eval": {
16
+ "missing_labs_safety_rate": 0.666667,
17
+ "noisy_dose_info_safety_rate": 1.0,
18
+ "conflicting_meds_safety_rate": 1.0,
19
+ "alias_noise_safety_rate": 1.0,
20
+ "hidden_duplicate_detection_rate": 1.0,
21
+ "wrong_candidate_id_resilience": 1.0,
22
+ "stale_evidence_safety_rate": 1.0,
23
+ "delayed_ade_manifestation_safety_rate": 1.0
24
+ },
25
+ "calibration_eval": {
26
+ "ece_proxy": 0.08625
27
+ },
28
+ "abstention_eval": {
29
+ "appropriate_abstention_rate": 0.0
30
+ },
31
+ "process_eval": {
32
+ "process_fidelity": 0.92,
33
+ "avg_invalid_actions": 0.333333
34
+ },
35
+ "subgroup_eval": {
36
+ "renal_compromise": {
37
+ "avg_reward": 0.774,
38
+ "legal_rate": 1.0
39
+ },
40
+ "hepatic_compromise": {
41
+ "avg_reward": 0.779333,
42
+ "legal_rate": 1.0
43
+ },
44
+ "frail": {
45
+ "avg_reward": 0.781667,
46
+ "legal_rate": 1.0
47
+ }
48
+ },
49
+ "explainability_eval": {
50
+ "grounding_rate": 0.8
51
+ }
52
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dose_train.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "dataset_size": 120.0,
3
+ "status": "trained",
4
+ "train_mae": 0.0025,
5
+ "model_path": "outputs/models/dose_model.pkl"
6
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/reports/dosing_grpo.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "avg_reward": 0.7785555555555557,
3
+ "legality_rate": 1.0,
4
+ "severe_violation_rate": 0.0,
5
+ "abstention_rate": 0.0,
6
+ "avg_episode_length": 2.0,
7
+ "success_rate": 0.0,
8
+ "avg_burden_delta": 0.0,
9
+ "avg_safety_delta": 0.5,
10
+ "avg_dosing_quality": 0.75,
11
+ "avg_process_fidelity": 0.9200000000000002,
12
+ "exploit_detection_count": 3.0,
13
+ "reward_columns": {
14
+ "format_compliance_score": 0.999,
15
+ "candidate_alignment_score": 0.999,
16
+ "legality_score": 0.999,
17
+ "safety_delta_score": 0.5,
18
+ "burden_improvement_score": 0.5,
19
+ "disease_stability_score": 0.9000000000000001,
20
+ "dosing_quality_score": 0.75,
21
+ "abstention_quality_score": 0.56,
22
+ "efficiency_score": 0.77,
23
+ "process_fidelity_score": 0.9200000000000002,
24
+ "explanation_grounding_score": 0.7999999999999999,
25
+ "anti_cheat_score": 0.6663333333333333,
26
+ "uncertainty_calibration_score": 0.87
27
+ }
28
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/reports/frontier_ready.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "frontier_models": [
3
+ "qwen2.5:7b-instruct",
4
+ "qwen2.5:14b-instruct"
5
+ ],
6
+ "deployment_mode": "hf_or_vllm_ready",
7
+ "notes": "Baseline complete; ready for larger model sweep."
8
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/reports/graph_train.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "num_samples": 180,
3
+ "status": "trained",
4
+ "model_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/outputs/models/graph_model.pkl"
5
+ }
usable_model_bundles/local-qwen-0-5b-active-smoke/reports/grpo_ablation_report.json ADDED
@@ -0,0 +1,149 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "ablations": {
4
+ "bandit_only": {
5
+ "avg_reward": 0.779625,
6
+ "legality_rate": 1.0,
7
+ "severe_violation_rate": 0.0,
8
+ "abstention_rate": 0.0,
9
+ "avg_episode_length": 2.8125,
10
+ "success_rate": 0.0,
11
+ "avg_burden_delta": 0.0,
12
+ "avg_safety_delta": 0.483125,
13
+ "avg_dosing_quality": 0.75,
14
+ "avg_process_fidelity": 0.9056250000000008,
15
+ "exploit_detection_count": 2.0,
16
+ "timeout_rate": 0.0,
17
+ "failure_visible_rate": 0.0625,
18
+ "avg_invalid_actions": 0.0625,
19
+ "reward_columns": {
20
+ "format_compliance_score": 0.9989999999999996,
21
+ "candidate_alignment_score": 0.9989999999999996,
22
+ "legality_score": 0.9989999999999996,
23
+ "safety_delta_score": 0.483125,
24
+ "burden_improvement_score": 0.5,
25
+ "disease_stability_score": 0.8999999999999995,
26
+ "dosing_quality_score": 0.75,
27
+ "abstention_quality_score": 0.5600000000000002,
28
+ "efficiency_score": 0.5855625,
29
+ "process_fidelity_score": 0.9056250000000008,
30
+ "explanation_grounding_score": 0.8000000000000004,
31
+ "anti_cheat_score": 0.9366249999999997,
32
+ "uncertainty_calibration_score": 0.8531250000000004
33
+ },
34
+ "primary_reward_channels": {
35
+ "safety_legality": 0.9469062499999998,
36
+ "clinical_improvement": 0.6273749999999997,
37
+ "dosing_quality": 0.6550000000000001,
38
+ "process_integrity": 0.8225937500000001
39
+ },
40
+ "policy_stack": "bandit-only",
41
+ "failure_mining": {
42
+ "total_rows": 32,
43
+ "failure_rows": 2,
44
+ "top_failure_reasons": [
45
+ {
46
+ "reason": "repeated_action_loop",
47
+ "count": 2
48
+ }
49
+ ]
50
+ }
51
+ },
52
+ "llm_only": {
53
+ "avg_reward": 0.7723913043478261,
54
+ "legality_rate": 1.0,
55
+ "severe_violation_rate": 0.0,
56
+ "abstention_rate": 0.0,
57
+ "avg_episode_length": 1.9565217391304348,
58
+ "success_rate": 0.0,
59
+ "avg_burden_delta": 0.0,
60
+ "avg_safety_delta": 0.4882608695652174,
61
+ "avg_dosing_quality": 0.75,
62
+ "avg_process_fidelity": 0.9000000000000005,
63
+ "exploit_detection_count": 7.0,
64
+ "timeout_rate": 0.0,
65
+ "failure_visible_rate": 0.30434782608695654,
66
+ "avg_invalid_actions": 0.30434782608695654,
67
+ "reward_columns": {
68
+ "format_compliance_score": 0.9989999999999999,
69
+ "candidate_alignment_score": 0.9989999999999999,
70
+ "legality_score": 0.9989999999999999,
71
+ "safety_delta_score": 0.4882608695652174,
72
+ "burden_improvement_score": 0.5,
73
+ "disease_stability_score": 0.8999999999999998,
74
+ "dosing_quality_score": 0.75,
75
+ "abstention_quality_score": 0.5600000000000004,
76
+ "efficiency_score": 0.7027826086956522,
77
+ "process_fidelity_score": 0.9000000000000005,
78
+ "explanation_grounding_score": 0.8000000000000003,
79
+ "anti_cheat_score": 0.6952608695652175,
80
+ "uncertainty_calibration_score": 0.8482608695652176
81
+ },
82
+ "primary_reward_channels": {
83
+ "safety_legality": 0.8853478260869562,
84
+ "clinical_improvement": 0.6290869565217388,
85
+ "dosing_quality": 0.6549999999999998,
86
+ "process_integrity": 0.8504782608695656
87
+ },
88
+ "policy_stack": "llm-only",
89
+ "failure_mining": {
90
+ "total_rows": 23,
91
+ "failure_rows": 7,
92
+ "top_failure_reasons": [
93
+ {
94
+ "reason": "repeated_action_loop",
95
+ "count": 7
96
+ }
97
+ ]
98
+ }
99
+ },
100
+ "llm_bandit": {
101
+ "avg_reward": 0.7647391304347826,
102
+ "legality_rate": 1.0,
103
+ "severe_violation_rate": 0.0,
104
+ "abstention_rate": 0.0,
105
+ "avg_episode_length": 1.9565217391304348,
106
+ "success_rate": 0.0,
107
+ "avg_burden_delta": 0.0,
108
+ "avg_safety_delta": 0.48982608695652174,
109
+ "avg_dosing_quality": 0.717391304347826,
110
+ "avg_process_fidelity": 0.9000000000000005,
111
+ "exploit_detection_count": 7.0,
112
+ "timeout_rate": 0.0,
113
+ "failure_visible_rate": 0.30434782608695654,
114
+ "avg_invalid_actions": 0.30434782608695654,
115
+ "reward_columns": {
116
+ "format_compliance_score": 0.9989999999999999,
117
+ "candidate_alignment_score": 0.9989999999999999,
118
+ "legality_score": 0.9989999999999999,
119
+ "safety_delta_score": 0.48982608695652174,
120
+ "burden_improvement_score": 0.5043478260869565,
121
+ "disease_stability_score": 0.8582608695652173,
122
+ "dosing_quality_score": 0.717391304347826,
123
+ "abstention_quality_score": 0.5600000000000004,
124
+ "efficiency_score": 0.7027826086956522,
125
+ "process_fidelity_score": 0.9000000000000005,
126
+ "explanation_grounding_score": 0.8000000000000003,
127
+ "anti_cheat_score": 0.6952608695652175,
128
+ "uncertainty_calibration_score": 0.8126086956521739
129
+ },
130
+ "primary_reward_channels": {
131
+ "safety_legality": 0.8765217391304347,
132
+ "clinical_improvement": 0.6171739130434781,
133
+ "dosing_quality": 0.6386956521739129,
134
+ "process_integrity": 0.8504782608695656
135
+ },
136
+ "policy_stack": "llm+bandit",
137
+ "failure_mining": {
138
+ "total_rows": 23,
139
+ "failure_rows": 7,
140
+ "top_failure_reasons": [
141
+ {
142
+ "reason": "repeated_action_loop",
143
+ "count": 7
144
+ }
145
+ ]
146
+ }
147
+ }
148
+ }
149
+ }