adithya9903 commited on
Commit
0dd16d3
·
verified ·
1 Parent(s): a76f54c

Upload PolyGuard artifact: promoted qwen-qwen2-5-3b-instruct

Browse files
Files changed (1) hide show
  1. outputs/reports/grpo_trl_run.json +43 -0
outputs/reports/grpo_trl_run.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "status": "ok",
3
+ "backend": "trl_transformers",
4
+ "model_id": "Qwen/Qwen2.5-3B-Instruct",
5
+ "records": 2000,
6
+ "prompts_path": "/app/data/processed/training_corpus_grpo_prompts.jsonl",
7
+ "reward_summary": {
8
+ "count": 4000,
9
+ "avg_reward": 0.767,
10
+ "avg_reward_components": {
11
+ "format_compliance_score": 0.999,
12
+ "candidate_alignment_score": 0.999,
13
+ "legality_score": 0.929,
14
+ "safety_delta_score": 0.497,
15
+ "burden_improvement_score": 0.469,
16
+ "disease_stability_score": 0.861,
17
+ "dosing_quality_score": 0.526,
18
+ "abstention_quality_score": 0.56,
19
+ "efficiency_score": 0.849,
20
+ "process_fidelity_score": 0.856,
21
+ "explanation_grounding_score": 0.795,
22
+ "anti_cheat_score": 0.589,
23
+ "uncertainty_calibration_score": 0.747
24
+ },
25
+ "avg_primary_reward_channels": {
26
+ "safety_legality": 0.816,
27
+ "clinical_improvement": 0.609,
28
+ "dosing_quality": 0.543,
29
+ "process_integrity": 0.875
30
+ }
31
+ },
32
+ "reward_log": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_reward_components.jsonl",
33
+ "train_metrics": {
34
+ "train_runtime": 6873.9375,
35
+ "train_samples_per_second": 0.291,
36
+ "train_steps_per_second": 0.291,
37
+ "total_flos": 0.0,
38
+ "train_loss": 2.665005830824185e-06
39
+ },
40
+ "history_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_history.json",
41
+ "artifact_path": "/app/checkpoints/sweeps/qwen-qwen2-5-3b-instruct/grpo_adapter",
42
+ "unsloth_available": false
43
+ }