File size: 1,292 Bytes
b5cc210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
{
  "status": "fallback",
  "backend": "env_reward_fallback",
  "model_id": "Qwen/Qwen2.5-1.5B-Instruct",
  "records": 1,
  "prompts_path": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/data/processed/training_corpus_grpo_prompts.jsonl",
  "reward_summary": {
    "count": 1,
    "avg_reward": 0.764,
    "avg_reward_components": {
      "format_compliance_score": 0.999,
      "candidate_alignment_score": 0.999,
      "legality_score": 0.999,
      "safety_delta_score": 0.5,
      "burden_improvement_score": 0.5,
      "disease_stability_score": 0.9,
      "dosing_quality_score": 0.5,
      "abstention_quality_score": 0.56,
      "efficiency_score": 0.857,
      "process_fidelity_score": 0.92,
      "explanation_grounding_score": 0.8,
      "anti_cheat_score": 0.001,
      "uncertainty_calibration_score": 0.7
    },
    "avg_primary_reward_channels": {
      "safety_legality": 0.675,
      "clinical_improvement": 0.633,
      "dosing_quality": 0.53,
      "process_integrity": 0.894
    }
  },
  "reward_log": "/Users/daver/Desktop/Meta_Pytorch_OpenEnv_Scaler/polyguard-rl/checkpoints/grpo_reward_components.jsonl",
  "train_metrics": {
    "steps_executed": 1.0
  },
  "artifact_path": "",
  "unsloth_available": false,
  "trl_runtime_error": "forced_fallback"
}