Spaces:
Sleeping
Sleeping
File size: 5,130 Bytes
f2beac3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 | """
Trajectory scorers for the Pharmacovigilance Signal Detector.
These functions are intentionally pharmacovigilance-specific rather than
generic "reward bucket" adapters. The scoring rubric emphasizes:
1. Signal sensitivity: missing a true novel safety signal is costly.
2. Operational judgment: escalation/log/dismiss choices matter independently.
3. Causal calibration: high scores should reflect not just suspicion, but
identifying the right drug or interaction.
All public grader outputs are forced into the judge-safe interval (0.01, 0.99).
"""
from typing import Any, Iterable, List
STRICT_MIN = 0.01
STRICT_MAX = 0.99
def _bounded(value: float) -> float:
return min(max(round(value, 4), STRICT_MIN), STRICT_MAX)
def _as_reward_list(trajectory: dict | None) -> List[float]:
payload = trajectory or {}
rewards = payload.get("rewards")
if isinstance(rewards, list) and rewards:
return [float(item) for item in rewards]
if "score" in payload:
return [float(payload["score"])]
reward = payload.get("reward")
if isinstance(reward, dict) and "total" in reward:
return [float(reward["total"])]
if reward is not None:
return [float(reward)]
return []
def _reward_profile(reward: float) -> str:
"""
Translate a step reward into a pharmacovigilance interpretation bucket.
This keeps the grader coupled to the meaning of the environment rather than
to borrowed labels from a different domain.
"""
if reward <= 0.05:
return "unsafe_miss"
if reward <= 0.20:
return "bad_call"
if reward < 0.50:
return "weak_triage"
if reward < 0.80:
return "workable_triage"
if reward < 0.95:
return "strong_triage"
return "expert_triage"
def _mean(values: Iterable[float]) -> float:
items = list(values)
if not items:
return 0.5
return sum(items) / len(items)
def _score_episode(
rewards: List[float],
*,
miss_cost: float,
overcall_cost: float,
stability_gain: float,
expertise_gain: float,
) -> float:
if not rewards:
return 0.5
labels = [_reward_profile(reward) for reward in rewards]
mean_reward = _mean(rewards)
total_steps = len(rewards)
unsafe_miss_count = labels.count("unsafe_miss")
bad_call_count = labels.count("bad_call")
weak_count = labels.count("weak_triage")
strong_count = labels.count("strong_triage") + labels.count("expert_triage")
expert_count = labels.count("expert_triage")
downward_pressure = (
min(unsafe_miss_count * miss_cost, 0.35)
+ min(bad_call_count * overcall_cost, 0.15)
+ min(weak_count * 0.015, 0.06)
)
upward_pressure = 0.0
if strong_count / total_steps >= 0.80:
upward_pressure += stability_gain
if expert_count / total_steps >= 0.60:
upward_pressure += expertise_gain
return _bounded(mean_reward - downward_pressure + upward_pressure)
def easy_grader(trajectory: dict = None) -> float:
"""
Easy tier: obvious known-signal recognition and straightforward handling.
The scorer expects high reliability here. Weak or missed judgments are
penalized more sharply because these are the least ambiguous cases.
"""
rewards = _as_reward_list(trajectory)
return _score_episode(
rewards,
miss_cost=0.12,
overcall_cost=0.03,
stability_gain=0.05,
expertise_gain=0.01,
)
def medium_grader(trajectory: dict = None) -> float:
"""
Medium tier: cluster recognition and escalation readiness.
These cases reward agents that can move from single-case thinking to
population-level signal interpretation.
"""
rewards = _as_reward_list(trajectory)
return _score_episode(
rewards,
miss_cost=0.09,
overcall_cost=0.04,
stability_gain=0.03,
expertise_gain=0.02,
)
def hard_grader(trajectory: dict = None) -> float:
"""
Hard tier: confounding, blame reassignment, and interaction reasoning.
The hard scorer gives extra value to near-expert trajectories because this
tier is specifically designed to separate shallow pattern matching from
mechanistic causal reasoning.
"""
rewards = _as_reward_list(trajectory)
return _score_episode(
rewards,
miss_cost=0.07,
overcall_cost=0.03,
stability_gain=0.02,
expertise_gain=0.04,
)
def known_signal_easy_grader(trajectory: dict = None) -> float:
return easy_grader(trajectory)
def cluster_signal_medium_grader(trajectory: dict = None) -> float:
return medium_grader(trajectory)
def confounded_hard_grader(trajectory: dict = None) -> float:
return hard_grader(trajectory)
__all__ = [
"easy_grader",
"medium_grader",
"hard_grader",
"known_signal_easy_grader",
"cluster_signal_medium_grader",
"confounded_hard_grader",
]
|