Spaces:

adithya9903
/

polypharmacy

Sleeping

App Files Files Community

polypharmacy / backend /src /polypharmacy_env /rewards.py

adithya9903

Flatten project to root for OpenEnv submission readiness.

fa51dd9 about 1 month ago

raw

history blame contribute delete

2.56 kB

	"""Reward shaping and regimen-risk computation."""

	from __future__ import annotations

	from itertools import combinations
	from typing import Dict, List, Optional, Tuple

	from .config import (
	INTERVENTION_COST,
	INVALID_ACTION_PENALTY,
	QUERY_COST,
	SEVERE_DDI_DISCOVERY_BONUS,
	TIMEOUT_PENALTY,
	)
	from .data_loader import BeersCriterion, DDIRule, DrugMeta


	def compute_regimen_risk(
	current_drug_ids: List[str],
	patient_conditions: List[str],
	ddi_rules: Dict[Tuple[str, str], DDIRule],
	beers_criteria: List[BeersCriterion],
	drug_metadata: Dict[str, DrugMeta],
	) -> float:
	"""Compute an aggregate risk score for the current medication regimen.

	Returns a float clipped to [0.0, 1.0].
	"""
	if not current_drug_ids:
	return 0.0

	risk = 0.0
	drug_set = set(current_drug_ids)

	# 1. DDI pairwise risk
	for a, b in combinations(sorted(drug_set), 2):
	key = (a, b) if a < b else (b, a)
	rule = ddi_rules.get(key)
	if rule is not None:
	risk += rule.base_risk_score

	# 2. Beers violations
	beers_weight = {"avoid": 0.25, "caution": 0.10, "dose_adjust": 0.08, "avoid_in_condition": 0.20}
	for bc in beers_criteria:
	if bc.drug_id not in drug_set:
	continue
	if bc.condition is None:
	risk += beers_weight.get(bc.criterion_type, 0.05)
	elif bc.condition in patient_conditions:
	risk += beers_weight.get(bc.criterion_type, 0.05)

	# 3. High-risk elderly drugs
	for did in drug_set:
	dm = drug_metadata.get(did)
	if dm and dm.is_high_risk_elderly:
	risk += 0.05

	# Normalise by regimen size to keep score comparable across difficulties
	risk /= max(len(drug_set), 1)
	return min(max(risk, 0.0), 1.0)


	def compute_shaped_reward(
	previous_risk: float,
	new_risk: float,
	action_type: str,
	*,
	is_invalid: bool = False,
	is_timeout: bool = False,
	discovered_severe: bool = False,
	) -> float:
	"""Compute the step-level shaped reward."""
	reward = 0.0

	if is_invalid:
	return -INVALID_ACTION_PENALTY

	if is_timeout:
	return -TIMEOUT_PENALTY

	if action_type == "query_ddi":
	reward -= QUERY_COST
	if discovered_severe:
	reward += SEVERE_DDI_DISCOVERY_BONUS

	elif action_type == "propose_intervention":
	reward += (previous_risk - new_risk)
	reward -= INTERVENTION_COST

	# finish_review terminal bonus is added by the caller after grading

	return reward