| """Per-step reward calculator [0, 1].""" |
|
|
|
|
| def compute_step_reward( |
| action_type: str, |
| daily_fraud_spend: float, |
| total_budget: float, |
| publisher_cfg: dict | None = None, |
| flag_correct: bool | None = None, |
| flag_type_correct: bool | None = None, |
| day: int = 1, |
| episode_days: int = 14, |
| ) -> float: |
| """Return a reward in [0.0, 1.0] for a single step. |
| |
| Centered at 0.5 (neutral). Rewards scale with timing and precision. |
| """ |
| if action_type == "monitor": |
| if daily_fraud_spend > 0: |
| |
| progress = day / episode_days |
| penalty = 0.10 + 0.20 * progress |
| return max(0.05, 0.50 - penalty) |
| return 0.50 |
|
|
| if action_type == "investigate_publisher": |
| if publisher_cfg is not None and publisher_cfg.get("is_fraudulent"): |
| |
| early_bonus = max(0.0, (episode_days - day) / episode_days) * 0.10 |
| return min(1.0, 0.55 + early_bonus) |
| |
| return 0.35 |
|
|
| if action_type == "flag_fraud": |
| if flag_correct is True and flag_type_correct is True: |
| |
| early_bonus = max(0.0, (episode_days - day) / episode_days) * 0.05 |
| return min(1.0, 0.95 + early_bonus) |
| if flag_correct is True: |
| |
| return 0.70 |
| |
| return 0.05 |
|
|
| if action_type == "submit_report": |
| return 0.50 |
|
|
| |
| return 0.05 |
|
|