SmartPayEnv / models.py
Pratap-K's picture
Implement stateful temporal dynamics, partial observability, and Human-in-the-Loop (HITL) review logic.
f953d1e
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
Data models for the Smartpayenv Environment.
Rich, production-inspired payment transaction observation and action types.
"""
from pydantic import BaseModel, Field
from openenv.core.env_server.types import Action, Observation
class SmartpayenvAction(Action):
"""
Agent action for one payment transaction step.
gateway: Which payment gateway to attempt (0=GatewayA cheap, 1=GatewayB balanced, 2=GatewayC premium)
retry_strategy: 0=no retry on failure, 1=failover to next gateway
fraud_decision: 0=allow transaction, 1=block transaction (ends episode)
"""
gateway: int = Field(default=0, description="0=GatewayA (cheap), 1=GatewayB (balanced), 2=GatewayC (premium)")
retry_strategy: int = Field(default=0, description="0=No Retry, 1=Failover to next gateway on failure")
fraud_decision: int = Field(default=0, description="0=Allow, 1=Block, 2=Challenge (3DS), 3=Manual Review (Delayed)")
class SmartpayenvObservation(Observation):
"""
Rich observation for one incoming payment transaction.
Includes multi-factor signals that a real payment intelligence
system would use: merchant context, device fingerprinting,
transaction velocity, international flag, and gateway health.
"""
# ── Transaction context ────────────────────────────────────────────
amount: float = Field(default=0.0, description="Transaction amount in USD")
merchant_category: int = Field(
default=0,
description="Merchant category: 0=grocery, 1=travel, 2=electronics, 3=dining, 4=gaming, 5=other"
)
is_international: bool = Field(default=False, description="Cross-border transaction flag")
card_present: bool = Field(default=True, description="Card physically present (lowers fraud risk)")
# ── User / device signals ──────────────────────────────────────────
user_type: int = Field(default=0, description="Derived risk tier: 0=Normal, 1=Risky, 2=Fraud")
user_segment: int = Field(default=1, description="Cohort: 0=New/Guest, 1=Existing, 2=Premium/VIP")
user_history_score: float = Field(default=1.0, description="Normalized user reliability score [0,1]")
device_type: int = Field(default=0, description="0=mobile, 1=desktop, 2=tablet")
bin_category: int = Field(default=0, description="Bank Identification Number category (0-9)")
transaction_velocity: float = Field(
default=0.0,
description="Normalized count of transactions in the last 5 steps [0,1]"
)
# ── Temporal ──────────────────────────────────────────────────────
time_of_day: int = Field(default=0, description="Hour of day 0–23")
# ── Gateway health ────────────────────────────────────────────────
gateway_success_rates: list[float] = Field(
default_factory=list,
description="Current success-rate estimates for [GatewayA, GatewayB, GatewayC]"
)
gateway_states: list[str] = Field(
default_factory=list,
description="Health state for each gateway: 'normal' | 'degraded' | 'recovering'"
)
# ── Risk scores ───────────────────────────────────────────────────
observed_fraud_risk: float = Field(
default=0.0,
description="Noisy multi-factor fraud risk estimate [0,1] (true risk is hidden)"
)
# ── Episode tracking ──────────────────────────────────────────────
previous_failures: int = Field(default=0, description="Consecutive failed transactions in this episode")
difficulty: int = Field(default=0, description="Episode difficulty tier: 0=easy, 1=medium, 2=hard")
# ── Step outputs ──────────────────────────────────────────────────
reward: float = Field(default=0.0, description="Combined step reward [0,1]")
done: bool = Field(default=False, description="Episode done flag")
chargeback_penalty_applied: float = Field(default=0.0, description="Penalty deducted this step from a past transaction chargeback")
review_resolutions: list[dict] = Field(default_factory=list, description="List of resolved manual reviews this step: [{ 'amount': float, 'is_fraud': bool, 'outcome': 'accepted'|'rejected' }]")
# Per-task scores — declared as first-class fields so openenv framework serializes them
task_routing_score: float = Field(default=0.0, description="Routing efficacy score [0,1]")
task_fraud_mcc_score: float = Field(default=0.0, description="Fraud detection MCC score [0,1]")
task_retention_score: float = Field(default=1.0, description="User retention score [0,1]")
# Metadata dict for backward compatibility / agent introspection
metadata: dict = Field(default_factory=dict, description="Per-task score breakdown")