SmartPayEnv / server /app.py
Pratap-K's picture
Update training
c620fb9
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
"""
FastAPI application for the Smartpayenv Environment.
This module creates an HTTP server that exposes the SmartpayenvEnvironment
over HTTP and WebSocket endpoints, compatible with EnvClient.
Endpoints:
- POST /reset: Reset the environment
- POST /step: Execute an action
- GET /state: Get current environment state
- GET /schema: Get action/observation schemas
- WS /ws: WebSocket endpoint for persistent sessions
Usage:
# Development (with auto-reload):
uvicorn server.app:app --reload --host 0.0.0.0 --port 7860
# Production:
uvicorn server.app:app --host 0.0.0.0 --port 7860 --workers 4
# Or run directly:
python -m server.app
"""
try:
from openenv.core.env_server.http_server import create_app
except Exception as e: # pragma: no cover
raise ImportError(
"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
) from e
try:
# Try package-style relative import
from ..models import SmartpayenvAction, SmartpayenvObservation
from .SmartPayEnv_environment import SmartpayenvEnvironment
except (ImportError, ValueError):
# Fallback to local import (for uvicorn server.app:app)
from models import SmartpayenvAction, SmartpayenvObservation
from server.SmartPayEnv_environment import SmartpayenvEnvironment
# ── Singleton env so custom endpoints share state with openenv ─────────
# Different openenv versions store the env in different places
# (app.env, app.state.env, per-request factory, etc.). Rather than
# guessing, we use a singleton subclass: no matter how many times
# openenv instantiates the env class, it always gets the same object,
# and we can always reach it via _SHARED_ENV.
_SHARED_ENV: SmartpayenvEnvironment | None = None
class SharedSmartpayenvEnvironment(SmartpayenvEnvironment):
"""Singleton subclass — always returns the same env instance."""
def __new__(cls, *args, **kwargs):
global _SHARED_ENV
if _SHARED_ENV is None:
inst = super().__new__(cls)
super(SharedSmartpayenvEnvironment, inst).__init__(*args, **kwargs)
inst._singleton_initialized = True # type: ignore[attr-defined]
_SHARED_ENV = inst
return _SHARED_ENV
def __init__(self, *args, **kwargs): # noqa: D401
# Already initialised by __new__ on first construction; subsequent
# constructions are no-ops so we don't reset the env.
if getattr(self, "_singleton_initialized", False):
return
super().__init__(*args, **kwargs)
self._singleton_initialized = True
def _get_env() -> SmartpayenvEnvironment:
"""Return the shared env, creating it if openenv hasn't yet."""
global _SHARED_ENV
if _SHARED_ENV is None:
SharedSmartpayenvEnvironment() # populates _SHARED_ENV
assert _SHARED_ENV is not None
return _SHARED_ENV
# Create the app with web interface and README integration
app = create_app(
SharedSmartpayenvEnvironment,
SmartpayenvAction,
SmartpayenvObservation,
env_name="SmartPayEnv",
max_concurrent_envs=1,
)
@app.post("/simulate", response_model=SmartpayenvObservation)
async def simulate(action: SmartpayenvAction):
"""Simulates an action without advancing the true environment state."""
return _get_env().simulate(action)
# ── Theme-4 co-evolution endpoints ────────────────────────────────────
from typing import Optional
from pydantic import BaseModel
class AdversaryConfig(BaseModel):
"""Parametric fraud-agent policy. Any field may be omitted."""
intensity: Optional[float] = None
noise_boost: Optional[float] = None
pattern_rate: Optional[float] = None
strategy: Optional[str] = None # "mixed" | "fraud_surge" | "stealth_fraud" | "velocity_attack"
class SeededReset(BaseModel):
difficulty: int = 0
seed: Optional[int] = None
@app.post("/configure_adversary")
async def configure_adversary(cfg: AdversaryConfig):
"""Set the learnable fraud agent's behaviour. Returns the active config."""
return _get_env().configure_adversary(
intensity=cfg.intensity,
noise_boost=cfg.noise_boost,
pattern_rate=cfg.pattern_rate,
strategy=cfg.strategy,
)
@app.post("/reset_seeded", response_model=SmartpayenvObservation)
async def reset_seeded(req: SeededReset):
"""Deterministic reset: same `seed` => same starting trajectory.
Useful for GRPO so all completions in a group share the same state."""
return _get_env().reset(difficulty=int(req.difficulty), seed=req.seed)
def main():
"""
Entry point for direct execution via uv run or python -m.
This function enables running the server without Docker:
uv run --project . server
uv run --project . server --port 7860
python -m SmartPayEnv.server.app
Args:
host: Host address to bind to (default: "0.0.0.0")
port: Port number to listen on (default: 7860)
For production deployments, consider using uvicorn directly with
multiple workers:
uvicorn SmartPayEnv.server.app:app --workers 4
"""
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
if __name__ == "__main__":
main()