Spaces:

Pratap-K
/

SmartPayEnv

Sleeping

App Files Files Community

SmartPayEnv / server /app.py

Pratap-K

Update training

c620fb9 12 days ago

raw

history blame contribute delete

5.54 kB

	# Copyright (c) Meta Platforms, Inc. and affiliates.
	# All rights reserved.
	#
	# This source code is licensed under the BSD-style license found in the
	# LICENSE file in the root directory of this source tree.

	"""
	FastAPI application for the Smartpayenv Environment.

	This module creates an HTTP server that exposes the SmartpayenvEnvironment
	over HTTP and WebSocket endpoints, compatible with EnvClient.

	Endpoints:
	- POST /reset: Reset the environment
	- POST /step: Execute an action
	- GET /state: Get current environment state
	- GET /schema: Get action/observation schemas
	- WS /ws: WebSocket endpoint for persistent sessions

	Usage:
	# Development (with auto-reload):
	uvicorn server.app:app --reload --host 0.0.0.0 --port 7860

	# Production:
	uvicorn server.app:app --host 0.0.0.0 --port 7860 --workers 4

	# Or run directly:
	python -m server.app
	"""

	try:
	from openenv.core.env_server.http_server import create_app
	except Exception as e: # pragma: no cover
	raise ImportError(
	"openenv is required for the web interface. Install dependencies with '\n uv sync\n'"
	) from e

	try:
	# Try package-style relative import
	from ..models import SmartpayenvAction, SmartpayenvObservation
	from .SmartPayEnv_environment import SmartpayenvEnvironment
	except (ImportError, ValueError):
	# Fallback to local import (for uvicorn server.app:app)
	from models import SmartpayenvAction, SmartpayenvObservation
	from server.SmartPayEnv_environment import SmartpayenvEnvironment


	# ── Singleton env so custom endpoints share state with openenv ─────────
	# Different openenv versions store the env in different places
	# (app.env, app.state.env, per-request factory, etc.). Rather than
	# guessing, we use a singleton subclass: no matter how many times
	# openenv instantiates the env class, it always gets the same object,
	# and we can always reach it via _SHARED_ENV.
	_SHARED_ENV: SmartpayenvEnvironment \| None = None


	class SharedSmartpayenvEnvironment(SmartpayenvEnvironment):
	"""Singleton subclass — always returns the same env instance."""

	def __new__(cls, args, *kwargs):
	global _SHARED_ENV
	if _SHARED_ENV is None:
	inst = super().__new__(cls)
	super(SharedSmartpayenvEnvironment, inst).__init__(args, *kwargs)
	inst._singleton_initialized = True # type: ignore[attr-defined]
	_SHARED_ENV = inst
	return _SHARED_ENV

	def __init__(self, args, *kwargs): # noqa: D401
	# Already initialised by __new__ on first construction; subsequent
	# constructions are no-ops so we don't reset the env.
	if getattr(self, "_singleton_initialized", False):
	return
	super().__init__(args, *kwargs)
	self._singleton_initialized = True


	def _get_env() -> SmartpayenvEnvironment:
	"""Return the shared env, creating it if openenv hasn't yet."""
	global _SHARED_ENV
	if _SHARED_ENV is None:
	SharedSmartpayenvEnvironment() # populates _SHARED_ENV
	assert _SHARED_ENV is not None
	return _SHARED_ENV


	# Create the app with web interface and README integration
	app = create_app(
	SharedSmartpayenvEnvironment,
	SmartpayenvAction,
	SmartpayenvObservation,
	env_name="SmartPayEnv",
	max_concurrent_envs=1,
	)


	@app.post("/simulate", response_model=SmartpayenvObservation)
	async def simulate(action: SmartpayenvAction):
	"""Simulates an action without advancing the true environment state."""
	return _get_env().simulate(action)


	# ── Theme-4 co-evolution endpoints ────────────────────────────────────
	from typing import Optional
	from pydantic import BaseModel


	class AdversaryConfig(BaseModel):
	"""Parametric fraud-agent policy. Any field may be omitted."""
	intensity: Optional[float] = None
	noise_boost: Optional[float] = None
	pattern_rate: Optional[float] = None
	strategy: Optional[str] = None # "mixed" \| "fraud_surge" \| "stealth_fraud" \| "velocity_attack"


	class SeededReset(BaseModel):
	difficulty: int = 0
	seed: Optional[int] = None


	@app.post("/configure_adversary")
	async def configure_adversary(cfg: AdversaryConfig):
	"""Set the learnable fraud agent's behaviour. Returns the active config."""
	return _get_env().configure_adversary(
	intensity=cfg.intensity,
	noise_boost=cfg.noise_boost,
	pattern_rate=cfg.pattern_rate,
	strategy=cfg.strategy,
	)


	@app.post("/reset_seeded", response_model=SmartpayenvObservation)
	async def reset_seeded(req: SeededReset):
	"""Deterministic reset: same `seed` => same starting trajectory.
	Useful for GRPO so all completions in a group share the same state."""
	return _get_env().reset(difficulty=int(req.difficulty), seed=req.seed)


	def main():
	"""
	Entry point for direct execution via uv run or python -m.

	This function enables running the server without Docker:
	uv run --project . server
	uv run --project . server --port 7860
	python -m SmartPayEnv.server.app

	Args:
	host: Host address to bind to (default: "0.0.0.0")
	port: Port number to listen on (default: 7860)

	For production deployments, consider using uvicorn directly with
	multiple workers:
	uvicorn SmartPayEnv.server.app:app --workers 4
	"""

	import uvicorn
	uvicorn.run(app, host="0.0.0.0", port=7860)


	if __name__ == "__main__":
	main()