Spaces:

Lomesh7777
/

openenv-multi-agent-RL

Sleeping

openenv-multi-agent-RL / salespath_env /server /app.py

Lomesh2000

FIX: grop update new , env changes

e6a02dd 12 days ago

3.36 kB

	# salespath_env/server/app.py
	"""
	Custom stateful FastAPI server for SalesPath.

	Why not create_fastapi_app?
	OpenEnv's built-in HTTP /reset and /step endpoints are STATELESS —
	they create a new Environment instance per request and destroy it.
	State is preserved only over WebSocket sessions.

	For our training loop (HTTP polling), we need a persistent environment
	that survives across /reset + multiple /step calls. This file provides
	that by keeping a single global SalesPathEnvironment instance.

	The response envelope matches OpenEnv exactly:
	{ "observation": {...}, "reward": float, "done": bool }
	so all existing clients work without changes.
	"""

	from typing import Any, Dict, Optional

	from fastapi import FastAPI
	from pydantic import BaseModel

	from ..models import SalesPathAction
	from .salespath_environment import SalesPathEnvironment


	# ---------------------------------------------------------------------------
	# Single persistent environment instance
	# ---------------------------------------------------------------------------

	_env: SalesPathEnvironment = SalesPathEnvironment()


	# ---------------------------------------------------------------------------
	# Request models
	# ---------------------------------------------------------------------------

	class ResetRequest(BaseModel):
	difficulty: int = 1
	seed: Optional[int] = None
	episode_id: Optional[str] = None


	class ActionPayload(BaseModel):
	action_type: str
	content: str = ""
	target: str = ""
	format_ok: bool = True


	class StepRequest(BaseModel):
	action: ActionPayload


	# ---------------------------------------------------------------------------
	# FastAPI app
	# ---------------------------------------------------------------------------

	app = FastAPI(
	title="SalesPath Environment",
	description="OpenEnv-compatible RL environment for B2B sales agent training.",
	version="0.1.0",
	)


	@app.post("/reset")
	def reset(req: ResetRequest = ResetRequest()):
	"""Start a new episode."""
	obs = _env.reset(
	seed=req.seed,
	episode_id=req.episode_id,
	difficulty=req.difficulty,
	)
	return {
	"observation": obs.model_dump(),
	"reward": obs.reward,
	"done": obs.done,
	}


	@app.post("/step")
	def step(req: StepRequest):
	"""Take one action in the current episode."""
	action = SalesPathAction(
	action_type=req.action.action_type,
	content=req.action.content,
	target=req.action.target,
	format_ok=req.action.format_ok,
	)
	obs = _env.step(action)
	return {
	"observation": obs.model_dump(),
	"reward": obs.reward,
	"done": obs.done,
	}


	@app.get("/health")
	def health():
	return {"status": "healthy"}


	@app.get("/state")
	def state():
	"""Expose internal state (for debugging). Hidden state excluded."""
	s = _env.state
	return {
	"episode_id": s.episode_id,
	"turn_number": s.turn_number,
	"workflow_stage": s.workflow_stage,
	"steps_completed": s.steps_completed,
	"constraints_violated": s.constraints_violated,
	"objections_handled": s.objections_handled,
	"difficulty": s.difficulty,
	"done": s.done,
	"prospect_profile": s.prospect_profile,
	}