# salespath_env/server/app.py
"""
Custom stateful FastAPI server for SalesPath.

Why not create_fastapi_app?
  OpenEnv's built-in HTTP /reset and /step endpoints are STATELESS —
  they create a new Environment instance per request and destroy it.
  State is preserved only over WebSocket sessions.

  For our training loop (HTTP polling), we need a persistent environment
  that survives across /reset + multiple /step calls. This file provides
  that by keeping a single global SalesPathEnvironment instance.

  The response envelope matches OpenEnv exactly:
    { "observation": {...}, "reward": float, "done": bool }
  so all existing clients work without changes.
"""

from typing import Any, Dict, Optional

from fastapi import FastAPI
from pydantic import BaseModel

from ..models import SalesPathAction
from .salespath_environment import SalesPathEnvironment


# ---------------------------------------------------------------------------
# Single persistent environment instance
# ---------------------------------------------------------------------------

_env: SalesPathEnvironment = SalesPathEnvironment()


# ---------------------------------------------------------------------------
# Request models
# ---------------------------------------------------------------------------

class ResetRequest(BaseModel):
    difficulty: int = 1
    seed: Optional[int] = None
    episode_id: Optional[str] = None


class ActionPayload(BaseModel):
    action_type: str
    content: str = ""
    target: str = ""
    format_ok: bool = True


class StepRequest(BaseModel):
    action: ActionPayload


# ---------------------------------------------------------------------------
# FastAPI app
# ---------------------------------------------------------------------------

app = FastAPI(
    title="SalesPath Environment",
    description="OpenEnv-compatible RL environment for B2B sales agent training.",
    version="0.1.0",
)


@app.post("/reset")
def reset(req: ResetRequest = ResetRequest()):
    """Start a new episode."""
    obs = _env.reset(
        seed=req.seed,
        episode_id=req.episode_id,
        difficulty=req.difficulty,
    )
    return {
        "observation": obs.model_dump(),
        "reward": obs.reward,
        "done": obs.done,
    }


@app.post("/step")
def step(req: StepRequest):
    """Take one action in the current episode."""
    action = SalesPathAction(
        action_type=req.action.action_type,
        content=req.action.content,
        target=req.action.target,
        format_ok=req.action.format_ok,
    )
    obs = _env.step(action)
    return {
        "observation": obs.model_dump(),
        "reward": obs.reward,
        "done": obs.done,
    }


@app.get("/health")
def health():
    return {"status": "healthy"}


@app.get("/state")
def state():
    """Expose internal state (for debugging). Hidden state excluded."""
    s = _env.state
    return {
        "episode_id":           s.episode_id,
        "turn_number":          s.turn_number,
        "workflow_stage":       s.workflow_stage,
        "steps_completed":      s.steps_completed,
        "constraints_violated": s.constraints_violated,
        "objections_handled":   s.objections_handled,
        "difficulty":           s.difficulty,
        "done":                 s.done,
        "prospect_profile":     s.prospect_profile,
    }