Lomesh2000
FIX: grop update new , env changes
e6a02dd
# salespath_env/models.py
from __future__ import annotations
import uuid
from typing import Dict, List
from pydantic import Field
from openenv.core import Action, Observation, State
VALID_ACTIONS = {
"PROSPECT",
"QUALIFY",
"PRESENT",
"HANDLE_OBJECTION",
"OFFER_DEMO",
"NEGOTIATE",
"CLOSE",
"FOLLOW_UP",
"DISQUALIFY",
}
class SalesPathAction(Action):
"""
Action sent by the agent to the environment.
Attributes
----------
action_type : str
One of `VALID_ACTIONS`.
content : str
The natural-language message attached to the action.
target : str
Optional target hint (unused by the deterministic simulator).
format_ok : bool
Set to ``False`` by the agent's output parser when the raw model
completion did NOT match the expected ``ACTION:/CONTENT:`` block.
The environment uses this flag to penalise format-hacking
attempts where a malformed completion is silently coerced to a
valid action_type. Default ``True`` so direct callers (tests,
scripted demos) are unaffected.
"""
action_type: str
content: str
target: str = ""
format_ok: bool = True
def is_valid(self) -> bool:
"""Strict validation of allowed action types."""
return self.action_type in VALID_ACTIONS
class SalesPathObservation(Observation):
"""
What the agent is allowed to observe.
Hidden state must NEVER be exposed here.
"""
prospect_response: str = ""
workflow_stage: str = "START"
constraints_violated: List[str] = Field(default_factory=list)
steps_completed: List[str] = Field(default_factory=list)
turn_number: int = 0
reward: float = 0.0
reward_components: Dict = Field(default_factory=dict)
done: bool = False
info: Dict = Field(default_factory=dict)
class SalesPathState(State):
"""
Internal environment state.
Includes hidden state not exposed to the agent.
"""
episode_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
prospect_profile: Dict = Field(default_factory=dict)
conversation_history: List[Dict] = Field(default_factory=list)
workflow_stage: str = "START"
required_workflow: List[str] = Field(default_factory=list)
steps_completed: List[str] = Field(default_factory=list)
constraints_violated: List[str] = Field(default_factory=list)
objections_handled: int = 0
turn_number: int = 0
difficulty: int = 1
done: bool = False
# Hidden state — NEVER exposed in Observation
hidden_state: Dict = Field(default_factory=dict)