Spaces:

Lomesh7777
/

openenv-multi-agent-RL

Sleeping

openenv-multi-agent-RL / salespath_env /models.py

Lomesh2000

FIX: grop update new , env changes

e6a02dd 12 days ago

2.64 kB

	# salespath_env/models.py

	from __future__ import annotations

	import uuid
	from typing import Dict, List
	from pydantic import Field

	from openenv.core import Action, Observation, State


	VALID_ACTIONS = {
	"PROSPECT",
	"QUALIFY",
	"PRESENT",
	"HANDLE_OBJECTION",
	"OFFER_DEMO",
	"NEGOTIATE",
	"CLOSE",
	"FOLLOW_UP",
	"DISQUALIFY",
	}


	class SalesPathAction(Action):
	"""
	Action sent by the agent to the environment.

	Attributes
	----------
	action_type : str
	One of `VALID_ACTIONS`.
	content : str
	The natural-language message attached to the action.
	target : str
	Optional target hint (unused by the deterministic simulator).
	format_ok : bool
	Set to ``False`` by the agent's output parser when the raw model
	completion did NOT match the expected ``ACTION:/CONTENT:`` block.
	The environment uses this flag to penalise format-hacking
	attempts where a malformed completion is silently coerced to a
	valid action_type. Default ``True`` so direct callers (tests,
	scripted demos) are unaffected.
	"""

	action_type: str
	content: str
	target: str = ""
	format_ok: bool = True

	def is_valid(self) -> bool:
	"""Strict validation of allowed action types."""
	return self.action_type in VALID_ACTIONS


	class SalesPathObservation(Observation):
	"""
	What the agent is allowed to observe.
	Hidden state must NEVER be exposed here.
	"""

	prospect_response: str = ""
	workflow_stage: str = "START"

	constraints_violated: List[str] = Field(default_factory=list)
	steps_completed: List[str] = Field(default_factory=list)

	turn_number: int = 0

	reward: float = 0.0
	reward_components: Dict = Field(default_factory=dict)

	done: bool = False
	info: Dict = Field(default_factory=dict)


	class SalesPathState(State):
	"""
	Internal environment state.
	Includes hidden state not exposed to the agent.
	"""

	episode_id: str = Field(default_factory=lambda: str(uuid.uuid4()))

	prospect_profile: Dict = Field(default_factory=dict)
	conversation_history: List[Dict] = Field(default_factory=list)

	workflow_stage: str = "START"
	required_workflow: List[str] = Field(default_factory=list)

	steps_completed: List[str] = Field(default_factory=list)
	constraints_violated: List[str] = Field(default_factory=list)

	objections_handled: int = 0
	turn_number: int = 0
	difficulty: int = 1

	done: bool = False

	# Hidden state — NEVER exposed in Observation
	hidden_state: Dict = Field(default_factory=dict)