Spaces:

Lomesh7777
/

openenv-multi-agent-RL

Sleeping

App Files Files Community

Imsachin010 commited on 13 days ago

Commit

fbf5bf6

verified ·

1 Parent(s): 414b500

Deploy SalesPath Environment

Browse files

Files changed (17) hide show

Dockerfile +27 -0
README.md +42 -5
requirements.txt +4 -0
salespath_env/README.md +0 -0
salespath_env/__init__.py +0 -0
salespath_env/client.py +0 -0
salespath_env/models.py +86 -0
salespath_env/openenv.yaml +13 -0
salespath_env/pyproject.toml +0 -0
salespath_env/server/__init__.py +0 -0
salespath_env/server/app.py +18 -0
salespath_env/server/prospect_simulator.py +162 -0
salespath_env/server/requirements.txt +0 -0
salespath_env/server/reward.py +138 -0
salespath_env/server/rules.py +222 -0
salespath_env/server/salespath_environment.py +294 -0
salespath_env/server/task_bank.py +199 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,27 @@

+FROM python:3.11-slim
+# HuggingFace Spaces runs on port 7860 by default
+ENV PORT=7860
+ENV PYTHONUNBUFFERED=1
+ENV PYTHONDONTWRITEBYTECODE=1
+WORKDIR /app
+# Install system dependencies
+RUN apt-get update && apt-get install -y --no-install-recommends \
+    curl \
+    && rm -rf /var/lib/apt/lists/*
+# Install Python dependencies
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+# Copy the salespath_env package
+COPY salespath_env/ ./salespath_env/
+# Health check
+HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
+    CMD curl -f http://localhost:${PORT}/health || exit 1
+# Start the FastAPI server on HF Spaces port
+CMD ["sh", "-c", "uvicorn salespath_env.server.app:app --host 0.0.0.0 --port ${PORT}"]

README.md CHANGED Viewed

@@ -1,10 +1,47 @@
 ---
-title: Salespath Env
-emoji: 📊
-colorFrom: pink
-colorTo: pink
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: SalesPath Environment
+emoji: 🤝
+colorFrom: blue
+colorTo: indigo
 sdk: docker
+app_port: 7860
 pinned: false
+license: mit
+short_description: RL gym environment for sales agent training
 ---
+# SalesPath Environment
+A [OpenEnv](https://github.com/openenv)-compatible Reinforcement Learning gym environment for training sales agents via LLM fine-tuning.
+## API Endpoints
+| Method | Endpoint | Description |
+|--------|----------|-------------|
+| `POST` | `/reset` | Reset the environment, returns initial observation |
+| `POST` | `/step` | Take an action, returns next observation + reward |
+| `GET`  | `/health` | Health check |
+## Quick Start
+### Reset
+```bash
+curl -X POST https://imsachin010-salespath-env.hf.space/reset \
+  -H "Content-Type: application/json" \
+  -d '{"difficulty": 1}'
+```
+### Step
+```bash
+curl -X POST https://imsachin010-salespath-env.hf.space/step \
+  -H "Content-Type: application/json" \
+  -d '{"action": {"action_type": "PROSPECT", "content": "Hello, tell me about your workflow challenges."}}'
+```
+## Action Types
+- `PROSPECT` — Initial outreach and discovery
+- `QUALIFY` — Qualify the lead
+- `PITCH` — Deliver the sales pitch
+- `HANDLE_OBJECTION` — Handle prospect objections
+- `CLOSE` — Attempt to close the deal

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+fastapi>=0.110.0
+uvicorn[standard]>=0.29.0
+pydantic>=2.0
+openenv

salespath_env/README.md ADDED Viewed

File without changes

salespath_env/__init__.py ADDED Viewed

File without changes

salespath_env/client.py ADDED Viewed

File without changes

salespath_env/models.py ADDED Viewed

	@@ -0,0 +1,86 @@

+# salespath_env/models.py
+from __future__ import annotations
+import uuid
+from typing import Dict, List
+from pydantic import Field
+from openenv.core import Action, Observation, State
+VALID_ACTIONS = {
+    "PROSPECT",
+    "QUALIFY",
+    "PRESENT",
+    "HANDLE_OBJECTION",
+    "OFFER_DEMO",
+    "NEGOTIATE",
+    "CLOSE",
+    "FOLLOW_UP",
+    "DISQUALIFY",
+}
+class SalesPathAction(Action):
+    """
+    Action sent by the agent to the environment.
+    """
+    action_type: str
+    content: str
+    target: str = ""
+    def is_valid(self) -> bool:
+        """
+        Strict validation of allowed action types.
+        """
+        return self.action_type in VALID_ACTIONS
+class SalesPathObservation(Observation):
+    """
+    What the agent is allowed to observe.
+    Hidden state must NEVER be exposed here.
+    """
+    prospect_response: str = ""
+    workflow_stage: str = "START"
+    constraints_violated: List[str] = Field(default_factory=list)
+    steps_completed: List[str] = Field(default_factory=list)
+    turn_number: int = 0
+    reward: float = 0.0
+    reward_components: Dict = Field(default_factory=dict)
+    done: bool = False
+    info: Dict = Field(default_factory=dict)
+class SalesPathState(State):
+    """
+    Internal environment state.
+    Includes hidden state not exposed to the agent.
+    """
+    episode_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
+    prospect_profile: Dict = Field(default_factory=dict)
+    conversation_history: List[Dict] = Field(default_factory=list)
+    workflow_stage: str = "START"
+    required_workflow: List[str] = Field(default_factory=list)
+    steps_completed: List[str] = Field(default_factory=list)
+    constraints_violated: List[str] = Field(default_factory=list)
+    objections_handled: int = 0
+    turn_number: int = 0
+    difficulty: int = 1
+    done: bool = False
+    # Hidden state — NEVER exposed in Observation
+    hidden_state: Dict = Field(default_factory=dict)

salespath_env/openenv.yaml ADDED Viewed

	@@ -0,0 +1,13 @@

+[project]
+name = "salespath_env"
+version = "0.1.0"
+dependencies = [
+    "openenv",
+    "fastapi",
+    "uvicorn",
+    "pydantic>=2.0",
+    "trl>=0.8.0",
+    "unsloth",
+    "torch",
+    "transformers",
+]

salespath_env/pyproject.toml ADDED Viewed

File without changes

salespath_env/server/__init__.py ADDED Viewed

File without changes

salespath_env/server/app.py ADDED Viewed

	@@ -0,0 +1,18 @@

+# salespath_env/server/app.py
+from openenv.core.env_server import create_fastapi_app
+from ..models import (
+    SalesPathAction,
+    SalesPathObservation,
+)
+from .salespath_environment import (
+    SalesPathEnvironment,
+)
+app = create_fastapi_app(
+    SalesPathEnvironment,
+    SalesPathAction,
+    SalesPathObservation,
+)

salespath_env/server/prospect_simulator.py ADDED Viewed

	@@ -0,0 +1,162 @@

+# salespath_env/server/prospect_simulator.py
+from ..models import SalesPathAction, SalesPathState
+RESPONSE_TEXT = {
+    "open:positive_signal": "That sounds interesting. Tell me more about how this works.",
+    "open:neutral_signal": "I see. We're evaluating a few options at the moment.",
+    "objection:price": "The pricing seems higher than what we budgeted for.",
+    "objection:timing": "The timing isn't ideal — we're in the middle of a quarter close.",
+    "objection:premature_pitch": (
+        "I'm not sure we're ready to discuss solutions yet. "
+        "What do you know about our current situation?"
+    ),
+    "deflect:budget_not_discussed": (
+        "We haven't really talked about what we're looking for yet."
+    ),
+    "deflect:stall": (
+        "Let me get back to you on this. A lot is happening on our end."
+    ),
+    "accept:demo_scheduled": (
+        "Yes, let's set up a demo. What time works next week?"
+    ),
+    "accept:close_success": (
+        "Alright, I think we can move forward with this. "
+        "Send over the paperwork."
+    ),
+    "reject:close_failed": (
+        "I don't think we're ready to commit at this point."
+    ),
+    "silence": "",
+    "exit:disqualified": (
+        "I think we're done here. This isn't the right fit."
+    ),
+}
+class ProspectSimulator:
+    """
+    Pure rule-based simulator.
+    No LLM. No transformers. Deterministic behavior.
+    """
+    def respond(
+        self,
+        action: SalesPathAction,
+        state: SalesPathState,
+    ) -> tuple[str, str]:
+        """
+        Returns:
+            (response_token, response_text)
+        """
+        token = self._get_token(action, state)
+        text = RESPONSE_TEXT[token]
+        return token, text
+    def _get_token(
+        self,
+        action: SalesPathAction,
+        state: SalesPathState,
+    ) -> str:
+        atype = action.action_type
+        difficulty = state.difficulty
+        turn = state.turn_number
+        profile = state.prospect_profile
+        hidden = state.hidden_state
+        objections = state.objections_handled
+        # -----------------------------
+        # Rule-triggered responses first
+        # -----------------------------
+        if state.constraints_violated:
+            latest = state.constraints_violated[-1]
+            if latest == "R01":
+                return "objection:premature_pitch"
+            if latest == "R03":
+                return "deflect:budget_not_discussed"
+        # -----------------------------
+        # Action-based responses
+        # -----------------------------
+        if atype == "PROSPECT":
+            return "open:positive_signal"
+        if atype == "QUALIFY":
+            # Reveal budget if hidden
+            if profile.get("budget_signal") == "unknown":
+                state.prospect_profile["budget_signal"] = hidden.get(
+                    "revealed_budget",
+                    "medium",
+                )
+            return "open:neutral_signal"
+        if atype == "PRESENT":
+            if difficulty >= 2:
+                if objections == 0:
+                    return "objection:price"
+            return "open:positive_signal"
+        if atype == "HANDLE_OBJECTION":
+            state.objections_handled += 1
+            required_objections = hidden.get("num_objections", 1)
+            if state.objections_handled >= required_objections:
+                return "open:positive_signal"
+            if objections == 0:
+                return "objection:timing"
+            return "open:positive_signal"
+        if atype == "OFFER_DEMO":
+            return "accept:demo_scheduled"
+        if atype == "NEGOTIATE":
+            return "open:neutral_signal"
+        if atype == "CLOSE":
+            true_budget = hidden.get("true_budget", 0.7)
+            close_threshold = hidden.get("close_threshold", 0.5)
+            decision_maker = profile.get("decision_maker", True)
+            if (
+                true_budget >= close_threshold
+                and decision_maker
+            ):
+                return "accept:close_success"
+            return "reject:close_failed"
+        if atype == "FOLLOW_UP":
+            return "open:neutral_signal"
+        if atype == "DISQUALIFY":
+            return "exit:disqualified"
+        # -----------------------------
+        # Difficulty 3+ mode shift
+        # -----------------------------
+        if difficulty >= 3 and turn >= 10:
+            import random
+            if random.random() < hidden.get("stall_probability", 0.0):
+                return "deflect:stall"
+        return "open:neutral_signal"

salespath_env/server/requirements.txt ADDED Viewed

File without changes

salespath_env/server/reward.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# salespath_env/server/reward.py
+from ..models import SalesPathAction, SalesPathState
+DIFFICULTY_OPTIMAL_TURNS = {
+    1: 5,
+    2: 8,
+    3: 12,
+    4: 14,
+}
+def compute_reward(
+    state: SalesPathState,
+    action: SalesPathAction,
+    response_token: str,
+    new_violations: list[str],
+    episode_done: bool,
+) -> tuple[float, dict]:
+    """
+    Returns:
+        (total_reward, reward_components)
+    """
+    components = {}
+    # --------------------------------------------------
+    # 1. Outcome Reward (terminal only)
+    # --------------------------------------------------
+    r_outcome = 0.0
+    if episode_done:
+        if response_token == "accept:close_success":
+            r_outcome = 1.0
+        elif action.action_type == "DISQUALIFY":
+            if "R08" not in new_violations:
+                r_outcome = 0.5
+            else:
+                r_outcome = -0.5
+        elif state.turn_number >= 20:
+            r_outcome = -0.3
+        elif len(state.constraints_violated) >= 3:
+            r_outcome = -0.5
+        else:
+            r_outcome = -0.5
+    components["r_outcome"] = r_outcome
+    # --------------------------------------------------
+    # 2. Compliance Reward
+    # --------------------------------------------------
+    r_compliance = max(
+        -1.0,
+        -0.2 * len(new_violations),
+    )
+    components["r_compliance"] = r_compliance
+    # --------------------------------------------------
+    # 3. Ordering Reward
+    # --------------------------------------------------
+    required = state.required_workflow
+    completed = state.steps_completed
+    if len(required) > 0 and len(completed) > 0:
+        correct = sum(
+            1
+            for i in range(min(len(required), len(completed)))
+            if required[i] == completed[i]
+        )
+        r_ordering = correct / len(required)
+    else:
+        r_ordering = 1.0
+    components["r_ordering"] = r_ordering
+    # --------------------------------------------------
+    # 4. Efficiency Reward
+    # --------------------------------------------------
+    if episode_done:
+        optimal = DIFFICULTY_OPTIMAL_TURNS.get(
+            state.difficulty,
+            10,
+        )
+        extra_turns = max(
+            0,
+            state.turn_number - optimal,
+        )
+        r_efficiency = max(
+            -0.3,
+            -0.05 * extra_turns,
+        )
+    else:
+        r_efficiency = 0.0
+    components["r_efficiency"] = r_efficiency
+    # --------------------------------------------------
+    # 5. Format Reward
+    # --------------------------------------------------
+    r_format = 1.0 if action.is_valid() else -0.1
+    components["r_format"] = r_format
+    # --------------------------------------------------
+    # Final Weighted Reward
+    # --------------------------------------------------
+    weights = {
+        "r_outcome": 0.40,
+        "r_compliance": 0.30,
+        "r_ordering": 0.15,
+        "r_efficiency": 0.10,
+        "r_format": 0.05,
+    }
+    total_reward = sum(
+        weights[key] * components[key]
+        for key in weights
+    )
+    components["total"] = total_reward
+    return total_reward, components

salespath_env/server/rules.py ADDED Viewed

	@@ -0,0 +1,222 @@

+# salespath_env/server/rules.py
+from dataclasses import dataclass
+from typing import Callable
+from ..models import SalesPathAction, SalesPathState
+@dataclass
+class BusinessRule:
+    """
+    Returns True when the rule is VIOLATED.
+    """
+    rule_id: str
+    name: str
+    description: str
+    check: Callable[[SalesPathState, SalesPathAction], bool]
+def _qualify_before_present(
+    state: SalesPathState,
+    action: SalesPathAction,
+) -> bool:
+    """
+    R01:
+    PRESENT before QUALIFY is invalid.
+    """
+    if action.action_type == "PRESENT":
+        return "QUALIFY" not in state.steps_completed
+    return False
+def _demo_before_negotiate(
+    state: SalesPathState,
+    action: SalesPathAction,
+) -> bool:
+    """
+    R02:
+    NEGOTIATE before OFFER_DEMO is invalid.
+    """
+    if action.action_type == "NEGOTIATE":
+        return "OFFER_DEMO" not in state.steps_completed
+    return False
+def _budget_known_to_negotiate(
+    state: SalesPathState,
+    action: SalesPathAction,
+) -> bool:
+    """
+    R03:
+    Cannot NEGOTIATE while budget is unknown.
+    """
+    if action.action_type == "NEGOTIATE":
+        return state.prospect_profile.get("budget_signal") == "unknown"
+    return False
+def _discount_after_objections(
+    state: SalesPathState,
+    action: SalesPathAction,
+) -> bool:
+    """
+    R04:
+    Discount only after 2 objections handled.
+    """
+    if action.action_type == "NEGOTIATE":
+        if "discount" in action.content.lower():
+            return state.objections_handled < 2
+    return False
+def _no_repeat_action(
+    state: SalesPathState,
+    action: SalesPathAction,
+) -> bool:
+    """
+    R05:
+    Same action twice in a row is invalid.
+    """
+    if state.conversation_history:
+        last_action = state.conversation_history[-1].get("action_type", "")
+        return last_action == action.action_type
+    return False
+def _prospect_first(
+    state: SalesPathState,
+    action: SalesPathAction,
+) -> bool:
+    """
+    R06:
+    First action must be PROSPECT.
+    """
+    if state.turn_number == 1:
+        return action.action_type != "PROSPECT"
+    return False
+def _followup_timing(
+    state: SalesPathState,
+    action: SalesPathAction,
+) -> bool:
+    """
+    R07:
+    FOLLOW_UP only valid after silence.
+    If prospect just responded last turn, violation.
+    """
+    if action.action_type == "FOLLOW_UP":
+        if state.conversation_history:
+            last_speaker = state.conversation_history[-1].get("speaker", "agent")
+            return last_speaker == "prospect"
+    return False
+def _disqualify_logic(
+    state: SalesPathState,
+    action: SalesPathAction,
+) -> bool:
+    """
+    R08:
+    DISQUALIFY only when prospect is genuinely not closeable.
+    Violation if prospect is actually closeable.
+    """
+    if action.action_type == "DISQUALIFY":
+        true_budget = state.hidden_state.get("true_budget", 0.5)
+        close_threshold = state.hidden_state.get("close_threshold", 0.5)
+        decision_maker = state.prospect_profile.get("decision_maker", True)
+        return (true_budget >= close_threshold) and decision_maker
+    return False
+def _close_requires_demo(
+    state: SalesPathState,
+    action: SalesPathAction,
+) -> bool:
+    """
+    R09:
+    Difficulty 2+ requires OFFER_DEMO before CLOSE.
+    """
+    if action.action_type == "CLOSE":
+        if state.difficulty >= 2:
+            return "OFFER_DEMO" not in state.steps_completed
+    return False
+BUSINESS_RULES = [
+    BusinessRule(
+        "R01",
+        "qualify_before_present",
+        "Must QUALIFY before PRESENT",
+        _qualify_before_present,
+    ),
+    BusinessRule(
+        "R02",
+        "demo_before_negotiate",
+        "Must OFFER_DEMO before NEGOTIATE",
+        _demo_before_negotiate,
+    ),
+    BusinessRule(
+        "R03",
+        "budget_known_to_negotiate",
+        "Budget must be known before NEGOTIATE",
+        _budget_known_to_negotiate,
+    ),
+    BusinessRule(
+        "R04",
+        "discount_after_objections",
+        "Discount only after 2 objections handled",
+        _discount_after_objections,
+    ),
+    BusinessRule(
+        "R05",
+        "no_repeat_action",
+        "Cannot repeat same action consecutively",
+        _no_repeat_action,
+    ),
+    BusinessRule(
+        "R06",
+        "prospect_first",
+        "First action must be PROSPECT",
+        _prospect_first,
+    ),
+    BusinessRule(
+        "R07",
+        "followup_timing",
+        "FOLLOW_UP only after prospect silence",
+        _followup_timing,
+    ),
+    BusinessRule(
+        "R08",
+        "disqualify_logic",
+        "DISQUALIFY only when prospect is genuinely unqualified",
+        _disqualify_logic,
+    ),
+    BusinessRule(
+        "R09",
+        "close_requires_demo",
+        "Must OFFER_DEMO before CLOSE (difficulty 2+)",
+        _close_requires_demo,
+    ),
+]
+def check_rules(
+    state: SalesPathState,
+    action: SalesPathAction,
+) -> list[str]:
+    """
+    Returns list of violated rule IDs.
+    """
+    violated = []
+    for rule in BUSINESS_RULES:
+        if rule.check(state, action):
+            violated.append(rule.rule_id)
+    return violated

salespath_env/server/salespath_environment.py ADDED Viewed

	@@ -0,0 +1,294 @@

+# salespath_env/server/salespath_environment.py
+import uuid
+from openenv.core.env_server import Environment
+from ..models import (
+    SalesPathAction,
+    SalesPathObservation,
+    SalesPathState,
+)
+from .task_bank import sample_profile
+from .rules import check_rules
+from .reward import compute_reward
+from .prospect_simulator import ProspectSimulator
+DIFFICULTY_WORKFLOW = {
+    1: [
+        "QUALIFY",
+        "PRESENT",
+        "CLOSE",
+    ],
+    2: [
+        "QUALIFY",
+        "PRESENT",
+        "HANDLE_OBJECTION",
+        "OFFER_DEMO",
+        "CLOSE",
+    ],
+    3: [
+        "QUALIFY",
+        "PRESENT",
+        "HANDLE_OBJECTION",
+        "OFFER_DEMO",
+        "HANDLE_OBJECTION",
+        "NEGOTIATE",
+        "CLOSE",
+    ],
+    4: [],  # Agent must determine; DISQUALIFY may be correct
+}
+MAX_VIOLATIONS_BEFORE_TERMINATE = 3
+MAX_TURNS = 20
+class SalesPathEnvironment(Environment):
+    """
+    Core OpenEnv environment.
+    All business logic routes through:
+    - rules.py
+    - reward.py
+    - prospect_simulator.py
+    """
+    def __init__(self):
+        super().__init__()
+        self._state = SalesPathState()
+        self._simulator = ProspectSimulator()
+    def reset(self, difficulty: int = 1) -> SalesPathObservation:
+        """
+        Start a new episode.
+        """
+        profile = sample_profile(difficulty)
+        hidden_state = {
+            "true_budget": profile.true_budget,
+            "close_threshold": profile.close_threshold,
+            "stall_probability": profile.stall_probability,
+            "num_objections": {
+                1: 0,
+                2: 1,
+                3: 2,
+                4: 2,
+            }[difficulty],
+            "revealed_budget": (
+                "high"
+                if profile.true_budget >= 0.7
+                else "medium"
+                if profile.true_budget >= 0.4
+                else "low"
+            ),
+        }
+        public_profile = {
+            "company_name": profile.company_name,
+            "company_size": profile.company_size,
+            "industry": profile.industry,
+            "budget_signal": profile.budget_signal,
+            "pain_points": profile.pain_points,
+            "decision_maker": profile.decision_maker,
+        }
+        self._state = SalesPathState(
+            episode_id=str(uuid.uuid4()),
+            prospect_profile=public_profile,
+            conversation_history=[],
+            workflow_stage="START",
+            required_workflow=DIFFICULTY_WORKFLOW[difficulty],
+            steps_completed=[],
+            constraints_violated=[],
+            objections_handled=0,
+            turn_number=0,
+            difficulty=difficulty,
+            done=False,
+            hidden_state=hidden_state,
+        )
+        intro_message = (
+            f"You are engaging {profile.company_name}, "
+            f"a {profile.company_size} {profile.industry} company. "
+            f"Pain points: {', '.join(profile.pain_points)}. "
+            f"Begin the sales conversation."
+        )
+        return SalesPathObservation(
+            prospect_response=intro_message,
+            workflow_stage="START",
+            constraints_violated=[],
+            steps_completed=[],
+            turn_number=0,
+            reward=0.0,
+            reward_components={},
+            done=False,
+            info={
+                "difficulty": difficulty,
+                "episode_id": self._state.episode_id,
+            },
+        )
+    def step(
+        self,
+        action: SalesPathAction,
+    ) -> SalesPathObservation:
+        """
+        One environment transition.
+        """
+        state = self._state
+        # -----------------------------------
+        # Advance turn
+        # -----------------------------------
+        state.turn_number += 1
+        # -----------------------------------
+        # Strict action validation
+        # Must return observation, never crash
+        # -----------------------------------
+        if not action.is_valid():
+            return SalesPathObservation(
+                prospect_response="Invalid action type.",
+                workflow_stage=state.workflow_stage,
+                constraints_violated=list(state.constraints_violated),
+                steps_completed=list(state.steps_completed),
+                turn_number=state.turn_number,
+                reward=-0.2,
+                reward_components={
+                    "r_format": -0.1,
+                },
+                done=False,
+                info={
+                    "error": (
+                        f"Invalid action_type: "
+                        f"{action.action_type}"
+                    )
+                },
+            )
+        # -----------------------------------
+        # Rule checks
+        # -----------------------------------
+        new_violations = check_rules(
+            state,
+            action,
+        )
+        state.constraints_violated.extend(
+            new_violations
+        )
+        # -----------------------------------
+        # Record agent action
+        # -----------------------------------
+        state.conversation_history.append(
+            {
+                "turn": state.turn_number,
+                "speaker": "agent",
+                "action_type": action.action_type,
+                "content": action.content,
+            }
+        )
+        # -----------------------------------
+        # Update workflow state
+        # -----------------------------------
+        if action.action_type not in state.steps_completed:
+            state.steps_completed.append(
+                action.action_type
+            )
+        state.workflow_stage = action.action_type
+        # -----------------------------------
+        # Prospect response
+        # -----------------------------------
+        response_token, response_text = (
+            self._simulator.respond(
+                action,
+                state,
+            )
+        )
+        state.conversation_history.append(
+            {
+                "turn": state.turn_number,
+                "speaker": "prospect",
+                "response_token": response_token,
+                "text": response_text,
+            }
+        )
+        # -----------------------------------
+        # Episode termination
+        # -----------------------------------
+        terminal_actions = {
+            "CLOSE",
+            "DISQUALIFY",
+        }
+        too_many_violations = (
+            len(state.constraints_violated)
+            >= MAX_VIOLATIONS_BEFORE_TERMINATE
+        )
+        turn_limit_reached = (
+            state.turn_number >= MAX_TURNS
+        )
+        done = (
+            action.action_type in terminal_actions
+            or too_many_violations
+            or turn_limit_reached
+        )
+        state.done = done
+        # -----------------------------------
+        # Reward
+        # -----------------------------------
+        total_reward, components = (
+            compute_reward(
+                state=state,
+                action=action,
+                response_token=response_token,
+                new_violations=new_violations,
+                episode_done=done,
+            )
+        )
+        return SalesPathObservation(
+            prospect_response=response_text,
+            workflow_stage=state.workflow_stage,
+            constraints_violated=list(
+                state.constraints_violated
+            ),
+            steps_completed=list(
+                state.steps_completed
+            ),
+            turn_number=state.turn_number,
+            reward=total_reward,
+            reward_components=components,
+            done=done,
+            info={
+                "response_token": response_token,
+                "new_violations": new_violations,
+                "episode_id": state.episode_id,
+            },
+        )
+    @property
+    def state(self) -> SalesPathState:
+        return self._state

salespath_env/server/task_bank.py ADDED Viewed

	@@ -0,0 +1,199 @@

+# salespath_env/server/task_bank.py
+import random
+from dataclasses import dataclass
+@dataclass
+class ProspectProfile:
+    company_name: str
+    company_size: str          # small / medium / enterprise
+    industry: str
+    budget_signal: str         # high / medium / low / unknown
+    pain_points: list[str]
+    decision_maker: bool
+    # Hidden values — never exposed directly to agent
+    true_budget: float         # 0.0 → 1.0
+    close_threshold: float
+    stall_probability: float
+# -------------------------
+# LEVEL 1 — Easy
+# budget known
+# decision maker present
+# close is usually possible
+# -------------------------
+PROFILES_L1 = [
+    ProspectProfile(
+        company_name="Meridian Retail",
+        company_size="medium",
+        industry="retail",
+        budget_signal="high",
+        pain_points=[
+            "manual inventory tracking",
+            "slow reporting",
+        ],
+        decision_maker=True,
+        true_budget=0.8,
+        close_threshold=0.5,
+        stall_probability=0.0,
+    ),
+    ProspectProfile(
+        company_name="Northline Foods",
+        company_size="small",
+        industry="food distribution",
+        budget_signal="medium",
+        pain_points=[
+            "supplier delays",
+            "inventory mismatch",
+        ],
+        decision_maker=True,
+        true_budget=0.6,
+        close_threshold=0.5,
+        stall_probability=0.0,
+    ),
+]
+# -------------------------
+# LEVEL 2 — Medium
+# budget hidden initially
+# one objection expected
+# -------------------------
+PROFILES_L2 = [
+    ProspectProfile(
+        company_name="Apex Logistics",
+        company_size="enterprise",
+        industry="logistics",
+        budget_signal="unknown",
+        pain_points=[
+            "route optimization",
+            "driver coordination",
+            "fuel tracking",
+        ],
+        decision_maker=True,
+        true_budget=0.7,
+        close_threshold=0.5,
+        stall_probability=0.0,
+    ),
+    ProspectProfile(
+        company_name="Vertex Supply",
+        company_size="medium",
+        industry="manufacturing",
+        budget_signal="unknown",
+        pain_points=[
+            "vendor visibility",
+            "purchase delays",
+        ],
+        decision_maker=True,
+        true_budget=0.55,
+        close_threshold=0.5,
+        stall_probability=0.0,
+    ),
+]
+# -------------------------
+# LEVEL 3 — Hard
+# budget hidden
+# 2 objections
+# possible stalling
+# decision maker may be absent
+# -------------------------
+PROFILES_L3 = [
+    ProspectProfile(
+        company_name="Nova Financial",
+        company_size="enterprise",
+        industry="finance",
+        budget_signal="unknown",
+        pain_points=[
+            "compliance reporting",
+            "audit trails",
+            "data silos",
+        ],
+        decision_maker=False,
+        true_budget=0.6,
+        close_threshold=0.55,
+        stall_probability=0.3,
+    ),
+    ProspectProfile(
+        company_name="Atlas Health",
+        company_size="enterprise",
+        industry="healthcare",
+        budget_signal="unknown",
+        pain_points=[
+            "patient workflow delays",
+            "reporting compliance",
+        ],
+        decision_maker=False,
+        true_budget=0.65,
+        close_threshold=0.55,
+        stall_probability=0.25,
+    ),
+]
+# -------------------------
+# LEVEL 4 — Trap cases
+# misleading signals
+# correct action may be DISQUALIFY
+# -------------------------
+PROFILES_L4 = [
+    ProspectProfile(
+        company_name="Cipher Tech",
+        company_size="small",
+        industry="technology",
+        budget_signal="high",   # misleading
+        pain_points=[
+            "security",
+            "compliance",
+        ],
+        decision_maker=True,
+        true_budget=0.2,
+        close_threshold=0.5,
+        stall_probability=0.5,
+    ),
+    ProspectProfile(
+        company_name="BluePeak Studio",
+        company_size="small",
+        industry="creative agency",
+        budget_signal="high",   # misleading
+        pain_points=[
+            "project visibility",
+            "client reporting",
+        ],
+        decision_maker=True,
+        true_budget=0.25,
+        close_threshold=0.5,
+        stall_probability=0.4,
+    ),
+]
+ALL_PROFILES = {
+    1: PROFILES_L1,
+    2: PROFILES_L2,
+    3: PROFILES_L3,
+    4: PROFILES_L4,
+}
+def sample_profile(difficulty: int) -> ProspectProfile:
+    """
+    Returns one sampled profile for the selected difficulty.
+    """
+    if difficulty not in ALL_PROFILES:
+        difficulty = 1
+    return random.choice(ALL_PROFILES[difficulty])