Imsachin010 commited on
Commit
fbf5bf6
·
verified ·
1 Parent(s): 414b500

Deploy SalesPath Environment

Browse files
Dockerfile ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ # HuggingFace Spaces runs on port 7860 by default
4
+ ENV PORT=7860
5
+ ENV PYTHONUNBUFFERED=1
6
+ ENV PYTHONDONTWRITEBYTECODE=1
7
+
8
+ WORKDIR /app
9
+
10
+ # Install system dependencies
11
+ RUN apt-get update && apt-get install -y --no-install-recommends \
12
+ curl \
13
+ && rm -rf /var/lib/apt/lists/*
14
+
15
+ # Install Python dependencies
16
+ COPY requirements.txt .
17
+ RUN pip install --no-cache-dir -r requirements.txt
18
+
19
+ # Copy the salespath_env package
20
+ COPY salespath_env/ ./salespath_env/
21
+
22
+ # Health check
23
+ HEALTHCHECK --interval=30s --timeout=5s --start-period=15s --retries=3 \
24
+ CMD curl -f http://localhost:${PORT}/health || exit 1
25
+
26
+ # Start the FastAPI server on HF Spaces port
27
+ CMD ["sh", "-c", "uvicorn salespath_env.server.app:app --host 0.0.0.0 --port ${PORT}"]
README.md CHANGED
@@ -1,10 +1,47 @@
1
  ---
2
- title: Salespath Env
3
- emoji: 📊
4
- colorFrom: pink
5
- colorTo: pink
6
  sdk: docker
 
7
  pinned: false
 
 
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: SalesPath Environment
3
+ emoji: 🤝
4
+ colorFrom: blue
5
+ colorTo: indigo
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
+ license: mit
10
+ short_description: RL gym environment for sales agent training
11
  ---
12
 
13
+ # SalesPath Environment
14
+
15
+ A [OpenEnv](https://github.com/openenv)-compatible Reinforcement Learning gym environment for training sales agents via LLM fine-tuning.
16
+
17
+ ## API Endpoints
18
+
19
+ | Method | Endpoint | Description |
20
+ |--------|----------|-------------|
21
+ | `POST` | `/reset` | Reset the environment, returns initial observation |
22
+ | `POST` | `/step` | Take an action, returns next observation + reward |
23
+ | `GET` | `/health` | Health check |
24
+
25
+ ## Quick Start
26
+
27
+ ### Reset
28
+ ```bash
29
+ curl -X POST https://imsachin010-salespath-env.hf.space/reset \
30
+ -H "Content-Type: application/json" \
31
+ -d '{"difficulty": 1}'
32
+ ```
33
+
34
+ ### Step
35
+ ```bash
36
+ curl -X POST https://imsachin010-salespath-env.hf.space/step \
37
+ -H "Content-Type: application/json" \
38
+ -d '{"action": {"action_type": "PROSPECT", "content": "Hello, tell me about your workflow challenges."}}'
39
+ ```
40
+
41
+ ## Action Types
42
+
43
+ - `PROSPECT` — Initial outreach and discovery
44
+ - `QUALIFY` — Qualify the lead
45
+ - `PITCH` — Deliver the sales pitch
46
+ - `HANDLE_OBJECTION` — Handle prospect objections
47
+ - `CLOSE` — Attempt to close the deal
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi>=0.110.0
2
+ uvicorn[standard]>=0.29.0
3
+ pydantic>=2.0
4
+ openenv
salespath_env/README.md ADDED
File without changes
salespath_env/__init__.py ADDED
File without changes
salespath_env/client.py ADDED
File without changes
salespath_env/models.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # salespath_env/models.py
2
+
3
+ from __future__ import annotations
4
+
5
+ import uuid
6
+ from typing import Dict, List
7
+ from pydantic import Field
8
+
9
+ from openenv.core import Action, Observation, State
10
+
11
+
12
+ VALID_ACTIONS = {
13
+ "PROSPECT",
14
+ "QUALIFY",
15
+ "PRESENT",
16
+ "HANDLE_OBJECTION",
17
+ "OFFER_DEMO",
18
+ "NEGOTIATE",
19
+ "CLOSE",
20
+ "FOLLOW_UP",
21
+ "DISQUALIFY",
22
+ }
23
+
24
+
25
+ class SalesPathAction(Action):
26
+ """
27
+ Action sent by the agent to the environment.
28
+ """
29
+
30
+ action_type: str
31
+ content: str
32
+ target: str = ""
33
+
34
+ def is_valid(self) -> bool:
35
+ """
36
+ Strict validation of allowed action types.
37
+ """
38
+ return self.action_type in VALID_ACTIONS
39
+
40
+
41
+ class SalesPathObservation(Observation):
42
+ """
43
+ What the agent is allowed to observe.
44
+ Hidden state must NEVER be exposed here.
45
+ """
46
+
47
+ prospect_response: str = ""
48
+ workflow_stage: str = "START"
49
+
50
+ constraints_violated: List[str] = Field(default_factory=list)
51
+ steps_completed: List[str] = Field(default_factory=list)
52
+
53
+ turn_number: int = 0
54
+
55
+ reward: float = 0.0
56
+ reward_components: Dict = Field(default_factory=dict)
57
+
58
+ done: bool = False
59
+ info: Dict = Field(default_factory=dict)
60
+
61
+
62
+ class SalesPathState(State):
63
+ """
64
+ Internal environment state.
65
+ Includes hidden state not exposed to the agent.
66
+ """
67
+
68
+ episode_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
69
+
70
+ prospect_profile: Dict = Field(default_factory=dict)
71
+ conversation_history: List[Dict] = Field(default_factory=list)
72
+
73
+ workflow_stage: str = "START"
74
+ required_workflow: List[str] = Field(default_factory=list)
75
+
76
+ steps_completed: List[str] = Field(default_factory=list)
77
+ constraints_violated: List[str] = Field(default_factory=list)
78
+
79
+ objections_handled: int = 0
80
+ turn_number: int = 0
81
+ difficulty: int = 1
82
+
83
+ done: bool = False
84
+
85
+ # Hidden state — NEVER exposed in Observation
86
+ hidden_state: Dict = Field(default_factory=dict)
salespath_env/openenv.yaml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "salespath_env"
3
+ version = "0.1.0"
4
+ dependencies = [
5
+ "openenv",
6
+ "fastapi",
7
+ "uvicorn",
8
+ "pydantic>=2.0",
9
+ "trl>=0.8.0",
10
+ "unsloth",
11
+ "torch",
12
+ "transformers",
13
+ ]
salespath_env/pyproject.toml ADDED
File without changes
salespath_env/server/__init__.py ADDED
File without changes
salespath_env/server/app.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # salespath_env/server/app.py
2
+
3
+ from openenv.core.env_server import create_fastapi_app
4
+
5
+ from ..models import (
6
+ SalesPathAction,
7
+ SalesPathObservation,
8
+ )
9
+ from .salespath_environment import (
10
+ SalesPathEnvironment,
11
+ )
12
+
13
+
14
+ app = create_fastapi_app(
15
+ SalesPathEnvironment,
16
+ SalesPathAction,
17
+ SalesPathObservation,
18
+ )
salespath_env/server/prospect_simulator.py ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # salespath_env/server/prospect_simulator.py
2
+
3
+ from ..models import SalesPathAction, SalesPathState
4
+
5
+
6
+ RESPONSE_TEXT = {
7
+ "open:positive_signal": "That sounds interesting. Tell me more about how this works.",
8
+ "open:neutral_signal": "I see. We're evaluating a few options at the moment.",
9
+
10
+ "objection:price": "The pricing seems higher than what we budgeted for.",
11
+ "objection:timing": "The timing isn't ideal — we're in the middle of a quarter close.",
12
+ "objection:premature_pitch": (
13
+ "I'm not sure we're ready to discuss solutions yet. "
14
+ "What do you know about our current situation?"
15
+ ),
16
+
17
+ "deflect:budget_not_discussed": (
18
+ "We haven't really talked about what we're looking for yet."
19
+ ),
20
+ "deflect:stall": (
21
+ "Let me get back to you on this. A lot is happening on our end."
22
+ ),
23
+
24
+ "accept:demo_scheduled": (
25
+ "Yes, let's set up a demo. What time works next week?"
26
+ ),
27
+ "accept:close_success": (
28
+ "Alright, I think we can move forward with this. "
29
+ "Send over the paperwork."
30
+ ),
31
+
32
+ "reject:close_failed": (
33
+ "I don't think we're ready to commit at this point."
34
+ ),
35
+
36
+ "silence": "",
37
+
38
+ "exit:disqualified": (
39
+ "I think we're done here. This isn't the right fit."
40
+ ),
41
+ }
42
+
43
+
44
+ class ProspectSimulator:
45
+ """
46
+ Pure rule-based simulator.
47
+ No LLM. No transformers. Deterministic behavior.
48
+ """
49
+
50
+ def respond(
51
+ self,
52
+ action: SalesPathAction,
53
+ state: SalesPathState,
54
+ ) -> tuple[str, str]:
55
+ """
56
+ Returns:
57
+ (response_token, response_text)
58
+ """
59
+
60
+ token = self._get_token(action, state)
61
+ text = RESPONSE_TEXT[token]
62
+
63
+ return token, text
64
+
65
+ def _get_token(
66
+ self,
67
+ action: SalesPathAction,
68
+ state: SalesPathState,
69
+ ) -> str:
70
+ atype = action.action_type
71
+ difficulty = state.difficulty
72
+ turn = state.turn_number
73
+ profile = state.prospect_profile
74
+ hidden = state.hidden_state
75
+ objections = state.objections_handled
76
+
77
+ # -----------------------------
78
+ # Rule-triggered responses first
79
+ # -----------------------------
80
+
81
+ if state.constraints_violated:
82
+ latest = state.constraints_violated[-1]
83
+
84
+ if latest == "R01":
85
+ return "objection:premature_pitch"
86
+
87
+ if latest == "R03":
88
+ return "deflect:budget_not_discussed"
89
+
90
+ # -----------------------------
91
+ # Action-based responses
92
+ # -----------------------------
93
+
94
+ if atype == "PROSPECT":
95
+ return "open:positive_signal"
96
+
97
+ if atype == "QUALIFY":
98
+ # Reveal budget if hidden
99
+ if profile.get("budget_signal") == "unknown":
100
+ state.prospect_profile["budget_signal"] = hidden.get(
101
+ "revealed_budget",
102
+ "medium",
103
+ )
104
+
105
+ return "open:neutral_signal"
106
+
107
+ if atype == "PRESENT":
108
+ if difficulty >= 2:
109
+ if objections == 0:
110
+ return "objection:price"
111
+
112
+ return "open:positive_signal"
113
+
114
+ if atype == "HANDLE_OBJECTION":
115
+ state.objections_handled += 1
116
+
117
+ required_objections = hidden.get("num_objections", 1)
118
+
119
+ if state.objections_handled >= required_objections:
120
+ return "open:positive_signal"
121
+
122
+ if objections == 0:
123
+ return "objection:timing"
124
+
125
+ return "open:positive_signal"
126
+
127
+ if atype == "OFFER_DEMO":
128
+ return "accept:demo_scheduled"
129
+
130
+ if atype == "NEGOTIATE":
131
+ return "open:neutral_signal"
132
+
133
+ if atype == "CLOSE":
134
+ true_budget = hidden.get("true_budget", 0.7)
135
+ close_threshold = hidden.get("close_threshold", 0.5)
136
+ decision_maker = profile.get("decision_maker", True)
137
+
138
+ if (
139
+ true_budget >= close_threshold
140
+ and decision_maker
141
+ ):
142
+ return "accept:close_success"
143
+
144
+ return "reject:close_failed"
145
+
146
+ if atype == "FOLLOW_UP":
147
+ return "open:neutral_signal"
148
+
149
+ if atype == "DISQUALIFY":
150
+ return "exit:disqualified"
151
+
152
+ # -----------------------------
153
+ # Difficulty 3+ mode shift
154
+ # -----------------------------
155
+
156
+ if difficulty >= 3 and turn >= 10:
157
+ import random
158
+
159
+ if random.random() < hidden.get("stall_probability", 0.0):
160
+ return "deflect:stall"
161
+
162
+ return "open:neutral_signal"
salespath_env/server/requirements.txt ADDED
File without changes
salespath_env/server/reward.py ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # salespath_env/server/reward.py
2
+
3
+ from ..models import SalesPathAction, SalesPathState
4
+
5
+
6
+ DIFFICULTY_OPTIMAL_TURNS = {
7
+ 1: 5,
8
+ 2: 8,
9
+ 3: 12,
10
+ 4: 14,
11
+ }
12
+
13
+
14
+ def compute_reward(
15
+ state: SalesPathState,
16
+ action: SalesPathAction,
17
+ response_token: str,
18
+ new_violations: list[str],
19
+ episode_done: bool,
20
+ ) -> tuple[float, dict]:
21
+ """
22
+ Returns:
23
+ (total_reward, reward_components)
24
+ """
25
+
26
+ components = {}
27
+
28
+ # --------------------------------------------------
29
+ # 1. Outcome Reward (terminal only)
30
+ # --------------------------------------------------
31
+
32
+ r_outcome = 0.0
33
+
34
+ if episode_done:
35
+ if response_token == "accept:close_success":
36
+ r_outcome = 1.0
37
+
38
+ elif action.action_type == "DISQUALIFY":
39
+ if "R08" not in new_violations:
40
+ r_outcome = 0.5
41
+ else:
42
+ r_outcome = -0.5
43
+
44
+ elif state.turn_number >= 20:
45
+ r_outcome = -0.3
46
+
47
+ elif len(state.constraints_violated) >= 3:
48
+ r_outcome = -0.5
49
+
50
+ else:
51
+ r_outcome = -0.5
52
+
53
+ components["r_outcome"] = r_outcome
54
+
55
+ # --------------------------------------------------
56
+ # 2. Compliance Reward
57
+ # --------------------------------------------------
58
+
59
+ r_compliance = max(
60
+ -1.0,
61
+ -0.2 * len(new_violations),
62
+ )
63
+
64
+ components["r_compliance"] = r_compliance
65
+
66
+ # --------------------------------------------------
67
+ # 3. Ordering Reward
68
+ # --------------------------------------------------
69
+
70
+ required = state.required_workflow
71
+ completed = state.steps_completed
72
+
73
+ if len(required) > 0 and len(completed) > 0:
74
+ correct = sum(
75
+ 1
76
+ for i in range(min(len(required), len(completed)))
77
+ if required[i] == completed[i]
78
+ )
79
+
80
+ r_ordering = correct / len(required)
81
+
82
+ else:
83
+ r_ordering = 1.0
84
+
85
+ components["r_ordering"] = r_ordering
86
+
87
+ # --------------------------------------------------
88
+ # 4. Efficiency Reward
89
+ # --------------------------------------------------
90
+
91
+ if episode_done:
92
+ optimal = DIFFICULTY_OPTIMAL_TURNS.get(
93
+ state.difficulty,
94
+ 10,
95
+ )
96
+
97
+ extra_turns = max(
98
+ 0,
99
+ state.turn_number - optimal,
100
+ )
101
+
102
+ r_efficiency = max(
103
+ -0.3,
104
+ -0.05 * extra_turns,
105
+ )
106
+
107
+ else:
108
+ r_efficiency = 0.0
109
+
110
+ components["r_efficiency"] = r_efficiency
111
+
112
+ # --------------------------------------------------
113
+ # 5. Format Reward
114
+ # --------------------------------------------------
115
+
116
+ r_format = 1.0 if action.is_valid() else -0.1
117
+ components["r_format"] = r_format
118
+
119
+ # --------------------------------------------------
120
+ # Final Weighted Reward
121
+ # --------------------------------------------------
122
+
123
+ weights = {
124
+ "r_outcome": 0.40,
125
+ "r_compliance": 0.30,
126
+ "r_ordering": 0.15,
127
+ "r_efficiency": 0.10,
128
+ "r_format": 0.05,
129
+ }
130
+
131
+ total_reward = sum(
132
+ weights[key] * components[key]
133
+ for key in weights
134
+ )
135
+
136
+ components["total"] = total_reward
137
+
138
+ return total_reward, components
salespath_env/server/rules.py ADDED
@@ -0,0 +1,222 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # salespath_env/server/rules.py
2
+
3
+ from dataclasses import dataclass
4
+ from typing import Callable
5
+
6
+ from ..models import SalesPathAction, SalesPathState
7
+
8
+
9
+ @dataclass
10
+ class BusinessRule:
11
+ """
12
+ Returns True when the rule is VIOLATED.
13
+ """
14
+
15
+ rule_id: str
16
+ name: str
17
+ description: str
18
+ check: Callable[[SalesPathState, SalesPathAction], bool]
19
+
20
+
21
+ def _qualify_before_present(
22
+ state: SalesPathState,
23
+ action: SalesPathAction,
24
+ ) -> bool:
25
+ """
26
+ R01:
27
+ PRESENT before QUALIFY is invalid.
28
+ """
29
+ if action.action_type == "PRESENT":
30
+ return "QUALIFY" not in state.steps_completed
31
+ return False
32
+
33
+
34
+ def _demo_before_negotiate(
35
+ state: SalesPathState,
36
+ action: SalesPathAction,
37
+ ) -> bool:
38
+ """
39
+ R02:
40
+ NEGOTIATE before OFFER_DEMO is invalid.
41
+ """
42
+ if action.action_type == "NEGOTIATE":
43
+ return "OFFER_DEMO" not in state.steps_completed
44
+ return False
45
+
46
+
47
+ def _budget_known_to_negotiate(
48
+ state: SalesPathState,
49
+ action: SalesPathAction,
50
+ ) -> bool:
51
+ """
52
+ R03:
53
+ Cannot NEGOTIATE while budget is unknown.
54
+ """
55
+ if action.action_type == "NEGOTIATE":
56
+ return state.prospect_profile.get("budget_signal") == "unknown"
57
+ return False
58
+
59
+
60
+ def _discount_after_objections(
61
+ state: SalesPathState,
62
+ action: SalesPathAction,
63
+ ) -> bool:
64
+ """
65
+ R04:
66
+ Discount only after 2 objections handled.
67
+ """
68
+ if action.action_type == "NEGOTIATE":
69
+ if "discount" in action.content.lower():
70
+ return state.objections_handled < 2
71
+ return False
72
+
73
+
74
+ def _no_repeat_action(
75
+ state: SalesPathState,
76
+ action: SalesPathAction,
77
+ ) -> bool:
78
+ """
79
+ R05:
80
+ Same action twice in a row is invalid.
81
+ """
82
+ if state.conversation_history:
83
+ last_action = state.conversation_history[-1].get("action_type", "")
84
+ return last_action == action.action_type
85
+ return False
86
+
87
+
88
+ def _prospect_first(
89
+ state: SalesPathState,
90
+ action: SalesPathAction,
91
+ ) -> bool:
92
+ """
93
+ R06:
94
+ First action must be PROSPECT.
95
+ """
96
+ if state.turn_number == 1:
97
+ return action.action_type != "PROSPECT"
98
+ return False
99
+
100
+
101
+ def _followup_timing(
102
+ state: SalesPathState,
103
+ action: SalesPathAction,
104
+ ) -> bool:
105
+ """
106
+ R07:
107
+ FOLLOW_UP only valid after silence.
108
+ If prospect just responded last turn, violation.
109
+ """
110
+ if action.action_type == "FOLLOW_UP":
111
+ if state.conversation_history:
112
+ last_speaker = state.conversation_history[-1].get("speaker", "agent")
113
+ return last_speaker == "prospect"
114
+ return False
115
+
116
+
117
+ def _disqualify_logic(
118
+ state: SalesPathState,
119
+ action: SalesPathAction,
120
+ ) -> bool:
121
+ """
122
+ R08:
123
+ DISQUALIFY only when prospect is genuinely not closeable.
124
+ Violation if prospect is actually closeable.
125
+ """
126
+ if action.action_type == "DISQUALIFY":
127
+ true_budget = state.hidden_state.get("true_budget", 0.5)
128
+ close_threshold = state.hidden_state.get("close_threshold", 0.5)
129
+ decision_maker = state.prospect_profile.get("decision_maker", True)
130
+
131
+ return (true_budget >= close_threshold) and decision_maker
132
+
133
+ return False
134
+
135
+
136
+ def _close_requires_demo(
137
+ state: SalesPathState,
138
+ action: SalesPathAction,
139
+ ) -> bool:
140
+ """
141
+ R09:
142
+ Difficulty 2+ requires OFFER_DEMO before CLOSE.
143
+ """
144
+ if action.action_type == "CLOSE":
145
+ if state.difficulty >= 2:
146
+ return "OFFER_DEMO" not in state.steps_completed
147
+ return False
148
+
149
+
150
+ BUSINESS_RULES = [
151
+ BusinessRule(
152
+ "R01",
153
+ "qualify_before_present",
154
+ "Must QUALIFY before PRESENT",
155
+ _qualify_before_present,
156
+ ),
157
+ BusinessRule(
158
+ "R02",
159
+ "demo_before_negotiate",
160
+ "Must OFFER_DEMO before NEGOTIATE",
161
+ _demo_before_negotiate,
162
+ ),
163
+ BusinessRule(
164
+ "R03",
165
+ "budget_known_to_negotiate",
166
+ "Budget must be known before NEGOTIATE",
167
+ _budget_known_to_negotiate,
168
+ ),
169
+ BusinessRule(
170
+ "R04",
171
+ "discount_after_objections",
172
+ "Discount only after 2 objections handled",
173
+ _discount_after_objections,
174
+ ),
175
+ BusinessRule(
176
+ "R05",
177
+ "no_repeat_action",
178
+ "Cannot repeat same action consecutively",
179
+ _no_repeat_action,
180
+ ),
181
+ BusinessRule(
182
+ "R06",
183
+ "prospect_first",
184
+ "First action must be PROSPECT",
185
+ _prospect_first,
186
+ ),
187
+ BusinessRule(
188
+ "R07",
189
+ "followup_timing",
190
+ "FOLLOW_UP only after prospect silence",
191
+ _followup_timing,
192
+ ),
193
+ BusinessRule(
194
+ "R08",
195
+ "disqualify_logic",
196
+ "DISQUALIFY only when prospect is genuinely unqualified",
197
+ _disqualify_logic,
198
+ ),
199
+ BusinessRule(
200
+ "R09",
201
+ "close_requires_demo",
202
+ "Must OFFER_DEMO before CLOSE (difficulty 2+)",
203
+ _close_requires_demo,
204
+ ),
205
+ ]
206
+
207
+
208
+ def check_rules(
209
+ state: SalesPathState,
210
+ action: SalesPathAction,
211
+ ) -> list[str]:
212
+ """
213
+ Returns list of violated rule IDs.
214
+ """
215
+
216
+ violated = []
217
+
218
+ for rule in BUSINESS_RULES:
219
+ if rule.check(state, action):
220
+ violated.append(rule.rule_id)
221
+
222
+ return violated
salespath_env/server/salespath_environment.py ADDED
@@ -0,0 +1,294 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # salespath_env/server/salespath_environment.py
2
+
3
+ import uuid
4
+
5
+ from openenv.core.env_server import Environment
6
+
7
+ from ..models import (
8
+ SalesPathAction,
9
+ SalesPathObservation,
10
+ SalesPathState,
11
+ )
12
+ from .task_bank import sample_profile
13
+ from .rules import check_rules
14
+ from .reward import compute_reward
15
+ from .prospect_simulator import ProspectSimulator
16
+
17
+
18
+ DIFFICULTY_WORKFLOW = {
19
+ 1: [
20
+ "QUALIFY",
21
+ "PRESENT",
22
+ "CLOSE",
23
+ ],
24
+ 2: [
25
+ "QUALIFY",
26
+ "PRESENT",
27
+ "HANDLE_OBJECTION",
28
+ "OFFER_DEMO",
29
+ "CLOSE",
30
+ ],
31
+ 3: [
32
+ "QUALIFY",
33
+ "PRESENT",
34
+ "HANDLE_OBJECTION",
35
+ "OFFER_DEMO",
36
+ "HANDLE_OBJECTION",
37
+ "NEGOTIATE",
38
+ "CLOSE",
39
+ ],
40
+ 4: [], # Agent must determine; DISQUALIFY may be correct
41
+ }
42
+
43
+
44
+ MAX_VIOLATIONS_BEFORE_TERMINATE = 3
45
+ MAX_TURNS = 20
46
+
47
+
48
+ class SalesPathEnvironment(Environment):
49
+ """
50
+ Core OpenEnv environment.
51
+ All business logic routes through:
52
+ - rules.py
53
+ - reward.py
54
+ - prospect_simulator.py
55
+ """
56
+
57
+ def __init__(self):
58
+ super().__init__()
59
+ self._state = SalesPathState()
60
+ self._simulator = ProspectSimulator()
61
+
62
+ def reset(self, difficulty: int = 1) -> SalesPathObservation:
63
+ """
64
+ Start a new episode.
65
+ """
66
+
67
+ profile = sample_profile(difficulty)
68
+
69
+ hidden_state = {
70
+ "true_budget": profile.true_budget,
71
+ "close_threshold": profile.close_threshold,
72
+ "stall_probability": profile.stall_probability,
73
+ "num_objections": {
74
+ 1: 0,
75
+ 2: 1,
76
+ 3: 2,
77
+ 4: 2,
78
+ }[difficulty],
79
+ "revealed_budget": (
80
+ "high"
81
+ if profile.true_budget >= 0.7
82
+ else "medium"
83
+ if profile.true_budget >= 0.4
84
+ else "low"
85
+ ),
86
+ }
87
+
88
+ public_profile = {
89
+ "company_name": profile.company_name,
90
+ "company_size": profile.company_size,
91
+ "industry": profile.industry,
92
+ "budget_signal": profile.budget_signal,
93
+ "pain_points": profile.pain_points,
94
+ "decision_maker": profile.decision_maker,
95
+ }
96
+
97
+ self._state = SalesPathState(
98
+ episode_id=str(uuid.uuid4()),
99
+ prospect_profile=public_profile,
100
+ conversation_history=[],
101
+ workflow_stage="START",
102
+ required_workflow=DIFFICULTY_WORKFLOW[difficulty],
103
+ steps_completed=[],
104
+ constraints_violated=[],
105
+ objections_handled=0,
106
+ turn_number=0,
107
+ difficulty=difficulty,
108
+ done=False,
109
+ hidden_state=hidden_state,
110
+ )
111
+
112
+ intro_message = (
113
+ f"You are engaging {profile.company_name}, "
114
+ f"a {profile.company_size} {profile.industry} company. "
115
+ f"Pain points: {', '.join(profile.pain_points)}. "
116
+ f"Begin the sales conversation."
117
+ )
118
+
119
+ return SalesPathObservation(
120
+ prospect_response=intro_message,
121
+ workflow_stage="START",
122
+ constraints_violated=[],
123
+ steps_completed=[],
124
+ turn_number=0,
125
+ reward=0.0,
126
+ reward_components={},
127
+ done=False,
128
+ info={
129
+ "difficulty": difficulty,
130
+ "episode_id": self._state.episode_id,
131
+ },
132
+ )
133
+
134
+ def step(
135
+ self,
136
+ action: SalesPathAction,
137
+ ) -> SalesPathObservation:
138
+ """
139
+ One environment transition.
140
+ """
141
+
142
+ state = self._state
143
+
144
+ # -----------------------------------
145
+ # Advance turn
146
+ # -----------------------------------
147
+
148
+ state.turn_number += 1
149
+
150
+ # -----------------------------------
151
+ # Strict action validation
152
+ # Must return observation, never crash
153
+ # -----------------------------------
154
+
155
+ if not action.is_valid():
156
+ return SalesPathObservation(
157
+ prospect_response="Invalid action type.",
158
+ workflow_stage=state.workflow_stage,
159
+ constraints_violated=list(state.constraints_violated),
160
+ steps_completed=list(state.steps_completed),
161
+ turn_number=state.turn_number,
162
+ reward=-0.2,
163
+ reward_components={
164
+ "r_format": -0.1,
165
+ },
166
+ done=False,
167
+ info={
168
+ "error": (
169
+ f"Invalid action_type: "
170
+ f"{action.action_type}"
171
+ )
172
+ },
173
+ )
174
+
175
+ # -----------------------------------
176
+ # Rule checks
177
+ # -----------------------------------
178
+
179
+ new_violations = check_rules(
180
+ state,
181
+ action,
182
+ )
183
+
184
+ state.constraints_violated.extend(
185
+ new_violations
186
+ )
187
+
188
+ # -----------------------------------
189
+ # Record agent action
190
+ # -----------------------------------
191
+
192
+ state.conversation_history.append(
193
+ {
194
+ "turn": state.turn_number,
195
+ "speaker": "agent",
196
+ "action_type": action.action_type,
197
+ "content": action.content,
198
+ }
199
+ )
200
+
201
+ # -----------------------------------
202
+ # Update workflow state
203
+ # -----------------------------------
204
+
205
+ if action.action_type not in state.steps_completed:
206
+ state.steps_completed.append(
207
+ action.action_type
208
+ )
209
+
210
+ state.workflow_stage = action.action_type
211
+
212
+ # -----------------------------------
213
+ # Prospect response
214
+ # -----------------------------------
215
+
216
+ response_token, response_text = (
217
+ self._simulator.respond(
218
+ action,
219
+ state,
220
+ )
221
+ )
222
+
223
+ state.conversation_history.append(
224
+ {
225
+ "turn": state.turn_number,
226
+ "speaker": "prospect",
227
+ "response_token": response_token,
228
+ "text": response_text,
229
+ }
230
+ )
231
+
232
+ # -----------------------------------
233
+ # Episode termination
234
+ # -----------------------------------
235
+
236
+ terminal_actions = {
237
+ "CLOSE",
238
+ "DISQUALIFY",
239
+ }
240
+
241
+ too_many_violations = (
242
+ len(state.constraints_violated)
243
+ >= MAX_VIOLATIONS_BEFORE_TERMINATE
244
+ )
245
+
246
+ turn_limit_reached = (
247
+ state.turn_number >= MAX_TURNS
248
+ )
249
+
250
+ done = (
251
+ action.action_type in terminal_actions
252
+ or too_many_violations
253
+ or turn_limit_reached
254
+ )
255
+
256
+ state.done = done
257
+
258
+ # -----------------------------------
259
+ # Reward
260
+ # -----------------------------------
261
+
262
+ total_reward, components = (
263
+ compute_reward(
264
+ state=state,
265
+ action=action,
266
+ response_token=response_token,
267
+ new_violations=new_violations,
268
+ episode_done=done,
269
+ )
270
+ )
271
+
272
+ return SalesPathObservation(
273
+ prospect_response=response_text,
274
+ workflow_stage=state.workflow_stage,
275
+ constraints_violated=list(
276
+ state.constraints_violated
277
+ ),
278
+ steps_completed=list(
279
+ state.steps_completed
280
+ ),
281
+ turn_number=state.turn_number,
282
+ reward=total_reward,
283
+ reward_components=components,
284
+ done=done,
285
+ info={
286
+ "response_token": response_token,
287
+ "new_violations": new_violations,
288
+ "episode_id": state.episode_id,
289
+ },
290
+ )
291
+
292
+ @property
293
+ def state(self) -> SalesPathState:
294
+ return self._state
salespath_env/server/task_bank.py ADDED
@@ -0,0 +1,199 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # salespath_env/server/task_bank.py
2
+
3
+ import random
4
+ from dataclasses import dataclass
5
+
6
+
7
+ @dataclass
8
+ class ProspectProfile:
9
+ company_name: str
10
+ company_size: str # small / medium / enterprise
11
+ industry: str
12
+ budget_signal: str # high / medium / low / unknown
13
+ pain_points: list[str]
14
+ decision_maker: bool
15
+
16
+ # Hidden values — never exposed directly to agent
17
+ true_budget: float # 0.0 → 1.0
18
+ close_threshold: float
19
+ stall_probability: float
20
+
21
+
22
+ # -------------------------
23
+ # LEVEL 1 — Easy
24
+ # budget known
25
+ # decision maker present
26
+ # close is usually possible
27
+ # -------------------------
28
+
29
+ PROFILES_L1 = [
30
+ ProspectProfile(
31
+ company_name="Meridian Retail",
32
+ company_size="medium",
33
+ industry="retail",
34
+ budget_signal="high",
35
+ pain_points=[
36
+ "manual inventory tracking",
37
+ "slow reporting",
38
+ ],
39
+ decision_maker=True,
40
+ true_budget=0.8,
41
+ close_threshold=0.5,
42
+ stall_probability=0.0,
43
+ ),
44
+
45
+ ProspectProfile(
46
+ company_name="Northline Foods",
47
+ company_size="small",
48
+ industry="food distribution",
49
+ budget_signal="medium",
50
+ pain_points=[
51
+ "supplier delays",
52
+ "inventory mismatch",
53
+ ],
54
+ decision_maker=True,
55
+ true_budget=0.6,
56
+ close_threshold=0.5,
57
+ stall_probability=0.0,
58
+ ),
59
+ ]
60
+
61
+
62
+ # -------------------------
63
+ # LEVEL 2 — Medium
64
+ # budget hidden initially
65
+ # one objection expected
66
+ # -------------------------
67
+
68
+ PROFILES_L2 = [
69
+ ProspectProfile(
70
+ company_name="Apex Logistics",
71
+ company_size="enterprise",
72
+ industry="logistics",
73
+ budget_signal="unknown",
74
+ pain_points=[
75
+ "route optimization",
76
+ "driver coordination",
77
+ "fuel tracking",
78
+ ],
79
+ decision_maker=True,
80
+ true_budget=0.7,
81
+ close_threshold=0.5,
82
+ stall_probability=0.0,
83
+ ),
84
+
85
+ ProspectProfile(
86
+ company_name="Vertex Supply",
87
+ company_size="medium",
88
+ industry="manufacturing",
89
+ budget_signal="unknown",
90
+ pain_points=[
91
+ "vendor visibility",
92
+ "purchase delays",
93
+ ],
94
+ decision_maker=True,
95
+ true_budget=0.55,
96
+ close_threshold=0.5,
97
+ stall_probability=0.0,
98
+ ),
99
+ ]
100
+
101
+
102
+ # -------------------------
103
+ # LEVEL 3 — Hard
104
+ # budget hidden
105
+ # 2 objections
106
+ # possible stalling
107
+ # decision maker may be absent
108
+ # -------------------------
109
+
110
+ PROFILES_L3 = [
111
+ ProspectProfile(
112
+ company_name="Nova Financial",
113
+ company_size="enterprise",
114
+ industry="finance",
115
+ budget_signal="unknown",
116
+ pain_points=[
117
+ "compliance reporting",
118
+ "audit trails",
119
+ "data silos",
120
+ ],
121
+ decision_maker=False,
122
+ true_budget=0.6,
123
+ close_threshold=0.55,
124
+ stall_probability=0.3,
125
+ ),
126
+
127
+ ProspectProfile(
128
+ company_name="Atlas Health",
129
+ company_size="enterprise",
130
+ industry="healthcare",
131
+ budget_signal="unknown",
132
+ pain_points=[
133
+ "patient workflow delays",
134
+ "reporting compliance",
135
+ ],
136
+ decision_maker=False,
137
+ true_budget=0.65,
138
+ close_threshold=0.55,
139
+ stall_probability=0.25,
140
+ ),
141
+ ]
142
+
143
+
144
+ # -------------------------
145
+ # LEVEL 4 — Trap cases
146
+ # misleading signals
147
+ # correct action may be DISQUALIFY
148
+ # -------------------------
149
+
150
+ PROFILES_L4 = [
151
+ ProspectProfile(
152
+ company_name="Cipher Tech",
153
+ company_size="small",
154
+ industry="technology",
155
+ budget_signal="high", # misleading
156
+ pain_points=[
157
+ "security",
158
+ "compliance",
159
+ ],
160
+ decision_maker=True,
161
+ true_budget=0.2,
162
+ close_threshold=0.5,
163
+ stall_probability=0.5,
164
+ ),
165
+
166
+ ProspectProfile(
167
+ company_name="BluePeak Studio",
168
+ company_size="small",
169
+ industry="creative agency",
170
+ budget_signal="high", # misleading
171
+ pain_points=[
172
+ "project visibility",
173
+ "client reporting",
174
+ ],
175
+ decision_maker=True,
176
+ true_budget=0.25,
177
+ close_threshold=0.5,
178
+ stall_probability=0.4,
179
+ ),
180
+ ]
181
+
182
+
183
+ ALL_PROFILES = {
184
+ 1: PROFILES_L1,
185
+ 2: PROFILES_L2,
186
+ 3: PROFILES_L3,
187
+ 4: PROFILES_L4,
188
+ }
189
+
190
+
191
+ def sample_profile(difficulty: int) -> ProspectProfile:
192
+ """
193
+ Returns one sampled profile for the selected difficulty.
194
+ """
195
+
196
+ if difficulty not in ALL_PROFILES:
197
+ difficulty = 1
198
+
199
+ return random.choice(ALL_PROFILES[difficulty])