Spaces:
Sleeping
Sleeping
File size: 6,999 Bytes
57eab70 e6a02dd 57eab70 e6a02dd 57eab70 e6a02dd 57eab70 e6a02dd 57eab70 fbf5bf6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 | # salespath_env/server/prospect_simulator.py
import hashlib
import random
from ..models import SalesPathAction, SalesPathState
RESPONSE_TEXT = {
"open:positive_signal": "That sounds interesting. Tell me more about how this works.",
"open:neutral_signal": "I see. We're evaluating a few options at the moment.",
"objection:price": "The pricing seems higher than what we budgeted for.",
"objection:timing": "The timing isn't ideal — we're in the middle of a quarter close.",
"objection:premature_pitch": (
"I'm not sure we're ready to discuss solutions yet. "
"What do you know about our current situation?"
),
"deflect:budget_not_discussed": (
"We haven't really talked about what we're looking for yet."
),
"deflect:stall": (
"Let me get back to you on this. A lot is happening on our end."
),
"accept:demo_scheduled": (
"Yes, let's set up a demo. What time works next week?"
),
"accept:close_success": (
"Alright, I think we can move forward with this. "
"Send over the paperwork."
),
"reject:close_failed": (
"I don't think we're ready to commit at this point."
),
"silence": "",
"exit:disqualified": (
"I think we're done here. This isn't the right fit."
),
}
def _seeded_random(state: SalesPathState, action: SalesPathAction) -> random.Random:
"""
Build a deterministic RNG keyed on (episode_id, turn_number, action_type).
Why: GRPO training restores environment state from snapshots and re-applies
actions in a separate process / thread. If the prospect's response depends
on an unseeded `random.random()` call, the reward computed during gradient
update can disagree with the rollout-time reward, breaking the snapshot
trick and silently corrupting the gradient.
"""
key = f"{state.episode_id}|{state.turn_number}|{action.action_type}"
seed = int(hashlib.sha1(key.encode("utf-8")).hexdigest()[:12], 16)
return random.Random(seed)
# Prefix injected into QUALIFY response to reveal budget signal
# without mutating prospect_profile (immutable prospect state).
BUDGET_REVEAL_TEXT = {
"high": "We do have solid budget allocated for this initiative. ",
"medium": "We have some budget set aside, though flexibility is limited. ",
"low": "Our budget is quite constrained right now. ",
}
class ProspectSimulator:
"""
Pure rule-based simulator. No LLM. No transformers.
Deterministic per action type.
Immutability guarantee:
This class NEVER mutates state.prospect_profile.
Budget reveal is surfaced via the response *text* only.
The environment (salespath_environment.py) owns all state writes.
"""
def respond(
self,
action: SalesPathAction,
state: SalesPathState,
) -> tuple[str, str]:
"""
Returns:
(response_token, response_text)
"""
token = self._get_token(action, state)
text = self._build_text(token, action, state)
return token, text
# ------------------------------------------------------------------
# Private helpers
# ------------------------------------------------------------------
def _build_text(
self,
token: str,
action: SalesPathAction,
state: SalesPathState,
) -> str:
base = RESPONSE_TEXT[token]
# Inject budget reveal into QUALIFY response text.
# We read from hidden_state, not prospect_profile, so no mutation needed.
if action.action_type == "QUALIFY":
budget_signal = state.prospect_profile.get("budget_signal", "unknown")
if budget_signal == "unknown":
revealed = state.hidden_state.get("revealed_budget", "medium")
prefix = BUDGET_REVEAL_TEXT.get(revealed, "")
return prefix + base
return base
def _get_token(
self,
action: SalesPathAction,
state: SalesPathState,
) -> str:
atype = action.action_type
difficulty = state.difficulty
turn = state.turn_number
profile = state.prospect_profile
hidden = state.hidden_state
objections = state.objections_handled
# --------------------------------------------------
# 1. Rule-violation responses (highest priority)
# --------------------------------------------------
if state.constraints_violated:
latest = state.constraints_violated[-1]
if latest == "R01":
return "objection:premature_pitch"
if latest == "R03":
return "deflect:budget_not_discussed"
# --------------------------------------------------
# 2. Stall injection for difficulty 3+
# Uses a state-seeded RNG so the response is
# deterministic given (episode_id, turn, action).
# Required for GRPO state-snapshot consistency.
# --------------------------------------------------
if difficulty >= 3 and turn >= 5:
stall_prob = hidden.get("stall_probability", 0.0)
if stall_prob > 0.0:
rng = _seeded_random(state, action)
if rng.random() < stall_prob:
return "deflect:stall"
# --------------------------------------------------
# 3. Action-based deterministic responses
# --------------------------------------------------
if atype == "PROSPECT":
return "open:positive_signal"
if atype == "QUALIFY":
return "open:neutral_signal"
if atype == "PRESENT":
if difficulty >= 2 and objections == 0:
return "objection:price"
return "open:positive_signal"
if atype == "HANDLE_OBJECTION":
state.objections_handled += 1 # only non-profile mutation
required = hidden.get("num_objections", 1)
if state.objections_handled >= required:
return "open:positive_signal"
if objections == 0:
return "objection:timing"
return "open:positive_signal"
if atype == "OFFER_DEMO":
return "accept:demo_scheduled"
if atype == "NEGOTIATE":
return "open:neutral_signal"
if atype == "CLOSE":
true_budget = hidden.get("true_budget", 0.7)
close_threshold = hidden.get("close_threshold", 0.5)
decision_maker = profile.get("decision_maker", True)
if true_budget >= close_threshold and decision_maker:
return "accept:close_success"
return "reject:close_failed"
if atype == "FOLLOW_UP":
return "open:neutral_signal"
if atype == "DISQUALIFY":
return "exit:disqualified"
return "open:neutral_signal" |