Spaces:
Sleeping
Sleeping
| # salespath_env/server/prospect_simulator.py | |
| import hashlib | |
| import random | |
| from ..models import SalesPathAction, SalesPathState | |
| RESPONSE_TEXT = { | |
| "open:positive_signal": "That sounds interesting. Tell me more about how this works.", | |
| "open:neutral_signal": "I see. We're evaluating a few options at the moment.", | |
| "objection:price": "The pricing seems higher than what we budgeted for.", | |
| "objection:timing": "The timing isn't ideal — we're in the middle of a quarter close.", | |
| "objection:premature_pitch": ( | |
| "I'm not sure we're ready to discuss solutions yet. " | |
| "What do you know about our current situation?" | |
| ), | |
| "deflect:budget_not_discussed": ( | |
| "We haven't really talked about what we're looking for yet." | |
| ), | |
| "deflect:stall": ( | |
| "Let me get back to you on this. A lot is happening on our end." | |
| ), | |
| "accept:demo_scheduled": ( | |
| "Yes, let's set up a demo. What time works next week?" | |
| ), | |
| "accept:close_success": ( | |
| "Alright, I think we can move forward with this. " | |
| "Send over the paperwork." | |
| ), | |
| "reject:close_failed": ( | |
| "I don't think we're ready to commit at this point." | |
| ), | |
| "silence": "", | |
| "exit:disqualified": ( | |
| "I think we're done here. This isn't the right fit." | |
| ), | |
| } | |
| def _seeded_random(state: SalesPathState, action: SalesPathAction) -> random.Random: | |
| """ | |
| Build a deterministic RNG keyed on (episode_id, turn_number, action_type). | |
| Why: GRPO training restores environment state from snapshots and re-applies | |
| actions in a separate process / thread. If the prospect's response depends | |
| on an unseeded `random.random()` call, the reward computed during gradient | |
| update can disagree with the rollout-time reward, breaking the snapshot | |
| trick and silently corrupting the gradient. | |
| """ | |
| key = f"{state.episode_id}|{state.turn_number}|{action.action_type}" | |
| seed = int(hashlib.sha1(key.encode("utf-8")).hexdigest()[:12], 16) | |
| return random.Random(seed) | |
| # Prefix injected into QUALIFY response to reveal budget signal | |
| # without mutating prospect_profile (immutable prospect state). | |
| BUDGET_REVEAL_TEXT = { | |
| "high": "We do have solid budget allocated for this initiative. ", | |
| "medium": "We have some budget set aside, though flexibility is limited. ", | |
| "low": "Our budget is quite constrained right now. ", | |
| } | |
| class ProspectSimulator: | |
| """ | |
| Pure rule-based simulator. No LLM. No transformers. | |
| Deterministic per action type. | |
| Immutability guarantee: | |
| This class NEVER mutates state.prospect_profile. | |
| Budget reveal is surfaced via the response *text* only. | |
| The environment (salespath_environment.py) owns all state writes. | |
| """ | |
| def respond( | |
| self, | |
| action: SalesPathAction, | |
| state: SalesPathState, | |
| ) -> tuple[str, str]: | |
| """ | |
| Returns: | |
| (response_token, response_text) | |
| """ | |
| token = self._get_token(action, state) | |
| text = self._build_text(token, action, state) | |
| return token, text | |
| # ------------------------------------------------------------------ | |
| # Private helpers | |
| # ------------------------------------------------------------------ | |
| def _build_text( | |
| self, | |
| token: str, | |
| action: SalesPathAction, | |
| state: SalesPathState, | |
| ) -> str: | |
| base = RESPONSE_TEXT[token] | |
| # Inject budget reveal into QUALIFY response text. | |
| # We read from hidden_state, not prospect_profile, so no mutation needed. | |
| if action.action_type == "QUALIFY": | |
| budget_signal = state.prospect_profile.get("budget_signal", "unknown") | |
| if budget_signal == "unknown": | |
| revealed = state.hidden_state.get("revealed_budget", "medium") | |
| prefix = BUDGET_REVEAL_TEXT.get(revealed, "") | |
| return prefix + base | |
| return base | |
| def _get_token( | |
| self, | |
| action: SalesPathAction, | |
| state: SalesPathState, | |
| ) -> str: | |
| atype = action.action_type | |
| difficulty = state.difficulty | |
| turn = state.turn_number | |
| profile = state.prospect_profile | |
| hidden = state.hidden_state | |
| objections = state.objections_handled | |
| # -------------------------------------------------- | |
| # 1. Rule-violation responses (highest priority) | |
| # -------------------------------------------------- | |
| if state.constraints_violated: | |
| latest = state.constraints_violated[-1] | |
| if latest == "R01": | |
| return "objection:premature_pitch" | |
| if latest == "R03": | |
| return "deflect:budget_not_discussed" | |
| # -------------------------------------------------- | |
| # 2. Stall injection for difficulty 3+ | |
| # Uses a state-seeded RNG so the response is | |
| # deterministic given (episode_id, turn, action). | |
| # Required for GRPO state-snapshot consistency. | |
| # -------------------------------------------------- | |
| if difficulty >= 3 and turn >= 5: | |
| stall_prob = hidden.get("stall_probability", 0.0) | |
| if stall_prob > 0.0: | |
| rng = _seeded_random(state, action) | |
| if rng.random() < stall_prob: | |
| return "deflect:stall" | |
| # -------------------------------------------------- | |
| # 3. Action-based deterministic responses | |
| # -------------------------------------------------- | |
| if atype == "PROSPECT": | |
| return "open:positive_signal" | |
| if atype == "QUALIFY": | |
| return "open:neutral_signal" | |
| if atype == "PRESENT": | |
| if difficulty >= 2 and objections == 0: | |
| return "objection:price" | |
| return "open:positive_signal" | |
| if atype == "HANDLE_OBJECTION": | |
| state.objections_handled += 1 # only non-profile mutation | |
| required = hidden.get("num_objections", 1) | |
| if state.objections_handled >= required: | |
| return "open:positive_signal" | |
| if objections == 0: | |
| return "objection:timing" | |
| return "open:positive_signal" | |
| if atype == "OFFER_DEMO": | |
| return "accept:demo_scheduled" | |
| if atype == "NEGOTIATE": | |
| return "open:neutral_signal" | |
| if atype == "CLOSE": | |
| true_budget = hidden.get("true_budget", 0.7) | |
| close_threshold = hidden.get("close_threshold", 0.5) | |
| decision_maker = profile.get("decision_maker", True) | |
| if true_budget >= close_threshold and decision_maker: | |
| return "accept:close_success" | |
| return "reject:close_failed" | |
| if atype == "FOLLOW_UP": | |
| return "open:neutral_signal" | |
| if atype == "DISQUALIFY": | |
| return "exit:disqualified" | |
| return "open:neutral_signal" |