Spaces:

Lomesh7777
/

openenv-multi-agent-RL

Sleeping

openenv-multi-agent-RL / salespath_env /server /task_bank.py

Lomesh2000

FIX: grop update new , env changes

e6a02dd 13 days ago

17.5 kB

	# salespath_env/server/task_bank.py
	"""
	Prospect profiles, organised by difficulty.

	Per arXiv:2408.10215 §3 ("Reward shaping cannot fix data scarcity"),
	the training distribution must be wide enough that the policy cannot
	overfit to a handful of memorised episodes. We expand to ~20 profiles
	per level and reserve the last 4 of each level as a held-out eval set.

	Public API
	----------
	sample_profile(difficulty, split="train", rng=None)
	Sample a profile for online training/eval.

	iter_eval_profiles(difficulty)
	Iterate over the held-out eval profiles.

	iter_train_profiles(difficulty)
	Iterate over the training profiles.
	"""

	from __future__ import annotations

	import random
	from dataclasses import dataclass
	from typing import Iterator, List, Optional


	@dataclass
	class ProspectProfile:
	company_name: str
	company_size: str # small / medium / enterprise
	industry: str
	budget_signal: str # high / medium / low / unknown
	pain_points: List[str]
	decision_maker: bool

	# Hidden values — never exposed directly to the agent.
	true_budget: float # 0.0 → 1.0
	close_threshold: float
	stall_probability: float


	# ---------------------------------------------------------------------------
	# LEVEL 1 — Easy
	# Budget known, decision maker present, close should succeed.
	# ---------------------------------------------------------------------------

	PROFILES_L1: List[ProspectProfile] = [
	ProspectProfile("Meridian Retail", "medium", "retail", "high", ["manual inventory tracking", "slow reporting"], True, 0.80, 0.50, 0.0),
	ProspectProfile("Northline Foods", "small", "food distribution", "medium", ["supplier delays", "inventory mismatch"], True, 0.60, 0.50, 0.0),
	ProspectProfile("Crestline Auto", "medium", "automotive parts", "high", ["parts forecasting", "warehouse turnover"], True, 0.75, 0.50, 0.0),
	ProspectProfile("HarborGoods", "small", "consumer goods", "high", ["channel reporting", "stockout alerts"], True, 0.72, 0.50, 0.0),
	ProspectProfile("Ironclad Tools", "medium", "industrial supply", "high", ["catalog updates", "B2B quoting"], True, 0.78, 0.50, 0.0),
	ProspectProfile("Greenway Grocer", "medium", "grocery", "medium", ["expiration tracking", "cold-chain visibility"], True, 0.62, 0.50, 0.0),
	ProspectProfile("BlueRiver Pharma", "medium", "pharmacy retail", "high", ["compliance forms", "expiry alerts"], True, 0.70, 0.50, 0.0),
	ProspectProfile("Stride Apparel", "small", "apparel", "medium", ["sizing variants", "returns workflow"], True, 0.58, 0.50, 0.0),
	ProspectProfile("Summit Hardware", "medium", "hardware retail", "high", ["SKU bloat", "POS integration"], True, 0.74, 0.50, 0.0),
	ProspectProfile("Pinecrest Books", "small", "books", "medium", ["seasonal demand", "inventory shrinkage"], True, 0.55, 0.50, 0.0),
	ProspectProfile("Lakeside Resort", "medium", "hospitality", "high", ["guest preference data", "F&B inventory"], True, 0.68, 0.50, 0.0),
	ProspectProfile("Granite Coffee", "small", "F&B chain", "medium", ["multi-location SKU sync", "shrinkage"], True, 0.60, 0.50, 0.0),
	ProspectProfile("Horizon Outdoor", "medium", "sporting goods", "high", ["seasonal kitting", "regional demand"], True, 0.71, 0.50, 0.0),
	ProspectProfile("Cobalt Components","medium", "electronics dist.", "high", ["BOM management", "lead-time variance"], True, 0.77, 0.50, 0.0),
	ProspectProfile("Verdant Garden", "small", "garden centre", "medium", ["seasonal stock", "weather-driven demand"], True, 0.56, 0.50, 0.0),
	# ---- eval split (last 4) -----------------------------------------------
	ProspectProfile("Falcon Sports", "medium", "sporting goods", "high", ["return rate spikes", "regional sizing"], True, 0.69, 0.50, 0.0),
	ProspectProfile("Maple & Co", "small", "specialty grocery", "medium", ["organic inventory", "seasonal sourcing"], True, 0.57, 0.50, 0.0),
	ProspectProfile("Skyline Pet", "medium", "pet supplies", "high", ["food expiration", "subscription kits"], True, 0.73, 0.50, 0.0),
	ProspectProfile("Helix Beauty", "small", "beauty retail", "medium", ["palette variants", "promo windows"], True, 0.61, 0.50, 0.0),
	]


	# ---------------------------------------------------------------------------
	# LEVEL 2 — Medium
	# Budget hidden initially, one objection expected, demo required for close.
	# ---------------------------------------------------------------------------

	PROFILES_L2: List[ProspectProfile] = [
	ProspectProfile("Apex Logistics", "enterprise", "logistics", "unknown", ["route optimization", "driver coordination", "fuel tracking"], True, 0.70, 0.50, 0.0),
	ProspectProfile("Vertex Supply", "medium", "manufacturing", "unknown", ["vendor visibility", "purchase delays"], True, 0.55, 0.50, 0.0),
	ProspectProfile("Polaris Freight", "enterprise", "freight", "unknown", ["dispatch SLA", "fleet maintenance"], True, 0.66, 0.50, 0.0),
	ProspectProfile("Cobra Builders", "medium", "construction", "unknown", ["project costing", "subcontractor coordination"], True, 0.60, 0.50, 0.0),
	ProspectProfile("Aegis Energy", "enterprise", "utilities", "unknown", ["asset uptime", "grid analytics"], True, 0.71, 0.50, 0.0),
	ProspectProfile("Crystal Foods", "medium", "food processing", "unknown", ["batch traceability", "regulatory reporting"], True, 0.58, 0.50, 0.0),
	ProspectProfile("Atlas Steel", "enterprise", "metals", "unknown", ["yield optimization", "downtime reduction"], True, 0.65, 0.50, 0.0),
	ProspectProfile("Quartz Mobility", "medium", "mobility tech", "unknown", ["fleet utilization", "telematics ingest"], True, 0.59, 0.50, 0.0),
	ProspectProfile("Beacon Insure", "enterprise", "insurance", "unknown", ["claims triage", "fraud signals"], True, 0.72, 0.50, 0.0),
	ProspectProfile("Tesseract Bio", "medium", "biotech", "unknown", ["lab inventory", "experiment tracking"], True, 0.62, 0.50, 0.0),
	ProspectProfile("Pivot Media", "enterprise", "media", "unknown", ["content rights", "campaign attribution"], True, 0.69, 0.50, 0.0),
	ProspectProfile("Solstice Travel", "medium", "travel", "unknown", ["booking variance", "supplier API churn"], True, 0.57, 0.50, 0.0),
	ProspectProfile("Anvil Robotics", "enterprise", "robotics", "unknown", ["fleet calibration", "OTA updates"], True, 0.74, 0.50, 0.0),
	ProspectProfile("Pacific Marine", "medium", "shipping", "unknown", ["port turnaround", "container visibility"], True, 0.61, 0.50, 0.0),
	ProspectProfile("Lumen Telecom", "enterprise", "telecom", "unknown", ["service incidents", "field tech routing"], True, 0.68, 0.50, 0.0),
	# ---- eval split --------------------------------------------------------
	ProspectProfile("Onyx Logistics", "enterprise", "logistics", "unknown", ["last-mile delays", "warehouse handoffs"], True, 0.67, 0.50, 0.0),
	ProspectProfile("Sigma Industrial", "medium", "industrial", "unknown", ["MRO inventory", "supplier OTIF"], True, 0.56, 0.50, 0.0),
	ProspectProfile("Kepler Insurance", "enterprise", "insurance", "unknown", ["renewal forecasting", "policy ops"], True, 0.70, 0.50, 0.0),
	ProspectProfile("Mosaic Energy", "enterprise", "energy", "unknown", ["asset health", "predictive maintenance"], True, 0.66, 0.50, 0.0),
	]


	# ---------------------------------------------------------------------------
	# LEVEL 3 — Hard
	# Budget hidden, two objections, possible stalling, decision maker may be absent.
	# ---------------------------------------------------------------------------

	PROFILES_L3: List[ProspectProfile] = [
	ProspectProfile("Nova Financial", "enterprise", "finance", "unknown", ["compliance reporting", "audit trails", "data silos"], False, 0.60, 0.55, 0.30),
	ProspectProfile("Atlas Health", "enterprise", "healthcare", "unknown", ["patient workflow delays", "reporting compliance"], False, 0.65, 0.55, 0.25),
	ProspectProfile("Citadel Bank", "enterprise", "banking", "unknown", ["KYC automation", "fraud detection lag"], False, 0.62, 0.55, 0.30),
	ProspectProfile("Helios Hospitals", "enterprise", "healthcare", "unknown", ["EHR fragmentation", "billing reconciliation"], False, 0.58, 0.55, 0.30),
	ProspectProfile("Orion Asset Mgmt", "enterprise", "asset mgmt", "unknown", ["risk reporting", "ESG data ingestion"], False, 0.66, 0.55, 0.25),
	ProspectProfile("Sable Pharma", "enterprise", "pharma", "unknown", ["GxP traceability", "trial data integrity"], False, 0.61, 0.55, 0.30),
	ProspectProfile("Magellan Travel", "enterprise", "travel ops", "unknown", ["disruption response", "loyalty data"], False, 0.59, 0.55, 0.30),
	ProspectProfile("Crucible Defense", "enterprise", "defense", "unknown", ["clearance workflow", "supply chain audit"], False, 0.63, 0.55, 0.25),
	ProspectProfile("Seraphim Care", "enterprise", "elder care", "unknown", ["caregiver scheduling", "regulatory reporting"], False, 0.57, 0.55, 0.30),
	ProspectProfile("Polaris Reinsure", "enterprise", "reinsurance", "unknown", ["catastrophe modeling", "loss aggregation"], False, 0.64, 0.55, 0.30),
	ProspectProfile("Vanguard Edu", "enterprise", "education", "unknown", ["enrollment ops", "compliance audits"], False, 0.55, 0.55, 0.25),
	ProspectProfile("Aurora Telecom", "enterprise", "telecom", "unknown", ["spectrum analytics", "tower asset mgmt"], False, 0.60, 0.55, 0.30),
	ProspectProfile("Trident Marine", "enterprise", "marine", "unknown", ["fleet compliance", "fuel arbitrage"], False, 0.58, 0.55, 0.30),
	ProspectProfile("Granite Mining", "enterprise", "mining", "unknown", ["asset uptime", "ESG reporting"], False, 0.62, 0.55, 0.30),
	ProspectProfile("Echelon Health", "enterprise", "health-ins", "unknown", ["claims adjudication", "provider network"], False, 0.59, 0.55, 0.30),
	# ---- eval split --------------------------------------------------------
	ProspectProfile("Castle Securities", "enterprise", "securities", "unknown", ["trade surveillance", "settlement breaks"], False, 0.61, 0.55, 0.30),
	ProspectProfile("Lighthouse Care", "enterprise", "elder care", "unknown", ["staffing variance", "incident reporting"], False, 0.56, 0.55, 0.25),
	ProspectProfile("Crown Reinsurance", "enterprise", "reinsurance", "unknown", ["catastrophe modeling", "treaty management"], False, 0.63, 0.55, 0.30),
	ProspectProfile("Apex Pharma", "enterprise", "pharma", "unknown", ["clinical-trial reporting", "supply chain audit"], False, 0.60, 0.55, 0.30),
	]


	# ---------------------------------------------------------------------------
	# LEVEL 4 — Adversarial
	# Misleading "high" budget signal but actual budget < threshold,
	# OR decision maker absent. Correct action is DISQUALIFY.
	# ---------------------------------------------------------------------------

	PROFILES_L4: List[ProspectProfile] = [
	ProspectProfile("Cipher Tech", "small", "technology", "high", ["security", "compliance"], False, 0.20, 0.50, 0.50),
	ProspectProfile("BluePeak Studio", "small", "creative agency", "high", ["project visibility", "client reporting"], False, 0.25, 0.50, 0.40),
	ProspectProfile("Nimbus Labs", "small", "research", "high", ["grant reporting", "experiment tracking"], False, 0.18, 0.50, 0.45),
	ProspectProfile("Halo Consulting", "small", "consulting", "high", ["billable utilization", "client deliverables"], False, 0.22, 0.50, 0.45),
	ProspectProfile("Spire Architects", "small", "architecture", "high", ["drawing revisions", "permit tracking"], False, 0.24, 0.50, 0.40),
	ProspectProfile("Quill Publishing", "small", "publishing", "high", ["royalty tracking", "rights management"], False, 0.17, 0.50, 0.50),
	ProspectProfile("Onyx Boutique", "small", "fashion boutique", "high", ["trend forecasting", "supplier mix"], False, 0.21, 0.50, 0.45),
	ProspectProfile("Topaz Cinema", "small", "indie film", "high", ["distribution rights", "festival logistics"], False, 0.19, 0.50, 0.50),
	ProspectProfile("Mariner Charter", "small", "yacht charter", "high", ["seasonal demand", "crew scheduling"], False, 0.23, 0.50, 0.45),
	ProspectProfile("Velvet Catering", "small", "catering", "high", ["event variance", "ingredient costing"], False, 0.16, 0.50, 0.50),
	ProspectProfile("Echo Photography", "small", "studio", "high", ["project pipelines", "asset licensing"], False, 0.20, 0.50, 0.45),
	ProspectProfile("Stellar Wellness", "small", "wellness", "high", ["membership churn", "class scheduling"], False, 0.22, 0.50, 0.45),
	ProspectProfile("Drift Digital", "small", "agency", "high", ["campaign attribution", "creative asset library"], False, 0.19, 0.50, 0.50),
	ProspectProfile("Ember Theater", "small", "performing arts", "high", ["production budgeting", "ticket allocation"], False, 0.18, 0.50, 0.45),
	ProspectProfile("Halcyon Crafts", "small", "artisan retail", "high", ["maker payouts", "fulfilment SLA"], False, 0.21, 0.50, 0.50),
	# ---- eval split --------------------------------------------------------
	ProspectProfile("Onyx Tech", "small", "technology", "high", ["zero-trust rollout", "compliance"], False, 0.19, 0.50, 0.50),
	ProspectProfile("Haven Studio", "small", "creative agency", "high", ["client-asset versioning", "billing transparency"], False, 0.23, 0.50, 0.40),
	ProspectProfile("Beacon Indie", "small", "publishing", "high", ["distribution rights", "royalty splits"], False, 0.17, 0.50, 0.50),
	ProspectProfile("Kindled Catering", "small", "catering", "high", ["event variance", "menu engineering"], False, 0.22, 0.50, 0.45),
	]


	# ---------------------------------------------------------------------------
	# Splits
	# ---------------------------------------------------------------------------

	# Last `_EVAL_SIZE` of each list is the held-out eval split.
	_EVAL_SIZE = 4


	def _split(profiles: List[ProspectProfile]) -> tuple[list, list]:
	return profiles[:-_EVAL_SIZE], profiles[-_EVAL_SIZE:]


	_TRAIN_L1, _EVAL_L1 = _split(PROFILES_L1)
	_TRAIN_L2, _EVAL_L2 = _split(PROFILES_L2)
	_TRAIN_L3, _EVAL_L3 = _split(PROFILES_L3)
	_TRAIN_L4, _EVAL_L4 = _split(PROFILES_L4)


	TRAIN_PROFILES = {1: _TRAIN_L1, 2: _TRAIN_L2, 3: _TRAIN_L3, 4: _TRAIN_L4}
	EVAL_PROFILES = {1: _EVAL_L1, 2: _EVAL_L2, 3: _EVAL_L3, 4: _EVAL_L4}
	ALL_PROFILES = {1: PROFILES_L1, 2: PROFILES_L2, 3: PROFILES_L3, 4: PROFILES_L4}


	# ---------------------------------------------------------------------------
	# Public API
	# ---------------------------------------------------------------------------

	def sample_profile(
	difficulty: int,
	split: str = "train",
	rng: Optional[random.Random] = None,
	) -> ProspectProfile:
	"""
	Sample one profile from the requested split.

	Parameters
	----------
	difficulty : int (1..4)
	split : "train" \| "eval" \| "all"
	rng : optional pre-seeded RNG for reproducibility
	"""
	if difficulty not in TRAIN_PROFILES:
	difficulty = 1

	pool: List[ProspectProfile]
	if split == "eval":
	pool = EVAL_PROFILES[difficulty]
	elif split == "all":
	pool = ALL_PROFILES[difficulty]
	else:
	pool = TRAIN_PROFILES[difficulty]

	return (rng or random).choice(pool)


	def iter_train_profiles(difficulty: int) -> Iterator[ProspectProfile]:
	yield from TRAIN_PROFILES[difficulty]


	def iter_eval_profiles(difficulty: int) -> Iterator[ProspectProfile]:
	yield from EVAL_PROFILES[difficulty]