Spaces:

Lomesh7777
/

openenv-multi-agent-RL

Sleeping

File size: 17,450 Bytes

e6a02dd

# salespath_env/server/task_bank.py
"""
Prospect profiles, organised by difficulty.

Per arXiv:2408.10215 §3 ("Reward shaping cannot fix data scarcity"),
the training distribution must be wide enough that the policy cannot
overfit to a handful of memorised episodes.  We expand to ~20 profiles
per level and reserve the last 4 of each level as a held-out eval set.

Public API
----------
    sample_profile(difficulty, split="train", rng=None)
        Sample a profile for online training/eval.

    iter_eval_profiles(difficulty)
        Iterate over the held-out eval profiles.

    iter_train_profiles(difficulty)
        Iterate over the training profiles.
"""

from __future__ import annotations

import random
from dataclasses import dataclass
from typing import Iterator, List, Optional


@dataclass
class ProspectProfile:
    company_name: str
    company_size: str           # small / medium / enterprise
    industry: str
    budget_signal: str          # high / medium / low / unknown
    pain_points: List[str]
    decision_maker: bool

    # Hidden values — never exposed directly to the agent.
    true_budget: float          # 0.0 → 1.0
    close_threshold: float
    stall_probability: float


# ---------------------------------------------------------------------------
# LEVEL 1 — Easy
# Budget known, decision maker present, close should succeed.
# ---------------------------------------------------------------------------

PROFILES_L1: List[ProspectProfile] = [
    ProspectProfile("Meridian Retail",  "medium",     "retail",            "high",   ["manual inventory tracking", "slow reporting"],            True, 0.80, 0.50, 0.0),
    ProspectProfile("Northline Foods",  "small",      "food distribution", "medium", ["supplier delays", "inventory mismatch"],                  True, 0.60, 0.50, 0.0),
    ProspectProfile("Crestline Auto",   "medium",     "automotive parts",  "high",   ["parts forecasting", "warehouse turnover"],                True, 0.75, 0.50, 0.0),
    ProspectProfile("HarborGoods",      "small",      "consumer goods",    "high",   ["channel reporting", "stockout alerts"],                   True, 0.72, 0.50, 0.0),
    ProspectProfile("Ironclad Tools",   "medium",     "industrial supply", "high",   ["catalog updates", "B2B quoting"],                         True, 0.78, 0.50, 0.0),
    ProspectProfile("Greenway Grocer",  "medium",     "grocery",           "medium", ["expiration tracking", "cold-chain visibility"],           True, 0.62, 0.50, 0.0),
    ProspectProfile("BlueRiver Pharma", "medium",     "pharmacy retail",   "high",   ["compliance forms", "expiry alerts"],                      True, 0.70, 0.50, 0.0),
    ProspectProfile("Stride Apparel",   "small",      "apparel",           "medium", ["sizing variants", "returns workflow"],                    True, 0.58, 0.50, 0.0),
    ProspectProfile("Summit Hardware",  "medium",     "hardware retail",   "high",   ["SKU bloat", "POS integration"],                           True, 0.74, 0.50, 0.0),
    ProspectProfile("Pinecrest Books",  "small",      "books",             "medium", ["seasonal demand", "inventory shrinkage"],                 True, 0.55, 0.50, 0.0),
    ProspectProfile("Lakeside Resort",  "medium",     "hospitality",       "high",   ["guest preference data", "F&B inventory"],                 True, 0.68, 0.50, 0.0),
    ProspectProfile("Granite Coffee",   "small",      "F&B chain",         "medium", ["multi-location SKU sync", "shrinkage"],                   True, 0.60, 0.50, 0.0),
    ProspectProfile("Horizon Outdoor",  "medium",     "sporting goods",    "high",   ["seasonal kitting", "regional demand"],                    True, 0.71, 0.50, 0.0),
    ProspectProfile("Cobalt Components","medium",     "electronics dist.", "high",   ["BOM management", "lead-time variance"],                   True, 0.77, 0.50, 0.0),
    ProspectProfile("Verdant Garden",   "small",      "garden centre",     "medium", ["seasonal stock", "weather-driven demand"],                True, 0.56, 0.50, 0.0),
    # ---- eval split (last 4) -----------------------------------------------
    ProspectProfile("Falcon Sports",    "medium",     "sporting goods",    "high",   ["return rate spikes", "regional sizing"],                  True, 0.69, 0.50, 0.0),
    ProspectProfile("Maple & Co",       "small",      "specialty grocery", "medium", ["organic inventory", "seasonal sourcing"],                 True, 0.57, 0.50, 0.0),
    ProspectProfile("Skyline Pet",      "medium",     "pet supplies",      "high",   ["food expiration", "subscription kits"],                   True, 0.73, 0.50, 0.0),
    ProspectProfile("Helix Beauty",     "small",      "beauty retail",     "medium", ["palette variants", "promo windows"],                      True, 0.61, 0.50, 0.0),
]


# ---------------------------------------------------------------------------
# LEVEL 2 — Medium
# Budget hidden initially, one objection expected, demo required for close.
# ---------------------------------------------------------------------------

PROFILES_L2: List[ProspectProfile] = [
    ProspectProfile("Apex Logistics",   "enterprise", "logistics",         "unknown", ["route optimization", "driver coordination", "fuel tracking"], True, 0.70, 0.50, 0.0),
    ProspectProfile("Vertex Supply",    "medium",     "manufacturing",     "unknown", ["vendor visibility", "purchase delays"],                       True, 0.55, 0.50, 0.0),
    ProspectProfile("Polaris Freight",  "enterprise", "freight",           "unknown", ["dispatch SLA", "fleet maintenance"],                          True, 0.66, 0.50, 0.0),
    ProspectProfile("Cobra Builders",   "medium",     "construction",      "unknown", ["project costing", "subcontractor coordination"],              True, 0.60, 0.50, 0.0),
    ProspectProfile("Aegis Energy",     "enterprise", "utilities",         "unknown", ["asset uptime", "grid analytics"],                             True, 0.71, 0.50, 0.0),
    ProspectProfile("Crystal Foods",    "medium",     "food processing",   "unknown", ["batch traceability", "regulatory reporting"],                 True, 0.58, 0.50, 0.0),
    ProspectProfile("Atlas Steel",      "enterprise", "metals",            "unknown", ["yield optimization", "downtime reduction"],                   True, 0.65, 0.50, 0.0),
    ProspectProfile("Quartz Mobility",  "medium",     "mobility tech",     "unknown", ["fleet utilization", "telematics ingest"],                     True, 0.59, 0.50, 0.0),
    ProspectProfile("Beacon Insure",    "enterprise", "insurance",         "unknown", ["claims triage", "fraud signals"],                             True, 0.72, 0.50, 0.0),
    ProspectProfile("Tesseract Bio",    "medium",     "biotech",           "unknown", ["lab inventory", "experiment tracking"],                       True, 0.62, 0.50, 0.0),
    ProspectProfile("Pivot Media",      "enterprise", "media",             "unknown", ["content rights", "campaign attribution"],                     True, 0.69, 0.50, 0.0),
    ProspectProfile("Solstice Travel",  "medium",     "travel",            "unknown", ["booking variance", "supplier API churn"],                     True, 0.57, 0.50, 0.0),
    ProspectProfile("Anvil Robotics",   "enterprise", "robotics",          "unknown", ["fleet calibration", "OTA updates"],                           True, 0.74, 0.50, 0.0),
    ProspectProfile("Pacific Marine",   "medium",     "shipping",          "unknown", ["port turnaround", "container visibility"],                    True, 0.61, 0.50, 0.0),
    ProspectProfile("Lumen Telecom",    "enterprise", "telecom",           "unknown", ["service incidents", "field tech routing"],                    True, 0.68, 0.50, 0.0),
    # ---- eval split --------------------------------------------------------
    ProspectProfile("Onyx Logistics",   "enterprise", "logistics",         "unknown", ["last-mile delays", "warehouse handoffs"],                     True, 0.67, 0.50, 0.0),
    ProspectProfile("Sigma Industrial", "medium",     "industrial",        "unknown", ["MRO inventory", "supplier OTIF"],                             True, 0.56, 0.50, 0.0),
    ProspectProfile("Kepler Insurance", "enterprise", "insurance",         "unknown", ["renewal forecasting", "policy ops"],                          True, 0.70, 0.50, 0.0),
    ProspectProfile("Mosaic Energy",    "enterprise", "energy",            "unknown", ["asset health", "predictive maintenance"],                     True, 0.66, 0.50, 0.0),
]


# ---------------------------------------------------------------------------
# LEVEL 3 — Hard
# Budget hidden, two objections, possible stalling, decision maker may be absent.
# ---------------------------------------------------------------------------

PROFILES_L3: List[ProspectProfile] = [
    ProspectProfile("Nova Financial",    "enterprise", "finance",       "unknown", ["compliance reporting", "audit trails", "data silos"],      False, 0.60, 0.55, 0.30),
    ProspectProfile("Atlas Health",      "enterprise", "healthcare",    "unknown", ["patient workflow delays", "reporting compliance"],         False, 0.65, 0.55, 0.25),
    ProspectProfile("Citadel Bank",      "enterprise", "banking",       "unknown", ["KYC automation", "fraud detection lag"],                   False, 0.62, 0.55, 0.30),
    ProspectProfile("Helios Hospitals",  "enterprise", "healthcare",    "unknown", ["EHR fragmentation", "billing reconciliation"],             False, 0.58, 0.55, 0.30),
    ProspectProfile("Orion Asset Mgmt",  "enterprise", "asset mgmt",    "unknown", ["risk reporting", "ESG data ingestion"],                    False, 0.66, 0.55, 0.25),
    ProspectProfile("Sable Pharma",      "enterprise", "pharma",        "unknown", ["GxP traceability", "trial data integrity"],                False, 0.61, 0.55, 0.30),
    ProspectProfile("Magellan Travel",   "enterprise", "travel ops",    "unknown", ["disruption response", "loyalty data"],                     False, 0.59, 0.55, 0.30),
    ProspectProfile("Crucible Defense",  "enterprise", "defense",       "unknown", ["clearance workflow", "supply chain audit"],                False, 0.63, 0.55, 0.25),
    ProspectProfile("Seraphim Care",     "enterprise", "elder care",    "unknown", ["caregiver scheduling", "regulatory reporting"],            False, 0.57, 0.55, 0.30),
    ProspectProfile("Polaris Reinsure",  "enterprise", "reinsurance",   "unknown", ["catastrophe modeling", "loss aggregation"],                False, 0.64, 0.55, 0.30),
    ProspectProfile("Vanguard Edu",      "enterprise", "education",     "unknown", ["enrollment ops", "compliance audits"],                     False, 0.55, 0.55, 0.25),
    ProspectProfile("Aurora Telecom",    "enterprise", "telecom",       "unknown", ["spectrum analytics", "tower asset mgmt"],                  False, 0.60, 0.55, 0.30),
    ProspectProfile("Trident Marine",    "enterprise", "marine",        "unknown", ["fleet compliance", "fuel arbitrage"],                      False, 0.58, 0.55, 0.30),
    ProspectProfile("Granite Mining",    "enterprise", "mining",        "unknown", ["asset uptime", "ESG reporting"],                           False, 0.62, 0.55, 0.30),
    ProspectProfile("Echelon Health",    "enterprise", "health-ins",    "unknown", ["claims adjudication", "provider network"],                 False, 0.59, 0.55, 0.30),
    # ---- eval split --------------------------------------------------------
    ProspectProfile("Castle Securities", "enterprise", "securities",    "unknown", ["trade surveillance", "settlement breaks"],                 False, 0.61, 0.55, 0.30),
    ProspectProfile("Lighthouse Care",   "enterprise", "elder care",    "unknown", ["staffing variance", "incident reporting"],                 False, 0.56, 0.55, 0.25),
    ProspectProfile("Crown Reinsurance", "enterprise", "reinsurance",   "unknown", ["catastrophe modeling", "treaty management"],               False, 0.63, 0.55, 0.30),
    ProspectProfile("Apex Pharma",       "enterprise", "pharma",        "unknown", ["clinical-trial reporting", "supply chain audit"],          False, 0.60, 0.55, 0.30),
]


# ---------------------------------------------------------------------------
# LEVEL 4 — Adversarial
# Misleading "high" budget signal but actual budget < threshold,
# OR decision maker absent.  Correct action is DISQUALIFY.
# ---------------------------------------------------------------------------

PROFILES_L4: List[ProspectProfile] = [
    ProspectProfile("Cipher Tech",        "small", "technology",        "high", ["security", "compliance"],                              False, 0.20, 0.50, 0.50),
    ProspectProfile("BluePeak Studio",    "small", "creative agency",   "high", ["project visibility", "client reporting"],              False, 0.25, 0.50, 0.40),
    ProspectProfile("Nimbus Labs",        "small", "research",          "high", ["grant reporting", "experiment tracking"],              False, 0.18, 0.50, 0.45),
    ProspectProfile("Halo Consulting",    "small", "consulting",        "high", ["billable utilization", "client deliverables"],         False, 0.22, 0.50, 0.45),
    ProspectProfile("Spire Architects",   "small", "architecture",      "high", ["drawing revisions", "permit tracking"],                False, 0.24, 0.50, 0.40),
    ProspectProfile("Quill Publishing",   "small", "publishing",        "high", ["royalty tracking", "rights management"],               False, 0.17, 0.50, 0.50),
    ProspectProfile("Onyx Boutique",      "small", "fashion boutique",  "high", ["trend forecasting", "supplier mix"],                   False, 0.21, 0.50, 0.45),
    ProspectProfile("Topaz Cinema",       "small", "indie film",        "high", ["distribution rights", "festival logistics"],           False, 0.19, 0.50, 0.50),
    ProspectProfile("Mariner Charter",    "small", "yacht charter",     "high", ["seasonal demand", "crew scheduling"],                  False, 0.23, 0.50, 0.45),
    ProspectProfile("Velvet Catering",    "small", "catering",          "high", ["event variance", "ingredient costing"],                False, 0.16, 0.50, 0.50),
    ProspectProfile("Echo Photography",   "small", "studio",            "high", ["project pipelines", "asset licensing"],                False, 0.20, 0.50, 0.45),
    ProspectProfile("Stellar Wellness",   "small", "wellness",          "high", ["membership churn", "class scheduling"],                False, 0.22, 0.50, 0.45),
    ProspectProfile("Drift Digital",      "small", "agency",            "high", ["campaign attribution", "creative asset library"],      False, 0.19, 0.50, 0.50),
    ProspectProfile("Ember Theater",      "small", "performing arts",   "high", ["production budgeting", "ticket allocation"],           False, 0.18, 0.50, 0.45),
    ProspectProfile("Halcyon Crafts",     "small", "artisan retail",    "high", ["maker payouts", "fulfilment SLA"],                     False, 0.21, 0.50, 0.50),
    # ---- eval split --------------------------------------------------------
    ProspectProfile("Onyx Tech",          "small", "technology",        "high", ["zero-trust rollout", "compliance"],                    False, 0.19, 0.50, 0.50),
    ProspectProfile("Haven Studio",       "small", "creative agency",   "high", ["client-asset versioning", "billing transparency"],     False, 0.23, 0.50, 0.40),
    ProspectProfile("Beacon Indie",       "small", "publishing",        "high", ["distribution rights", "royalty splits"],               False, 0.17, 0.50, 0.50),
    ProspectProfile("Kindled Catering",   "small", "catering",          "high", ["event variance", "menu engineering"],                  False, 0.22, 0.50, 0.45),
]


# ---------------------------------------------------------------------------
# Splits
# ---------------------------------------------------------------------------

# Last `_EVAL_SIZE` of each list is the held-out eval split.
_EVAL_SIZE = 4


def _split(profiles: List[ProspectProfile]) -> tuple[list, list]:
    return profiles[:-_EVAL_SIZE], profiles[-_EVAL_SIZE:]


_TRAIN_L1, _EVAL_L1 = _split(PROFILES_L1)
_TRAIN_L2, _EVAL_L2 = _split(PROFILES_L2)
_TRAIN_L3, _EVAL_L3 = _split(PROFILES_L3)
_TRAIN_L4, _EVAL_L4 = _split(PROFILES_L4)


TRAIN_PROFILES = {1: _TRAIN_L1, 2: _TRAIN_L2, 3: _TRAIN_L3, 4: _TRAIN_L4}
EVAL_PROFILES  = {1: _EVAL_L1,  2: _EVAL_L2,  3: _EVAL_L3,  4: _EVAL_L4}
ALL_PROFILES   = {1: PROFILES_L1, 2: PROFILES_L2, 3: PROFILES_L3, 4: PROFILES_L4}


# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------

def sample_profile(
    difficulty: int,
    split: str = "train",
    rng: Optional[random.Random] = None,
) -> ProspectProfile:
    """
    Sample one profile from the requested split.

    Parameters
    ----------
    difficulty : int (1..4)
    split      : "train" | "eval" | "all"
    rng        : optional pre-seeded RNG for reproducibility
    """
    if difficulty not in TRAIN_PROFILES:
        difficulty = 1

    pool: List[ProspectProfile]
    if split == "eval":
        pool = EVAL_PROFILES[difficulty]
    elif split == "all":
        pool = ALL_PROFILES[difficulty]
    else:
        pool = TRAIN_PROFILES[difficulty]

    return (rng or random).choice(pool)


def iter_train_profiles(difficulty: int) -> Iterator[ProspectProfile]:
    yield from TRAIN_PROFILES[difficulty]


def iter_eval_profiles(difficulty: int) -> Iterator[ProspectProfile]:
    yield from EVAL_PROFILES[difficulty]