name: CustomerSupportEnv version: "1.0.0" description: > A real-world customer support reinforcement learning environment where an AI agent handles inbound support tickets. The agent must search a knowledge base, empathise with customers, offer concrete solutions, and resolve tickets efficiently. Models the genuine complexity of Tier-1 customer support: multi-turn conversation, retrieval-augmented reasoning, and satisfaction optimisation. author: OpenEnv Submission domain: customer-support tags: [openenv, customer-support, nlp, retrieval, multi-turn, real-world] tasks: - id: task_1 name: "Resolve a Standard Auth Ticket" difficulty: easy ticket: TKT-001 max_turns: 8 description: > Handle a frustrated customer locked out of their account. Optimal policy: search_kb → empathize → offer_solution → resolve. - id: task_2 name: "Handle a Multi-Step Billing Dispute" difficulty: medium ticket: TKT-003 max_turns: 10 description: > Resolve a billing discrepancy. Requires clarification before diagnosis. Generic solutions are penalised; agent must cite a specific credit amount. - id: task_3 name: "Triage a Critical Time-Sensitive Bug" difficulty: hard ticket: TKT-006 max_turns: 8 description: > Enterprise customer with a compliance deadline. Data export stuck for 6 hours. Two-part solution required (priority queue + partial export). Escalation is penalised. Tests urgency awareness and multi-step planning. observation_space: type: object fields: ticket_id: {type: string, nullable: true} task_id: {type: string} status: {type: string, enum: [idle, open, resolved, escalated, timeout]} sentiment: {type: string, enum: [positive, neutral, frustrated, angry], nullable: true} priority: {type: string, enum: [low, medium, high, urgent], nullable: true} category: {type: string, enum: [auth, billing, fulfillment, bug, sales, general], nullable: true} turn: {type: integer, minimum: 0} max_turns: {type: integer} history: {type: array, items: {role: string, text: string, turn: integer}} kb_results: {type: array, items: {type: string}} kb_searched: {type: boolean} empathized: {type: boolean} clarified: {type: boolean} solution_offered: {type: boolean} escalated: {type: boolean} cumulative_reward: {type: number} done: {type: boolean} action_space: type: object fields: action_type: type: string enum: [search_kb, empathize, ask_clarify, offer_solution, escalate, resolve, send_message] payload: type: string nullable: true description: > Required for offer_solution (solution text), ask_clarify (question), and send_message (message body). Optional for others. reward_function: type: shaped components: search_kb: "+2.0 (first call only; -1.0 duplicate)" empathize: "+1.0 (first call only)" ask_clarify: "+1.0 (first call only)" offer_solution: "+3.0 × quality_score (0–1); -1.0 if KB not searched first" escalate: "-1.0" resolve_good: "+5.0 + csat × 2.0 (when solution offered)" resolve_bad: "-3.0 (when no solution offered)" timeout: "-2.0" csat_components: empathized: 0.30 kb_searched: 0.30 solution_offered: 0.40 graders: scoring: 0.0_to_1.0 deterministic: true task_1_weights: kb_searched: 0.30 empathized: 0.25 solution_quality: 0.25 resolved: 0.20 task_2_weights: ask_clarify: 0.20 kb_searched: 0.20 solution_quality: 0.30 empathized: 0.15 resolved: 0.15 task_3_weights: kb_searched: 0.20 empathized: 0.15 solution_quality: 0.35 no_escalation: 0.15 resolved: 0.15 endpoints: reset: "POST /reset" step: "POST /step" state: "GET /state" tasks: "GET /tasks" grade: "POST /grade" health: "GET /health" spec: "GET /openenv.yaml" baseline_scores: task_1: 0.85 task_2: 0.78 task_3: 0.65 average: 0.76 model: gpt-4o-mini huggingface: space_sdk: docker port: 7860