HelpDesk / openenv.yaml
Freakdivi's picture
updating openenv.yaml
f90756c
spec_version: 1
name: helpdesk_env
version: "0.1.0"
description: >
An OpenEnv RL environment simulating UPI banking customer support workflows.
An AI agent classifies issues, retrieves the correct FAQ or escalation path,
and completes a safe multi-turn support flow across three graded tasks of
increasing difficulty.
author: Freakdivi
tags:
- openenv
- banking
- upi
- customer-support
- rl-environment
type: space
runtime: fastapi
app: server.app:app
port: 8000
default_task: medium
tasks:
- id: easy
difficulty: easy
description: Classify the customer's issue into the correct support category
dataset: data/tickets/easy.json
max_steps: 1
reward_range: [0.0, 1.0]
grader:
type: python
reward_source: server.helpdesk_environment:HelpdeskEnv.step
score_field: reward.value
functions:
- graders.category_grader:grade_classification
- graders.resolution_grader:grade_resolution
- graders.score_utils:ensure_open_unit_interval
- id: medium
difficulty: medium
description: Select the correct FAQ or escalate cases that require manual handling
dataset: data/tickets/medium.json
max_steps: 3
reward_range: [0.0, 1.0]
grader:
type: python
reward_source: server.helpdesk_environment:HelpdeskEnv.step
score_field: reward.value
functions:
- graders.faq_grader:grade_faq_retrieval
- graders.faq_grader:grade_escalation
- graders.faq_grader:grade_operation_choice
- graders.score_utils:ensure_open_unit_interval
- id: hard
difficulty: hard
description: Run a multi-turn support conversation with clarification, guidance, and safe closure
dataset: data/tickets/hard.json
max_steps: 8
reward_range: [0.0, 1.0]
grader:
type: python
reward_source: server.helpdesk_environment:HelpdeskEnv.step
score_field: reward.value
functions:
- graders.category_grader:grade_information_collection
- graders.faq_grader:grade_faq_retrieval
- graders.resolution_grader:grade_case_closure
- graders.resolution_grader:grade_resolution
- graders.score_utils:ensure_open_unit_interval
observation_space:
type: object
fields:
case_id: string
track: string
customer_message: string
conversation_history: array
known_facts: object
required_slots: array
available_actions: array
turn_number: integer
action_space:
type: object
fields:
action_type: "classify | lookup_faq | ask_clarification | reply | escalate | resolve_ticket"
category: string (optional)
faq_id: string (optional)
message: string (optional)
fields_requested: array (optional)
target: string (optional)
operation: string (optional)
reward:
type: float
range: [0.0, 1.0]
description: >
Partial reward is produced at each step and normalized by the environment.
The final reward combines correctness, safety, resolution, efficiency, and
penalties, with score outputs constrained to the open interval (0, 1) for
submission compatibility.
endpoints:
reset: POST /reset
step: POST /step
state: GET /state
health: GET /health
runtime_config:
framework: fastapi
python: "3.10"
port: 8000