vegarl / openenv.yaml
ronitraj's picture
Deploy Space without oversized raw dataset
4fbc241
name: InferenceGym
version: "1.0.0"
description: >
OpenEnv-compliant RL environment for LLM inference serving optimization.
Teaches agents to make real-time serving configuration decisions for LLM
infrastructure using trace-driven simulation grounded in Orca, vLLM, and Decima.
author: team-llmserve
tags:
- openenv
- rl
- llm
- inference
- serving
endpoints:
reset: /reset
step: /step
state: /state
tasks: /tasks
grader: /grader
baseline: /baseline
health: /health
tasks:
- id: static_workload
name: Static Uniform Workload
description: "Steady 10 rps traffic with uniform prompt lengths. Tests basic queue pressure response."
difficulty: easy
episode_length: 200
slo_thresholds:
p99_ttft_ms: 500
- id: bursty_workload
name: Bursty ShareGPT Workload
description: "Alternating quiet/burst phases with real ShareGPT prompt distributions. Tests non-stationary traffic adaptation."
difficulty: medium
episode_length: 120
slo_thresholds:
p99_ttft_ms: 300
- id: adversarial_multitenant
name: Adversarial Multi-Tenant Serving
description: "Sinusoidal arrival with mega-prompt injections and multi-priority routing. Challenges frontier models."
difficulty: hard
episode_length: 200
slo_thresholds:
p99_ttft_ms: 200
observation_space:
- { name: queue_depth, type: int, min: 0, max: 10000 }
- { name: active_requests, type: int, min: 0, max: 512 }
- { name: kv_cache_occupancy, type: float, min: 0.0, max: 1.0 }
- { name: mean_prompt_length, type: float, min: 0.0, max: 10000.0 }
- { name: p50_ttft_ms, type: float, min: 0.0, max: 10000.0 }
- { name: p99_ttft_ms, type: float, min: 0.0, max: 10000.0 }
- { name: p50_itl_ms, type: float, min: 0.0, max: 1000.0 }
- { name: throughput_tps, type: float, min: 0.0, max: 1000.0 }
- { name: slo_compliance_rate, type: float, min: 0.0, max: 1.0 }
- { name: gpu_memory_used_gb, type: float, min: 0.0, max: 80.0 }
- { name: estimated_cost_per_1k, type: float, min: 0.0, max: 1.0 }
- { name: request_arrival_rate, type: float, min: 0.0, max: 500.0 }
- { name: spec_acceptance_rate, type: float, min: 0.0, max: 1.0 }
- { name: eviction_events, type: int, min: 0, max: 1000 }
- { name: step_index, type: int, min: 0, max: 200 }
- { name: task_id, type: string }
action_space:
- { name: batch_cap, type: int, min: 1, max: 512 }
- { name: kv_budget_fraction, type: float, min: 0.1, max: 1.0 }
- { name: speculation_depth, type: int, min: 0, max: 8 }
- { name: quantization_tier, type: enum, values: [FP16, INT8, INT4] }
- { name: prefill_decode_split, type: bool }
- { name: priority_routing, type: bool }
reward_range: [-1.0, 1.0]
grader_range: [0.0, 1.0]