name: InferenceGym version: "1.0.0" description: > OpenEnv-compliant RL environment for LLM inference serving optimization. Teaches agents to make real-time serving configuration decisions for LLM infrastructure using trace-driven simulation grounded in Orca, vLLM, and Decima. author: team-llmserve tags: - openenv - rl - llm - inference - serving endpoints: reset: /reset step: /step state: /state tasks: /tasks grader: /grader baseline: /baseline health: /health tasks: - id: static_workload name: Static Uniform Workload description: "Steady 10 rps traffic with uniform prompt lengths. Tests basic queue pressure response." difficulty: easy episode_length: 200 slo_thresholds: p99_ttft_ms: 500 - id: bursty_workload name: Bursty ShareGPT Workload description: "Alternating quiet/burst phases with real ShareGPT prompt distributions. Tests non-stationary traffic adaptation." difficulty: medium episode_length: 120 slo_thresholds: p99_ttft_ms: 300 - id: adversarial_multitenant name: Adversarial Multi-Tenant Serving description: "Sinusoidal arrival with mega-prompt injections and multi-priority routing. Challenges frontier models." difficulty: hard episode_length: 200 slo_thresholds: p99_ttft_ms: 200 observation_space: - { name: queue_depth, type: int, min: 0, max: 10000 } - { name: active_requests, type: int, min: 0, max: 512 } - { name: kv_cache_occupancy, type: float, min: 0.0, max: 1.0 } - { name: mean_prompt_length, type: float, min: 0.0, max: 10000.0 } - { name: p50_ttft_ms, type: float, min: 0.0, max: 10000.0 } - { name: p99_ttft_ms, type: float, min: 0.0, max: 10000.0 } - { name: p50_itl_ms, type: float, min: 0.0, max: 1000.0 } - { name: throughput_tps, type: float, min: 0.0, max: 1000.0 } - { name: slo_compliance_rate, type: float, min: 0.0, max: 1.0 } - { name: gpu_memory_used_gb, type: float, min: 0.0, max: 80.0 } - { name: estimated_cost_per_1k, type: float, min: 0.0, max: 1.0 } - { name: request_arrival_rate, type: float, min: 0.0, max: 500.0 } - { name: spec_acceptance_rate, type: float, min: 0.0, max: 1.0 } - { name: eviction_events, type: int, min: 0, max: 1000 } - { name: step_index, type: int, min: 0, max: 200 } - { name: task_id, type: string } action_space: - { name: batch_cap, type: int, min: 1, max: 512 } - { name: kv_budget_fraction, type: float, min: 0.1, max: 1.0 } - { name: speculation_depth, type: int, min: 0, max: 8 } - { name: quantization_tier, type: enum, values: [FP16, INT8, INT4] } - { name: prefill_decode_split, type: bool } - { name: priority_routing, type: bool } reward_range: [-1.0, 1.0] grader_range: [0.0, 1.0]