Spaces:

ronitraj
/

vegarl

Running

App Files Files Community

vegarl / openenv.yaml

ronitraj

Deploy Space without oversized raw dataset

4fbc241 29 days ago

raw

history blame contribute delete

2.74 kB

	name: InferenceGym
	version: "1.0.0"
	description: >
	OpenEnv-compliant RL environment for LLM inference serving optimization.
	Teaches agents to make real-time serving configuration decisions for LLM
	infrastructure using trace-driven simulation grounded in Orca, vLLM, and Decima.
	author: team-llmserve
	tags:
	- openenv
	- rl
	- llm
	- inference
	- serving
	endpoints:
	reset: /reset
	step: /step
	state: /state
	tasks: /tasks
	grader: /grader
	baseline: /baseline
	health: /health
	tasks:
	- id: static_workload
	name: Static Uniform Workload
	description: "Steady 10 rps traffic with uniform prompt lengths. Tests basic queue pressure response."
	difficulty: easy
	episode_length: 200
	slo_thresholds:
	p99_ttft_ms: 500
	- id: bursty_workload
	name: Bursty ShareGPT Workload
	description: "Alternating quiet/burst phases with real ShareGPT prompt distributions. Tests non-stationary traffic adaptation."
	difficulty: medium
	episode_length: 120
	slo_thresholds:
	p99_ttft_ms: 300
	- id: adversarial_multitenant
	name: Adversarial Multi-Tenant Serving
	description: "Sinusoidal arrival with mega-prompt injections and multi-priority routing. Challenges frontier models."
	difficulty: hard
	episode_length: 200
	slo_thresholds:
	p99_ttft_ms: 200
	observation_space:
	- { name: queue_depth, type: int, min: 0, max: 10000 }
	- { name: active_requests, type: int, min: 0, max: 512 }
	- { name: kv_cache_occupancy, type: float, min: 0.0, max: 1.0 }
	- { name: mean_prompt_length, type: float, min: 0.0, max: 10000.0 }
	- { name: p50_ttft_ms, type: float, min: 0.0, max: 10000.0 }
	- { name: p99_ttft_ms, type: float, min: 0.0, max: 10000.0 }
	- { name: p50_itl_ms, type: float, min: 0.0, max: 1000.0 }
	- { name: throughput_tps, type: float, min: 0.0, max: 1000.0 }
	- { name: slo_compliance_rate, type: float, min: 0.0, max: 1.0 }
	- { name: gpu_memory_used_gb, type: float, min: 0.0, max: 80.0 }
	- { name: estimated_cost_per_1k, type: float, min: 0.0, max: 1.0 }
	- { name: request_arrival_rate, type: float, min: 0.0, max: 500.0 }
	- { name: spec_acceptance_rate, type: float, min: 0.0, max: 1.0 }
	- { name: eviction_events, type: int, min: 0, max: 1000 }
	- { name: step_index, type: int, min: 0, max: 200 }
	- { name: task_id, type: string }
	action_space:
	- { name: batch_cap, type: int, min: 1, max: 512 }
	- { name: kv_budget_fraction, type: float, min: 0.1, max: 1.0 }
	- { name: speculation_depth, type: int, min: 0, max: 8 }
	- { name: quantization_tier, type: enum, values: [FP16, INT8, INT4] }
	- { name: prefill_decode_split, type: bool }
	- { name: priority_routing, type: bool }
	reward_range: [-1.0, 1.0]
	grader_range: [0.0, 1.0]