vegarl / llmserve_env /task_catalog.py
ronitraj's picture
Deploy Space without oversized raw dataset
4fbc241
from __future__ import annotations
import json
from pathlib import Path
from typing import Any
ROOT_DIR = Path(__file__).resolve().parents[1]
WORKLOAD_CONFIG_PATH = ROOT_DIR / "server" / "data" / "workload_configs.json"
def _load_catalog() -> list[dict[str, Any]]:
with WORKLOAD_CONFIG_PATH.open("r", encoding="utf-8") as handle:
payload = json.load(handle)
return payload["tasks"]
def get_task_catalog() -> list[dict[str, Any]]:
return _load_catalog()
def get_task_config(task_id: str) -> dict[str, Any]:
for task in _load_catalog():
if task["id"] == task_id:
return task
raise KeyError(f"Unknown task_id: {task_id}")
def get_action_schema() -> dict[str, Any]:
return {
"batch_cap": {"type": "int", "min": 1, "max": 512},
"kv_budget_fraction": {"type": "float", "min": 0.1, "max": 1.0},
"speculation_depth": {"type": "int", "min": 0, "max": 8},
"quantization_tier": {"type": "enum", "values": ["FP16", "INT8", "INT4"]},
"prefill_decode_split": {"type": "bool"},
"priority_routing": {"type": "bool"},
}