ROCmPort-AI / scripts /collect_benchmark_result.py
Nawangdorjay's picture
Deploy ROCmPort AI — CUDA-to-ROCm migration scanner
786f63c verified
from __future__ import annotations
import argparse
import json
import subprocess
from datetime import datetime, timezone
from pathlib import Path
def main() -> None:
parser = argparse.ArgumentParser(description="Collect a ROCmPort AI benchmark result shell.")
parser.add_argument("--output", default="benchmark_result.json")
parser.add_argument("--model", default="Qwen/Qwen3-Coder-Next-FP8")
parser.add_argument("--throughput", type=float, default=None)
parser.add_argument("--p50-latency-ms", type=float, default=None)
parser.add_argument("--p95-latency-ms", type=float, default=None)
parser.add_argument("--peak-vram-gb", type=float, default=None)
args = parser.parse_args()
result = {
"verified": all(
value is not None
for value in (args.throughput, args.p50_latency_ms, args.p95_latency_ms, args.peak_vram_gb)
),
"captured_at": datetime.now(timezone.utc).isoformat(),
"hardware": _run(["rocm-smi", "--showproductname"]),
"rocm_version": _run(["bash", "-lc", "rocminfo | head -40"]),
"vllm_version": _run(["python", "-m", "vllm", "--version"]),
"model": args.model,
"prompt_config": {
"input_tokens": 512,
"output_tokens": 256,
"concurrency": 8,
"requests": 64,
},
"throughput_tokens_per_second": args.throughput,
"p50_latency_ms": args.p50_latency_ms,
"p95_latency_ms": args.p95_latency_ms,
"peak_vram_gb": args.peak_vram_gb,
"log_excerpt": "Attach vLLM benchmark output here before final submission.",
}
Path(args.output).write_text(json.dumps(result, indent=2), encoding="utf-8")
def _run(command: list[str]) -> str:
try:
completed = subprocess.run(command, capture_output=True, text=True, check=False, timeout=20)
except (OSError, subprocess.TimeoutExpired) as exc:
return f"unavailable: {exc}"
output = (completed.stdout or completed.stderr).strip()
return output[:2000] if output else f"command exited {completed.returncode} with no output"
if __name__ == "__main__":
main()