| from __future__ import annotations |
|
|
| import json |
| import zipfile |
| from pathlib import Path |
| from typing import Any |
|
|
| from .models import CATEGORY_LABELS, MigrationBundle |
|
|
|
|
| PROJECT_ROOT = Path(__file__).resolve().parents[1] |
|
|
|
|
| def generate_rocm_dockerfile(repo_name: str) -> str: |
| return f"""FROM vllm/vllm-openai-rocm:latest |
| |
| WORKDIR /workspace/{repo_name} |
| COPY . /workspace/{repo_name} |
| |
| RUN if [ -f requirements.txt ]; then pip install --no-cache-dir -r requirements.txt; fi |
| |
| ENV HIP_VISIBLE_DEVICES=0 |
| ENV PYTORCH_HIP_ALLOC_CONF=expandable_segments:True |
| |
| CMD ["python", "-c", "import torch; print('torch', torch.__version__); print('rocm_gpu_available', torch.cuda.is_available())"] |
| """ |
|
|
|
|
| def generate_runbook(repo_name: str) -> str: |
| return f"""# AMD Developer Cloud Runbook |
| |
| This runbook validates `{repo_name}` on AMD Developer Cloud without executing untrusted code inside the ROCmPort AI Space. |
| |
| ## 1. Create an AMD GPU VM |
| |
| Use an AMD Developer Cloud VM with an AMD Instinct GPU and ROCm-ready Docker support. |
| |
| ## 2. Build the ROCm container |
| |
| ```bash |
| docker build -f Dockerfile.rocm -t rocmport-{repo_name.lower()} . |
| ``` |
| |
| ## 3. Run a smoke check |
| |
| ```bash |
| docker run --rm -it \\ |
| --device /dev/kfd \\ |
| --device /dev/dri \\ |
| --group-add video \\ |
| --ipc=host \\ |
| --network=host \\ |
| --security-opt seccomp=unconfined \\ |
| rocmport-{repo_name.lower()} |
| ``` |
| |
| ## 4. Run vLLM on ROCm |
| |
| ```bash |
| docker run --rm -it \\ |
| --device /dev/kfd \\ |
| --device /dev/dri \\ |
| --group-add video \\ |
| --ipc=host \\ |
| --network=host \\ |
| --security-opt seccomp=unconfined \\ |
| -v "$PWD:/workspace/{repo_name}" \\ |
| vllm/vllm-openai-rocm:latest \\ |
| vllm serve Qwen/Qwen3-Coder-Next-FP8 --tensor-parallel-size 1 |
| ``` |
| |
| ## 5. Capture benchmark metadata |
| |
| ```bash |
| rocm-smi --showproductname --showmeminfo vram --showuse |
| python scripts/collect_benchmark_result.py --output benchmark_result.json |
| ``` |
| |
| Replace `data/benchmark_result.json` with the captured result before final submission. |
| """ |
|
|
|
|
| def load_benchmark() -> dict[str, Any]: |
| path = PROJECT_ROOT / "data" / "benchmark_result.json" |
| if not path.exists(): |
| return {"verified": False, "status": "missing"} |
| return json.loads(path.read_text(encoding="utf-8")) |
|
|
|
|
| def generate_report(bundle: MigrationBundle, qwen_section: str | None = None) -> str: |
| lines = [ |
| f"# ROCmPort AI Migration Report: {bundle.repo_name}", |
| "", |
| "## AMD Readiness Score", |
| "", |
| f"- Before deterministic fixes: {bundle.before_score.total}/100", |
| f"- Migration package generated: {bundle.after_score.total}/100", |
| "- This score means ROCm migration artifacts were generated and are ready for AMD Developer Cloud validation; it is not a production certification.", |
| "", |
| "| Category | Before | Migration package |", |
| "| --- | ---: | ---: |", |
| ] |
| for category, label in CATEGORY_LABELS.items(): |
| lines.append( |
| f"| {label} | {bundle.before_score.categories[category]} | {bundle.after_score.categories[category]} |" |
| ) |
|
|
| lines.extend(["", "## Findings", ""]) |
| if not bundle.findings: |
| lines.append("No ROCm migration blockers were found by the MVP scanner.") |
| else: |
| lines.extend(["| Severity | Category | Location | Finding | Suggested fix |", "| --- | --- | --- | --- | --- |"]) |
| for finding in bundle.findings: |
| lines.append( |
| f"| {finding.severity} | {CATEGORY_LABELS.get(finding.category, finding.category)} | " |
| f"`{finding.path}:{finding.line}` | {finding.message} | {finding.suggested_fix} |" |
| ) |
|
|
| lines.extend( |
| [ |
| "", |
| "## Generated Artifacts", |
| "", |
| "- `rocm_patch.diff` contains deterministic MVP fixes.", |
| "- `Dockerfile.rocm` uses the ROCm-enabled vLLM container.", |
| "- `amd_developer_cloud_runbook.md` documents the validation path.", |
| "- `benchmark_result.json` records the AMD benchmark schema and status.", |
| "", |
| "## Qwen Agent Notes", |
| "", |
| qwen_section |
| or "Qwen endpoint was not configured. The report uses deterministic scanner output only.", |
| "", |
| "## Remaining Risks", |
| "", |
| "- CUDA C++ kernels, custom Triton kernels, and CUDA-only binary dependencies require manual review.", |
| "- Uploaded repositories are not executed inside the Space; live validation belongs on AMD Developer Cloud.", |
| "- ROCm performance depends on model, batch shape, vLLM version, ROCm version, and GPU instance configuration.", |
| ] |
| ) |
| return "\n".join(lines) + "\n" |
|
|
|
|
| def generate_cookbook() -> str: |
| return """# ROCm Migration Cookbook |
| |
| ## PyTorch device handling |
| |
| Use a runtime device abstraction instead of hardcoding `.cuda()` or `torch.device("cuda")` everywhere. |
| |
| ```python |
| import torch |
| |
| # ROCm PyTorch exposes AMD GPUs through the torch.cuda namespace. |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
| model = model.to(device) |
| inputs = inputs.to(device) |
| ``` |
| |
| ## GPU inspection |
| |
| Replace NVIDIA-only commands with ROCm equivalents: |
| |
| ```bash |
| rocm-smi --showproductname --showmeminfo vram --showuse |
| ``` |
| |
| ## Containers |
| |
| For vLLM serving on AMD GPUs, use the ROCm-enabled vLLM image: |
| |
| ```bash |
| docker pull vllm/vllm-openai-rocm:latest |
| ``` |
| |
| Run with AMD GPU device access: |
| |
| ```bash |
| docker run --rm -it --device /dev/kfd --device /dev/dri --group-add video --ipc=host --network=host --security-opt seccomp=unconfined vllm/vllm-openai-rocm:latest |
| ``` |
| |
| ## Manual review cases |
| |
| Manual migration is still required for CUDA C++ kernels, CUDA-only binary wheels, custom Triton kernels, and libraries that ship only CUDA builds. |
| """ |
|
|
|
|
| def generate_feedback() -> str: |
| return """# ROCm / AMD Developer Cloud Feedback |
| |
| ## What worked well |
| |
| - The ROCm-enabled vLLM container gives developers a clear serving path for AMD Instinct GPUs. |
| - AMD Developer Cloud is well aligned with hackathon demos because developers can avoid local GPU setup. |
| - Qwen3-Coder-Next on AMD Instinct is a strong story for repo-level coding agents. |
| |
| ## Friction points to document during the live run |
| |
| - Exact VM image, ROCm version, and Docker image should be easy to capture in benchmark logs. |
| - Users need obvious examples for replacing NVIDIA container flags and monitoring commands. |
| - More migration examples for common CUDA-first PyTorch repos would reduce onboarding time. |
| |
| ## Suggested product improvement |
| |
| Publish a small official CUDA-to-ROCm migration checklist for PyTorch, vLLM, and Hugging Face inference projects, with copyable Docker commands for AMD Developer Cloud. |
| """ |
|
|
|
|
| def write_artifacts(bundle: MigrationBundle, output_dir: Path) -> dict[str, str]: |
| output_dir.mkdir(parents=True, exist_ok=True) |
| files = { |
| "rocm_patch.diff": bundle.patch_diff, |
| "Dockerfile.rocm": bundle.dockerfile, |
| "amd_developer_cloud_runbook.md": bundle.runbook, |
| "migration_report.md": bundle.report, |
| "benchmark_result.json": json.dumps(bundle.benchmark, indent=2), |
| "ROCM_MIGRATION_COOKBOOK.md": bundle.cookbook, |
| "ROCM_FEEDBACK.md": bundle.feedback, |
| } |
| paths: dict[str, str] = {} |
| for filename, content in files.items(): |
| path = output_dir / filename |
| path.write_text(content, encoding="utf-8") |
| paths[filename] = str(path) |
|
|
| bundle_path = output_dir / "rocmport_artifacts.zip" |
| with zipfile.ZipFile(bundle_path, "w", compression=zipfile.ZIP_DEFLATED) as archive: |
| for filename, path in paths.items(): |
| archive.write(path, arcname=filename) |
| paths["rocmport_artifacts.zip"] = str(bundle_path) |
| return paths |
|
|