Spaces:

lablab-ai-amd-developer-hackathon
/

ROCmPort-AI

Configuration error

App Files Files Community

tazwarrrr commited on 2 days ago

Commit

485813e

1 Parent(s): bb9523d

react add

Browse files

Files changed (16) hide show

.gitignore +5 -0
README.md +7 -8
backend/graph/__init__.py +0 -0
backend/graph/pipeline.py +489 -0
backend/graph/state.py +29 -0
backend/main.py +80 -27
backend/requirements.txt +1 -0
frontend/index.html +16 -1722
frontend/package-lock.json +0 -0
frontend/package.json +21 -0
frontend/postcss.config.js +6 -0
frontend/src/App.jsx +872 -0
frontend/src/index.css +3 -0
frontend/src/main.jsx +10 -0
frontend/tailwind.config.js +22 -0
frontend/vite.config.js +9 -0

.gitignore CHANGED Viewed

@@ -25,6 +25,11 @@ backend/.env
 mock_rocprof_output.json
 *.db
 # OS junk
 .DS_Store
 Thumbs.db

 mock_rocprof_output.json
 *.db
+# Node
+node_modules/
+frontend/dist/
+frontend/.env.local
 # OS junk
 .DS_Store
 Thumbs.db

README.md CHANGED Viewed

@@ -259,14 +259,13 @@ This is the gap between "it compiles" and "it is correct."
 All demo kernels migrated, compiled, and profiled on real MI300X hardware (AMD DevCloud, ROCm 7.2, gfx942).
-| Kernel | Total Changes | Critical AMD Bugs Found | Status |
-|--------|--------------|------------------------|--------|
-| reduction | 9 | warp-32 final stage (silent wrong results) | ✅ Compiled |
-| vector_add | 7 | threadIdx%32 wavefront mismatch | ✅ Compiled |
-| matrix_multiply | 11 | warp-32 + LDS bank conflicts | ✅ Compiled |
-| convolution_2d | 13 | warp-32 + LDS padding | ✅ Compiled |
-`data_source: real_rocm` — verified on AMD DevCloud MI300X instance.
 ## License

 All demo kernels migrated, compiled, and profiled on real MI300X hardware (AMD DevCloud, ROCm 7.2, gfx942).
+| Kernel | Input | Baseline HIP | Optimized HIP | Speedup |
+|--------|-------|-------------|---------------|---------|
+| matrix_multiply | 512x512 fp32 | 0.068ms | 0.026ms | 2.61x |
+| reduction | 16M elements | — | 0.019ms | PASS (correct) |
+| vector_add | 32M elements | — | 0.099ms | 4077.6 GB/s |
+Hardware: AMD Instinct MI300X VF (gfx942), 192GB HBM3, ROCm 7.2
 ## License

backend/graph/__init__.py ADDED Viewed

File without changes

backend/graph/pipeline.py ADDED Viewed

	@@ -0,0 +1,489 @@

+# pylint: disable=broad-exception-caught
+import asyncio
+import json
+from typing import Literal
+from langgraph.graph import StateGraph, END
+from backend.graph.state import MigrationState
+# ─── Node implementations ──────────────────────────────────────────────────────
+async def analyze_node(state: MigrationState) -> dict:
+    from backend.agents import analyzer as analyzer_agent
+    new_events = [
+        {
+            "agent": "analyzer",
+            "status": "running",
+            "message": "Scanning CUDA code for kernels, APIs, and hardware-specific issues...",
+            "detail": None,
+            "iteration": state.get("iteration", 0),
+        }
+    ]
+    try:
+        result = await asyncio.to_thread(analyzer_agent.run, state["cuda_code"])
+    except Exception as exc:
+        new_events.append(
+            {
+                "agent": "analyzer",
+                "status": "failed",
+                "message": "Analysis failed",
+                "detail": str(exc),
+                "iteration": state.get("iteration", 0),
+            }
+        )
+        return {"analyzer_result": None, "events": new_events, "migration_success": False}
+    detail_parts = [
+        f"Found {len(result.kernels_found)} kernel(s): {', '.join(result.kernels_found)}",
+        f"Workload: {result.workload_type.value}",
+        f"Difficulty: {result.difficulty} - {result.difficulty_reason}",
+    ]
+    if result.warp_size_issue:
+        detail_parts.append(f"WARP SIZE ISSUE: {result.warp_size_detail}")
+    if result.sharding_detected:
+        detail_parts.append(
+            "Multi-GPU sharding detected; review if needed on MI300X memory capacity."
+        )
+    if result.prediction:
+        detail_parts.append(result.prediction)
+    new_events.append(
+        {
+            "agent": "analyzer",
+            "status": "done",
+            "message": (
+                f"Found {len(result.kernels_found)} kernel(s) | "
+                f"{result.workload_type.value} workload | Difficulty: {result.difficulty}"
+            ),
+            "detail": "\n".join(detail_parts),
+            "iteration": state.get("iteration", 0),
+        }
+    )
+    return {"analyzer_result": result, "events": new_events}
+async def translate_node(state: MigrationState) -> dict:
+    from backend.agents import translator as translator_agent
+    new_events = [
+        {
+            "agent": "translator",
+            "status": "running",
+            "message": "Running hipify-clang (pass 1) then LLM correction (pass 2)...",
+            "detail": None,
+            "iteration": state.get("iteration", 0),
+        }
+    ]
+    analyzer_result = state.get("analyzer_result")
+    if analyzer_result is None:
+        new_events.append(
+            {
+                "agent": "translator",
+                "status": "failed",
+                "message": "Translation skipped — analysis did not complete",
+                "detail": None,
+                "iteration": state.get("iteration", 0),
+            }
+        )
+        return {"translator_result": None, "events": new_events}
+    try:
+        result = await asyncio.to_thread(
+            translator_agent.run, state["cuda_code"], analyzer_result
+        )
+    except Exception as exc:
+        new_events.append(
+            {
+                "agent": "translator",
+                "status": "failed",
+                "message": "Translation failed",
+                "detail": str(exc),
+                "iteration": state.get("iteration", 0),
+            }
+        )
+        return {"translator_result": None, "events": new_events}
+    new_events.append(
+        {
+            "agent": "translator",
+            "status": "done",
+            "message": (
+                f"{result.total_changes} changes "
+                f"({result.hipify_changes} hipify + {result.llm_changes} LLM)"
+            ),
+            "detail": (
+                f"Total changes: {result.total_changes} "
+                f"({result.hipify_changes} hipify, {result.llm_changes} LLM)\n"
+                f"Warp size corrected: {analyzer_result.warp_size_issue}\n"
+                "Kernel launch syntax updated"
+            ),
+            "iteration": state.get("iteration", 0),
+        }
+    )
+    return {"translator_result": result, "events": new_events}
+async def optimize_node(state: MigrationState) -> dict:
+    from backend.agents import optimizer as optimizer_agent
+    # bump on each optimizer invocation
+    iteration = state.get("iteration", 0) + 1
+    analyzer_result = state.get("analyzer_result")
+    translator_result = state.get("translator_result")
+    prev_tester_result = state.get("tester_result")  # set on retry path
+    is_retry = prev_tester_result is not None
+    new_events: list[dict] = []
+    # On retry: emit coordinator decision + optimizer retrying signals
+    if is_retry:
+        new_events.append(
+            {
+                "agent": "coordinator",
+                "status": "running",
+                "message": "Performance regressed, retrying optimizer with profiler feedback...",
+                "detail": f"Profiler feedback: {prev_tester_result.notes}",
+                "iteration": iteration,
+            }
+        )
+        new_events.append(
+            {
+                "agent": "optimizer",
+                "status": "retrying",
+                "message": f"Trying alternative optimization strategy (iteration {iteration})...",
+                "detail": f"Previous strategy regressed. Feedback: {prev_tester_result.notes}",
+                "iteration": iteration,
+            }
+        )
+    else:
+        new_events.append(
+            {
+                "agent": "optimizer",
+                "status": "running",
+                "message": f"Applying AMD MI300X-specific optimizations (iteration {iteration})...",
+                "detail": None,
+                "iteration": iteration,
+            }
+        )
+    if translator_result is None:
+        new_events.append(
+            {
+                "agent": "optimizer",
+                "status": "failed",
+                "message": "Optimization skipped — translation did not complete",
+                "detail": None,
+                "iteration": iteration,
+            }
+        )
+        return {"optimizer_result": None, "iteration": iteration, "events": new_events}
+    previous_feedback = prev_tester_result.notes if is_retry else None
+    try:
+        result = await asyncio.to_thread(
+            optimizer_agent.run,
+            translator_result.hip_code,  # always start from translated base
+            analyzer_result,
+            iteration,
+            previous_feedback,
+        )
+    except Exception as exc:
+        new_events.append(
+            {
+                "agent": "optimizer",
+                "status": "failed",
+                "message": "Optimization failed" if not is_retry else "Re-optimization failed",
+                "detail": str(exc),
+                "iteration": iteration,
+            }
+        )
+        return {"optimizer_result": None, "iteration": iteration, "events": new_events}
+    new_events.append(
+        {
+            "agent": "optimizer",
+            "status": "done",
+            "message": f"{len(result.changes)} optimization(s) applied",
+            "detail": "\n".join(f"- {c['description']}" for c in result.changes),
+            "iteration": iteration,
+        }
+    )
+    return {"optimizer_result": result, "iteration": iteration, "events": new_events}
+async def test_node(state: MigrationState) -> dict:
+    from backend.agents import tester as tester_agent
+    iteration = state.get("iteration", 0)
+    analyzer_result = state.get("analyzer_result")
+    optimizer_result = state.get("optimizer_result")
+    new_events = [
+        {
+            "agent": "tester",
+            "status": "running",
+            "message": f"Compiling with hipcc and profiling with rocprof (iteration {iteration})...",
+            "detail": None,
+            "iteration": iteration,
+        }
+    ]
+    if optimizer_result is None:
+        new_events.append(
+            {
+                "agent": "tester",
+                "status": "failed",
+                "message": "Testing skipped — optimization did not complete",
+                "detail": None,
+                "iteration": iteration,
+            }
+        )
+        return {
+            "tester_result": None,
+            "migration_success": False,
+            "events": new_events,
+        }
+    try:
+        result = await asyncio.to_thread(
+            tester_agent.run,
+            optimizer_result.optimized_code,
+            analyzer_result,
+            iteration,
+            state.get("kernel_name", "custom"),
+        )
+    except Exception as exc:
+        new_events.append(
+            {
+                "agent": "tester",
+                "status": "failed",
+                "message": "Testing failed",
+                "detail": str(exc),
+                "iteration": iteration,
+            }
+        )
+        return {
+            "tester_result": None,
+            "migration_success": False,
+            "events": new_events,
+        }
+    if not result.success:
+        new_events.append(
+            {
+                "agent": "tester",
+                "status": "failed",
+                "message": "Compilation or profiling failed",
+                "detail": result.notes,
+                "iteration": iteration,
+            }
+        )
+        return {
+            "tester_result": result,
+            "migration_success": False,
+            "events": new_events,
+        }
+    if result.speedup < 0.95:
+        new_events.append(
+            {
+                "agent": "tester",
+                "status": "failed",
+                "message": f"Iteration {iteration}: {result.speedup}x vs baseline HIP (regression)",
+                "detail": (
+                    f"Bandwidth utilized: {result.bandwidth_utilized}%\n"
+                    f"{result.notes}"
+                ),
+                "iteration": iteration,
+            }
+        )
+    else:
+        new_events.append(
+            {
+                "agent": "tester",
+                "status": "done",
+                "message": f"Iteration {iteration}: {result.speedup}x vs baseline HIP",
+                "detail": (
+                    f"Execution time: {result.execution_ms:.1f}ms\n"
+                    f"Memory bandwidth: {result.bandwidth_utilized:.1f}% utilized\n"
+                    f"Bottleneck type: {result.bottleneck}\n"
+                    f"{result.notes}"
+                ),
+                "iteration": iteration,
+            }
+        )
+    return {"tester_result": result, "events": new_events}
+async def coordinate_node(state: MigrationState) -> dict:
+    from backend.agents.coordinator import (
+        calculate_cost_estimate,
+        simplify_explanation,
+        _build_amd_explanation,
+    )
+    from backend.models import FinalReport, CostEstimate
+    analyzer_result = state.get("analyzer_result")
+    translator_result = state.get("translator_result")
+    optimizer_result = state.get("optimizer_result")
+    tester_result = state.get("tester_result")
+    iteration = state.get("iteration", 0)
+    simple_mode = state.get("simple_mode", False)
+    new_events = [
+        {
+            "agent": "coordinator",
+            "status": "running",
+            "message": "Generating migration report...",
+            "detail": None,
+            "iteration": iteration,
+        }
+    ]
+    # Hard failure path — one or more agents did not complete
+    if tester_result is None or translator_result is None or optimizer_result is None:
+        new_events.append(
+            {
+                "agent": "coordinator",
+                "status": "failed",
+                "message": "Pipeline did not complete successfully",
+                "detail": "One or more agents failed before the report could be generated.",
+                "iteration": iteration,
+            }
+        )
+        return {
+            "migration_success": False,
+            "final_report": {},
+            "events": new_events,
+        }
+    amd_explanation = _build_amd_explanation(analyzer_result, tester_result)
+    try:
+        cost_estimate = calculate_cost_estimate(analyzer_result)
+    except Exception:
+        from backend.models import CostEstimate
+        cost_estimate = CostEstimate(
+            manual_porting_weeks="3-6 weeks",
+            rocmport_minutes="Varies by kernel",
+            estimated_savings="$20,000-$50,000",
+            complexity_factor="Medium",
+        )
+    total_changes = translator_result.total_changes + \
+        len(optimizer_result.changes)
+    temp_report = FinalReport(
+        migration_success=tester_result.success,
+        speedup=tester_result.speedup,
+        bandwidth_utilized=tester_result.bandwidth_utilized,
+        total_changes=total_changes,
+        bottleneck=tester_result.bottleneck,
+        amd_advantage_explanation=amd_explanation,
+        iterations=iteration,
+        hip_code=translator_result.hip_code,
+        optimized_code=optimizer_result.optimized_code,
+        verification=tester_result.verification,
+        static_risk_report=analyzer_result.static_risk_report if analyzer_result else None,
+        data_source=tester_result.data_source or "simulated",
+    )
+    simplified = simplify_explanation(
+        temp_report) if simple_mode else amd_explanation
+    report = FinalReport(
+        migration_success=tester_result.success,
+        speedup=tester_result.speedup,
+        bandwidth_utilized=tester_result.bandwidth_utilized,
+        total_changes=total_changes,
+        bottleneck=tester_result.bottleneck,
+        amd_advantage_explanation=amd_explanation,
+        iterations=iteration,
+        hip_code=translator_result.hip_code,
+        optimized_code=optimizer_result.optimized_code,
+        verification=tester_result.verification,
+        cost_estimate=cost_estimate,
+        simplified_explanation=simplified,
+        static_risk_report=analyzer_result.static_risk_report if analyzer_result else None,
+        data_source=tester_result.data_source or "simulated",
+    )
+    report_dict = report.model_dump()
+    new_events.append(
+        {
+            "agent": "coordinator",
+            "status": "done",
+            "message": "Migration complete",
+            "detail": json.dumps(report_dict),
+            "iteration": iteration,
+        }
+    )
+    return {
+        "migration_success": report.migration_success,
+        "final_report": report_dict,
+        "events": new_events,
+    }
+# ─── Conditional routing ───────────────────────────────────────────────────────
+def should_retry_decision(state: MigrationState) -> Literal["retry", "done"]:
+    """Route to optimizer (retry) or coordinator (done)."""
+    tester_result = state.get("tester_result")
+    if tester_result is None:
+        return "done"
+    if not getattr(tester_result, "success", True):
+        return "done"  # hard compile/run failure — let coordinator report it
+    speedup = float(getattr(tester_result, "speedup", 1.0) or 1.0)
+    iteration = state.get("iteration", 0)
+    max_iter = state.get("max_iterations", 3)
+    if speedup < 0.95 and iteration < max_iter:
+        return "retry"
+    return "done"
+# ─── Graph builder ─────────────────────────────────────────────────────────────
+def build_pipeline():
+    """Build and compile the LangGraph StateGraph for the migration pipeline."""
+    graph = StateGraph(MigrationState)
+    graph.add_node("analyzer",    analyze_node)
+    graph.add_node("translator",  translate_node)
+    graph.add_node("optimizer",   optimize_node)
+    graph.add_node("tester",      test_node)
+    graph.add_node("coordinator", coordinate_node)
+    graph.set_entry_point("analyzer")
+    graph.add_edge("analyzer",   "translator")
+    graph.add_edge("translator", "optimizer")
+    graph.add_edge("optimizer",  "tester")
+    graph.add_conditional_edges(
+        "tester",
+        should_retry_decision,
+        {
+            "retry": "optimizer",    # performance regression + iterations remaining
+            "done":  "coordinator",  # acceptable result or hard failure
+        },
+    )
+    graph.add_edge("coordinator", END)
+    return graph.compile()
+# Module-level compiled pipeline (reused across requests)
+pipeline = build_pipeline()

backend/graph/state.py ADDED Viewed

	@@ -0,0 +1,29 @@

+from typing import TypedDict, Optional, List, Any, Annotated
+import operator
+class MigrationState(TypedDict):
+    # ── Input ──────────────────────────────────────────────────────────────────
+    cuda_code: str
+    kernel_name: str
+    simple_mode: bool
+    # ── Intermediate results (pydantic model instances stored as Any) ──────────
+    analyzer_result: Optional[Any]    # AnalyzerResult
+    translator_result: Optional[Any]  # TranslatorResult
+    optimizer_result: Optional[Any]   # OptimizerResult
+    tester_result: Optional[Any]      # TesterResult
+    # ── Control flow ───────────────────────────────────────────────────────────
+    iteration: int         # incremented each time optimizer node runs
+    max_iterations: int    # default 3
+    should_retry: bool
+    # ── Output ─────────────────────────────────────────────────────────────────
+    migration_success: bool
+    final_report: dict
+    # ── SSE event accumulator — LangGraph appends each node's new events ───────
+    # operator.add reducer: each node returns a *list of new events*;
+    # LangGraph concatenates them into a single growing list.
+    events: Annotated[List[dict], operator.add]

backend/main.py CHANGED Viewed

@@ -2,7 +2,7 @@
 from backend.agents.analyzer import AnalyzerResult, WorkloadType
 from backend.agents.tester import run as run_tester
-from backend.agents.coordinator import run_pipeline
 from backend.models import PortRequest, ColdStartRequest, AggregateMetricsRequest
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import StreamingResponse
@@ -97,29 +97,61 @@ async def benchmark_report():
 @app.post("/port")
 async def port_cuda_code(req: PortRequest):
     """
-    Main endpoint. Streams SSE events as the agent pipeline runs.
-    Each event is a JSON AgentEvent object.
     """
     if not req.cuda_code or len(req.cuda_code.strip()) < 10:
         raise HTTPException(status_code=400, detail="No CUDA code provided")
     async def event_stream():
         try:
-            async for event in run_pipeline(req.cuda_code, req.kernel_name or "custom", req.simple_mode or False):
-                data = json.dumps(event.model_dump())
-                yield f"data: {data}\n\n"
-                # Let the client breathe between events
-                await asyncio.sleep(0.05)
-        except Exception as e:
-            error_event = {
-                "agent": "coordinator",
-                "status": "failed",
-                "message": "Pipeline error",
-                "detail": str(e)
-            }
-            yield f"data: {json.dumps(error_event)}\n\n"
         finally:
-            yield "data: [DONE]\n\n"
     return StreamingResponse(
         event_stream(),
@@ -127,23 +159,44 @@ async def port_cuda_code(req: PortRequest):
         headers={
             "Cache-Control": "no-cache",
             "X-Accel-Buffering": "no",
-        }
     )
 async def _collect_pipeline_events(cuda_code: str, kernel_name: str, simple_mode: bool = False) -> tuple[list[dict], dict | None]:
-    """Collect all pipeline events and extract final report payload when present."""
     events: list[dict] = []
     final_report = None
-    async for event in run_pipeline(cuda_code, kernel_name, simple_mode):
-        dumped = event.model_dump()
-        events.append(dumped)
-        if dumped.get("agent") == "coordinator" and dumped.get("status") == "done" and dumped.get("detail"):
-            try:
-                final_report = json.loads(dumped["detail"])
-            except (json.JSONDecodeError, TypeError):
-                final_report = None
     return events, final_report

 from backend.agents.analyzer import AnalyzerResult, WorkloadType
 from backend.agents.tester import run as run_tester
+from backend.graph.pipeline import pipeline as migration_pipeline
 from backend.models import PortRequest, ColdStartRequest, AggregateMetricsRequest
 from fastapi.staticfiles import StaticFiles
 from fastapi.responses import StreamingResponse
 @app.post("/port")
 async def port_cuda_code(req: PortRequest):
     """
+    Main endpoint. Streams SSE events as the LangGraph pipeline runs.
+    Each event is a JSON object matching the AgentEvent schema.
     """
     if not req.cuda_code or len(req.cuda_code.strip()) < 10:
         raise HTTPException(status_code=400, detail="No CUDA code provided")
+    queue: asyncio.Queue = asyncio.Queue()
+    async def _run_graph():
+        initial_state = {
+            "cuda_code": req.cuda_code,
+            "kernel_name": req.kernel_name or "custom",
+            "simple_mode": req.simple_mode or False,
+            "analyzer_result": None,
+            "translator_result": None,
+            "optimizer_result": None,
+            "tester_result": None,
+            "iteration": 0,
+            "max_iterations": 3,
+            "should_retry": False,
+            "migration_success": False,
+            "final_report": {},
+            "events": [],
+        }
+        try:
+            async for chunk in migration_pipeline.astream(
+                initial_state, stream_mode="updates"
+            ):
+                for _node_name, node_output in chunk.items():
+                    for event in node_output.get("events", []):
+                        await queue.put(event)
+                        await asyncio.sleep(0.05)  # let client breathe
+        except Exception as exc:
+            await queue.put(
+                {
+                    "agent": "coordinator",
+                    "status": "failed",
+                    "message": "Pipeline error",
+                    "detail": str(exc),
+                }
+            )
+        finally:
+            await queue.put(None)  # sentinel
     async def event_stream():
+        task = asyncio.create_task(_run_graph())
         try:
+            while True:
+                event = await queue.get()
+                if event is None:
+                    yield "data: [DONE]\n\n"
+                    break
+                yield f"data: {json.dumps(event)}\n\n"
         finally:
+            task.cancel()
     return StreamingResponse(
         event_stream(),
         headers={
             "Cache-Control": "no-cache",
             "X-Accel-Buffering": "no",
+        },
     )
 async def _collect_pipeline_events(cuda_code: str, kernel_name: str, simple_mode: bool = False) -> tuple[list[dict], dict | None]:
+    """Collect all pipeline events via LangGraph and extract the final report."""
     events: list[dict] = []
     final_report = None
+    initial_state = {
+        "cuda_code": cuda_code,
+        "kernel_name": kernel_name,
+        "simple_mode": simple_mode,
+        "analyzer_result": None,
+        "translator_result": None,
+        "optimizer_result": None,
+        "tester_result": None,
+        "iteration": 0,
+        "max_iterations": 3,
+        "should_retry": False,
+        "migration_success": False,
+        "final_report": {},
+        "events": [],
+    }
+    async for chunk in migration_pipeline.astream(initial_state, stream_mode="updates"):
+        for _node_name, node_output in chunk.items():
+            for event in node_output.get("events", []):
+                events.append(event)
+                if (
+                    event.get("agent") == "coordinator"
+                    and event.get("status") == "done"
+                    and event.get("detail")
+                ):
+                    try:
+                        final_report = json.loads(event["detail"])
+                    except (json.JSONDecodeError, TypeError):
+                        final_report = None
     return events, final_report

backend/requirements.txt CHANGED Viewed

@@ -10,3 +10,4 @@ crewai==0.55.2
 python-dotenv==1.0.0
 aiofiles==23.2.1
 jinja2==3.1.2

 python-dotenv==1.0.0
 aiofiles==23.2.1
 jinja2==3.1.2
+langgraph>=0.2.0

frontend/index.html CHANGED Viewed

@@ -1,1724 +1,18 @@
-<!DOCTYPE html>
 <html lang="en">
-<head>
-  <meta charset="UTF-8">
-  <meta name="viewport" content="width=device-width, initial-scale=1.0">
-  <title>ROCmPort AI</title>
-  <link rel="preconnect" href="https://fonts.googleapis.com">
-  <link
-    href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;500&family=Space+Grotesk:wght@500;600;700&display=swap"
-    rel="stylesheet">
-  <style>
-    :root {
-      --bg: #030303;
-      --s1: #0a0a0b;
-      --s2: #121214;
-      --s3: #1a1a1e;
-      --b1: rgba(255, 255, 255, 0.08);
-      --b2: rgba(255, 255, 255, 0.15);
-      --red: #ff3344;
-      --red-glow: rgba(255, 51, 68, 0.4);
-      --green: #00ff88;
-      --green-glow: rgba(0, 255, 136, 0.4);
-      --yellow: #ffcc00;
-      --cyan: #00d9ff;
-      --muted: #88888e;
-      --t1: #a1a1aa;
-      --t2: #d4d4d8;
-      --t3: #ffffff;
-      --mono: 'JetBrains Mono', monospace;
-      --sans: 'Space Grotesk', sans-serif;
-      --spring: cubic-bezier(0.34, 1.56, 0.64, 1);
-    }
-    * {
-      margin: 0;
-      padding: 0;
-      box-sizing: border-box;
-      cursor: none !important;
-    }
-    .hide {
-      display: none !important;
-    }
-    body {
-      background: var(--bg);
-      color: var(--t1);
-      font-family: var(--sans);
-      font-size: 14px;
-      line-height: 1.6;
-      overflow-x: hidden;
-      min-height: 100vh;
-    }
-    /* Animated Gradient Background */
-    body::before {
-      content: '';
-      position: fixed;
-      inset: 0;
-      background:
-        radial-gradient(circle at 20% 30%, rgba(0, 217, 255, 0.05), transparent 40%),
-        radial-gradient(circle at 80% 70%, rgba(255, 51, 68, 0.05), transparent 40%),
-        radial-gradient(circle at 50% 50%, rgba(0, 255, 136, 0.03), transparent 60%);
-      z-index: -1;
-      animation: bgMove 20s ease-in-out infinite alternate;
-    }
-    @keyframes bgMove {
-      0% {
-        transform: scale(1) translate(0, 0);
-      }
-      50% {
-        transform: scale(1.1) translate(20px, -20px);
-      }
-      100% {
-        transform: scale(1) translate(-20px, 20px);
-      }
-    }
-    .w {
-      max-width: 1200px;
-      margin: 0 auto;
-      padding: 32px 24px;
-      position: relative;
-    }
-    /* Container Glow */
-    .w::after {
-      content: '';
-      position: absolute;
-      inset: 0;
-      background: radial-gradient(circle at 50% 0%, rgba(255, 51, 68, 0.08), transparent 70%);
-      pointer-events: none;
-      z-index: -1;
-    }
-    header {
-      padding-bottom: 24px;
-      border-bottom: 1px solid var(--b1);
-      display: flex;
-      align-items: center;
-      justify-content: space-between;
-      margin-bottom: 24px;
-    }
-    .logo {
-      font-weight: 700;
-      font-size: 18px;
-      color: var(--t3);
-      letter-spacing: -0.02em;
-    }
-    .logo em {
-      font-style: normal;
-      color: var(--red);
-      text-shadow: 0 0 15px var(--red-glow);
-    }
-    .hr {
-      font-size: 12px;
-      color: var(--muted);
-      display: flex;
-      align-items: center;
-      gap: 10px;
-      background: var(--s1);
-      padding: 6px 12px;
-      border-radius: 20px;
-      border: 1px solid var(--b1);
-    }
-    .hd {
-      width: 6px;
-      height: 6px;
-      border-radius: 50%;
-      background: var(--green);
-      box-shadow: 0 0 10px var(--green-glow);
-    }
-    .hd.on {
-      animation: pulse 2s ease-in-out infinite;
-    }
-    @keyframes pulse {
-      0%,
-      100% {
-        opacity: 1;
-        transform: scale(1);
-      }
-      50% {
-        opacity: 0.4;
-        transform: scale(0.8);
-      }
-    }
-    .g {
-      display: grid;
-      grid-template-columns: 1.2fr 0.8fr;
-      gap: 24px;
-      padding: 0;
-    }
-    .fs {
-      grid-column: 1 / -1;
-    }
-    @media (max-width: 900px) {
-      .g {
-        grid-template-columns: 1fr;
-      }
-    }
-    /* Card Styling */
-    .p {
-      background: var(--s1);
-      border: 1px solid var(--b1);
-      border-radius: 12px;
-      overflow: hidden;
-      display: flex;
-      flex-direction: column;
-      box-shadow: 0 4px 20px rgba(0, 0, 0, 0.4);
-      backdrop-filter: blur(10px);
-      transition: transform 0.3s var(--spring), border-color 0.3s ease;
-    }
-    .p:hover {
-      border-color: var(--b2);
-    }
-    .ph {
-      padding: 12px 16px;
-      border-bottom: 1px solid var(--b1);
-      display: flex;
-      align-items: center;
-      justify-content: space-between;
-      font-size: 12px;
-      color: var(--muted);
-      background: rgba(255, 255, 255, 0.02);
-    }
-    .ph b {
-      color: var(--red);
-      font-weight: 600;
-      text-transform: uppercase;
-      letter-spacing: 0.05em;
-    }
-    textarea.code {
-      width: 100%;
-      flex: 1;
-      min-height: 300px;
-      background: var(--bg);
-      border: none;
-      color: var(--t2);
-      font-family: var(--mono);
-      font-size: 13px;
-      line-height: 1.7;
-      padding: 20px;
-      resize: vertical;
-      outline: none;
-      caret-color: var(--red);
-      will-change: transform;
-    }
-    .db {
-      padding: 12px 16px;
-      border-top: 1px solid var(--b1);
-      display: flex;
-      align-items: center;
-      gap: 8px;
-      background: var(--s1);
-    }
-    .db .l {
-      font-size: 11px;
-      color: var(--muted);
-      font-weight: 500;
-    }
-    .ch {
-      font-family: var(--sans);
-      font-size: 11px;
-      padding: 4px 12px;
-      background: var(--s2);
-      border: 1px solid var(--b1);
-      border-radius: 6px;
-      color: var(--t1);
-      cursor: pointer;
-      transition: all 0.2s var(--spring);
-    }
-    .ch:hover {
-      background: var(--s3);
-      color: var(--t3);
-      transform: translateY(-1px);
-      border-color: var(--b2);
-    }
-    .ch.on {
-      background: var(--red);
-      border-color: var(--red);
-      color: #fff;
-      box-shadow: 0 0 15px var(--red-glow);
-    }
-    .bg {
-      margin: 16px;
-      padding: 14px;
-      background: var(--red);
-      border: none;
-      border-radius: 8px;
-      color: #fff;
-      font-family: var(--sans);
-      font-size: 14px;
-      font-weight: 700;
-      cursor: pointer;
-      transition: all 0.3s var(--spring);
-      text-transform: uppercase;
-      letter-spacing: 0.05em;
-      box-shadow: 0 4px 15px var(--red-glow);
-    }
-    .bg:hover {
-      background: #ff4d5a;
-      transform: translateY(-2px);
-      box-shadow: 0 6px 20px var(--red-glow);
-    }
-    .bg:active {
-      transform: translateY(0);
-    }
-    .bg:disabled {
-      opacity: 0.4;
-      cursor: not-allowed;
-      transform: none;
-      box-shadow: none;
-    }
-    /* Agent log */
-    .al {
-      padding: 12px;
-      display: flex;
-      flex-direction: column;
-      gap: 8px;
-    }
-    .ar {
-      padding: 12px 16px;
-      border-radius: 8px;
-      background: rgba(255, 255, 255, 0.03);
-      border: 1px solid transparent;
-      transition: all 0.4s var(--spring);
-      animation: slideIn 0.5s var(--spring) forwards;
-      opacity: 0;
-      transform: translateX(20px);
-    }
-    @keyframes slideIn {
-      to {
-        opacity: 1;
-        transform: translateX(0);
-      }
-    }
-    .ar.run {
-      border-color: var(--cyan);
-      background: rgba(0, 217, 255, 0.05);
-    }
-    .ar.done {
-      border-color: var(--green);
-      background: rgba(0, 255, 136, 0.05);
-    }
-    .ar.fail {
-      border-color: var(--red);
-      background: rgba(255, 51, 68, 0.05);
-    }
-    .ar.retry {
-      border-color: var(--yellow);
-      background: rgba(255, 204, 0, 0.05);
-      animation: pulse-border 1.5s ease-in-out infinite;
-    }
-    @keyframes pulse-border {
-      50% {
-        border-color: rgba(255, 204, 0, 0.2);
-      }
-    }
-    .at {
-      display: flex;
-      align-items: center;
-      gap: 12px;
-    }
-    .an {
-      font-size: 10px;
-      font-weight: 700;
-      color: var(--muted);
-      min-width: 90px;
-      text-transform: uppercase;
-      letter-spacing: 0.1em;
-    }
-    .am {
-      font-size: 13px;
-      color: var(--t2);
-      font-weight: 500;
-    }
-    .ad {
-      font-size: 11px;
-      color: var(--muted);
-      margin-top: 4px;
-      padding-left: 102px;
-      white-space: pre-wrap;
-      line-height: 1.6;
-      max-height: 100px;
-      overflow-y: auto;
-    }
-    .ad .w {
-      color: var(--yellow);
-      font-weight: 600;
-    }
-    .ad .g {
-      color: var(--green);
-      font-weight: 600;
-    }
-    /* Horizontal Timeline */
-    .timeline {
-      display: flex;
-      justify-content: space-between;
-      padding: 16px 20px;
-      background: rgba(255, 255, 255, 0.02);
-      border-bottom: 1px solid var(--b1);
-      margin-bottom: 8px;
-    }
-    .node {
-      display: flex;
-      flex-direction: column;
-      align-items: center;
-      gap: 6px;
-      position: relative;
-      flex: 1;
-    }
-    .node::after {
-      content: '';
-      position: absolute;
-      top: 12px;
-      left: 50%;
-      width: 100%;
-      height: 2px;
-      background: var(--b1);
-      z-index: 0;
-    }
-    .node:last-child::after {
-      display: none;
-    }
-    .ni {
-      width: 24px;
-      height: 24px;
-      border-radius: 50%;
-      background: var(--s3);
-      border: 2px solid var(--b1);
-      display: flex;
-      align-items: center;
-      justify-content: center;
-      font-size: 12px;
-      z-index: 1;
-      transition: all 0.4s var(--spring);
-    }
-    .node.on .ni {
-      background: var(--cyan);
-      border-color: var(--cyan);
-      color: #000;
-      box-shadow: 0 0 15px var(--cyan);
-    }
-    .node.done .ni {
-      background: var(--green);
-      border-color: var(--green);
-      color: #000;
-      box-shadow: 0 0 15px var(--green);
-    }
-    .node.fail .ni {
-      background: var(--red);
-      border-color: var(--red);
-      color: #fff;
-    }
-    .node.retry .ni {
-      animation: pulse-node 1s var(--spring) infinite;
-      background: var(--yellow);
-      border-color: var(--yellow);
-    }
-    @keyframes pulse-node {
-      0%,
-      100% {
-        transform: scale(1);
-      }
-      50% {
-        transform: scale(1.2);
-      }
-    }
-    .nl {
-      font-size: 9px;
-      font-weight: 700;
-      color: var(--muted);
-      text-transform: uppercase;
-      letter-spacing: 0.05em;
-    }
-    .node.on .nl,
-    .node.done .nl {
-      color: var(--t3);
-    }
-    /* Tabs */
-    .tabs {
-      display: flex;
-      gap: 8px;
-    }
-    .tab {
-      background: var(--s2);
-      border: 1px solid var(--b1);
-      padding: 6px 16px;
-      border-radius: 8px;
-      font-family: var(--sans);
-      font-size: 12px;
-      font-weight: 600;
-      color: var(--muted);
-      cursor: pointer;
-      transition: all 0.2s var(--spring);
-    }
-    .tab:hover {
-      color: var(--t2);
-      background: var(--s3);
-    }
-    .tab.on {
-      color: var(--t3);
-      background: var(--red);
-      border-color: var(--red);
-      box-shadow: 0 0 10px var(--red-glow);
-    }
-    .tc {
-      display: none;
-      padding: 0;
-      animation: fadeIn 0.4s ease;
-    }
-    .tc.on {
-      display: block;
-    }
-    @keyframes fadeIn {
-      from {
-        opacity: 0;
-        transform: translateY(10px);
-      }
-      to {
-        opacity: 1;
-        transform: translateY(0);
-      }
-    }
-    /* Summary row */
-    .sum-row {
-      padding: 24px;
-      display: flex;
-      align-items: center;
-      gap: 32px;
-      flex-wrap: wrap;
-      border-bottom: 1px solid var(--b1);
-      background: rgba(0, 255, 136, 0.02);
-    }
-    .sum-big {
-      font-size: 32px;
-      font-weight: 800;
-      color: var(--green);
-      line-height: 1;
-      letter-spacing: -0.02em;
-      text-shadow: 0 0 20px var(--green-glow);
-    }
-    .sum-big .u {
-      font-size: 13px;
-      font-weight: 500;
-      color: var(--muted);
-      margin-left: 4px;
-      display: block;
-      margin-top: 4px;
-      letter-spacing: 0;
-    }
-    .sum-big .vic {
-      font-size: 11px;
-      color: var(--cyan);
-      font-weight: 600;
-      display: block;
-      margin-top: 8px;
-      text-shadow: none;
-      opacity: 0.8;
-    }
-    .sum-sep {
-      width: 1px;
-      height: 40px;
-      background: var(--b1);
-    }
-    .sum-chk {
-      display: flex;
-      align-items: center;
-      gap: 8px;
-      font-size: 12px;
-      color: var(--t2);
-      font-weight: 500;
-    }
-    .sum-dot {
-      width: 8px;
-      height: 8px;
-      border-radius: 50%;
-      flex-shrink: 0;
-    }
-    .sum-dot.ok {
-      background: var(--green);
-      box-shadow: 0 0 8px var(--green-glow);
-    }
-    .sum-dot.no {
-      background: var(--red);
-      box-shadow: 0 0 8px var(--red-glow);
-    }
-    .sum-dot.na {
-      background: var(--muted);
-      box-shadow: none;
-    }
-    .sum-type {
-      font-size: 11px;
-      color: var(--cyan);
-      text-transform: uppercase;
-      letter-spacing: 0.1em;
-      font-weight: 700;
-      padding: 4px 10px;
-      background: rgba(0, 217, 255, 0.1);
-      border-radius: 4px;
-    }
-    .sum-bar {
-      padding: 16px 24px;
-      display: flex;
-      align-items: center;
-      gap: 12px;
-      flex-wrap: wrap;
-      border-bottom: 1px solid var(--b1);
-    }
-    .bs {
-      font-family: var(--sans);
-      font-size: 11px;
-      font-weight: 700;
-      padding: 8px 16px;
-      border-radius: 8px;
-      border: 1px solid var(--b1);
-      background: var(--s2);
-      color: var(--t2);
-      cursor: pointer;
-      transition: all 0.2s var(--spring);
-      text-transform: uppercase;
-      letter-spacing: 0.05em;
-    }
-    .bs:hover {
-      border-color: var(--b2);
-      transform: translateY(-1px);
-      background: var(--s3);
-    }
-    .bs.r {
-      background: var(--bg);
-      border-color: var(--red);
-      color: var(--red);
-    }
-    .bs.r:hover {
-      background: var(--red);
-      color: #fff;
-      box-shadow: 0 4px 15px var(--red-glow);
-    }
-    .bs.gr {
-      background: var(--green);
-      border-color: var(--green);
-      color: #000;
-    }
-    .bs.gr:hover {
-      box-shadow: 0 4px 15px var(--green-glow);
-      transform: translateY(-2px);
-    }
-    .sp {
-      flex: 1;
-    }
-    /* Details tab */
-    .dm {
-      display: grid;
-      grid-template-columns: repeat(5, 1fr);
-      border-bottom: 1px solid var(--b1);
-    }
-    @media (max-width: 800px) {
-      .dm {
-        grid-template-columns: repeat(2, 1fr);
-      }
-    }
-    .di {
-      padding: 20px;
-      border-right: 1px solid var(--b1);
-      background: rgba(255, 255, 255, 0.01);
-    }
-    .di:last-child {
-      border-right: none;
-    }
-    .dl {
-      font-size: 10px;
-      color: var(--muted);
-      text-transform: uppercase;
-      letter-spacing: 0.1em;
-      margin-bottom: 8px;
-      font-weight: 700;
-    }
-    .dv {
-      font-size: 20px;
-      font-weight: 800;
-      line-height: 1;
-      margin-bottom: 4px;
-      color: var(--t3);
-    }
-    .dv.g {
-      color: var(--green);
-    }
-    .dv.c {
-      color: var(--cyan);
-    }
-    .dv.y {
-      color: var(--yellow);
-    }
-    .dv.t {
-      color: var(--t2);
-      font-size: 13px;
-    }
-    .ds {
-      font-size: 10px;
-      color: var(--muted);
-      line-height: 1.4;
-    }
-    /* Benchmark bars */
-    .bk {
-      padding: 24px;
-      border-bottom: 1px solid var(--b1);
-    }
-    .bk-t {
-      font-size: 11px;
-      color: var(--muted);
-      text-transform: uppercase;
-      letter-spacing: 0.1em;
-      margin-bottom: 16px;
-      font-weight: 700;
-    }
-    .br {
-      display: flex;
-      align-items: center;
-      gap: 16px;
-      margin-bottom: 12px;
-    }
-    .br:last-child {
-      margin-bottom: 0;
-    }
-    .bl {
-      font-size: 12px;
-      color: var(--t2);
-      width: 140px;
-      flex-shrink: 0;
-      font-weight: 500;
-    }
-    .bt {
-      flex: 1;
-      height: 8px;
-      background: var(--bg);
-      border-radius: 4px;
-      overflow: hidden;
-      border: 1px solid var(--b1);
-    }
-    .bf {
-      height: 100%;
-      border-radius: 4px;
-      transition: width 1s var(--spring);
-      width: 0;
-    }
-    .bf.bad {
-      background: linear-gradient(90deg, #ff334466, #ff3344);
-      box-shadow: 0 0 10px rgba(255, 51, 68, 0.3);
-    }
-    .bf.good {
-      background: linear-gradient(90deg, #00ff8866, #00ff88);
-      box-shadow: 0 0 10px rgba(0, 255, 136, 0.3);
-    }
-    .bv {
-      font-size: 12px;
-      font-weight: 700;
-      width: 40px;
-      text-align: right;
-      flex-shrink: 0;
-    }
-    .bv.bad {
-      color: var(--red);
-    }
-    .bv.good {
-      color: var(--green);
-    }
-    /* Simple mode note */
-    .sn {
-      padding: 20px;
-      border: 1px solid var(--cyan);
-      border-radius: 12px;
-      background: rgba(0, 217, 255, 0.05);
-      margin: 24px;
-      font-size: 13px;
-      color: var(--t2);
-      line-height: 1.6;
-      border-left-width: 4px;
-    }
-    /* Diff */
-    .dg {
-      display: grid;
-      grid-template-columns: 1fr 1fr;
-      background: var(--bg);
-    }
-    .dfs {
-      min-width: 0;
-    }
-    @media (max-width: 780px) {
-      .dg {
-        grid-template-columns: 1fr;
-      }
-      .dfs:first-child {
-        border-right: none !important;
-        border-bottom: 1px solid var(--b1);
-      }
-    }
-    .dfs:first-child {
-      border-right: 1px solid var(--b1);
-    }
-    .dfh {
-      padding: 10px 16px;
-      border-bottom: 1px solid var(--b1);
-      font-size: 11px;
-      color: var(--muted);
-      display: flex;
-      align-items: center;
-      gap: 8px;
-      font-weight: 600;
-      background: var(--s2);
-    }
-    .dft {
-      font-size: 9px;
-      font-weight: 800;
-      padding: 2px 6px;
-      border-radius: 4px;
-      text-transform: uppercase;
-    }
-    .dft.cu {
-      background: rgba(255, 51, 68, 0.2);
-      color: var(--red);
-    }
-    .dft.ro {
-      background: rgba(0, 255, 136, 0.2);
-      color: var(--green);
-    }
-    .dfp {
-      padding: 20px;
-      font-family: var(--mono);
-      font-size: 12px;
-      line-height: 1.7;
-      overflow: auto;
-      max-height: min(70vh, 760px);
-      white-space: pre-wrap;
-      overflow-wrap: anywhere;
-      word-break: break-word;
-      tab-size: 2;
-      color: var(--t2);
-    }
-    .dlo {
-      background: rgba(255, 51, 68, 0.08);
-      color: var(--red);
-      display: block;
-      border-left: 2px solid rgba(255, 51, 68, 0.45);
-      padding-left: 8px;
-    }
-    .dln {
-      background: rgba(0, 255, 136, 0.08);
-      color: var(--green);
-      display: block;
-      border-left: 2px solid rgba(0, 255, 136, 0.45);
-      padding-left: 8px;
-    }
-    /* Loading Skeleton */
-    .skeleton {
-      position: relative;
-      overflow: hidden;
-      background: var(--s2);
-      border-radius: 12px;
-      height: 200px;
-      margin-top: 24px;
-    }
-    .skeleton::after {
-      content: '';
-      position: absolute;
-      inset: 0;
-      transform: translateX(-100%);
-      background: linear-gradient(90deg, transparent, rgba(255, 255, 255, 0.05), transparent);
-      animation: shimmer 1.5s infinite;
-    }
-    @keyframes shimmer {
-      100% {
-        transform: translateX(100%);
-      }
-    }
-    /* Custom Cursor */
-    #cursor {
-      position: fixed;
-      width: 20px;
-      height: 20px;
-      background: rgba(255, 255, 255, 0.2);
-      border: 1px solid rgba(255, 255, 255, 0.4);
-      border-radius: 50%;
-      pointer-events: none;
-      z-index: 9999;
-      transition: transform 0.1s ease, width 0.3s var(--spring), height 0.3s var(--spring), background 0.3s ease;
-      mix-blend-mode: difference;
-    }
-    #cursor.active {
-      transform: scale(3);
-      background: rgba(255, 51, 68, 0.3);
-      border-color: var(--red);
-    }
-    /* Modal */
-    .mo {
-      display: none;
-      position: fixed;
-      inset: 0;
-      background: rgba(0, 0, 0, 0.85);
-      z-index: 1000;
-      place-items: center;
-      backdrop-filter: blur(8px);
-    }
-    .mo.open {
-      display: grid;
-    }
-    .mb {
-      background: var(--s1);
-      border: 1px solid var(--b1);
-      border-radius: 16px;
-      width: 90%;
-      max-width: 800px;
-      max-height: 90vh;
-      overflow: hidden;
-      box-shadow: 0 20px 50px rgba(0, 0, 0, 0.6);
-    }
-    .mt {
-      padding: 16px 24px;
-      border-bottom: 1px solid var(--b1);
-      display: flex;
-      justify-content: space-between;
-      align-items: center;
-      background: var(--s2);
-    }
-    .mt h3 {
-      font-size: 16px;
-      color: var(--t3);
-      font-weight: 700;
-    }
-    .mx {
-      background: none;
-      border: none;
-      color: var(--muted);
-      font-size: 24px;
-      cursor: pointer !important;
-      line-height: 1;
-      transition: color 0.2s;
-    }
-    .mx:hover {
-      color: var(--t3);
-    }
-    .mc {
-      padding: 24px;
-    }
-    .mc textarea {
-      width: 100%;
-      height: 400px;
-      background: var(--bg);
-      border: 1px solid var(--b1);
-      border-radius: 8px;
-      padding: 16px;
-      color: var(--cyan);
-      font-family: var(--mono);
-      font-size: 12px;
-      line-height: 1.6;
-      resize: vertical;
-      outline: none;
-    }
-    .mc textarea:focus {
-      border-color: var(--cyan);
-      box-shadow: 0 0 10px rgba(0, 217, 255, 0.2);
-    }
-    .mf {
-      padding: 16px 24px;
-      border-top: 1px solid var(--b1);
-      display: flex;
-      justify-content: flex-end;
-      gap: 12px;
-      background: var(--s2);
-    }
-    ::-webkit-scrollbar {
-      width: 6px;
-      height: 6px;
-    }
-    ::-webkit-scrollbar-track {
-      background: transparent;
-    }
-    ::-webkit-scrollbar-thumb {
-      background: var(--b1);
-      border-radius: 10px;
-    }
-    ::-webkit-scrollbar-thumb:hover {
-      background: var(--b2);
-    }
-    footer {
-      padding: 32px 0;
-      border-top: 1px solid var(--b1);
-      display: flex;
-      justify-content: space-between;
-      font-size: 11px;
-      color: var(--muted);
-      font-weight: 500;
-    }
-    footer a {
-      color: var(--muted);
-      text-decoration: none;
-      transition: color 0.2s;
-      border-bottom: 1px solid transparent;
-    }
-    footer a:hover {
-      color: var(--t2);
-      border-bottom-color: var(--muted);
-    }
-    .idle {
-      flex: 1;
-      display: flex;
-      align-items: center;
-      justify-content: center;
-      color: var(--b2);
-      font-size: 13px;
-      font-weight: 500;
-      min-height: 100px;
-    }
-    /* Data source badge */
-    .ds-badge {
-      display: inline-flex;
-      align-items: center;
-      gap: 6px;
-      font-size: 10px;
-      font-weight: 800;
-      letter-spacing: 0.08em;
-      text-transform: uppercase;
-      padding: 4px 10px;
-      border-radius: 4px;
-      margin-left: 12px;
-      vertical-align: middle;
-    }
-    .ds-badge.real {
-      background: rgba(0,255,136,0.15);
-      color: var(--green);
-      border: 1px solid rgba(0,255,136,0.3);
-    }
-    .ds-badge.demo {
-      background: rgba(255,204,0,0.12);
-      color: var(--yellow);
-      border: 1px solid rgba(255,204,0,0.3);
-    }
-    .ds-badge.sim {
-      background: rgba(255,255,255,0.06);
-      color: var(--muted);
-      border: 1px solid var(--b1);
-    }
-    /* Risk matrix panel */
-    .risk-panel {
-      margin: 0 24px 24px;
-      border-radius: 10px;
-      overflow: hidden;
-      border: 1px solid var(--b1);
-    }
-    .risk-header {
-      background: rgba(255,255,255,0.03);
-      padding: 10px 16px;
-      font-size: 11px;
-      font-weight: 700;
-      color: var(--muted);
-      text-transform: uppercase;
-      letter-spacing: 0.08em;
-      border-bottom: 1px solid var(--b1);
-      display: flex;
-      align-items: center;
-      gap: 10px;
-    }
-    .risk-badge {
-      font-size: 9px;
-      font-weight: 800;
-      padding: 2px 6px;
-      border-radius: 3px;
-      text-transform: uppercase;
-      letter-spacing: 0.05em;
-    }
-    .risk-badge.crit { background: rgba(255,51,68,0.2); color: var(--red); }
-    .risk-badge.high { background: rgba(255,153,0,0.2); color: #ff9900; }
-    .risk-badge.med  { background: rgba(255,204,0,0.2); color: var(--yellow); }
-    .risk-row {
-      padding: 12px 16px;
-      border-bottom: 1px solid rgba(255,255,255,0.04);
-      display: grid;
-      grid-template-columns: 70px 1fr auto;
-      gap: 12px;
-      align-items: start;
-      font-size: 12px;
-      transition: background 0.2s;
-    }
-    .risk-row:last-child { border-bottom: none; }
-    .risk-row:hover { background: rgba(255,255,255,0.02); }
-    .risk-loc {
-      font-family: var(--mono);
-      font-size: 11px;
-      color: var(--muted);
-      padding-top: 1px;
-    }
-    .risk-desc { color: var(--t2); line-height: 1.5; }
-    .risk-hint {
-      font-size: 10px;
-      color: var(--cyan);
-      margin-top: 4px;
-      line-height: 1.4;
-    }
-  </style>
-</head>
-<div id="cursor"></div>
-<div class="w">
-  <header>
-    <div class="logo">ROCmPort <em>AI</em></div>
-    <div class="hr">
-      <div class="hd on" id="hdot"></div>
-      <span id="hstat">Ready</span>
-    </div>
-  </header>
-  <div class="g">
-    <div class="p">
-      <div class="ph">
-        <div><b>//</b> CUDA source</div>
-        <div id="lc">0 lines</div>
-      </div>
-      <textarea class="code" id="inp" spellcheck="false" placeholder="// Paste CUDA code here
-// or pick a demo below
-__global__ void kernel(float* A, float* B, int N) {
-    int idx = blockIdx.x * blockDim.x + threadIdx.x;
-    ...
-}"></textarea>
-      <div class="db">
-        <span class="l">Select a template:</span>
-        <button class="ch" onclick="lk('vector_add', this)">Vector addition</button>
-        <button class="ch" onclick="lk('matrix_multiply', this)">Matrix multiplication</button>
-        <button class="ch" onclick="lk('convolution_2d', this)">2D convolution</button>
-        <button class="ch" onclick="lk('reduction', this)">Parallel reduction</button>
-      </div>
-      <button class="bg" id="go" onclick="go()">Port to ROCm</button>
-    </div>
-    <div class="p">
-      <div class="ph">
-        <div><b>//</b> Pipeline</div>
-        <div id="pt">0.0s</div>
-      </div>
-      <div class="timeline" id="tl">
-        <!-- Nodes injected by JS -->
-      </div>
-      <div class="al" id="al">
-        <div class="idle">Paste CUDA code to begin migration</div>
-      </div>
-    </div>
-    <div class="p fs hide" id="rp">
-      <div class="ph">
-        <div style="display:flex;align-items:center;gap:12px"><b>//</b> Results</div>
-        <div class="tabs" id="tabs">
-          <button class="tab on" onclick="stab('sum',this)">Summary</button>
-          <button class="tab" onclick="stab('diff',this)">Visual Diff</button>
-          <button class="tab" onclick="stab('det',this)">Performance</button>
-        </div>
-      </div>
-      <div id="t-loader" class="hide">
-        <div class="skeleton"></div>
-      </div>
-      <div id="t-sum" class="tc on"></div>
-      <div id="t-diff" class="tc"></div>
-      <div id="t-det" class="tc">
-      </div>
-    </div>
-  </div>
-  <footer>
-    <div>ROCmPort AI</div>
-    <div><a href="https://x.com/TazwarEnan" target="_blank">Tazwar Ahnaf Enan</a> · <a
-        href="https://github.com/tazwaryayyyy" target="_blank">GitHub</a></div>
-  </footer>
-</div>
-<div class="mo" id="modal">
-  <div class="mb">
-    <div class="mt">
-      <h3>Edit ROCm code</h3><button class="mx" onclick="cm()">&times;</button>
-    </div>
-    <div class="mc"><textarea id="edt"></textarea></div>
-    <div class="mf"><button class="bs" onclick="cm()">Cancel</button><button class="bs r"
-        onclick="rec()">Re-test</button></div>
-  </div>
-</div>
-<script>
-  const API = window.location.protocol === 'file:'
-    ? 'http://localhost:8000'
-    : window.location.origin;
-  const S = { code: '', kn: 'custom', run: false, t0: null, iv: null, rep: null, tl: [], kernels: {} };
-  const AG = {
-    analyzer: { n: 'ANALYZER', i: '🔍' },
-    translator: { n: 'TRANSLATOR', i: '🔄' },
-    optimizer: { n: 'OPTIMIZER', i: '⚡' },
-    tester: { n: 'TESTER', i: '🧪' },
-    coordinator: { n: 'COORDINATOR', i: '📋' }
-  };
-  // Custom Cursor Logic
-  const cur = document.getElementById('cursor');
-  document.addEventListener('mousemove', (e) => {
-    cur.style.left = e.clientX + 'px';
-    cur.style.top = e.clientY + 'px';
-    const target = e.target;
-    const isClickable = target.onclick ||
-      target.tagName === 'BUTTON' ||
-      target.tagName === 'A' ||
-      target.tagName === 'TEXTAREA' ||
-      target.classList.contains('ch') ||
-      target.classList.contains('tab');
-    if (isClickable) {
-      cur.classList.add('active');
-      if (target.id === 'go') cur.style.background = 'rgba(255, 51, 68, 0.5)';
-      else cur.style.background = 'rgba(255, 255, 255, 0.3)';
-    } else {
-      cur.classList.remove('active');
-      cur.style.background = 'rgba(255, 255, 255, 0.2)';
-    }
-  });
-  async function init() {
-    const ta = document.getElementById('inp');
-    ta.oninput = () => {
-      document.getElementById('lc').textContent = ta.value.split('\n').length + ' lines';
-      S.code = ta.value;
-    };
-    try {
-      const r = await fetch(API + '/demo-kernels');
-      S.kernels = await r.json();
-    } catch (e) { S.kernels = FB; }
-  }
-  function lk(n, btn) {
-    document.querySelectorAll('.ch').forEach(c => c.classList.remove('on'));
-    btn.classList.add('on');
-    const code = S.kernels[n] || FB[n] || '', ta = document.getElementById('inp');
-    ta.value = code; S.code = code; S.kn = n;
-    document.getElementById('lc').textContent = code.split('\n').length + ' lines';
-  }
-  function stab(id, btn) {
-    document.querySelectorAll('.tab').forEach(t => t.classList.remove('on'));
-    document.querySelectorAll('.tc').forEach(t => t.classList.remove('on'));
-    btn.classList.add('on');
-    document.getElementById('t-' + id).classList.add('on');
-    if (id === 'diff' && S.rep) rDiff(S.code, S.rep.optimized_code);
-  }
-  async function go() {
-    if (S.run) return;
-    const code = document.getElementById('inp').value.trim();
-    if (!code) return;
-    S.code = code; S.run = true; S.t0 = Date.now(); S.tl = [];
-    const btn = document.getElementById('go');
-    btn.disabled = true;
-    btn.textContent = 'Running pipeline...';
-    document.getElementById('hstat').textContent = 'Pipeline running...';
-    document.getElementById('rp').classList.add('hide');
-    bLog();
-    sTimer();
-    try {
-      const simpleModeCheckbox = document.getElementById('sm');
-      const res = await fetch(API + '/port', {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify({
-          cuda_code: code,
-          kernel_name: S.kn,
-          simple_mode: simpleModeCheckbox ? simpleModeCheckbox.checked : false
-        })
-      });
-      // Show results panel with loader immediately
-      document.getElementById('rp').classList.remove('hide');
-      document.getElementById('t-loader').classList.remove('hide');
-      document.getElementById('t-sum').classList.remove('on');
-      document.getElementById('t-diff').classList.remove('on');
-      document.getElementById('t-det').classList.remove('on');
-      const rd = res.body.getReader(), dc = new TextDecoder();
-      let buf = '';
-      while (true) {
-        const { done, value } = await rd.read();
-        if (done) break;
-        buf += dc.decode(value, { stream: true });
-        const lines = buf.split('\n');
-        buf = lines.pop();
-        for (const ln of lines) {
-          if (!ln.startsWith('data: ')) continue;
-          const raw = ln.slice(6).trim();
-          if (raw === '[DONE]') { done_(); break; }
-          try { hEvt(JSON.parse(raw)); } catch (e) { console.error('Parse error:', e); }
-        }
-      }
-    } catch (e) {
-      document.getElementById('hstat').textContent = 'Pipeline error';
-      document.getElementById('t-loader').classList.add('hide'); // Hide loader on error
-      console.error(e);
-    } finally {
-      xTimer();
-      S.run = false;
-      btn.disabled = false;
-      btn.textContent = 'Port to ROCm';
-      document.getElementById('t-loader').classList.add('hide');
-    }
-  }
-  function hEvt(ev) {
-    uLog(ev.agent, ev.status, ev.message, ev.detail);
-    if (ev.agent === 'tester' && (ev.status === 'done' || ev.status === 'failed')) {
-      const m = ev.message.match(/([\d.]+)x/);
-      if (m) {
-        const sp = parseFloat(m[1]), ok = sp >= 1, im = ev.message.match(/Iteration (\d+)/i);
-        S.tl.push({
-          label: 'Iteration ' + (im ? im[1] : S.tl.length + 1) + (ok ? ' (optimized)' : ' (baseline)'),
-          speedup: sp,
-          good: ok
-        });
-      }
-    }
-    if (ev.agent === 'coordinator' && ev.status === 'done' && ev.detail) {
-      try {
-        const r = JSON.parse(ev.detail);
-        S.rep = r;
-        rRes(r, S.tl);
-      } catch (e) { console.error('Coordinator detail parse error:', e); }
-    }
-  }
-  function done_() {
-    document.getElementById('hstat').textContent = 'Pipeline complete';
-    document.getElementById('t-loader').classList.add('hide');
-    if (!S.rep) {
-      document.getElementById('t-sum').innerHTML = '<div class="idle">Migration finished but no report was generated. Check agent logs for details.</div>';
-      document.getElementById('t-sum').classList.add('on');
-    }
-  }
-  function bLog() {
-    const el = document.getElementById('al');
-    const tl = document.getElementById('tl');
-    el.innerHTML = '';
-    tl.innerHTML = '';
-    let i = 0;
-    for (const [k, obj] of Object.entries(AG)) {
-      // Log row
-      const d = document.createElement('div');
-      d.className = 'ar';
-      d.id = 'ar-' + k;
-      d.style.animationDelay = (i * 0.1) + 's';
-      d.innerHTML = `
-      <div class="at">
-        <span class="an">${obj.n}</span>
-        <span class="am" id="am-${k}">Waiting</span>
-      </div>
-      <div class="ad" id="ad-${k}"></div>`;
-      el.appendChild(d);
-      // Timeline node
-      const n = document.createElement('div');
-      n.className = 'node';
-      n.id = 'nd-' + k;
-      n.title = obj.n;
-      n.innerHTML = `<div class="ni">${obj.i}</div><div class="nl">${obj.n.slice(0, 3)}</div>`;
-      tl.appendChild(n);
-      i++;
-    }
-  }
-  function uLog(a, s, m, d) {
-    const row = document.getElementById('ar-' + a);
-    const node = document.getElementById('nd-' + a);
-    if (!row || !node) return;
-    const statusClass = { running: 'run', done: 'done', failed: 'fail', retrying: 'retry' }[s] || '';
-    row.className = 'ar ' + statusClass;
-    node.className = 'node ' + (s === 'running' ? 'on' : s === 'retrying' ? 'retry' : s === 'done' ? 'done' : s === 'failed' ? 'fail' : '');
-    const me = document.getElementById('am-' + a);
-    if (me) me.textContent = m;
-    // Node tooltip message update
-    node.title = m;
-    const de = document.getElementById('ad-' + a);
-    if (de && d) {
-      de.innerHTML = esc(d)
-        .replace(/\u26a0\ufe0f([^\n]*)/g, '<span class="w">⚠️ $1</span>')
-        .replace(/\u2705([^\n]*)/g, '<span class="g">✅ $1</span>');
-      de.scrollTop = de.scrollHeight;
-    }
-  }
-  function rRes(r, tl) {
-    // Hide loader, show summary
-    document.getElementById('t-loader').classList.add('hide');
-    document.getElementById('t-sum').classList.add('on');
-    const v = r.verification || {}, bw = r.bandwidth_utilized;
-    const dot = ok => `<div class="sum-dot ${ok === true ? 'ok' : ok === false ? 'no' : 'na'}"></div>`;
-    // Data source badge
-    const ds = r.data_source || 'simulated';
-    const dsBadge = ds === 'real_rocm'
-      ? `<span class="ds-badge real">🟢 LIVE MI300X</span>`
-      : ds === 'demo_artifact'
-      ? `<span class="ds-badge demo">🟡 DEMO DATA</span>`
-      : `<span class="ds-badge sim">⚪ SIMULATED</span>`;
-    document.getElementById('t-sum').innerHTML = `
-    <div class="sum-row">
-      <div class="sum-big">
-        ${r.speedup}x
-        ${dsBadge}
-        <span class="u">vs baseline hipify</span>
-        <span class="vic">Measured against declared baseline. ${ds === 'demo_artifact' ? 'Representative MI300X values — set ROCM_AVAILABLE=true for real numbers.' : ds === 'real_rocm' ? 'Real rocprof measurement on AMD MI300X.' : 'Set ROCM_AVAILABLE=true on AMD Cloud for real numbers.'}</span>
-      </div>
-      <div class="sum-sep"></div>
-      <div>
-        <div class="sum-chk">${dot(v.compiled_successfully)} Compiled${v.mock_mode ? ' (simulated)' : ''}</div>
-        <div class="sum-chk" style="margin-top:8px">${dot(v.executed_without_error)} Executed without error</div>
-        <div class="sum-chk" style="margin-top:8px">${dot(v.output_matches_expected)} Output matches expected</div>
-      </div>
-      <div class="sum-sep"></div>
-      <div class="sum-type">${(r.bottleneck || 'optimized').toLowerCase()}</div>
-    </div>
-    <div class="sum-bar">
-      <button class="bs r" onclick="om()">Edit code</button>
-      <button class="bs gr" onclick="exM()">Export PR</button>
-      <button class="bs" onclick="dlR()">Download report</button>
-      <div class="sp"></div>
-    </div>
-    <div class="sn" id="sn" style="margin: 24px; border-left-width: 4px;">
-      <div style="font-weight: bold; margin-bottom: 8px; color: var(--cyan);">🧠 Simple explanation</div>
-      ${r.simplified_explanation ? esc(r.simplified_explanation) : '<em>Simplified explanation will appear here</em>'}
-    </div>
-    ${riskMatrix(r.static_risk_report)}`;
-    // Details tab
-    let dh = `<div class="dm">
-    <div class="di"><div class="dl">Speedup</div><div class="dv g">${r.speedup}x</div><div class="ds">optimized ROCm vs straight hipify output</div></div>
-    <div class="di"><div class="dl">Bandwidth</div><div class="dv c">${bw != null ? bw.toFixed(1) : '—'}%</div><div class="ds">of MI300X 5.3 TB/s HBM3</div></div>
-    <div class="di"><div class="dl">Changes</div><div class="dv y">${r.total_changes}</div><div class="ds">hipify + LLM + optimizer changes</div></div>
-    <div class="di"><div class="dl">Iterations</div><div class="dv c">${r.iterations || 1}</div><div class="ds">optimizer retry loop count</div></div>
-    <div class="di"><div class="dl">Type</div><div class="dv t">${(r.bottleneck || '—').toUpperCase()}</div><div class="ds">workload classification</div></div>
-  </div>`;
-    if (tl.length) {
-      dh += '<div class="bk"><div class="bk-t">Benchmark iterations (optimized vs baseline hipify)</div>';
-      tl.forEach(d => {
-        const pct = Math.min(Math.max((d.speedup / 2) * 100, 3), 95);
-        dh += `<div class="br">
-        <div class="bl">${esc(d.label)}</div>
-        <div class="bt"><div class="bf ${d.good ? 'good' : 'bad'}" style="width: 0" data-w="${pct}%"></div></div>
-        <div class="bv ${d.good ? 'good' : 'bad'}">${d.speedup}x</div>
-      </div>`;
-      });
-      dh += '</div>';
-    }
-    document.getElementById('t-det').innerHTML = dh;
-    tsm(); // Ensure simple note visibility matches current toggle state
-    // Progress bar animation
-    setTimeout(() => {
-      document.querySelectorAll('.bf[data-w]').forEach(b => {
-        b.style.width = b.dataset.w;
-      });
-    }, 100);
-  }
-  function riskMatrix(srr) {
-    if (!srr || !srr.items || srr.items.length === 0) return '';
-    const levelClass = { CRITICAL: 'crit', HIGH: 'high', MEDIUM: 'med' };
-    const critical = srr.critical_count || 0;
-    const high = srr.high_count || 0;
-    const medium = srr.medium_count || 0;
-    let rows = srr.items.map(item => {
-      const cls = levelClass[item.risk_level] || 'med';
-      const loc = item.line ? `line ${item.line}` : '—';
-      return `<div class="risk-row">
-        <div class="risk-loc">${esc(loc)}</div>
-        <div>
-          <div class="risk-desc">${esc(item.description)}</div>
-          <div class="risk-hint">Fix: ${esc(item.amd_fix_hint)}</div>
-        </div>
-        <div><span class="risk-badge ${cls}">${esc(item.risk_level)}</span></div>
-      </div>`;
-    }).join('');
-    const scanMs = srr.scan_duration_ms != null ? `${srr.scan_duration_ms.toFixed(1)}ms` : '';
-    return `<div class="risk-panel">
-      <div class="risk-header">
-        ⚠️ Static Risk Scan
-        ${critical > 0 ? `<span class="risk-badge crit">${critical} CRITICAL</span>` : ''}
-        ${high > 0 ? `<span class="risk-badge high">${high} HIGH</span>` : ''}
-        ${medium > 0 ? `<span class="risk-badge med">${medium} MEDIUM</span>` : ''}
-        <span style="margin-left:auto;font-size:9px;opacity:0.5">Pure-Python pre-scan · ${scanMs}</span>
-      </div>
-      ${rows}
-    </div>`;
-  }
-  function rDiff(o, n) {
-    if (!o || !n) return;
-    document.getElementById('t-diff').innerHTML = `<div class="dg">
-    <div class="dfs"><div class="dfh"><span class="dft cu">CUDA</span> Original Source</div><pre class="dfp" id="d-o"></pre></div>
-    <div class="dfs"><div class="dfh"><span class="dft ro">ROCm</span> Optimized HIP</div><pre class="dfp" id="d-n"></pre></div>
-  </div>`;
-    const oL = o.split('\n'), nL = n.split('\n'), mx = Math.max(oL.length, nL.length);
-    let oH = '', nH = '';
-    for (let i = 0; i < mx; i++) {
-      const a = oL[i] ?? '', b = nL[i] ?? '', c = a !== b;
-      oH += `<span class="${c ? 'dlo' : ''}">${esc(a)}\n</span>`;
-      nH += `<span class="${c ? 'dln' : ''}">${esc(b)}\n</span>`;
-    }
-    const left = document.getElementById('d-o');
-    const right = document.getElementById('d-n');
-    left.innerHTML = oH;
-    right.innerHTML = nH;
-    // Keep both panes aligned while scrolling for easier comparison.
-    let syncing = false;
-    left.addEventListener('scroll', () => {
-      if (syncing) return;
-      syncing = true;
-      right.scrollTop = left.scrollTop;
-      syncing = false;
-    }, { passive: true });
-    right.addEventListener('scroll', () => {
-      if (syncing) return;
-      syncing = true;
-      left.scrollTop = right.scrollTop;
-      syncing = false;
-    }, { passive: true });
-  }
-  function sTimer() { S.iv = setInterval(() => { document.getElementById('pt').textContent = ((Date.now() - S.t0) / 1000).toFixed(1) + 's' }, 100) }
-  function xTimer() { clearInterval(S.iv) }
-  function dlR() {
-    const r = S.rep; if (!r) return;
-    const md = `# ROCmPort AI — Migration Report\n\n## Results\n- **Speedup**: ${r.speedup}x\n- **Bandwidth**: ${r.bandwidth_utilized ? r.bandwidth_utilized.toFixed(1) : '—'}%\n- **Changes**: ${r.total_changes}\n- **Iterations**: ${r.iterations}\n- **Type**: ${r.bottleneck}\n\n${r.amd_advantage_explanation ? '> ' + r.amd_advantage_explanation + '\n\n' : ''}${r.cost_estimate ? '## Cost Impact\n- Manual: ' + r.cost_estimate.manual_porting_weeks + '\n- ROCmPort: ' + r.cost_estimate.rocmport_minutes + '\n- Savings: ' + r.cost_estimate.estimated_savings + '\n\n' : ''}## ROCm/HIP Code\n\`\`\`cpp\n${r.optimized_code || ''}\n\`\`\`\n\n---\n*Generated by ROCmPort AI*\n`;
-    const a = document.createElement('a'); a.href = URL.createObjectURL(new Blob([md], { type: 'text/markdown' })); a.download = 'rocmport-migration-report.md'; a.click();
-  }
-  function om() { if (!S.rep) return alert('No results yet!'); document.getElementById('edt').value = S.rep?.optimized_code || ''; document.getElementById('modal').classList.add('open') }
-  function cm() { document.getElementById('modal').classList.remove('open') }
-  async function rec() {
-    const code = document.getElementById('edt').value.trim(); if (!code) return;
-    try {
-      const res = await fetch(API + '/recompile', { method: 'POST', headers: { 'Content-Type': 'application/json' }, body: JSON.stringify({ edited_code: code, kernel_name: S.kn }) });
-      const r = await res.json();
-      if (r.success) { cm(); if (r.result) rRes(r.result, S.tl); }
-      else alert('Failed: ' + (r.detail || 'Unknown'))
-    } catch (e) { alert('Error: ' + e.message) }
-  }
-  async function exM() {
-    if (!S.rep) return;
-    try {
-      const currentInput = document.getElementById('inp')?.value || '';
-      const payload = {
-        original_cuda: S.code || currentInput,
-        final_rocm: S.rep.optimized_code || '',
-        migration_report: S.rep
-      };
-      const res = await fetch(API + '/export', {
-        method: 'POST',
-        headers: { 'Content-Type': 'application/json' },
-        body: JSON.stringify(payload)
-      });
-      if (!res.ok) {
-        let msg = `Export failed (${res.status})`;
-        try {
-          const err = await res.json();
-          if (err && err.detail) msg = err.detail;
-        } catch (_) { }
-        throw new Error(msg);
-      }
-      const a = document.createElement('a');
-      a.href = URL.createObjectURL(await res.blob());
-      a.download = 'rocmport-migration.zip';
-      a.click();
-    } catch (e) {
-      alert('Export error: ' + (e.message || 'Unknown error'));
-    }
-  }
-  function tsm() {
-    const sn = document.getElementById('sn');
-    if (sn) sn.classList.remove('hide');
-  }
-  function esc(s) { return String(s ?? '').replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;') }
-  const FB = {
-    vector_add: `#include <cuda_runtime.h>\n\n__global__ void vector_add_kernel(float* A, float* B, float* C, int N) {\n    int idx = blockIdx.x * blockDim.x + threadIdx.x;\n    if (idx < N) {\n        C[idx] = A[idx] + B[idx];\n    }\n}\n\nint main() {\n    int N = 1 << 24;\n    size_t size = N * sizeof(float);\n    float *d_A, *d_B, *d_C;\n    cudaMalloc(&d_A, size);\n    cudaMalloc(&d_B, size);\n    cudaMalloc(&d_C, size);\n    int threads = 128;\n    int blocks = (N + threads - 1) / threads;\n    vector_add_kernel<<<blocks, threads>>>(d_A, d_B, d_C, N);\n    cudaDeviceSynchronize();\n    cudaFree(d_A); cudaFree(d_B); cudaFree(d_C);\n    return 0;\n}`,
-    matrix_multiply: `#include <cuda_runtime.h>\n#define WARP_SIZE 32\n\n__global__ void matmul_kernel(float* A, float* B, float* C, int N) {\n    int row = blockIdx.y * blockDim.y + threadIdx.y;\n    int col = blockIdx.x * blockDim.x + threadIdx.x;\n    float sum = 0.0f;\n    if (row < N && col < N) {\n        for (int k = 0; k < N; k++)\n            sum += A[row * N + k] * B[k * N + col];\n        C[row * N + col] = sum;\n    }\n}\n\n__global__ void warp_reduce(float* data, float* result, int N) {\n    int tid = threadIdx.x;\n    extern __shared__ float sdata[];\n    sdata[tid] = (tid < N) ? data[tid] : 0;\n    __syncthreads();\n    for (int s = WARP_SIZE/2; s > 0; s >>= 1) {\n        if (tid < s) sdata[tid] += sdata[tid + s];\n        __syncthreads();\n    }\n    if (tid == 0) result[blockIdx.x] = sdata[0];\n}\n\nint main() {\n    int N = 1024;\n    size_t size = N * N * sizeof(float);\n    float *d_A, *d_B, *d_C;\n    cudaMalloc(&d_A, size);\n    cudaMalloc(&d_B, size);\n    cudaMalloc(&d_C, size);\n    dim3 block(16, 16);\n    dim3 grid((N+15)/16, (N+15)/16);\n    matmul_kernel<<<grid, block>>>(d_A, d_B, d_C, N);\n    cudaDeviceSynchronize();\n    cudaFree(d_A); cudaFree(d_B); cudaFree(d_C);\n    return 0;\n}`,
-    convolution_2d: `#include <cuda_runtime.h>\n#define BLOCK_SIZE 16\n\n__global__ void conv2d_kernel(\n    float* input, float* kernel, float* output,\n    int width, int height\n) {\n    int x = blockIdx.x * blockDim.x + threadIdx.x;\n    int y = blockIdx.y * blockDim.y + threadIdx.y;\n    if (x >= width || y >= height) return;\n    float sum = 0.0f;\n    for (int ky = -1; ky <= 1; ky++) {\n        for (int kx = -1; kx <= 1; kx++) {\n            int ix = x + kx, iy = y + ky;\n            if (ix >= 0 && ix < width && iy >= 0 && iy < height)\n                sum += input[iy * width + ix] * kernel[(ky+1)*3 + (kx+1)];\n        }\n    }\n    output[y * width + x] = sum;\n}\n\nint main() {\n    int W = 2048, H = 2048;\n    float *d_in, *d_ker, *d_out;\n    cudaMalloc(&d_in,  W*H*sizeof(float));\n    cudaMalloc(&d_ker, 9*sizeof(float));\n    cudaMalloc(&d_out, W*H*sizeof(float));\n    dim3 block(BLOCK_SIZE, BLOCK_SIZE);\n    dim3 grid((W+BLOCK_SIZE-1)/BLOCK_SIZE, (H+BLOCK_SIZE-1)/BLOCK_SIZE);\n    conv2d_kernel<<<grid, block>>>(d_in, d_ker, d_out, W, H);\n    cudaDeviceSynchronize();\n    cudaFree(d_in); cudaFree(d_ker); cudaFree(d_out);\n    return 0;\n}`,
-    reduction: `#include <cuda_runtime.h>\n#include <stdio.h>\n#include <iostream>\n#include <vector>\n#include <numeric>\n\n// Tree-based reduction kernel\n__global__ void reduction_kernel(float* g_idata, float* g_odata, unsigned int n) {\n    extern __shared__ float sdata[];\n    unsigned int tid = threadIdx.x;\n    unsigned int i = blockIdx.x * (blockDim.x * 2) + threadIdx.x;\n\n    float mySum = (i < n) ? g_idata[i] : 0;\n    if (i + blockDim.x < n) mySum += g_idata[i + blockDim.x];\n    sdata[tid] = mySum;\n    __syncthreads();\n\n    for (unsigned int s = blockDim.x / 2; s > 32; s >>= 1) {\n        if (tid < s) sdata[tid] = mySum = mySum + sdata[tid + s];\n        __syncthreads();\n    }\n\n    // DELIBERATE WARP-SIZE BUG: Unroll to 32 instead of 64\n    if (tid < 32) {\n        volatile float* vsmem = sdata;\n        vsmem[tid] = mySum = mySum + vsmem[tid + 32];\n        vsmem[tid] = mySum = mySum + vsmem[tid + 16];\n        vsmem[tid] = mySum = mySum + vsmem[tid + 8];\n        vsmem[tid] = mySum = mySum + vsmem[tid + 4];\n        vsmem[tid] = mySum = mySum + vsmem[tid + 2];\n        vsmem[tid] = mySum = mySum + vsmem[tid + 1];\n    }\n\n    if (tid == 0) g_odata[blockIdx.x] = sdata[0];\n}\n\nint main() {\n    const int N = 1048576;\n    // ... Host code for Parallel Reduction demo\n    printf("Parallel Reduction demo loaded.\\n");\n    return 0;\n}`
-  };
-  init();
-</script>
-</body>
 </html>

+<!DOCTYPE html>
 <html lang="en">
+  <head>
+    <meta charset="UTF-8" />
+    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
+    <title>ROCmPort AI</title>
+    <link rel="preconnect" href="https://fonts.googleapis.com" />
+    <link rel="preconnect" href="https://fonts.gstatic.com" crossorigin />
+    <link
+      href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600&family=JetBrains+Mono:wght@400;500;600&display=swap"
+      rel="stylesheet"
+    />
+  </head>
+  <body>
+    <div id="root"></div>
+    <script type="module" src="/src/main.jsx"></script>
+  </body>
 </html>

frontend/package-lock.json ADDED Viewed

The diff for this file is too large to render. See raw diff

frontend/package.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "name": "rocmport-ai-frontend",
+    "version": "1.0.0",
+    "type": "module",
+    "scripts": {
+        "dev": "vite",
+        "build": "vite build",
+        "preview": "vite preview"
+    },
+    "dependencies": {
+        "react": "^18.3.1",
+        "react-dom": "^18.3.1"
+    },
+    "devDependencies": {
+        "@vitejs/plugin-react": "^4.3.4",
+        "autoprefixer": "^10.4.20",
+        "postcss": "^8.4.47",
+        "tailwindcss": "^3.4.15",
+        "vite": "^5.4.10"
+    }
+}

frontend/postcss.config.js ADDED Viewed

	@@ -0,0 +1,6 @@

+export default {
+    plugins: {
+        tailwindcss: {},
+        autoprefixer: {},
+    },
+}

frontend/src/App.jsx ADDED Viewed

	@@ -0,0 +1,872 @@

+import { useState, useEffect, useRef } from 'react'
+// ─── Template Kernels ─────────────────────────────────────────────────────────
+const KERNEL_VECTOR_ADD = String.raw`
+#include <cuda_runtime.h>
+#include <stdio.h>
+// Vector addition kernel with intentional warp size bug
+__global__ void vectorAdd(const float *A, const float *B, float *C, int numElements) {
+    int i = blockDim.x * blockIdx.x + threadIdx.x;
+    if (i < numElements) {
+        C[i] = A[i] + B[i];
+        // Intentional warp size bug - assumes 32 threads per warp
+        // This will break on AMD wavefront (64 threads)
+        if (threadIdx.x % 32 == 0) {
+            // This synchronization only works for CUDA's 32-thread warps
+            printf("Thread %d in warp %d completed\n", threadIdx.x, threadIdx.x / 32);
+        }
+    }
+}
+int main(void) {
+    int numElements = 50000;
+    size_t size = numElements * sizeof(float);
+    // Allocate host memory
+    float *h_A = (float *)malloc(size);
+    float *h_B = (float *)malloc(size);
+    float *h_C = (float *)malloc(size);
+    // Initialize host vectors
+    for (int i = 0; i < numElements; ++i) {
+        h_A[i] = rand() / (float)RAND_MAX;
+        h_B[i] = rand() / (float)RAND_MAX;
+    }
+    // Allocate device memory
+    float *d_A, *d_B, *d_C;
+    cudaMalloc((void **)&d_A, size);
+    cudaMalloc((void **)&d_B, size);
+    cudaMalloc((void **)&d_C, size);
+    // Copy data from host to device
+    cudaMemcpy(d_A, h_A, size, cudaMemcpyHostToDevice);
+    cudaMemcpy(d_B, h_B, size, cudaMemcpyHostToDevice);
+    // Launch kernel
+    int threadsPerBlock = 256;
+    int blocksPerGrid = (numElements + threadsPerBlock - 1) / threadsPerBlock;
+    printf("Launching kernel with %d blocks of %d threads\n", blocksPerGrid, threadsPerBlock);
+    vectorAdd<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, numElements);
+    cudaDeviceSynchronize();
+    // Copy result back to host
+    cudaMemcpy(h_C, d_C, size, cudaMemcpyDeviceToHost);
+    // Verify result
+    for (int i = 0; i < numElements; ++i) {
+        if (fabs(h_A[i] + h_B[i] - h_C[i]) > 1e-5) {
+            printf("Test FAILED at element %d!\n", i);
+            break;
+        }
+    }
+    printf("Test PASSED\n");
+    // Free device memory
+    cudaFree(d_A);
+    cudaFree(d_B);
+    cudaFree(d_C);
+    // Free host memory
+    free(h_A);
+    free(h_B);
+    free(h_C);
+    printf("Done\n");
+    return 0;
+}
+`.trim()
+const KERNEL_MATRIX_MULTIPLY = String.raw`
+#include <cuda_runtime.h>
+#include <stdio.h>
+#include <stdlib.h>
+// Matrix multiplication kernel with intentional warp size bug
+// C = A * B
+// A: M x K, B: K x N, C: M x N
+__global__ void matrixMultiply(const float *A, const float *B, float *C, int M, int N, int K) {
+    int row = blockIdx.y * blockDim.y + threadIdx.y;
+    int col = blockIdx.x * blockDim.x + threadIdx.x;
+    if (row < M && col < N) {
+        float sum = 0.0f;
+        for (int k = 0; k < K; ++k) {
+            sum += A[row * K + k] * B[k * N + col];
+        }
+        C[row * N + col] = sum;
+        // Intentional warp size bug - assumes 32 threads per warp
+        // This will cause incorrect behavior on AMD wavefront (64 threads)
+        if (threadIdx.x % 32 == 0 && threadIdx.y % 32 == 0) {
+            // This warp-level synchronization only works for CUDA
+            printf("Block (%d,%d) warp (%d,%d) computed element (%d,%d) = %f\n",
+                   blockIdx.x, blockIdx.y, threadIdx.x / 32, threadIdx.y / 32, row, col, sum);
+        }
+    }
+}
+// Optimized version with shared memory (for comparison)
+__global__ void matrixMultiplyShared(const float *A, const float *B, float *C, int M, int N, int K) {
+    __shared__ float tileA[32][32];
+    __shared__ float tileB[32][32];
+    int row = blockIdx.y * blockDim.y + threadIdx.y;
+    int col = blockIdx.x * blockDim.x + threadIdx.x;
+    float sum = 0.0f;
+    for (int tile = 0; tile < (K + 31) / 32; ++tile) {
+        if (row < M && tile * 32 + threadIdx.x < K) {
+            tileA[threadIdx.y][threadIdx.x] = A[row * K + tile * 32 + threadIdx.x];
+        } else {
+            tileA[threadIdx.y][threadIdx.x] = 0.0f;
+        }
+        if (col < N && tile * 32 + threadIdx.y < K) {
+            tileB[threadIdx.y][threadIdx.x] = B[(tile * 32 + threadIdx.y) * N + col];
+        } else {
+            tileB[threadIdx.y][threadIdx.x] = 0.0f;
+        }
+        __syncthreads();
+        for (int k = 0; k < 32; ++k) {
+            sum += tileA[threadIdx.y][k] * tileB[k][threadIdx.x];
+        }
+        __syncthreads();
+    }
+    if (row < M && col < N) {
+        C[row * N + col] = sum;
+    }
+}
+int main(int argc, char **argv) {
+    int M = 512, N = 512, K = 512;
+    size_t size_A = M * K * sizeof(float);
+    size_t size_B = K * N * sizeof(float);
+    size_t size_C = M * N * sizeof(float);
+    float *h_A = (float *)malloc(size_A);
+    float *h_B = (float *)malloc(size_B);
+    float *h_C = (float *)malloc(size_C);
+    float *h_C_ref = (float *)malloc(size_C);
+    for (int i = 0; i < M * K; ++i) h_A[i] = rand() / (float)RAND_MAX;
+    for (int i = 0; i < K * N; ++i) h_B[i] = rand() / (float)RAND_MAX;
+    float *d_A, *d_B, *d_C, *d_C_ref;
+    cudaMalloc(&d_A, size_A);
+    cudaMalloc(&d_B, size_B);
+    cudaMalloc(&d_C, size_C);
+    cudaMalloc(&d_C_ref, size_C);
+    cudaMemcpy(d_A, h_A, size_A, cudaMemcpyHostToDevice);
+    cudaMemcpy(d_B, h_B, size_B, cudaMemcpyHostToDevice);
+    dim3 threadsPerBlock(32, 32);
+    dim3 blocksPerGrid((N + threadsPerBlock.x - 1) / threadsPerBlock.x,
+                       (M + threadsPerBlock.y - 1) / threadsPerBlock.y);
+    printf("Matrix dimensions: %dx%d * %dx%d = %dx%d\n", M, K, K, N, M, N);
+    printf("Launching kernel with grid (%d,%d) and block (%d,%d)\n",
+           blocksPerGrid.x, blocksPerGrid.y, threadsPerBlock.x, threadsPerBlock.y);
+    // Warmup
+    matrixMultiply<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C_ref, M, N, K);
+    cudaDeviceSynchronize();
+    cudaEvent_t start, stop;
+    cudaEventCreate(&start);
+    cudaEventCreate(&stop);
+    cudaEventRecord(start);
+    matrixMultiply<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C_ref, M, N, K);
+    cudaEventRecord(stop);
+    cudaEventSynchronize(stop);
+    float basic_time = 0;
+    cudaEventElapsedTime(&basic_time, start, stop);
+    printf("Basic kernel time: %.3f ms\n", basic_time);
+    cudaEventRecord(start);
+    matrixMultiplyShared<<<blocksPerGrid, threadsPerBlock>>>(d_A, d_B, d_C, M, N, K);
+    cudaEventRecord(stop);
+    cudaEventSynchronize(stop);
+    float shared_time = 0;
+    cudaEventElapsedTime(&shared_time, start, stop);
+    printf("Shared memory kernel time: %.3f ms\n", shared_time);
+    printf("Speedup: %.2fx\n", basic_time / shared_time);
+    cudaMemcpy(h_C_ref, d_C_ref, size_C, cudaMemcpyDeviceToHost);
+    cudaMemcpy(h_C, d_C, size_C, cudaMemcpyDeviceToHost);
+    bool correct = true;
+    for (int i = 0; i < M * N; ++i) {
+        if (fabs(h_C[i] - h_C_ref[i]) > 1e-5) {
+            printf("Mismatch at element %d: %f != %f\n", i, h_C[i], h_C_ref[i]);
+            correct = false;
+            break;
+        }
+    }
+    printf(correct ? "Verification PASSED\n" : "Verification FAILED\n");
+    cudaFree(d_A); cudaFree(d_B); cudaFree(d_C); cudaFree(d_C_ref);
+    free(h_A); free(h_B); free(h_C); free(h_C_ref);
+    printf("Done\n");
+    return 0;
+}
+`.trim()
+const KERNEL_CONVOLUTION_2D = String.raw`
+#include <cuda_runtime.h>
+#include <stdio.h>
+#include <stdlib.h>
+// 2D Convolution kernel with intentional warp size bug
+__global__ void convolution2D(const float *input, const float *kernel, float *output,
+                               int input_height, int input_width, int kernel_size,
+                               int output_height, int output_width) {
+    int row = blockIdx.y * blockDim.y + threadIdx.y;
+    int col = blockIdx.x * blockDim.x + threadIdx.x;
+    if (row < output_height && col < output_width) {
+        float sum = 0.0f;
+        int kernel_radius = kernel_size / 2;
+        for (int i = -kernel_radius; i <= kernel_radius; i++) {
+            for (int j = -kernel_radius; j <= kernel_radius; j++) {
+                int input_row = row + i;
+                int input_col = col + j;
+                if (input_row >= 0 && input_row < input_height &&
+                    input_col >= 0 && input_col < input_width) {
+                    int kernel_row = i + kernel_radius;
+                    int kernel_col = j + kernel_radius;
+                    sum += input[input_row * input_width + input_col] *
+                           kernel[kernel_row * kernel_size + kernel_col];
+                }
+            }
+        }
+        output[row * output_width + col] = sum;
+        // Intentional warp size bug - assumes 32 threads per warp
+        // This will break on AMD wavefront (64 threads)
+        if (threadIdx.x % 32 == 0 && threadIdx.y % 32 == 0) {
+            printf("Warp (%d,%d) processed output pixel (%d,%d) = %f\n",
+                   threadIdx.x / 32, threadIdx.y / 32, row, col, sum);
+        }
+    }
+}
+// Shared memory version for comparison
+__global__ void convolution2DShared(const float *input, const float *kernel, float *output,
+                                    int input_height, int input_width, int kernel_size,
+                                    int output_height, int output_width) {
+    __shared__ float shared_input[32 + 6][32 + 6]; // +6 for 3x3 kernel padding
+    __shared__ float shared_kernel[7][7];           // Max 7x7 kernel
+    int row = blockIdx.y * blockDim.y + threadIdx.y;
+    int col = blockIdx.x * blockDim.x + threadIdx.x;
+    int kernel_radius = kernel_size / 2;
+    if (threadIdx.x < kernel_size && threadIdx.y < kernel_size) {
+        shared_kernel[threadIdx.y][threadIdx.x] =
+            kernel[threadIdx.y * kernel_size + threadIdx.x];
+    }
+    int input_row = blockIdx.y * blockDim.y + threadIdx.y - kernel_radius;
+    int input_col = blockIdx.x * blockDim.x + threadIdx.x - kernel_radius;
+    if (input_row >= 0 && input_row < input_height &&
+        input_col >= 0 && input_col < input_width) {
+        shared_input[threadIdx.y][threadIdx.x] =
+            input[input_row * input_width + input_col];
+    } else {
+        shared_input[threadIdx.y][threadIdx.x] = 0.0f;
+    }
+    __syncthreads();
+    if (row < output_height && col < output_width) {
+        float sum = 0.0f;
+        for (int i = 0; i < kernel_size; i++)
+            for (int j = 0; j < kernel_size; j++)
+                sum += shared_input[threadIdx.y + i][threadIdx.x + j] * shared_kernel[i][j];
+        output[row * output_width + col] = sum;
+    }
+}
+int main(int argc, char **argv) {
+    int input_height = 1024, input_width = 1024, kernel_size = 3;
+    int output_height = input_height - kernel_size + 1;
+    int output_width  = input_width  - kernel_size + 1;
+    size_t input_size        = input_height * input_width * sizeof(float);
+    size_t kernel_size_bytes = kernel_size * kernel_size * sizeof(float);
+    size_t output_size       = output_height * output_width * sizeof(float);
+    printf("Input: %dx%d, Kernel: %dx%d, Output: %dx%d\n",
+           input_height, input_width, kernel_size, kernel_size, output_height, output_width);
+    float *h_input      = (float *)malloc(input_size);
+    float *h_kernel     = (float *)malloc(kernel_size_bytes);
+    float *h_output     = (float *)malloc(output_size);
+    float *h_output_ref = (float *)malloc(output_size);
+    for (int i = 0; i < input_height * input_width; i++)
+        h_input[i] = rand() / (float)RAND_MAX;
+    float kernel_3x3[9] = {-1, -1, -1, -1, 8, -1, -1, -1, -1};
+    for (int i = 0; i < kernel_size * kernel_size; i++)
+        h_kernel[i] = kernel_3x3[i];
+    float *d_input, *d_kernel, *d_output, *d_output_ref;
+    cudaMalloc(&d_input,      input_size);
+    cudaMalloc(&d_kernel,     kernel_size_bytes);
+    cudaMalloc(&d_output,     output_size);
+    cudaMalloc(&d_output_ref, output_size);
+    cudaMemcpy(d_input,  h_input,  input_size,        cudaMemcpyHostToDevice);
+    cudaMemcpy(d_kernel, h_kernel, kernel_size_bytes, cudaMemcpyHostToDevice);
+    dim3 threadsPerBlock(32, 32);
+    dim3 blocksPerGrid((output_width  + threadsPerBlock.x - 1) / threadsPerBlock.x,
+                       (output_height + threadsPerBlock.y - 1) / threadsPerBlock.y);
+    printf("Launching kernel with grid (%d,%d) and block (%d,%d)\n",
+           blocksPerGrid.x, blocksPerGrid.y, threadsPerBlock.x, threadsPerBlock.y);
+    // Warmup
+    convolution2D<<<blocksPerGrid, threadsPerBlock>>>(
+        d_input, d_kernel, d_output_ref,
+        input_height, input_width, kernel_size, output_height, output_width);
+    cudaDeviceSynchronize();
+    cudaEvent_t start, stop;
+    cudaEventCreate(&start);
+    cudaEventCreate(&stop);
+    cudaEventRecord(start);
+    convolution2D<<<blocksPerGrid, threadsPerBlock>>>(
+        d_input, d_kernel, d_output_ref,
+        input_height, input_width, kernel_size, output_height, output_width);
+    cudaEventRecord(stop);
+    cudaEventSynchronize(stop);
+    float basic_time = 0;
+    cudaEventElapsedTime(&basic_time, start, stop);
+    printf("Basic kernel time: %.3f ms\n", basic_time);
+    cudaEventRecord(start);
+    convolution2DShared<<<blocksPerGrid, threadsPerBlock>>>(
+        d_input, d_kernel, d_output,
+        input_height, input_width, kernel_size, output_height, output_width);
+    cudaEventRecord(stop);
+    cudaEventSynchronize(stop);
+    float shared_time = 0;
+    cudaEventElapsedTime(&shared_time, start, stop);
+    printf("Shared memory kernel time: %.3f ms\n", shared_time);
+    printf("Speedup: %.2fx\n", basic_time / shared_time);
+    cudaMemcpy(h_output_ref, d_output_ref, output_size, cudaMemcpyDeviceToHost);
+    cudaMemcpy(h_output,     d_output,     output_size, cudaMemcpyDeviceToHost);
+    bool correct = true;
+    for (int i = 0; i < 100 && i < output_height * output_width; i++) {
+        if (fabs(h_output[i] - h_output_ref[i]) > 1e-5) {
+            printf("Mismatch at element %d: %f != %f\n", i, h_output[i], h_output_ref[i]);
+            correct = false;
+            break;
+        }
+    }
+    printf(correct ? "Verification PASSED (first 100 elements)\n" : "Verification FAILED\n");
+    cudaFree(d_input); cudaFree(d_kernel); cudaFree(d_output); cudaFree(d_output_ref);
+    free(h_input); free(h_kernel); free(h_output); free(h_output_ref);
+    printf("Done\n");
+    return 0;
+}
+`.trim()
+const KERNEL_REDUCTION = String.raw`
+#include <stdio.h>
+#include <stdlib.h>
+// compile: hipcc -arch=sm_60 -nocudalib reduction.cu
+// --- IDE & COMPILER COMPATIBILITY LAYER ---
+#if !defined(__CUDACC__) && !defined(__HIPCC__)
+    #define __global__
+    #define __shared__
+    #define __syncthreads()
+    struct dim3 {
+        int x, y, z;
+        dim3(int _x = 1, int _y = 1, int _z = 1) : x(_x), y(_y), z(_z) {}
+    };
+    typedef unsigned int cudaError_t;
+    typedef void* cudaStream_t;
+    dim3 threadIdx, blockIdx, blockDim;
+    int warpSize = 64;
+    #define cudaMalloc(p, s) (0)
+    #define cudaFree(p) (0)
+    #define cudaMemcpy(d, s, n, k) (0)
+    #define cudaMemcpyHostToDevice 1
+    #define cudaMemcpyDeviceToHost 2
+    #define cudaSuccess 0
+    #define cudaDeviceSynchronize() (0)
+    #define LAUNCH_REDUCTION(g, b, m, ...) reduction_kernel(__VA_ARGS__)
+#else
+    #define LAUNCH_REDUCTION(g, b, m, ...) reduction_kernel<<<g, b, m>>>(__VA_ARGS__)
+#endif
+// ------------------------------------------
+// Standard reduction template (first pass: block-level)
+__global__ void reduction_kernel(float* g_idata, float* g_odata, unsigned int n) {
+    extern __shared__ float sdata[];
+    unsigned int tid = threadIdx.x;
+    unsigned int i   = blockIdx.x * (blockDim.x * 2) + threadIdx.x;
+    float mySum = (i < n) ? g_idata[i] : 0;
+    if (i + blockDim.x < n)
+        mySum += g_idata[i + blockDim.x];
+    sdata[tid] = mySum;
+    __syncthreads();
+    for (unsigned int s = blockDim.x / 2; s > 32; s >>= 1) {
+        if (tid < s) {
+            sdata[tid] = mySum = mySum + sdata[tid + s];
+        }
+        __syncthreads();
+    }
+    // DELIBERATE WARP-SIZE BUG: Assuming warpSize=32 for final unrolled reduction
+    // This will produce incorrect results on AMD (warpSize=64)
+    if (tid < 32) {
+        volatile float* vsmem = sdata;
+        vsmem[tid] = mySum = mySum + vsmem[tid + 32];
+        vsmem[tid] = mySum = mySum + vsmem[tid + 16];
+        vsmem[tid] = mySum = mySum + vsmem[tid + 8];
+        vsmem[tid] = mySum = mySum + vsmem[tid + 4];
+        vsmem[tid] = mySum = mySum + vsmem[tid + 2];
+        vsmem[tid] = mySum = mySum + vsmem[tid + 1];
+    }
+    if (tid == 0) g_odata[blockIdx.x] = sdata[0];
+}
+int main() {
+    const int N              = 1048576; // 1M elements
+    const int threadsPerBlock = 256;
+    const int blocksPerGrid   = (N + (threadsPerBlock * 2) - 1) / (threadsPerBlock * 2);
+    float *h_input  = (float*)malloc(N * sizeof(float));
+    float *h_output = (float*)malloc(blocksPerGrid * sizeof(float));
+    for (int i = 0; i < N; i++) h_input[i] = 1.0f;
+    float *d_input, *d_output;
+    cudaMalloc(&d_input,  N * sizeof(float));
+    cudaMalloc(&d_output, blocksPerGrid * sizeof(float));
+    cudaMemcpy(d_input, h_input, N * sizeof(float), cudaMemcpyHostToDevice);
+    LAUNCH_REDUCTION(blocksPerGrid, threadsPerBlock, threadsPerBlock * sizeof(float),
+                     d_input, d_output, N);
+    cudaMemcpy(h_output, d_output, blocksPerGrid * sizeof(float), cudaMemcpyDeviceToHost);
+    float gpu_sum = 0;
+    for (int i = 0; i < blocksPerGrid; i++) gpu_sum += h_output[i];
+    float cpu_sum = (float)N;
+    printf("Parallel Reduction (1M elements)\n");
+    printf("CPU Sum: %.1f\n", cpu_sum);
+    printf("GPU Sum: %.1f\n", gpu_sum);
+    printf("Result: %s\n", (gpu_sum == cpu_sum) ? "PASS" : "FAIL (Warp size issue suspected)");
+    cudaFree(d_input);
+    cudaFree(d_output);
+    free(h_input);
+    free(h_output);
+    return 0;
+}
+`.trim()
+// ─── Constants ────────────────────────────────────────────────────────────────
+const TEMPLATES = {
+    'Vector addition': KERNEL_VECTOR_ADD,
+    'Matrix multiplication': KERNEL_MATRIX_MULTIPLY,
+    '2D convolution': KERNEL_CONVOLUTION_2D,
+    'Parallel reduction': KERNEL_REDUCTION,
+}
+const AGENT_LIST = ['analyzer', 'translator', 'optimizer', 'tester', 'coordinator']
+const AGENT_LABEL = {
+    analyzer: 'ANALYZER',
+    translator: 'TRANSLATOR',
+    optimizer: 'OPTIMIZER',
+    tester: 'TESTER',
+    coordinator: 'COORDINATOR',
+}
+// Tailwind class strings per status — all literals so JIT can scan them
+const STATUS = {
+    idle: {
+        dot: 'bg-[#1E2D40]',
+        badge: 'bg-[#1E2D40] text-[#6B7A99]',
+        label: 'IDLE',
+    },
+    running: {
+        dot: 'bg-[#FFB800] animate-rocm-pulse',
+        badge: 'bg-[#1A1500] text-[#FFB800]',
+        label: 'RUNNING',
+    },
+    done: {
+        dot: 'bg-[#00FF88]',
+        badge: 'bg-[#001A0D] text-[#00FF88]',
+        label: 'DONE',
+    },
+    failed: {
+        dot: 'bg-[#FF3B3B]',
+        badge: 'bg-[#1A0000] text-[#FF3B3B]',
+        label: 'FAILED',
+    },
+}
+const INITIAL_AGENTS = Object.fromEntries(
+    AGENT_LIST.map(a => [a, { status: 'idle', message: 'Waiting…', detail: '' }])
+)
+// ─── AgentCard ────────────────────────────────────────────────────────────────
+function AgentCard({ name, state }) {
+    const s = STATUS[state.status] ?? STATUS.idle
+    return (
+        <div className="rounded-lg border border-[#1E2D40] bg-[#111827] p-3">
+            <div className="flex items-center gap-3">
+                {/* Status dot */}
+                <span className={`shrink-0 w-2 h-2 rounded-full ${s.dot}`} />
+                {/* Agent info */}
+                <div className="flex-1 min-w-0">
+                    <div className="font-code text-[11px] text-[#6B7A99] tracking-widest uppercase">
+                        {AGENT_LABEL[name]}
+                    </div>
+                    <div className="font-ui text-[13px] text-[#F0F4FF] mt-0.5 truncate">
+                        {state.message || 'Waiting…'}
+                    </div>
+                </div>
+                {/* Status badge */}
+                <span
+                    className={`shrink-0 font-code text-[10px] font-semibold px-2 py-0.5 rounded tracking-wider ${s.badge}`}
+                >
+                    {s.label}
+                </span>
+            </div>
+            {/* Detail (collapsible — shown only when present) */}
+            {state.detail && (
+                <p className="mt-2 font-ui text-[11px] text-[#6B7A99] italic leading-relaxed line-clamp-3">
+                    {state.detail}
+                </p>
+            )}
+        </div>
+    )
+}
+// ─── App ──────────────────────────────────────────────────────────────────────
+export default function App() {
+    const [code, setCode] = useState('')
+    const [activeTemplate, setActiveTemplate] = useState(null)
+    const [agents, setAgents] = useState(INITIAL_AGENTS)
+    const [running, setRunning] = useState(false)
+    const [elapsed, setElapsed] = useState(0)
+    const [benchmark, setBenchmark] = useState(null)
+    const [errorBanner, setErrorBanner] = useState(null)
+    const timerRef = useRef(null)
+    const startRef = useRef(null)
+    const lineCount = code ? code.split('\n').length : 1
+    // ── Timer ────────────────────────────────────────────────────────────────────
+    const startTimer = () => {
+        startRef.current = Date.now()
+        timerRef.current = setInterval(
+            () => setElapsed(Date.now() - startRef.current),
+            100
+        )
+    }
+    const stopTimer = () => {
+        clearInterval(timerRef.current)
+        timerRef.current = null
+    }
+    useEffect(() => () => stopTimer(), [])
+    // ── Helpers ───────────────────────────────────────────────────────────────────
+    const resetAgents = () =>
+        setAgents(Object.fromEntries(
+            AGENT_LIST.map(a => [a, { status: 'idle', message: 'Waiting…', detail: '' }])
+        ))
+    const updateAgent = (agent, patch) =>
+        setAgents(prev => ({ ...prev, [agent]: { ...prev[agent], ...patch } }))
+    const selectTemplate = (name) => {
+        setActiveTemplate(name)
+        setCode(TEMPLATES[name])
+    }
+    const fmtElapsed = (ms) => `${(ms / 1000).toFixed(1)}s`
+    // ── Demo mode fallback ────────────────────────────────────────────────────────
+    const runDemo = async () => {
+        const steps = [
+            { agent: 'analyzer', status: 'running', message: 'Scanning CUDA patterns…', detail: '' },
+            { agent: 'analyzer', status: 'done', message: 'Found 3 critical AMD issues', detail: 'warp-32 assumption in reduction tail, threadIdx%32 idiom, LDS bank conflict pattern' },
+            { agent: 'translator', status: 'running', message: 'Running hipify + LLM pass…', detail: '' },
+            { agent: 'translator', status: 'done', message: 'Translation complete', detail: 'hipify applied; 7 additional LLM corrections for wavefront-64 semantics' },
+            { agent: 'optimizer', status: 'running', message: 'Proposing optimizations…', detail: '' },
+            { agent: 'optimizer', status: 'done', message: '4 optimization patches generated', detail: 'LDS padding, wavefront-aware reduction, coalesced access pattern' },
+            { agent: 'tester', status: 'running', message: 'Compiling with hipcc…', detail: '' },
+            { agent: 'tester', status: 'done', message: 'Compiled and profiled on gfx942', detail: 'rocprof: 0.026 ms — correctness verified' },
+            { agent: 'coordinator', status: 'running', message: 'Assembling final report…', detail: '' },
+            { agent: 'coordinator', status: 'done', message: 'Migration complete — 2.61× speedup', detail: 'data_source: demo_artifact' },
+        ]
+        for (const step of steps) {
+            await new Promise(r => setTimeout(r, 800))
+            updateAgent(step.agent, { status: step.status, message: step.message, detail: step.detail })
+        }
+        setBenchmark({
+            total_changes: 11,
+            bugs_found: 3,
+            compiled_successfully: true,
+            data_source: 'demo_artifact',
+        })
+        stopTimer()
+        setRunning(false)
+    }
+    // ── Main action ───────────────────────────────────────────────────────────────
+    const handlePort = async () => {
+        if (running || !code.trim()) return
+        setRunning(true)
+        setElapsed(0)
+        setBenchmark(null)
+        setErrorBanner(null)
+        resetAgents()
+        startTimer()
+        try {
+            const res = await fetch('http://localhost:8000/port', {
+                method: 'POST',
+                headers: { 'Content-Type': 'application/json' },
+                body: JSON.stringify({
+                    cuda_code: code,
+                    kernel_name: activeTemplate || 'custom',
+                    simple_mode: false,
+                }),
+            })
+            if (!res.ok) throw new Error(`HTTP ${res.status}`)
+            const reader = res.body.getReader()
+            const dec = new TextDecoder()
+            let buf = ''
+            outer: while (true) {
+                const { done, value } = await reader.read()
+                if (done) break
+                buf += dec.decode(value, { stream: true })
+                const lines = buf.split('\n')
+                buf = lines.pop() // keep any incomplete trailing line
+                for (const line of lines) {
+                    if (!line.startsWith('data: ')) continue
+                    const raw = line.slice(6).trim()
+                    if (raw === '[DONE]') break outer
+                    try {
+                        const ev = JSON.parse(raw)
+                        if (!ev.agent) continue
+                        updateAgent(ev.agent, {
+                            status: ev.status,
+                            message: ev.message ?? '',
+                            detail: ev.detail ?? '',
+                        })
+                        // Extract benchmark data from the coordinator's done event
+                        if (ev.agent === 'coordinator' && ev.status === 'done') {
+                            const r = ev.result ?? ev
+                            setBenchmark({
+                                total_changes: r.total_changes ?? r.changes_made ?? '—',
+                                bugs_found: r.bugs_found ?? r.critical_bugs ?? '—',
+                                compiled_successfully: r.compiled_successfully ?? r.compiled ?? false,
+                                data_source: r.data_source ?? 'unknown',
+                            })
+                        }
+                    } catch (_) { /* malformed SSE line — skip */ }
+                }
+            }
+        } catch {
+            setErrorBanner('Backend unavailable — running in demo mode')
+            runDemo()
+            return // runDemo handles stopTimer + setRunning(false)
+        }
+        stopTimer()
+        setRunning(false)
+    }
+    // ── Render ────────────────────────────────────────────────────────────────────
+    return (
+        <div
+            className="min-h-screen flex flex-col text-[#F0F4FF] font-ui"
+            style={{ background: 'linear-gradient(180deg, #0A0E1A 0%, #0D1220 100%)' }}
+        >
+            {/* ── Error banner ──────────────────────────────────────────────────────── */}
+            {errorBanner && (
+                <div className="flex-none px-6 py-2.5 border-b border-[#FF3B3B] bg-[#1A0000] font-code text-[13px] text-[#FF3B3B]">
+                    ⚠ {errorBanner}
+                </div>
+            )}
+            {/* ── Two-column main layout ────────────────────────────────────────────── */}
+            <div className="flex flex-1 overflow-hidden">
+                {/* ──── LEFT PANEL  58% ─────────────────────────────────────────────── */}
+                <div className="w-[58%] flex flex-col p-5 gap-4 border-r border-[#1E2D40] overflow-y-auto">
+                    {/* Editor header */}
+                    <div className="flex justify-between items-center">
+                        <span className="font-code text-[12px] text-[#6B7A99]">// CUDA source</span>
+                        <span className="font-code text-[12px] text-[#6B7A99]">{lineCount} lines</span>
+                    </div>
+                    {/* Code editor */}
+                    <textarea
+                        value={code}
+                        onChange={e => { setCode(e.target.value); setActiveTemplate(null) }}
+                        placeholder={'// Paste CUDA code here\n// or pick a demo below'}
+                        spellCheck={false}
+                        className={[
+                            'w-full min-h-[300px] resize-y rounded-lg p-4',
+                            'border border-[#1E2D40] bg-[#0D1525]',
+                            'text-[#F0F4FF] font-code text-[13px] leading-[1.6]',
+                            'focus:outline-none focus:border-[#00D4FF] transition-colors duration-150',
+                            '[tab-size:4] [caret-color:#00D4FF]',
+                        ].join(' ')}
+                    />
+                    {/* Template selector */}
+                    <div>
+                        <p className="font-ui text-[12px] text-[#6B7A99] mb-2.5">Select a template:</p>
+                        <div className="flex flex-wrap gap-2">
+                            {Object.keys(TEMPLATES).map(name => (
+                                <button
+                                    key={name}
+                                    onClick={() => selectTemplate(name)}
+                                    className={[
+                                        'px-4 py-1.5 rounded-full border font-ui text-[13px]',
+                                        'cursor-pointer transition-colors duration-150',
+                                        activeTemplate === name
+                                            ? 'bg-[#001A24] border-[#00D4FF] text-[#00D4FF]'
+                                            : 'bg-[#111827] border-[#1E2D40] text-[#F0F4FF] hover:border-[#00D4FF]',
+                                    ].join(' ')}
+                                >
+                                    {name}
+                                </button>
+                            ))}
+                        </div>
+                    </div>
+                    {/* PORT TO ROCM button */}
+                    <button
+                        onClick={handlePort}
+                        disabled={running || !code.trim()}
+                        className={[
+                            'w-full h-12 rounded-lg font-code text-[14px] text-white font-semibold',
+                            '[letter-spacing:2px] transition-all duration-150',
+                            running || !code.trim()
+                                ? 'bg-[#FF3B3B] opacity-50 cursor-not-allowed'
+                                : 'bg-[#FF3B3B] hover:bg-[#FF1A1A] hover:shadow-[0_0_20px_rgba(255,59,59,0.35)] cursor-pointer',
+                        ].join(' ')}
+                    >
+                        {running ? 'RUNNING...' : 'PORT TO ROCM'}
+                    </button>
+                </div>
+                {/* ──── RIGHT PANEL  42% ────────────────────────────────────────────── */}
+                <div className="w-[42%] flex flex-col p-5 gap-4 overflow-y-auto">
+                    {/* Pipeline header */}
+                    <div className="flex justify-between items-center">
+                        <span className="font-code text-[12px] text-[#6B7A99]">// Pipeline</span>
+                        <span className={`font-code text-[12px] transition-colors duration-300 ${running ? 'text-[#FFB800]' : 'text-[#6B7A99]'}`}>
+                            {fmtElapsed(elapsed)}
+                        </span>
+                    </div>
+                    {/* Agent cards */}
+                    <div className="flex flex-col gap-2">
+                        {AGENT_LIST.map(agent => (
+                            <AgentCard key={agent} name={agent} state={agents[agent]} />
+                        ))}
+                    </div>
+                </div>
+            </div>
+            {/* ── Benchmark footer (hidden until run completes) ─────────────────────── */}
+            {benchmark && (
+                <div className="flex-none flex flex-wrap gap-6 px-6 py-4 border-t border-[#1E2D40] bg-[#0D1525]">
+                    {[
+                        { label: 'CHANGES MADE', value: benchmark.total_changes },
+                        { label: 'BUGS FOUND', value: benchmark.bugs_found },
+                        {
+                            label: 'COMPILE STATUS',
+                            value: benchmark.compiled_successfully ? 'SUCCESS' : 'FAILED',
+                            color: benchmark.compiled_successfully ? '#00FF88' : '#FF3B3B',
+                        },
+                        { label: 'DATA SOURCE', value: benchmark.data_source, isSource: true },
+                    ].map(({ label, value, color, isSource }) => (
+                        <div key={label} className="flex flex-col gap-1 min-w-[120px]">
+                            <span className="font-ui text-[10px] text-[#6B7A99] uppercase tracking-widest">
+                                {label}
+                            </span>
+                            <div className="flex items-center gap-2">
+                                <span
+                                    className="font-code text-[18px] font-semibold"
+                                    style={{ color: color ?? '#00D4FF' }}
+                                >
+                                    {String(value ?? '—')}
+                                </span>
+                                {isSource && value === 'real_rocm' && (
+                                    <span className="font-code text-[10px] text-[#00D4FF] border border-[#00D4FF] bg-[#001A24] px-2 py-0.5 rounded">
+                                        LIVE HARDWARE
+                                    </span>
+                                )}
+                            </div>
+                        </div>
+                    ))}
+                </div>
+            )}
+        </div>
+    )
+}

frontend/src/index.css ADDED Viewed

	@@ -0,0 +1,3 @@

+@tailwind base;
+@tailwind components;
+@tailwind utilities;

frontend/src/main.jsx ADDED Viewed

	@@ -0,0 +1,10 @@

+import React from 'react'
+import ReactDOM from 'react-dom/client'
+import './index.css'
+import App from './App'
+ReactDOM.createRoot(document.getElementById('root')).render(
+    <React.StrictMode>
+        <App />
+    </React.StrictMode>
+)

frontend/tailwind.config.js ADDED Viewed

	@@ -0,0 +1,22 @@

+/** @type {import('tailwindcss').Config} */
+export default {
+    content: ['./index.html', './src/**/*.{js,jsx}'],
+    theme: {
+        extend: {
+            fontFamily: {
+                code: ['"JetBrains Mono"', 'monospace'],
+                ui: ['Inter', 'sans-serif'],
+            },
+            keyframes: {
+                'rocm-pulse': {
+                    '0%, 100%': { opacity: '1' },
+                    '50%': { opacity: '0.3' },
+                },
+            },
+            animation: {
+                'rocm-pulse': 'rocm-pulse 1.2s ease-in-out infinite',
+            },
+        },
+    },
+    plugins: [],
+}

frontend/vite.config.js ADDED Viewed

	@@ -0,0 +1,9 @@

+import { defineConfig } from 'vite'
+import react from '@vitejs/plugin-react'
+export default defineConfig({
+    plugins: [react()],
+    server: {
+        port: 5173,
+    },
+})