Spaces:

lablab-ai-amd-developer-hackathon
/

SwarmAudit

Running

App Files Files Community

Pranoy Mukherjee commited on about 8 hours ago

Commit

9237011

1 Parent(s): a495074

Update SwarmAudit Space demo

Browse files

Files changed (44) hide show

.env.example +16 -1
.gitignore +1 -0
AMD_VLLM_RUNBOOK.md +190 -0
HF_SPACES_DEPLOY.md +80 -51
README.md +219 -99
app/agents/config_agent.py +114 -0
app/agents/cuda_migration_agent.py +106 -0
app/agents/dependency_agent.py +347 -0
app/agents/docs_agent.py +14 -2
app/agents/error_handling_agent.py +183 -0
app/agents/graph.py +121 -47
app/agents/llm_enrichment.py +85 -0
app/agents/observability_agent.py +155 -0
app/agents/performance_agent.py +39 -11
app/agents/quality_agent.py +14 -2
app/agents/security_agent.py +7 -5
app/agents/synthesizer_agent.py +181 -2
app/config.py +6 -1
app/main.py +7 -1
app/schemas.py +32 -0
app/services/benchmark.py +67 -0
app/services/json_parser.py +42 -0
app/services/llm_client.py +90 -4
app/services/repo_crawler.py +13 -2
app/services/report_formatter.py +288 -0
app/ui/gradio_app.py +1427 -35
tests/test_agent_llm_enrichment.py +104 -0
tests/test_api.py +12 -0
tests/test_benchmark.py +29 -0
tests/test_config_agent.py +56 -0
tests/test_cuda_migration_agent.py +54 -0
tests/test_dependency_agent.py +80 -0
tests/test_docs_agent.py +6 -4
tests/test_error_handling_agent.py +82 -0
tests/test_gradio_app.py +298 -1
tests/test_graph_progress.py +50 -1
tests/test_json_parser.py +44 -0
tests/test_llm_client.py +59 -0
tests/test_observability_agent.py +85 -0
tests/test_repo_crawler.py +11 -0
tests/test_report_exports.py +130 -0
tests/test_security_report.py +67 -1
tests/test_synthesizer_agent.py +94 -3
tests/test_v2_schemas.py +93 -0

.env.example CHANGED Viewed

@@ -1,10 +1,25 @@
 APP_NAME=SwarmAudit
 LLM_PROVIDER=mock
-LLM_BASE_URL=http://localhost:8000/v1
 LLM_API_KEY=not-needed-for-mock
 LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
 MAX_FILES=200
 MAX_FILE_SIZE_KB=250
 MAX_CHARS_PER_CHUNK=12000
 CLONE_TIMEOUT_SECONDS=60
 CLONE_BASE_DIR=.swarm_audit_tmp

 APP_NAME=SwarmAudit
 LLM_PROVIDER=mock
+LLM_BASE_URL=http://localhost:9000/v1
 LLM_API_KEY=not-needed-for-mock
 LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
+ENABLE_LLM_ENRICHMENT=false
+ENABLE_DEPENDENCY_CVE_LOOKUP=false
+DEPENDENCY_OSV_TIMEOUT_SECONDS=20
+MAX_LLM_CHUNKS=5
+LLM_TIMEOUT_SECONDS=120
 MAX_FILES=200
 MAX_FILE_SIZE_KB=250
 MAX_CHARS_PER_CHUNK=12000
 CLONE_TIMEOUT_SECONDS=60
 CLONE_BASE_DIR=.swarm_audit_tmp
+# Credit-safe AMD/vLLM first test overrides:
+# LLM_PROVIDER=vllm
+# LLM_BASE_URL=http://YOUR_VLLM_ENDPOINT/v1
+# LLM_API_KEY=swarm-audit-demo-key
+# ENABLE_LLM_ENRICHMENT=true
+# MAX_FILES=100
+# MAX_FILE_SIZE_KB=150
+# MAX_CHARS_PER_CHUNK=8000
+# MAX_LLM_CHUNKS=2

.gitignore CHANGED Viewed

@@ -208,6 +208,7 @@ tempCodeRunnerFile.py
 # SwarmAudit local test artifacts
 .tmp_pytest*/
 pytest-cache-files-*
 .swarm_audit_tmp/

 # SwarmAudit local test artifacts
 .tmp_pytest*/
+.tmp_test_exports/
 pytest-cache-files-*
 .swarm_audit_tmp/

AMD_VLLM_RUNBOOK.md ADDED Viewed

	@@ -0,0 +1,190 @@

+# AMD vLLM Runbook
+SwarmAudit is AMD-ready through an HTTP-only vLLM integration. The app does not install vLLM. It calls an OpenAI-compatible endpoint that can be hosted on AMD Developer Cloud with ROCm.
+## What Was Validated
+During development, SwarmAudit was tested against:
+- AMD Developer Cloud GPU instance
+- ROCm visible through `rocm-smi`
+- Docker-based vLLM environment
+- `Qwen/Qwen2.5-Coder-32B-Instruct`
+- OpenAI-compatible routes:
+  - `/v1/models`
+  - `/v1/chat/completions`
+- SwarmAudit Diagnostics tab
+- SwarmAudit Benchmark tab
+- real audit run with `ENABLE_LLM_ENRICHMENT=true` and `MAX_LLM_CHUNKS=2`
+The AMD instance was destroyed afterward to avoid credit burn.
+## Safe Default
+Use this locally and on Hugging Face Spaces when AMD is not running:
+```text
+LLM_PROVIDER=mock
+ENABLE_LLM_ENRICHMENT=false
+ENABLE_DEPENDENCY_CVE_LOOKUP=false
+```
+## Credit-Safe AMD Settings
+Use these for the first AMD session:
+```text
+LLM_PROVIDER=vllm
+LLM_BASE_URL=http://YOUR_VLLM_ENDPOINT/v1
+LLM_API_KEY=not-needed-if-open
+LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
+ENABLE_LLM_ENRICHMENT=false
+ENABLE_DEPENDENCY_CVE_LOOKUP=false
+MAX_FILES=100
+MAX_FILE_SIZE_KB=150
+MAX_CHARS_PER_CHUNK=8000
+MAX_LLM_CHUNKS=2
+```
+Only switch this after Diagnostics passes:
+```text
+ENABLE_LLM_ENRICHMENT=true
+```
+## AMD Session Flow
+1. Create/start the AMD GPU instance.
+2. SSH into the instance.
+3. Confirm GPU visibility:
+```bash
+rocm-smi
+```
+4. If the image provides a vLLM container, enter it:
+```bash
+docker exec -it rocm /bin/bash
+```
+5. Start vLLM:
+```bash
+vllm serve Qwen/Qwen2.5-Coder-32B-Instruct \
+  --host 0.0.0.0 \
+  --port 8000 \
+  --dtype float16 \
+  --max-model-len 8192 \
+  --gpu-memory-utilization 0.90
+```
+If the provided AMD image recommends different flags, use the provided image guidance first. The important part is that `/v1/models` and `/v1/chat/completions` are reachable.
+## Endpoint Checks
+From a machine that can reach the endpoint:
+```bash
+curl http://YOUR_VLLM_ENDPOINT/v1/models
+```
+Then:
+```bash
+curl http://YOUR_VLLM_ENDPOINT/v1/chat/completions \
+  -H "Content-Type: application/json" \
+  -d '{
+    "model": "Qwen/Qwen2.5-Coder-32B-Instruct",
+    "messages": [
+      {"role": "user", "content": "Reply with exactly: SwarmAudit LLM OK"}
+    ],
+    "max_tokens": 16,
+    "temperature": 0
+  }'
+```
+Expected completion:
+```text
+SwarmAudit LLM OK
+```
+## Connect SwarmAudit
+Set local `.env` or Space secrets:
+```text
+LLM_PROVIDER=vllm
+LLM_BASE_URL=http://YOUR_VLLM_ENDPOINT/v1
+LLM_API_KEY=not-needed-if-open
+LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
+ENABLE_LLM_ENRICHMENT=false
+MAX_LLM_CHUNKS=2
+```
+Run:
+```bash
+python app.py
+```
+Open the Diagnostics tab and confirm:
+- provider is `vllm`
+- model is `Qwen/Qwen2.5-Coder-32B-Instruct`
+- `/v1/models` succeeds
+- chat completion succeeds
+Then enable:
+```text
+ENABLE_LLM_ENRICHMENT=true
+```
+Restart the app after changing env vars.
+## Credit-Safe Demo Order
+1. Local mock test.
+2. HF Space mock test.
+3. Start AMD GPU.
+4. Start vLLM.
+5. Run Diagnostics once.
+6. Run Benchmark once.
+7. Enable enrichment with `MAX_LLM_CHUNKS=2`.
+8. Audit:
+```text
+https://github.com/pallets/itsdangerous
+```
+9. If good, audit:
+```text
+https://github.com/psf/requests
+```
+10. Capture screenshots:
+    - `rocm-smi`
+    - vLLM startup/model logs
+    - Diagnostics OK
+    - Benchmark result
+    - SwarmAudit report
+11. Destroy the GPU instance when done.
+## Important Billing Note
+For AMD GPU droplets, powering off may still reserve billable resources. Destroy the instance when finished unless the provider explicitly says billing stops.
+## Fallback
+If anything fails, use:
+```text
+LLM_PROVIDER=mock
+ENABLE_LLM_ENRICHMENT=false
+```
+SwarmAudit still runs the static multi-agent audit and remains demo-ready.

HF_SPACES_DEPLOY.md CHANGED Viewed

@@ -1,46 +1,31 @@
-# Hugging Face Spaces Deployment Checklist
-## Local Preflight
-Run these from the repo root:
-```bash
-pip install -r requirements.txt
-python -m pytest
-python app.py
-```
-Open:
 ```text
-http://127.0.0.1:7860
-```
-Test a small repo first:
-```text
-https://github.com/pallets/itsdangerous
 ```
-## Create The Space
-1. Go to Hugging Face Spaces.
-2. Create a new Space.
-3. Choose SDK: `Gradio`.
-4. Choose hardware: CPU basic for the mock MVP.
-5. Use the AMD hackathon organization if the event requires it.
 ## Required Files
-These must be at the repo root:
 ```text
 app.py
 requirements.txt
 README.md
 ```
-The README includes the Space metadata:
 ```yaml
 sdk: gradio
@@ -48,50 +33,94 @@ sdk_version: 6.14.0
 app_file: app.py
 ```
-## Environment Variables
-For the public mock demo:
 ```text
-LLM_PROVIDER=mock
 ```
-For a later AMD/vLLM deployment:
 ```text
-LLM_PROVIDER=vllm
-LLM_BASE_URL=http://YOUR_VLLM_ENDPOINT/v1
-LLM_API_KEY=not-needed-if-your-endpoint-does-not-require-one
-LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
 ```
-## First Hosted Smoke Test
-In the deployed Space, test:
 ```text
-https://github.com/pallets/itsdangerous
 ```
-Then test:
 ```text
-https://github.com/psf/requests
 ```
-Expected behavior:
-- Crawler maps files.
-- Chunker creates chunks.
-- Security, Performance, Quality, and Docs agents run.
-- Synthesizer returns a report.
-- Report shows a prioritized subset while preserving total finding counts.
-## If The Space Fails
-Check the Space logs first. Common issues:
-- Dependency install failure: verify `requirements.txt`.
-- App import failure: verify root `app.py`.
-- GitHub clone failure: verify Space has outbound internet access.
-- Large repo timeout: test `pallets/itsdangerous` before larger repos.

+# Hugging Face Spaces Deployment
+Use this checklist when updating the SwarmAudit Space.
+## Recommended Public Demo Mode
+Keep the public Space reliable unless a stable AMD/vLLM endpoint will remain online for judging.
 ```text
+LLM_PROVIDER=mock
+ENABLE_LLM_ENRICHMENT=false
+ENABLE_DEPENDENCY_CVE_LOOKUP=false
 ```
+This still runs the static multi-agent audit and produces exportable reports.
 ## Required Files
+These files must be at the Space repo root:
 ```text
 app.py
 requirements.txt
 README.md
+app/
 ```
+The README front matter tells Spaces how to start the app:
 ```yaml
 sdk: gradio
 app_file: app.py
 ```
+## Local Preflight
+From the repo root:
+```bash
+pip install -r requirements.txt
+python -m compileall -q app tests app.py
+python -m pytest --basetemp=.tmp_pytest -p no:cacheprovider
+python app.py
+```
+Open the local URL printed by Gradio.
+Test:
 ```text
+https://github.com/pallets/itsdangerous
 ```
+Then:
 ```text
+https://github.com/psf/requests
 ```
+Confirm:
+- agent progress appears
+- findings render
+- severity filters work
+- finding detail panel updates when clicking rows
+- Markdown download works
+- JSON download works
+- Diagnostics tab shows `Provider: mock` and `Status: OK`
+- Benchmark tab works in mock mode
+## Space Settings
+- SDK: Gradio
+- Hardware: CPU basic for public mock mode
+- App file: `app.py`
+- License: MIT
+- Suggested short description:
 ```text
+Multi-agent production-readiness scanner for AI-generated code
 ```
+## Deploy / Update
+Push the same project code to the hackathon organization Space repo.
+After the build starts:
+1. Open the Space logs.
+2. Wait for the Gradio startup message.
+3. Open the app.
+4. Run the small repo smoke test.
+5. Keep a screenshot of the working report for submission material.
+## Optional AMD/vLLM Mode
+Only use this if the endpoint is stable:
 ```text
+LLM_PROVIDER=vllm
+LLM_BASE_URL=http://YOUR_VLLM_ENDPOINT/v1
+LLM_API_KEY=stored-as-space-secret
+LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
+ENABLE_LLM_ENRICHMENT=false
+MAX_LLM_CHUNKS=2
 ```
+Run the Diagnostics tab before enabling enrichment.
+After diagnostics passes:
+```text
+ENABLE_LLM_ENRICHMENT=true
+```
+If the endpoint is temporary, switch back to mock mode after recording demo proof.
+## Common Issues
+- **Build error**: check `requirements.txt` and root `app.py`.
+- **No logs**: verify the code is pushed to the actual Space remote, not only GitHub.
+- **Clone error**: test a smaller public repo first.
+- **Port issue locally**: `python app.py` tries `7860` first and falls back locally when no explicit port env var is set.
+- **Secrets**: never put real API keys in README, screenshots, or `.env.example`.

README.md CHANGED Viewed

@@ -1,4 +1,4 @@
----
 title: SwarmAudit
 sdk: gradio
 sdk_version: 6.14.0
@@ -9,58 +9,159 @@ license: mit
 # SwarmAudit
-Paste any public GitHub URL. Get a structured multi-agent code audit in minutes.
-SwarmAudit is an AI-agent code review system for the AMD Developer Hackathon. It clones a public repository, filters and chunks source files, runs specialized review agents, and returns a severity-ranked report with file references and suggested fixes.
-The local MVP runs in mock-first mode, so the demo works without waiting for ROCm, vLLM, or MI300X infrastructure. The inference layer is designed to switch to a vLLM-compatible Qwen2.5-Coder endpoint later.
-## MVP
-SwarmAudit currently runs with a mock-first LLM interface so the demo is not blocked by ROCm, vLLM, or AMD MI300X setup. The current graph is:
 ```text
-GitHub URL -> Crawler -> Chunker -> [Security Agent + Performance Agent + Quality Agent + Docs Agent] -> Synthesizer -> Report
 ```
-## Demo Status
-Working locally:
-- Gradio UI with live agent progress
-- FastAPI `/health` and `/audit` endpoints
-- GitHub clone and repo scan on public repos
-- Four analysis agents plus synthesizer
-- Prioritized report display with full raw finding totals preserved
-- Hugging Face Spaces-style `app.py` entrypoint
-Smoke-tested repos:
-- `https://github.com/psf/requests`
-- `https://github.com/pallets/itsdangerous`
-Example output is available in [`examples/requests_report_excerpt.md`](examples/requests_report_excerpt.md).
-## Architecture
-```mermaid
-flowchart LR
-    U[User enters GitHub URL] --> API[FastAPI / Gradio]
-    API --> C[Crawler Agent]
-    C --> F[File Filter]
-    F --> K[Chunker]
-    K --> S[Security Agent]
-    K --> P[Performance Agent]
-    K --> Q[Quality Agent]
-    K --> D[Docs Agent]
-    S --> Y[Synthesizer Agent]
-    P --> Y
-    Q --> Y
-    D --> Y
-    Y --> R[Structured Audit Report]
 ```
-The graph is intentionally modular: each agent returns strict Pydantic findings, and the synthesizer merges, deduplicates, prioritizes, and formats the final report.
 ## Quick Start
@@ -70,115 +171,134 @@ python -m venv .venv
 pip install -r requirements.txt
 ```
 Run the FastAPI backend:
 ```bash
 uvicorn app.main:app --reload
 ```
-If port 8000 is busy on Windows, use:
 ```bash
 uvicorn app.main:app --reload --port 8001
 ```
-Health check:
 ```bash
 curl http://127.0.0.1:8000/health
 ```
-Audit endpoint:
 ```bash
 curl -X POST http://127.0.0.1:8000/audit \
   -H "Content-Type: application/json" \
-  -d '{"repo_url":"https://github.com/psf/requests"}'
-```
-Run the Gradio demo:
-```bash
-python -m app.ui.gradio_app
 ```
-For Hugging Face Spaces-style startup:
-```bash
-python app.py
 ```
-The Gradio app includes example repos, a live agent progress panel, and a structured markdown report panel.
-The launcher binds to `0.0.0.0` and uses `PORT` when provided, which matches hosted Gradio deployment expectations.
 ## Configuration
-Copy `.env.example` to `.env` for local overrides. Default inference mode is:
-```text
-LLM_PROVIDER=mock
-```
-Later, set `LLM_PROVIDER=vllm` and point `LLM_BASE_URL` at an OpenAI-compatible vLLM endpoint running Qwen2.5-Coder.
-Key safety limits:
 ```text
 MAX_FILES=200
 MAX_FILE_SIZE_KB=250
 MAX_CHARS_PER_CHUNK=12000
 CLONE_BASE_DIR=.swarm_audit_tmp
 ```
-## Report Schema
-Each finding includes:
-- title
-- severity: CRITICAL, HIGH, MEDIUM, LOW
-- file path and line range
-- description
-- why it matters
-- suggested fix
-- agent source
-Reports preserve full finding totals while displaying a prioritized subset for readability. High-severity findings are shown first, repeated low-severity findings are summarized, and warnings explain when lower-priority findings are hidden from the demo report.
-## Current Agents
-- Security Agent: flags hardcoded secrets, disabled TLS verification, and dynamic code execution.
-- Performance Agent: flags HTTP calls without timeouts, blocking sleep inside async functions, nested loops, file reads in loops, and synchronous Node.js filesystem calls.
-- Quality Agent: flags long functions, high branch density, large source sections, unresolved TODO/FIXME/HACK comments, and very short symbol names.
-- Docs Agent: flags incomplete README guidance and public Python symbols missing docstrings.
-- Synthesizer Agent: deduplicates findings, sorts by severity, and builds the final report.
-## Hugging Face Spaces
-SwarmAudit is ready to launch as a Gradio Space with the root `app.py` entrypoint. Keep `LLM_PROVIDER=mock` for a reliable public demo, then switch to `LLM_PROVIDER=vllm` when an AMD MI300X-hosted Qwen2.5-Coder endpoint is available.
-See [`HF_SPACES_DEPLOY.md`](HF_SPACES_DEPLOY.md) for the deployment checklist.
-Recommended Space settings:
-- SDK: Gradio
-- App file: `app.py`
-- Python: 3.11 or newer
-- Default env: `LLM_PROVIDER=mock`
-## AMD MI300X Roadmap
-The current code path is intentionally mock-first. The next inference phase is:
-1. Start a Qwen2.5-Coder vLLM server on AMD Developer Cloud.
-2. Expose an OpenAI-compatible `/v1/chat/completions` endpoint.
-3. Set `LLM_PROVIDER=vllm`, `LLM_BASE_URL`, and `LLM_MODEL`.
-4. Add LLM enrichment to agent findings while keeping static rules as deterministic guardrails.
-5. Add a benchmark tab with MI300X latency and throughput numbers.
-## Tests
-```bash
-python -m pytest
-```

+ ---
 title: SwarmAudit
 sdk: gradio
 sdk_version: 6.14.0
 # SwarmAudit
+SwarmAudit is a multi-agent production-readiness scanner for AI-generated code.
+Paste a public GitHub repository URL and SwarmAudit clones the repo, maps source files, runs specialized static and optional LLM-enriched agents, then returns a prioritized audit report with severity filters, file references, remediation guidance, scores, and Markdown/JSON exports.
+The project was built for the AMD Developer Hackathon Track 1: AI Agents & Agentic Workflows. It is designed to run reliably in mock/static mode for public demos and switch to AMD Developer Cloud + ROCm + vLLM + Qwen2.5-Coder when GPU credits are available.
+## Why It Exists
+AI coding tools are fast, but they often miss production concerns: broken security assumptions, unsafe configuration, missing timeouts, swallowed exceptions, weak observability, dependency risk, and GPU portability issues. SwarmAudit turns those review concerns into a coordinated agent workflow.
+The goal is not to replace linters. The goal is to give teams a fast second-pass review for code that might be functionally correct but not production-ready.
+## Current Status
+Working now:
+- Gradio dashboard with agent progress, activity log, summary cards, clickable severity filters, finding inspector, and report downloads.
+- FastAPI backend with `/health`, `/llm/health`, and `/audit`.
+- GitHub repo cloning with file limits and Windows-safe temp paths.
+- Static multi-agent audit path that works without GPU access.
+- Optional vLLM/Qwen enrichment behind config.
+- LLM Diagnostics tab for `/v1/models` and chat-completion checks.
+- Benchmark tab for latency checks against mock or vLLM backends.
+- Markdown and JSON report export.
+- Hugging Face Spaces entrypoint through root `app.py`.
+- AMD/vLLM runbook for credit-safe MI300X testing.
+Validated during development:
+- Hugging Face Space running in mock/static mode.
+- AMD Developer Cloud GPU instance with ROCm visible through `rocm-smi`.
+- vLLM serving `Qwen/Qwen2.5-Coder-32B-Instruct` through an OpenAI-compatible `/v1` API.
+- SwarmAudit Diagnostics and Benchmark tabs connected successfully to the AMD-hosted vLLM endpoint.
+## Agent Workflow
 ```text
+GitHub URL
+  -> Crawler Agent
+  -> Chunker
+  -> Parallel Analysis Agents
+       Security
+       Performance
+       Quality
+       Docs
+       Config
+       Dependency
+       Error Handling
+       Observability
+       CUDA-to-ROCm
+  -> Synthesizer
+  -> Scores + Roadmap + Report
 ```
+## Agents
+- **Security Agent**: hardcoded secrets, disabled TLS verification, dynamic execution, insecure dependency version ranges.
+- **Performance Agent**: missing HTTP timeouts, blocking work in async paths, nested loops, repeated file reads, synchronous hot-path operations.
+- **Quality Agent**: long functions, high branch density, very short identifiers, TODO/FIXME/HACK comments, maintainability signals.
+- **Docs Agent**: README gaps, missing install/run/test guidance, public Python symbols without docstrings.
+- **Config Agent**: production-dangerous defaults such as debug mode, open CORS, disabled TLS checks, weak secrets, unsafe config patterns.
+- **Dependency Agent**: parses manifests and optionally queries OSV.dev for CVE data when enabled.
+- **Error Handling Agent**: swallowed exceptions, missing timeouts, missing retry/fallback behavior, resilience gaps.
+- **Observability Agent**: `print` logging, sensitive data in logs, missing health checks, missing metrics/tracing signals.
+- **CUDA-to-ROCm Agent**: flags CUDA/NVIDIA-specific assumptions such as `torch.cuda`, `.cuda()`, `pynvml`, `nvidia-smi`, `cudaMalloc`, and `cudaMemcpy`, then suggests ROCm/generic alternatives.
+- **Synthesizer Agent**: deduplicates findings, ranks by severity, computes scores, groups categories, and builds the remediation roadmap.
+## Report Output
+Each audit report includes:
+- Repository URL
+- scanned/skipped file counts
+- severity summary
+- total/displayed/hidden finding counts
+- agent finding counts
+- category summary
+- security score
+- production readiness score
+- remediation roadmap:
+  - This Week
+  - Next Sprint
+  - Backlog
+- structured findings with:
+  - title
+  - severity
+  - file path and line range
+  - explanation
+  - why it matters
+  - suggested fix
+  - agent source
+  - category
+  - confidence when available
+- Markdown export
+- JSON export
+The UI displays a prioritized subset for readability while preserving full totals in the structured report.
+## AMD + Qwen Integration
+SwarmAudit uses Qwen through an OpenAI-compatible vLLM endpoint. The app does not install or run vLLM directly; it calls vLLM over HTTP.
+The AMD path improves the project by allowing the same agent workflow to use a stronger code model on AMD GPU infrastructure:
+- AMD Developer Cloud provides the GPU runtime.
+- ROCm exposes AMD GPU acceleration.
+- vLLM serves Qwen2.5-Coder as an OpenAI-compatible API.
+- SwarmAudit uses that endpoint for Diagnostics, Benchmark, and optional LLM enrichment.
+- Static agents remain the reliable fallback if the endpoint is unavailable.
+Default public/demo mode stays cheap and reliable:
+```text
+LLM_PROVIDER=mock
+ENABLE_LLM_ENRICHMENT=false
+```
+Credit-safe AMD test mode:
+```text
+LLM_PROVIDER=vllm
+LLM_BASE_URL=http://YOUR_VLLM_ENDPOINT/v1
+LLM_API_KEY=swarm-audit-demo-key
+LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
+ENABLE_LLM_ENRICHMENT=true
+MAX_FILES=100
+MAX_FILE_SIZE_KB=150
+MAX_CHARS_PER_CHUNK=8000
+MAX_LLM_CHUNKS=2
+```
+See [`AMD_VLLM_RUNBOOK.md`](AMD_VLLM_RUNBOOK.md) for the exact AMD setup and shutdown checklist.
+## Hugging Face Spaces
+SwarmAudit is deployable as a Gradio Space using the root `app.py`.
+Recommended public Space settings:
+- SDK: Gradio
+- Hardware: CPU basic
+- App file: `app.py`
+- Environment:
+```text
+LLM_PROVIDER=mock
+ENABLE_LLM_ENRICHMENT=false
+ENABLE_DEPENDENCY_CVE_LOOKUP=false
 ```
+Keep the public Space in mock/static mode unless a stable vLLM endpoint is available for the full judging window. Do not expose private endpoint keys in the README or UI.
+See [`HF_SPACES_DEPLOY.md`](HF_SPACES_DEPLOY.md) for the deployment checklist.
 ## Quick Start
 pip install -r requirements.txt
 ```
+Run the Gradio app:
+```bash
+python app.py
+```
+Open the URL printed by Gradio. The app tries port `7860` first and falls back to another local Gradio port if `7860` is busy.
 Run the FastAPI backend:
 ```bash
 uvicorn app.main:app --reload
 ```
+If port `8000` is busy:
 ```bash
 uvicorn app.main:app --reload --port 8001
 ```
+Health checks:
 ```bash
 curl http://127.0.0.1:8000/health
+curl http://127.0.0.1:8000/llm/health
 ```
+Audit API:
 ```bash
 curl -X POST http://127.0.0.1:8000/audit \
   -H "Content-Type: application/json" \
+  -d '{"repo_url":"https://github.com/pallets/itsdangerous"}'
 ```
+Recommended first test repos:
+```text
+https://github.com/pallets/itsdangerous
+https://github.com/psf/requests
 ```
 ## Configuration
+Copy `.env.example` to `.env` for local overrides.
+Important settings:
 ```text
+LLM_PROVIDER=mock
+LLM_BASE_URL=http://localhost:9000/v1
+LLM_API_KEY=not-needed-for-mock
+LLM_MODEL=Qwen/Qwen2.5-Coder-32B-Instruct
+ENABLE_LLM_ENRICHMENT=false
+ENABLE_DEPENDENCY_CVE_LOOKUP=false
+MAX_LLM_CHUNKS=5
+LLM_TIMEOUT_SECONDS=120
 MAX_FILES=200
 MAX_FILE_SIZE_KB=250
 MAX_CHARS_PER_CHUNK=12000
+CLONE_TIMEOUT_SECONDS=60
 CLONE_BASE_DIR=.swarm_audit_tmp
 ```
+Dependency CVE lookup is off by default so demos do not depend on network calls beyond cloning the target repo:
+```text
+ENABLE_DEPENDENCY_CVE_LOOKUP=false
+```
+Enable it only when you want OSV.dev CVE checks:
+```text
+ENABLE_DEPENDENCY_CVE_LOOKUP=true
+```
+## Tests
+```bash
+python -m compileall -q app tests app.py
+python -m pytest --basetemp=.tmp_pytest -p no:cacheprovider
+```
+Current local suite:
+```text
+104 tests
+```
+## Project Structure
+```text
+app.py                         # Hugging Face/Gradio entrypoint
+app/
+  main.py                      # FastAPI API
+  config.py                    # environment settings
+  schemas.py                   # Pydantic models
+  agents/
+    graph.py                   # orchestration
+    security_agent.py
+    performance_agent.py
+    quality_agent.py
+    docs_agent.py
+    config_agent.py
+    dependency_agent.py
+    error_handling_agent.py
+    observability_agent.py
+    cuda_migration_agent.py
+    synthesizer_agent.py
+    llm_enrichment.py
+  services/
+    llm_client.py
+    benchmark.py
+    report_formatter.py
+  ui/
+    gradio_app.py
+tests/
+examples/
+```
+## Submission Notes
+For the hackathon submission, highlight:
+- agentic workflow with multiple specialized agents
+- Qwen2.5-Coder integration through vLLM
+- AMD Developer Cloud + ROCm validation
+- Hugging Face Space deployment
+- practical business value: production readiness for AI-generated code
+- originality: combining security, operations, dependency, and CUDA-to-ROCm portability checks in one audit workflow

app/agents/config_agent.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import re
+from app.schemas import AgentOutput, CodeChunk, Finding, Severity
+CONFIG_PATTERNS = [
+    (
+        re.compile(r"(?i)\bdebug\s*=\s*true\b"),
+        "Debug mode enabled",
+        Severity.high,
+        "Debug mode can expose stack traces, environment details, and interactive debugger behavior.",
+        "Disable debug mode in production and load it from an environment-specific setting.",
+        0.9,
+    ),
+    (
+        re.compile(r"(?i)(allow_origins|cors_allowed_origins)\s*=\s*\[[^\]]*['\"]\*['\"]"),
+        "Wildcard CORS origin",
+        Severity.medium,
+        "A wildcard CORS policy can allow untrusted origins to interact with browser-protected resources.",
+        "Replace '*' with an explicit allowlist of trusted production origins.",
+        0.86,
+    ),
+    (
+        re.compile(r"(?i)access-control-allow-origin['\"]?\s*[:=]\s*['\"]\*['\"]"),
+        "Wildcard Access-Control-Allow-Origin",
+        Severity.medium,
+        "A wildcard Access-Control-Allow-Origin header weakens browser origin protections.",
+        "Set Access-Control-Allow-Origin to specific trusted domains.",
+        0.86,
+    ),
+    (
+        re.compile(r"(?i)verify\s*=\s*false\b"),
+        "TLS verification disabled in configuration",
+        Severity.high,
+        "Disabling TLS verification lets attackers intercept traffic that should be protected.",
+        "Remove verify=False and configure a trusted CA bundle if custom certificates are required.",
+        0.91,
+    ),
+    (
+        re.compile(r"(?i)node_tls_reject_unauthorized\s*=\s*['\"]?0['\"]?"),
+        "Node TLS certificate checks disabled",
+        Severity.high,
+        "Disabling Node.js TLS verification makes HTTPS connections vulnerable to interception.",
+        "Remove NODE_TLS_REJECT_UNAUTHORIZED=0 and fix certificate trust at the environment level.",
+        0.92,
+    ),
+    (
+        re.compile(r"(?i)(secret_key|jwt_secret|session_secret)\s*=\s*['\"](secret|changeme|change-me|password|django-insecure[^'\"]*)['\"]"),
+        "Weak default secret configured",
+        Severity.high,
+        "Default secrets are easy to guess and can compromise sessions, JWTs, or signed cookies.",
+        "Generate a strong secret and load it from a secret manager or environment variable.",
+        0.9,
+    ),
+]
+class ConfigAgent:
+    name = "Config Agent"
+    async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
+        findings: list[Finding] = []
+        for chunk in chunks:
+            findings.extend(self._scan_chunk(chunk))
+        return AgentOutput(
+            agent_name=self.name,
+            findings=findings,
+            metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
+        )
+    def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
+        findings: list[Finding] = []
+        for offset, line in enumerate(chunk.content.splitlines()):
+            actual_line = chunk.line_start + offset
+            for pattern, title, severity, description, fix, confidence in CONFIG_PATTERNS:
+                if pattern.search(line):
+                    findings.append(
+                        self._finding(
+                            title=title,
+                            severity=severity,
+                            chunk=chunk,
+                            line_number=actual_line,
+                            description=description,
+                            suggested_fix=fix,
+                            confidence=confidence,
+                        )
+                    )
+        return findings
+    def _finding(
+        self,
+        title: str,
+        severity: Severity,
+        chunk: CodeChunk,
+        line_number: int,
+        description: str,
+        suggested_fix: str,
+        confidence: float,
+    ) -> Finding:
+        return Finding(
+            title=title,
+            severity=severity,
+            file_path=chunk.file_path,
+            line_start=line_number,
+            line_end=line_number,
+            description=description,
+            why_it_matters="Development-safe configuration often becomes production risk when copied into deployed environments.",
+            suggested_fix=suggested_fix,
+            agent_source=self.name,
+            category="config",
+            confidence=confidence,
+        )

app/agents/cuda_migration_agent.py ADDED Viewed

	@@ -0,0 +1,106 @@

+import re
+from app.schemas import AgentOutput, CodeChunk, Finding, Severity
+CUDA_PATTERNS = [
+    (
+        re.compile(r"\btorch\.cuda\b|\.cuda\s*\("),
+        "PyTorch CUDA-specific API usage",
+        "Use device-agnostic PyTorch code such as torch.device('cuda' if torch.cuda.is_available() else 'cpu') only when portability is intended, and validate the same path under ROCm where PyTorch maps CUDA APIs to HIP.",
+        0.82,
+    ),
+    (
+        re.compile(r"\bpynvml\b|\bnvidia-smi\b"),
+        "NVIDIA-specific GPU monitoring",
+        "Replace NVIDIA-specific monitoring with ROCm tools such as rocm-smi or a metrics adapter that supports AMD GPUs.",
+        0.9,
+    ),
+    (
+        re.compile(r"\bcuda(Malloc|Free|Memcpy|Memset|DeviceSynchronize|GetDevice|SetDevice)\b"),
+        "CUDA runtime API call",
+        "Map CUDA runtime calls to HIP/ROCm equivalents and validate memory transfer semantics on AMD hardware.",
+        0.88,
+    ),
+    (
+        re.compile(r"\b(cublas|cudnn|cufft|curand)\w*\b", re.IGNORECASE),
+        "CUDA library dependency",
+        "Review ROCm equivalents such as rocBLAS, MIOpen, rocFFT, or rocRAND before running on AMD GPUs.",
+        0.86,
+    ),
+    (
+        re.compile(r"\bnccl\w*\b", re.IGNORECASE),
+        "NCCL-specific distributed GPU dependency",
+        "Use RCCL or a framework abstraction that supports AMD GPU collectives.",
+        0.84,
+    ),
+]
+class CudaMigrationAgent:
+    name = "CUDA-to-ROCm Agent"
+    async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
+        findings: list[Finding] = []
+        for chunk in chunks:
+            findings.extend(self._scan_chunk(chunk))
+        return AgentOutput(
+            agent_name=self.name,
+            findings=findings,
+            metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
+        )
+    def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
+        findings: list[Finding] = []
+        seen_titles: set[str] = set()
+        for offset, line in enumerate(chunk.content.splitlines()):
+            actual_line = chunk.line_start + offset
+            for pattern, title, fix, confidence in CUDA_PATTERNS:
+                if title in seen_titles:
+                    continue
+                if pattern.search(line):
+                    seen_titles.add(title)
+                    findings.append(
+                        self._finding(
+                            title=title,
+                            chunk=chunk,
+                            line_number=actual_line,
+                            matched_line=line,
+                            suggested_fix=fix,
+                            confidence=confidence,
+                        )
+                    )
+        return findings
+    def _finding(
+        self,
+        title: str,
+        chunk: CodeChunk,
+        line_number: int,
+        matched_line: str,
+        suggested_fix: str,
+        confidence: float,
+    ) -> Finding:
+        snippet = self._snippet(matched_line)
+        return Finding(
+            title=title,
+            severity=Severity.medium,
+            file_path=chunk.file_path,
+            line_start=line_number,
+            line_end=line_number,
+            description=f"`{snippet}` references a CUDA/NVIDIA-specific API that needs review before AMD ROCm deployment.",
+            why_it_matters="This exact GPU assumption can fail or reduce portability when the app moves from NVIDIA CUDA environments to AMD MI300X/ROCm.",
+            suggested_fix=suggested_fix,
+            agent_source=self.name,
+            category="cuda_migration",
+            confidence=confidence,
+        )
+    def _snippet(self, line: str, max_length: int = 96) -> str:
+        normalized = " ".join(line.strip().split())
+        if len(normalized) <= max_length:
+            return normalized
+        return f"{normalized[: max_length - 3]}..."

app/agents/dependency_agent.py ADDED Viewed

	@@ -0,0 +1,347 @@

+import json
+import re
+import tomllib
+from dataclasses import dataclass
+from typing import Any
+import httpx
+from app.config import Settings
+from app.schemas import AgentOutput, CodeChunk, Finding, Severity
+@dataclass(frozen=True)
+class Dependency:
+    name: str
+    version: str | None
+    ecosystem: str
+    manifest_path: str
+    line_number: int
+    source: str
+class DependencyAgent:
+    name = "Dependency Agent"
+    def __init__(self, settings: Settings):
+        self.settings = settings
+    async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
+        dependencies = self._parse_dependencies(chunks)
+        findings: list[Finding] = []
+        cves: list[dict[str, Any]] = []
+        warnings: list[str] = []
+        if self.settings.enable_dependency_cve_lookup and dependencies:
+            cves, warnings = await self._lookup_cves(dependencies)
+            findings.extend(self._cve_findings(cves))
+        return AgentOutput(
+            agent_name=self.name,
+            findings=findings,
+            metadata={
+                "mode": "manifest-parse+optional-osv",
+                "dependency_count": len(dependencies),
+                "manifests": sorted({dependency.manifest_path for dependency in dependencies}),
+                "dependency_cves": cves,
+                "warnings": warnings,
+            },
+        )
+    def _parse_dependencies(self, chunks: list[CodeChunk]) -> list[Dependency]:
+        dependencies: list[Dependency] = []
+        seen: set[tuple[str, str, str, str | None]] = set()
+        for chunk in chunks:
+            parsed = self._parse_chunk(chunk)
+            for dependency in parsed:
+                key = (
+                    dependency.ecosystem,
+                    dependency.name.lower(),
+                    dependency.manifest_path,
+                    dependency.version,
+                )
+                if key in seen:
+                    continue
+                seen.add(key)
+                dependencies.append(dependency)
+        return dependencies
+    def _parse_chunk(self, chunk: CodeChunk) -> list[Dependency]:
+        path = chunk.file_path.lower()
+        if path.endswith("requirements.txt"):
+            return self._parse_requirements(chunk)
+        if path.endswith("package.json"):
+            return self._parse_package_json(chunk)
+        if path.endswith("pyproject.toml"):
+            return self._parse_pyproject(chunk)
+        if path.endswith("go.mod"):
+            return self._parse_go_mod(chunk)
+        if path.endswith("cargo.toml"):
+            return self._parse_cargo_toml(chunk)
+        return []
+    def _parse_requirements(self, chunk: CodeChunk) -> list[Dependency]:
+        dependencies: list[Dependency] = []
+        for offset, raw_line in enumerate(chunk.content.splitlines()):
+            line = raw_line.split("#", 1)[0].strip()
+            if not line or line.startswith(("-", "git+", "http://", "https://")):
+                continue
+            match = re.match(r"([A-Za-z0-9_.-]+)\s*(?:\[.*?\])?\s*(==|~=|>=|<=|>|<)?\s*([A-Za-z0-9_.*!+-][A-Za-z0-9_.*!+-]*)?", line)
+            if not match:
+                continue
+            name = match.group(1)
+            version = self._clean_version(match.group(3))
+            dependencies.append(
+                Dependency(
+                    name=name,
+                    version=version,
+                    ecosystem="PyPI",
+                    manifest_path=chunk.file_path,
+                    line_number=chunk.line_start + offset,
+                    source=line,
+                )
+            )
+        return dependencies
+    def _parse_package_json(self, chunk: CodeChunk) -> list[Dependency]:
+        try:
+            data = json.loads(chunk.content)
+        except json.JSONDecodeError:
+            return []
+        dependencies: list[Dependency] = []
+        for section in ("dependencies", "devDependencies", "optionalDependencies"):
+            section_dependencies = data.get(section, {})
+            if not isinstance(section_dependencies, dict):
+                continue
+            for name, raw_version in section_dependencies.items():
+                dependencies.append(
+                    Dependency(
+                        name=name,
+                        version=self._clean_version(str(raw_version)),
+                        ecosystem="npm",
+                        manifest_path=chunk.file_path,
+                        line_number=self._line_for_text(chunk, f'"{name}"'),
+                        source=section,
+                    )
+                )
+        return dependencies
+    def _parse_pyproject(self, chunk: CodeChunk) -> list[Dependency]:
+        try:
+            data = tomllib.loads(chunk.content)
+        except tomllib.TOMLDecodeError:
+            return []
+        dependencies: list[Dependency] = []
+        project_dependencies = data.get("project", {}).get("dependencies", [])
+        if isinstance(project_dependencies, list):
+            for value in project_dependencies:
+                dependency = self._python_dependency_from_string(str(value), chunk)
+                if dependency:
+                    dependencies.append(dependency)
+        poetry_dependencies = data.get("tool", {}).get("poetry", {}).get("dependencies", {})
+        if isinstance(poetry_dependencies, dict):
+            for name, value in poetry_dependencies.items():
+                if name.lower() == "python":
+                    continue
+                dependencies.append(
+                    Dependency(
+                        name=name,
+                        version=self._clean_version(str(value)),
+                        ecosystem="PyPI",
+                        manifest_path=chunk.file_path,
+                        line_number=self._line_for_text(chunk, name),
+                        source="tool.poetry.dependencies",
+                    )
+                )
+        return dependencies
+    def _parse_go_mod(self, chunk: CodeChunk) -> list[Dependency]:
+        dependencies: list[Dependency] = []
+        in_require_block = False
+        for offset, raw_line in enumerate(chunk.content.splitlines()):
+            line = raw_line.strip()
+            if line.startswith("require ("):
+                in_require_block = True
+                continue
+            if in_require_block and line == ")":
+                in_require_block = False
+                continue
+            if line.startswith("require "):
+                line = line.removeprefix("require ").strip()
+            elif not in_require_block:
+                continue
+            parts = line.split()
+            if len(parts) < 2:
+                continue
+            dependencies.append(
+                Dependency(
+                    name=parts[0],
+                    version=self._clean_version(parts[1]),
+                    ecosystem="Go",
+                    manifest_path=chunk.file_path,
+                    line_number=chunk.line_start + offset,
+                    source=line,
+                )
+            )
+        return dependencies
+    def _parse_cargo_toml(self, chunk: CodeChunk) -> list[Dependency]:
+        try:
+            data = tomllib.loads(chunk.content)
+        except tomllib.TOMLDecodeError:
+            return []
+        dependencies: list[Dependency] = []
+        for section in ("dependencies", "dev-dependencies", "build-dependencies"):
+            section_dependencies = data.get(section, {})
+            if not isinstance(section_dependencies, dict):
+                continue
+            for name, value in section_dependencies.items():
+                version = value.get("version") if isinstance(value, dict) else str(value)
+                dependencies.append(
+                    Dependency(
+                        name=name,
+                        version=self._clean_version(str(version)),
+                        ecosystem="crates.io",
+                        manifest_path=chunk.file_path,
+                        line_number=self._line_for_text(chunk, name),
+                        source=section,
+                    )
+                )
+        return dependencies
+    def _python_dependency_from_string(self, value: str, chunk: CodeChunk) -> Dependency | None:
+        match = re.match(r"([A-Za-z0-9_.-]+)\s*(?:\[.*?\])?\s*(?:==|~=|>=|<=|>|<)?\s*([A-Za-z0-9_.*!+-]+)?", value)
+        if not match:
+            return None
+        return Dependency(
+            name=match.group(1),
+            version=self._clean_version(match.group(2)),
+            ecosystem="PyPI",
+            manifest_path=chunk.file_path,
+            line_number=self._line_for_text(chunk, match.group(1)),
+            source="project.dependencies",
+        )
+    async def _lookup_cves(self, dependencies: list[Dependency]) -> tuple[list[dict[str, Any]], list[str]]:
+        query_dependencies = [dependency for dependency in dependencies if dependency.version]
+        if not query_dependencies:
+            return [], []
+        queries = [
+            {
+                "package": {"name": dependency.name, "ecosystem": dependency.ecosystem},
+                "version": dependency.version,
+            }
+            for dependency in query_dependencies
+        ]
+        try:
+            async with httpx.AsyncClient(timeout=self.settings.dependency_osv_timeout_seconds) as client:
+                response = await client.post("https://api.osv.dev/v1/querybatch", json={"queries": queries})
+                response.raise_for_status()
+                payload = response.json()
+        except Exception as exc:
+            return [], [f"Dependency CVE lookup failed gracefully: {exc}"]
+        cves: list[dict[str, Any]] = []
+        results = payload.get("results", [])
+        for dependency, result in zip(query_dependencies, results, strict=False):
+            for vuln in result.get("vulns", []):
+                cves.append(self._cve_record(dependency, vuln))
+        return cves, []
+    def _cve_record(self, dependency: Dependency, vuln: dict[str, Any]) -> dict[str, Any]:
+        severity = self._severity_from_vuln(vuln)
+        return {
+            "id": vuln.get("id", "UNKNOWN"),
+            "package": dependency.name,
+            "version": dependency.version,
+            "ecosystem": dependency.ecosystem,
+            "severity": severity.value,
+            "summary": vuln.get("summary") or vuln.get("details", "Known vulnerability reported by OSV.dev."),
+            "manifest_path": dependency.manifest_path,
+            "line_number": dependency.line_number,
+            "fixed_version": self._fixed_version(vuln),
+        }
+    def _cve_findings(self, cves: list[dict[str, Any]]) -> list[Finding]:
+        findings: list[Finding] = []
+        for cve in cves:
+            package = cve["package"]
+            version = cve.get("version") or "unknown"
+            cve_id = cve["id"]
+            fixed_version = cve.get("fixed_version") or "a non-vulnerable version"
+            findings.append(
+                Finding(
+                    title=f"Vulnerable dependency: {package}",
+                    severity=Severity(cve["severity"]),
+                    file_path=cve["manifest_path"],
+                    line_start=cve["line_number"],
+                    line_end=cve["line_number"],
+                    description=f"{package}@{version} is associated with {cve_id}: {cve['summary']}",
+                    why_it_matters="Known vulnerable dependencies can expose the application to publicly documented exploits.",
+                    suggested_fix=f"Upgrade {package} to {fixed_version} after checking compatibility and lockfile updates.",
+                    agent_source=self.name,
+                    category="dependency",
+                    confidence=0.95,
+                )
+            )
+        return findings
+    def _severity_from_vuln(self, vuln: dict[str, Any]) -> Severity:
+        database_severity = str(vuln.get("database_specific", {}).get("severity", "")).upper()
+        if database_severity in Severity._value2member_map_:
+            return Severity(database_severity)
+        scores = []
+        for severity in vuln.get("severity", []):
+            score = self._cvss_score(str(severity.get("score", "")))
+            if score is not None:
+                scores.append(score)
+        max_score = max(scores, default=0.0)
+        if max_score >= 9:
+            return Severity.critical
+        if max_score >= 7:
+            return Severity.high
+        if max_score >= 4:
+            return Severity.medium
+        return Severity.low
+    def _cvss_score(self, score: str) -> float | None:
+        match = re.search(r"/AV:|CVSS:", score)
+        if match:
+            return None
+        try:
+            return float(score)
+        except ValueError:
+            return None
+    def _fixed_version(self, vuln: dict[str, Any]) -> str | None:
+        for affected in vuln.get("affected", []):
+            for range_data in affected.get("ranges", []):
+                for event in range_data.get("events", []):
+                    fixed = event.get("fixed")
+                    if fixed:
+                        return fixed
+        return None
+    def _clean_version(self, value: str | None) -> str | None:
+        if not value:
+            return None
+        version = value.strip().strip('"').strip("'")
+        version = re.sub(r"^[\^~<>=!\s]+", "", version)
+        version = version.split(",", 1)[0].strip()
+        if not version or version == "*" or any(char in version for char in "{}"):
+            return None
+        return version
+    def _line_for_text(self, chunk: CodeChunk, text: str) -> int:
+        for offset, line in enumerate(chunk.content.splitlines()):
+            if text in line:
+                return chunk.line_start + offset
+        return chunk.line_start

app/agents/docs_agent.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import re
 from app.schemas import AgentOutput, CodeChunk, Finding, Severity
 PYTHON_PUBLIC_DEF = re.compile(r"^(\s*)(async\s+def|def|class)\s+([A-Za-z][A-Za-z0-9_]*)")
@@ -9,9 +12,12 @@ README_TEST_TERMS = ("test", "pytest", "unittest")
 README_CONFIG_TERMS = ("config", "environment", ".env", "settings")
-class DocsAgent:
     name = "Docs Agent"
     async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
         findings: list[Finding] = []
         readme_seen = False
@@ -37,10 +43,16 @@ class DocsAgent:
                 )
             )
         return AgentOutput(
             agent_name=self.name,
             findings=findings,
-            metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
         )
     def _scan_readme(self, chunk: CodeChunk) -> list[Finding]:

 import re
+from app.agents.llm_enrichment import LLMEnrichmentMixin
+from app.config import Settings
 from app.schemas import AgentOutput, CodeChunk, Finding, Severity
+from app.services.llm_client import LLMClient
 PYTHON_PUBLIC_DEF = re.compile(r"^(\s*)(async\s+def|def|class)\s+([A-Za-z][A-Za-z0-9_]*)")
 README_CONFIG_TERMS = ("config", "environment", ".env", "settings")
+class DocsAgent(LLMEnrichmentMixin):
     name = "Docs Agent"
+    def __init__(self, llm_client: LLMClient | None = None):
+        self.llm_client = llm_client or LLMClient(Settings())
     async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
         findings: list[Finding] = []
         readme_seen = False
                 )
             )
+        llm_output = await self._run_llm_enrichment(
+            chunks,
+            "Review these code and README chunks for high-confidence documentation gaps, unclear setup instructions, missing usage guidance, or missing public API documentation.",
+        )
+        findings.extend(llm_output.findings)
         return AgentOutput(
             agent_name=self.name,
             findings=findings,
+            metadata=self._llm_metadata(chunks, llm_output),
         )
     def _scan_readme(self, chunk: CodeChunk) -> list[Finding]:

app/agents/error_handling_agent.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import re
+from app.schemas import AgentOutput, CodeChunk, Finding, Severity
+EXCEPT_LINE = re.compile(r"^\s*except(?:\s+([\w.]+))?.*:")
+REQUEST_WITHOUT_TIMEOUT = re.compile(r"\brequests\.(get|post|put|patch|delete)\s*\((?!.*\btimeout\s*=)")
+JS_FETCH_WITHOUT_ABORT = re.compile(r"\bfetch\s*\([^,\n)]+\)")
+LOGGING_SIGNALS = ("logging.", "logger.", ".exception(", ".error(", ".warning(", "console.error", "console.warn")
+class ErrorHandlingAgent:
+    name = "Error Handling Agent"
+    async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
+        findings: list[Finding] = []
+        for chunk in chunks:
+            findings.extend(self._scan_chunk(chunk))
+        return AgentOutput(
+            agent_name=self.name,
+            findings=findings,
+            metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
+        )
+    def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
+        findings: list[Finding] = []
+        lines = chunk.content.splitlines()
+        for index, line in enumerate(lines):
+            actual_line = chunk.line_start + index
+            stripped = line.strip()
+            except_match = EXCEPT_LINE.match(line)
+            if except_match:
+                findings.extend(self._scan_except_block(chunk, lines, index, actual_line, except_match.group(1)))
+            if REQUEST_WITHOUT_TIMEOUT.search(line):
+                call_snippet = self._snippet(line)
+                findings.append(
+                    self._finding(
+                        "External HTTP call without timeout",
+                        Severity.medium,
+                        chunk,
+                        actual_line,
+                        f"`{call_snippet}` makes an external request without an explicit timeout.",
+                        f"Add `timeout=` to `{call_snippet}` and handle timeout exceptions with logging or retry policy.",
+                        0.84,
+                        why_it_matters=(
+                            "This exact call can hold the worker until the operating system or remote service gives up, "
+                            "which makes downstream outages spread into the app."
+                        ),
+                    )
+                )
+            if JS_FETCH_WITHOUT_ABORT.search(line) and "AbortController" not in chunk.content:
+                call_snippet = self._snippet(line)
+                findings.append(
+                    self._finding(
+                        "Fetch call has no cancellation timeout",
+                        Severity.low,
+                        chunk,
+                        actual_line,
+                        f"`{call_snippet}` uses fetch without an AbortController or deadline in this scanned chunk.",
+                        "Wrap this fetch in an AbortController timeout or a shared HTTP client that enforces request deadlines.",
+                        0.76,
+                        why_it_matters="A stuck fetch can leave the user action or server-side request waiting with no bounded failure path.",
+                    )
+                )
+        return findings
+    def _scan_except_block(
+        self,
+        chunk: CodeChunk,
+        lines: list[str],
+        except_index: int,
+        actual_line: int,
+        exception_name: str | None,
+    ) -> list[Finding]:
+        block_lines = self._collect_block(lines, except_index)
+        normalized = "\n".join(line.strip() for line in block_lines)
+        findings: list[Finding] = []
+        if exception_name in (None, "Exception", "BaseException"):
+            exception_label = exception_name or "bare except"
+            findings.append(
+                self._finding(
+                    "Broad exception handler",
+                    Severity.medium,
+                    chunk,
+                    actual_line,
+                    f"The handler catches `{exception_label}`, which can group unrelated failures into the same recovery path.",
+                    f"Replace `{exception_label}` with the narrow exception type expected here, and let unexpected failures surface with context.",
+                    0.82,
+                    why_it_matters="Broad handlers make different failure modes look identical during incident triage.",
+                )
+            )
+        if not block_lines:
+            return findings
+        has_logging = any(signal in normalized for signal in LOGGING_SIGNALS)
+        reraises = re.search(r"(^|\n)raise(\s|$)", normalized) is not None
+        silent_body = normalized in {"pass", "..."} or normalized.startswith("return None")
+        if silent_body:
+            body_preview = self._snippet(normalized.splitlines()[0] if normalized else "empty handler")
+            findings.append(
+                self._finding(
+                    "Exception swallowed without recovery",
+                    Severity.high,
+                    chunk,
+                    actual_line,
+                    f"The except block uses `{body_preview}` and suppresses the failure without logging, retrying, or returning a meaningful fallback.",
+                    "Log the exception with local context, re-raise when the caller must handle it, or return a deliberate typed fallback.",
+                    0.9,
+                    why_it_matters="This handler erases the original failure at the exact point where debugging context is still available.",
+                )
+            )
+        elif not has_logging and not reraises:
+            first_action = self._snippet(normalized.splitlines()[0] if normalized else "handler body")
+            findings.append(
+                self._finding(
+                    "Exception handled without logging or re-raise",
+                    Severity.medium,
+                    chunk,
+                    actual_line,
+                    f"The except block continues with `{first_action}` but does not log or re-raise the exception.",
+                    "Add structured logging before this recovery path, or re-raise after adding recovery-specific context.",
+                    0.82,
+                    why_it_matters="The recovery branch may keep execution going while hiding why the branch was needed.",
+                )
+            )
+        return findings
+    def _collect_block(self, lines: list[str], except_index: int) -> list[str]:
+        except_line = lines[except_index]
+        except_indent = len(except_line) - len(except_line.lstrip(" "))
+        block: list[str] = []
+        for line in lines[except_index + 1 :]:
+            if not line.strip():
+                continue
+            indent = len(line) - len(line.lstrip(" "))
+            if indent <= except_indent:
+                break
+            block.append(line)
+        return block
+    def _finding(
+        self,
+        title: str,
+        severity: Severity,
+        chunk: CodeChunk,
+        line_number: int,
+        description: str,
+        suggested_fix: str,
+        confidence: float,
+        why_it_matters: str | None = None,
+    ) -> Finding:
+        return Finding(
+            title=title,
+            severity=severity,
+            file_path=chunk.file_path,
+            line_start=line_number,
+            line_end=line_number,
+            description=description,
+            why_it_matters=why_it_matters
+            or "Weak error handling turns small downstream failures into outages that are hard to diagnose and recover from.",
+            suggested_fix=suggested_fix,
+            agent_source=self.name,
+            category="error_handling",
+            confidence=confidence,
+        )
+    def _snippet(self, line: str, max_length: int = 96) -> str:
+        normalized = " ".join(line.strip().split())
+        if len(normalized) <= max_length:
+            return normalized
+        return f"{normalized[: max_length - 3]}..."

app/agents/graph.py CHANGED Viewed

@@ -1,10 +1,16 @@
 from collections.abc import AsyncIterator
 from operator import add
-from typing import Annotated, TypedDict
 from langgraph.graph import END, StateGraph
 from app.agents.docs_agent import DocsAgent
 from app.agents.performance_agent import PerformanceAgent
 from app.agents.quality_agent import QualityAgent
 from app.agents.security_agent import SecurityAgent
@@ -16,6 +22,22 @@ from app.services.llm_client import LLMClient
 from app.services.repo_crawler import RepoCrawler
 class AuditState(TypedDict, total=False):
     repo_url: str
     repo: RepoScanResult
@@ -24,6 +46,11 @@ class AuditState(TypedDict, total=False):
     performance_output: AgentOutput
     quality_output: AgentOutput
     docs_output: AgentOutput
     report: AuditReport
     progress: Annotated[list[str], add]
@@ -34,32 +61,103 @@ class AuditGraph:
         self.crawler = RepoCrawler(self.settings)
         self.chunker = Chunker(self.settings)
         self.llm_client = LLMClient(self.settings)
-        self.security_agent = SecurityAgent(self.llm_client)
-        self.performance_agent = PerformanceAgent()
-        self.quality_agent = QualityAgent()
-        self.docs_agent = DocsAgent()
         self.synthesizer = SynthesizerAgent()
         self.graph = self._build_graph()
     def _build_graph(self):
         graph = StateGraph(AuditState)
         graph.add_node("crawl", self._crawl)
         graph.add_node("chunk", self._chunk)
-        graph.add_node("security", self._security)
-        graph.add_node("performance", self._performance)
-        graph.add_node("quality", self._quality)
-        graph.add_node("docs", self._docs)
         graph.add_node("synthesize", self._synthesize)
         graph.set_entry_point("crawl")
         graph.add_edge("crawl", "chunk")
-        graph.add_edge("chunk", "security")
-        graph.add_edge("chunk", "performance")
-        graph.add_edge("chunk", "quality")
-        graph.add_edge("chunk", "docs")
-        graph.add_edge(["security", "performance", "quality", "docs"], "synthesize")
         graph.add_edge("synthesize", END)
         return graph.compile()
     async def run(self, repo_url: str) -> AuditReport:
         result = await self.graph.ainvoke({"repo_url": repo_url, "progress": []})
         return result["report"]
@@ -75,26 +173,17 @@ class AuditGraph:
             chunks = self.chunker.chunk_files(repo.files)
             yield f"Chunker: created {len(chunks)} code chunks."
-            yield "Security Agent: scanning for risky patterns..."
-            security_output = await self.security_agent.analyze(chunks)
-            yield f"Security Agent: found {len(security_output.findings)} findings."
-            yield "Performance Agent: scanning for slow-path patterns..."
-            performance_output = await self.performance_agent.analyze(chunks)
-            yield f"Performance Agent: found {len(performance_output.findings)} findings."
-            yield "Quality Agent: scanning maintainability signals..."
-            quality_output = await self.quality_agent.analyze(chunks)
-            yield f"Quality Agent: found {len(quality_output.findings)} findings."
-            yield "Docs Agent: scanning README and public documentation..."
-            docs_output = await self.docs_agent.analyze(chunks)
-            yield f"Docs Agent: found {len(docs_output.findings)} findings."
             yield "Synthesizer Agent: ranking findings and formatting report..."
             report = await self.synthesizer.synthesize(
                 repo,
-                [security_output, performance_output, quality_output, docs_output],
             )
             yield "Synthesizer Agent: final report generated."
             yield report
@@ -109,26 +198,11 @@ class AuditGraph:
         chunks = self.chunker.chunk_files(state["repo"].files)
         return {"chunks": chunks, "progress": [f"Chunker: created {len(chunks)} code chunks."]}
-    async def _security(self, state: AuditState) -> AuditState:
-        output = await self.security_agent.analyze(state["chunks"])
-        return {"security_output": output, "progress": [f"Security Agent: found {len(output.findings)} findings."]}
-    async def _performance(self, state: AuditState) -> AuditState:
-        output = await self.performance_agent.analyze(state["chunks"])
-        return {"performance_output": output, "progress": [f"Performance Agent: found {len(output.findings)} findings."]}
-    async def _quality(self, state: AuditState) -> AuditState:
-        output = await self.quality_agent.analyze(state["chunks"])
-        return {"quality_output": output, "progress": [f"Quality Agent: found {len(output.findings)} findings."]}
-    async def _docs(self, state: AuditState) -> AuditState:
-        output = await self.docs_agent.analyze(state["chunks"])
-        return {"docs_output": output, "progress": [f"Docs Agent: found {len(output.findings)} findings."]}
     async def _synthesize(self, state: AuditState) -> AuditState:
         report = await self.synthesizer.synthesize(
             state["repo"],
-            [state["security_output"], state["performance_output"], state["quality_output"], state["docs_output"]],
         )
         self.crawler.cleanup(state["repo"])
         return {"report": report, "progress": ["Synthesizer Agent: final report generated."]}

 from collections.abc import AsyncIterator
+from dataclasses import dataclass
 from operator import add
+from typing import Annotated, Protocol, TypedDict
 from langgraph.graph import END, StateGraph
+from app.agents.config_agent import ConfigAgent
+from app.agents.cuda_migration_agent import CudaMigrationAgent
+from app.agents.dependency_agent import DependencyAgent
 from app.agents.docs_agent import DocsAgent
+from app.agents.error_handling_agent import ErrorHandlingAgent
+from app.agents.observability_agent import ObservabilityAgent
 from app.agents.performance_agent import PerformanceAgent
 from app.agents.quality_agent import QualityAgent
 from app.agents.security_agent import SecurityAgent
 from app.services.repo_crawler import RepoCrawler
+class AnalysisAgent(Protocol):
+    name: str
+    async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
+        ...
+@dataclass(frozen=True)
+class AnalysisAgentSpec:
+    node_name: str
+    state_key: str
+    progress_label: str
+    start_message: str
+    agent: AnalysisAgent
 class AuditState(TypedDict, total=False):
     repo_url: str
     repo: RepoScanResult
     performance_output: AgentOutput
     quality_output: AgentOutput
     docs_output: AgentOutput
+    config_output: AgentOutput
+    dependency_output: AgentOutput
+    error_handling_output: AgentOutput
+    observability_output: AgentOutput
+    cuda_migration_output: AgentOutput
     report: AuditReport
     progress: Annotated[list[str], add]
         self.crawler = RepoCrawler(self.settings)
         self.chunker = Chunker(self.settings)
         self.llm_client = LLMClient(self.settings)
+        self.analysis_agents = self._build_agent_registry()
         self.synthesizer = SynthesizerAgent()
         self.graph = self._build_graph()
+    def _build_agent_registry(self) -> list[AnalysisAgentSpec]:
+        return [
+            AnalysisAgentSpec(
+                node_name="security",
+                state_key="security_output",
+                progress_label="Security Agent",
+                start_message="Security Agent: scanning for risky patterns...",
+                agent=SecurityAgent(self.llm_client),
+            ),
+            AnalysisAgentSpec(
+                node_name="performance",
+                state_key="performance_output",
+                progress_label="Performance Agent",
+                start_message="Performance Agent: scanning for slow-path patterns...",
+                agent=PerformanceAgent(self.llm_client),
+            ),
+            AnalysisAgentSpec(
+                node_name="quality",
+                state_key="quality_output",
+                progress_label="Quality Agent",
+                start_message="Quality Agent: scanning maintainability signals...",
+                agent=QualityAgent(self.llm_client),
+            ),
+            AnalysisAgentSpec(
+                node_name="docs",
+                state_key="docs_output",
+                progress_label="Docs Agent",
+                start_message="Docs Agent: scanning README and public documentation...",
+                agent=DocsAgent(self.llm_client),
+            ),
+            AnalysisAgentSpec(
+                node_name="config",
+                state_key="config_output",
+                progress_label="Config Agent",
+                start_message="Config Agent: scanning production configuration risk...",
+                agent=ConfigAgent(),
+            ),
+            AnalysisAgentSpec(
+                node_name="dependency",
+                state_key="dependency_output",
+                progress_label="Dependency Agent",
+                start_message="Dependency Agent: parsing manifests and optional CVE data...",
+                agent=DependencyAgent(self.settings),
+            ),
+            AnalysisAgentSpec(
+                node_name="error_handling",
+                state_key="error_handling_output",
+                progress_label="Error Handling Agent",
+                start_message="Error Handling Agent: scanning resilience and failure paths...",
+                agent=ErrorHandlingAgent(),
+            ),
+            AnalysisAgentSpec(
+                node_name="observability",
+                state_key="observability_output",
+                progress_label="Observability Agent",
+                start_message="Observability Agent: scanning logs, health checks, and telemetry gaps...",
+                agent=ObservabilityAgent(),
+            ),
+            AnalysisAgentSpec(
+                node_name="cuda_migration",
+                state_key="cuda_migration_output",
+                progress_label="CUDA-to-ROCm Agent",
+                start_message="CUDA-to-ROCm Agent: scanning NVIDIA-specific GPU assumptions...",
+                agent=CudaMigrationAgent(),
+            ),
+        ]
     def _build_graph(self):
         graph = StateGraph(AuditState)
         graph.add_node("crawl", self._crawl)
         graph.add_node("chunk", self._chunk)
+        for spec in self.analysis_agents:
+            graph.add_node(spec.node_name, self._make_agent_node(spec))
         graph.add_node("synthesize", self._synthesize)
         graph.set_entry_point("crawl")
         graph.add_edge("crawl", "chunk")
+        agent_node_names = [spec.node_name for spec in self.analysis_agents]
+        for node_name in agent_node_names:
+            graph.add_edge("chunk", node_name)
+        graph.add_edge(agent_node_names, "synthesize")
         graph.add_edge("synthesize", END)
         return graph.compile()
+    def _make_agent_node(self, spec: AnalysisAgentSpec):
+        async def run_agent(state: AuditState) -> AuditState:
+            output = await spec.agent.analyze(state["chunks"])
+            return {
+                spec.state_key: output,
+                "progress": [f"{spec.progress_label}: found {len(output.findings)} findings."],
+            }
+        return run_agent
     async def run(self, repo_url: str) -> AuditReport:
         result = await self.graph.ainvoke({"repo_url": repo_url, "progress": []})
         return result["report"]
             chunks = self.chunker.chunk_files(repo.files)
             yield f"Chunker: created {len(chunks)} code chunks."
+            outputs: list[AgentOutput] = []
+            for spec in self.analysis_agents:
+                yield spec.start_message
+                output = await spec.agent.analyze(chunks)
+                outputs.append(output)
+                yield f"{spec.progress_label}: found {len(output.findings)} findings."
             yield "Synthesizer Agent: ranking findings and formatting report..."
             report = await self.synthesizer.synthesize(
                 repo,
+                outputs,
             )
             yield "Synthesizer Agent: final report generated."
             yield report
         chunks = self.chunker.chunk_files(state["repo"].files)
         return {"chunks": chunks, "progress": [f"Chunker: created {len(chunks)} code chunks."]}
     async def _synthesize(self, state: AuditState) -> AuditState:
+        outputs = [state[spec.state_key] for spec in self.analysis_agents]
         report = await self.synthesizer.synthesize(
             state["repo"],
+            outputs,
         )
         self.crawler.cleanup(state["repo"])
         return {"report": report, "progress": ["Synthesizer Agent: final report generated."]}

app/agents/llm_enrichment.py ADDED Viewed

	@@ -0,0 +1,85 @@

+from app.schemas import AgentOutput, CodeChunk
+from app.services.json_parser import parse_agent_output
+from app.services.llm_client import LLMClient
+FINDING_SCHEMA_INSTRUCTIONS = (
+    "Return JSON matching this schema exactly:\n"
+    "{\n"
+    '  "findings": [\n'
+    "    {\n"
+    '      "title": "short title",\n'
+    '      "severity": "CRITICAL|HIGH|MEDIUM|LOW",\n'
+    '      "file_path": "path from input",\n'
+    '      "line_start": 1,\n'
+    '      "line_end": 1,\n'
+    '      "description": "what is wrong",\n'
+    '      "why_it_matters": "impact",\n'
+    '      "suggested_fix": "specific fix",\n'
+    '      "agent_source": "agent name"\n'
+    "    }\n"
+    "  ]\n"
+    "}\n"
+)
+CONTEXTUAL_REVIEW_INSTRUCTIONS = (
+    "Make each finding specific to the exact code shown. "
+    "Reference the concrete function, call, config value, exception handler, or line pattern when visible. "
+    "Do not reuse generic boilerplate language across findings. "
+    "Do not report duplicates of the same issue in the same file unless the risk or fix is meaningfully different. "
+    "Descriptions should explain what this exact code does wrong; suggested_fix should name the specific API, guard, timeout, logger, or config change to use."
+)
+class LLMEnrichmentMixin:
+    name: str
+    llm_client: LLMClient
+    async def _run_llm_enrichment(self, chunks: list[CodeChunk], review_instruction: str) -> AgentOutput:
+        if not self.llm_client.settings.enable_llm_enrichment:
+            return AgentOutput(agent_name=self.name)
+        selected_chunks = chunks[: self.llm_client.settings.max_llm_chunks]
+        if not selected_chunks:
+            return AgentOutput(agent_name=self.name)
+        try:
+            raw_output = await self.llm_client.complete_json(
+                f"You are a senior {self.name.lower()}. Return only JSON.",
+                self._build_llm_prompt(selected_chunks, review_instruction),
+            )
+            return parse_agent_output(raw_output, self.name)
+        except Exception as exc:
+            return AgentOutput(
+                agent_name=self.name,
+                metadata={"llm_error": str(exc)},
+            )
+    def _llm_metadata(self, chunks: list[CodeChunk], llm_output: AgentOutput) -> dict[str, object]:
+        return {
+            "chunks_scanned": len(chunks),
+            "mode": "static-rules-plus-optional-llm",
+            "llm_enrichment_enabled": self.llm_client.settings.enable_llm_enrichment,
+            "llm_findings": len(llm_output.findings),
+            **llm_output.metadata,
+        }
+    def _build_llm_prompt(self, chunks: list[CodeChunk], review_instruction: str) -> str:
+        chunk_text = "\n\n".join(
+            [
+                f"File: {chunk.file_path}\n"
+                f"Lines: {chunk.line_start}-{chunk.line_end}\n"
+                "```code\n"
+                f"{chunk.content[:4000]}\n"
+                "```"
+                for chunk in chunks
+            ]
+        )
+        return (
+            f"{review_instruction}\n"
+            f"{CONTEXTUAL_REVIEW_INSTRUCTIONS}\n"
+            f"{FINDING_SCHEMA_INSTRUCTIONS}\n"
+            f'Every finding must set "agent_source" to "{self.name}". '
+            "Only include findings that are specific, actionable, and tied to the provided files.\n\n"
+            f"{chunk_text}"
+        )

app/agents/observability_agent.py ADDED Viewed

	@@ -0,0 +1,155 @@

+import re
+from collections import Counter
+from app.schemas import AgentOutput, CodeChunk, Finding, Severity
+PRINT_CALL = re.compile(r"\bprint\s*\(")
+LOGGER_CALL = re.compile(r"\b(logging|logger|log)\.(debug|info|warning|error|exception|critical)\s*\(")
+ROUTE_DECLARATION = re.compile(r"@\w*(app|router)\.(get|post|put|patch|delete|route)\s*\(\s*['\"]([^'\"]+)['\"]")
+JS_ROUTE_DECLARATION = re.compile(r"\b(app|router)\.(get|post|put|patch|delete)\s*\(\s*['\"]([^'\"]+)['\"]")
+SENSITIVE_LOG_LINE = re.compile(r"(?i)(print|logging|logger|console)\S*\s*\(.*(password|passwd|secret|token|api[_-]?key)")
+HEALTH_PATHS = {"/health", "/healthz", "/ready", "/readiness", "/live", "/liveness", "/ping"}
+class ObservabilityAgent:
+    name = "Observability Agent"
+    async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
+        findings: list[Finding] = []
+        route_paths: set[str] = set()
+        print_counts: Counter[str] = Counter()
+        logger_seen = False
+        for chunk in chunks:
+            chunk_findings, chunk_routes, chunk_prints, chunk_has_logger = self._scan_chunk(chunk)
+            findings.extend(chunk_findings)
+            route_paths.update(chunk_routes)
+            print_counts[chunk.file_path] += chunk_prints
+            logger_seen = logger_seen or chunk_has_logger
+        findings.extend(self._print_overuse_findings(chunks, print_counts, logger_seen))
+        if route_paths and not any(path in HEALTH_PATHS for path in route_paths):
+            findings.append(self._missing_health_finding(chunks[0]))
+        return AgentOutput(
+            agent_name=self.name,
+            findings=findings,
+            metadata={
+                "chunks_scanned": len(chunks),
+                "mode": "static-rules",
+                "routes_seen": len(route_paths),
+                "logging_seen": logger_seen,
+            },
+        )
+    def _scan_chunk(self, chunk: CodeChunk) -> tuple[list[Finding], set[str], int, bool]:
+        findings: list[Finding] = []
+        routes: set[str] = set()
+        print_count = 0
+        has_logger = False
+        for offset, line in enumerate(chunk.content.splitlines()):
+            actual_line = chunk.line_start + offset
+            if PRINT_CALL.search(line):
+                print_count += 1
+            if LOGGER_CALL.search(line):
+                has_logger = True
+            if SENSITIVE_LOG_LINE.search(line):
+                sensitive_term = self._sensitive_term(line)
+                log_snippet = self._snippet(line)
+                findings.append(
+                    self._finding(
+                        "Sensitive value may be written to logs",
+                        Severity.high,
+                        chunk,
+                        actual_line,
+                        f"`{log_snippet}` appears to log credential-like data containing `{sensitive_term}`.",
+                        f"Remove `{sensitive_term}` from this log statement and log a masked value or stable identifier instead.",
+                        0.86,
+                        why_it_matters="This exact log statement can put sensitive data into terminal output, CI logs, or hosted application logs.",
+                    )
+                )
+            routes.update(match.group(3) for match in ROUTE_DECLARATION.finditer(line))
+            routes.update(match.group(3) for match in JS_ROUTE_DECLARATION.finditer(line))
+        return findings, routes, print_count, has_logger
+    def _print_overuse_findings(
+        self,
+        chunks: list[CodeChunk],
+        print_counts: Counter[str],
+        logger_seen: bool,
+    ) -> list[Finding]:
+        if logger_seen:
+            return []
+        findings: list[Finding] = []
+        first_chunk_by_path = {chunk.file_path: chunk for chunk in chunks}
+        for file_path, count in print_counts.items():
+            if count < 3:
+                continue
+            chunk = first_chunk_by_path[file_path]
+            findings.append(
+                self._finding(
+                    "Print statements used instead of structured logging",
+                    Severity.low,
+                    chunk,
+                    chunk.line_start,
+                    f"This file has {count} print statements and no structured logging was detected in the scanned repo.",
+                    "Use a logger with levels and structured context such as request_id, route, and operation.",
+                    0.72,
+                    why_it_matters=f"`{file_path}` will be harder to filter and correlate in production logs because print output has no severity or structured context.",
+                )
+            )
+        return findings
+    def _missing_health_finding(self, chunk: CodeChunk) -> Finding:
+        return self._finding(
+            "Web service has routes but no health endpoint detected",
+            Severity.medium,
+            chunk,
+            chunk.line_start,
+            "The scanned code defines web routes but no /health, /ready, /live, or /ping endpoint was detected.",
+            "Add a lightweight health endpoint that returns process readiness and dependency status appropriate for your deployment.",
+            0.74,
+            why_it_matters="Deployments and uptime checks need a predictable endpoint to tell whether this service process is alive and ready.",
+        )
+    def _finding(
+        self,
+        title: str,
+        severity: Severity,
+        chunk: CodeChunk,
+        line_number: int,
+        description: str,
+        suggested_fix: str,
+        confidence: float,
+        why_it_matters: str | None = None,
+    ) -> Finding:
+        return Finding(
+            title=title,
+            severity=severity,
+            file_path=chunk.file_path,
+            line_start=line_number,
+            line_end=line_number,
+            description=description,
+            why_it_matters=why_it_matters
+            or "Without basic observability, production failures are harder to detect, triage, and explain during incidents.",
+            suggested_fix=suggested_fix,
+            agent_source=self.name,
+            category="observability",
+            confidence=confidence,
+        )
+    def _sensitive_term(self, line: str) -> str:
+        match = re.search(r"(?i)(password|passwd|secret|token|api[_-]?key)", line)
+        return match.group(1) if match else "secret"
+    def _snippet(self, line: str, max_length: int = 96) -> str:
+        normalized = " ".join(line.strip().split())
+        if len(normalized) <= max_length:
+            return normalized
+        return f"{normalized[: max_length - 3]}..."

app/agents/performance_agent.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import re
 from app.schemas import AgentOutput, CodeChunk, Finding, Severity
 REQUEST_WITHOUT_TIMEOUT = re.compile(r"\brequests\.(get|post|put|patch|delete)\s*\((?!.*\btimeout\s*=)")
@@ -9,18 +12,27 @@ PYTHON_LOOP = re.compile(r"^(\s*)(for|while)\b")
 PYTHON_FILE_READ = re.compile(r"\b(open\s*\(|Path\s*\([^)]*\)\.read_(text|bytes)\s*\()")
-class PerformanceAgent:
     name = "Performance Agent"
     async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
         findings: list[Finding] = []
         for chunk in chunks:
             findings.extend(self._scan_chunk(chunk))
         return AgentOutput(
             agent_name=self.name,
             findings=findings,
-            metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
         )
     def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
@@ -56,50 +68,58 @@ class PerformanceAgent:
                 loop_stack.append(len(loop_match.group(1)))
             if REQUEST_WITHOUT_TIMEOUT.search(line):
                 findings.append(
                     self._finding(
                         "HTTP request without timeout",
                         Severity.medium,
                         chunk,
                         actual_line,
-                        "Network calls without timeouts can hang workers and make the app appear frozen under bad network conditions.",
-                        "Pass an explicit timeout, for example requests.get(url, timeout=10).",
                     )
                 )
             if async_indent_stack and "time.sleep(" in line:
                 findings.append(
                     self._finding(
                         "Blocking sleep inside async function",
                         Severity.medium,
                         chunk,
                         actual_line,
-                        "time.sleep blocks the event loop, delaying unrelated async work.",
-                        "Use await asyncio.sleep(...) inside async functions.",
                     )
                 )
             if loop_stack and PYTHON_FILE_READ.search(line):
                 findings.append(
                     self._finding(
                         "File read inside loop",
                         Severity.low,
                         chunk,
                         actual_line,
-                        "Repeated disk reads inside loops can dominate runtime and slow audits on larger inputs.",
-                        "Read once before the loop, cache results, or stream data deliberately.",
                     )
                 )
             if SYNC_FS_JS.search(line):
                 findings.append(
                     self._finding(
                         "Synchronous filesystem call",
                         Severity.low,
                         chunk,
                         actual_line,
-                        "Synchronous filesystem APIs block the Node.js event loop and can hurt request latency.",
-                        "Use async fs.promises APIs or move blocking work outside latency-sensitive paths.",
                     )
                 )
@@ -113,6 +133,7 @@ class PerformanceAgent:
         line_number: int,
         description: str,
         suggested_fix: str,
     ) -> Finding:
         return Finding(
             title=title,
@@ -121,7 +142,14 @@ class PerformanceAgent:
             line_start=line_number,
             line_end=line_number,
             description=description,
-            why_it_matters="Performance issues in hot paths can increase latency, resource usage, and demo analysis time.",
             suggested_fix=suggested_fix,
             agent_source=self.name,
         )

 import re
+from app.agents.llm_enrichment import LLMEnrichmentMixin
+from app.config import Settings
 from app.schemas import AgentOutput, CodeChunk, Finding, Severity
+from app.services.llm_client import LLMClient
 REQUEST_WITHOUT_TIMEOUT = re.compile(r"\brequests\.(get|post|put|patch|delete)\s*\((?!.*\btimeout\s*=)")
 PYTHON_FILE_READ = re.compile(r"\b(open\s*\(|Path\s*\([^)]*\)\.read_(text|bytes)\s*\()")
+class PerformanceAgent(LLMEnrichmentMixin):
     name = "Performance Agent"
+    def __init__(self, llm_client: LLMClient | None = None):
+        self.llm_client = llm_client or LLMClient(Settings())
     async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
         findings: list[Finding] = []
         for chunk in chunks:
             findings.extend(self._scan_chunk(chunk))
+        llm_output = await self._run_llm_enrichment(
+            chunks,
+            "Review these code chunks for high-confidence performance issues such as algorithmic bottlenecks, blocking I/O, inefficient repeated work, or expensive hot paths.",
+        )
+        findings.extend(llm_output.findings)
         return AgentOutput(
             agent_name=self.name,
             findings=findings,
+            metadata=self._llm_metadata(chunks, llm_output),
         )
     def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:
                 loop_stack.append(len(loop_match.group(1)))
             if REQUEST_WITHOUT_TIMEOUT.search(line):
+                call_snippet = self._snippet(line)
                 findings.append(
                     self._finding(
                         "HTTP request without timeout",
                         Severity.medium,
                         chunk,
                         actual_line,
+                        f"`{call_snippet}` does not pass `timeout=`, so this request can wait indefinitely.",
+                        f"Add a bounded timeout to this call, for example `{call_snippet.rstrip(')')}, timeout=10)` if the arguments fit that shape.",
+                        why_it_matters="This specific network call can tie up a worker or thread when the remote service stalls.",
                     )
                 )
             if async_indent_stack and "time.sleep(" in line:
+                sleep_snippet = self._snippet(line)
                 findings.append(
                     self._finding(
                         "Blocking sleep inside async function",
                         Severity.medium,
                         chunk,
                         actual_line,
+                        f"`{sleep_snippet}` runs inside an async scope and blocks the event loop.",
+                        "Replace this call with `await asyncio.sleep(...)` or move blocking work out of the async path.",
+                        why_it_matters="Blocking the event loop here delays unrelated coroutines that should be able to keep running.",
                     )
                 )
             if loop_stack and PYTHON_FILE_READ.search(line):
+                read_snippet = self._snippet(line)
                 findings.append(
                     self._finding(
                         "File read inside loop",
                         Severity.low,
                         chunk,
                         actual_line,
+                        f"`{read_snippet}` appears inside a loop, so the same path may hit disk repeatedly.",
+                        "Read once before the loop, cache by file path, or stream deliberately if every iteration needs fresh data.",
+                        why_it_matters="Repeated disk I/O in this loop can dominate runtime as the input size grows.",
                     )
                 )
             if SYNC_FS_JS.search(line):
+                fs_snippet = self._snippet(line)
                 findings.append(
                     self._finding(
                         "Synchronous filesystem call",
                         Severity.low,
                         chunk,
                         actual_line,
+                        f"`{fs_snippet}` uses a synchronous filesystem API.",
+                        "Use `fs.promises` or move this filesystem work outside latency-sensitive request paths.",
+                        why_it_matters="This call blocks the Node.js event loop while disk I/O completes.",
                     )
                 )
         line_number: int,
         description: str,
         suggested_fix: str,
+        why_it_matters: str | None = None,
     ) -> Finding:
         return Finding(
             title=title,
             line_start=line_number,
             line_end=line_number,
             description=description,
+            why_it_matters=why_it_matters
+            or "Performance issues in hot paths can increase latency, resource usage, and demo analysis time.",
             suggested_fix=suggested_fix,
             agent_source=self.name,
         )
+    def _snippet(self, line: str, max_length: int = 96) -> str:
+        normalized = " ".join(line.strip().split())
+        if len(normalized) <= max_length:
+            return normalized
+        return f"{normalized[: max_length - 3]}..."

app/agents/quality_agent.py CHANGED Viewed

@@ -1,6 +1,9 @@
 import re
 from app.schemas import AgentOutput, CodeChunk, Finding, Severity
 PYTHON_DEF = re.compile(r"^\s*(async\s+def|def|class)\s+([A-Za-z_][A-Za-z0-9_]*)")
@@ -16,18 +19,27 @@ MAX_BRANCHES_PER_CHUNK = 25
 MIN_MEANINGFUL_NAME_LENGTH = 3
-class QualityAgent:
     name = "Quality Agent"
     async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
         findings: list[Finding] = []
         for chunk in chunks:
             findings.extend(self._scan_chunk(chunk))
         return AgentOutput(
             agent_name=self.name,
             findings=findings,
-            metadata={"chunks_scanned": len(chunks), "mode": "static-rules"},
         )
     def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:

 import re
+from app.agents.llm_enrichment import LLMEnrichmentMixin
+from app.config import Settings
 from app.schemas import AgentOutput, CodeChunk, Finding, Severity
+from app.services.llm_client import LLMClient
 PYTHON_DEF = re.compile(r"^\s*(async\s+def|def|class)\s+([A-Za-z_][A-Za-z0-9_]*)")
 MIN_MEANINGFUL_NAME_LENGTH = 3
+class QualityAgent(LLMEnrichmentMixin):
     name = "Quality Agent"
+    def __init__(self, llm_client: LLMClient | None = None):
+        self.llm_client = llm_client or LLMClient(Settings())
     async def analyze(self, chunks: list[CodeChunk]) -> AgentOutput:
         findings: list[Finding] = []
         for chunk in chunks:
             findings.extend(self._scan_chunk(chunk))
+        llm_output = await self._run_llm_enrichment(
+            chunks,
+            "Review these code chunks for high-confidence code quality issues such as overly complex structure, risky abstractions, poor naming, or maintainability problems.",
+        )
+        findings.extend(llm_output.findings)
         return AgentOutput(
             agent_name=self.name,
             findings=findings,
+            metadata=self._llm_metadata(chunks, llm_output),
         )
     def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:

app/agents/security_agent.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import re
 from app.schemas import AgentOutput, CodeChunk, Finding, Severity
 from app.services.llm_client import LLMClient
@@ -29,7 +30,7 @@ SECURITY_PATTERNS = [
 ]
-class SecurityAgent:
     name = "Security Agent"
     def __init__(self, llm_client: LLMClient):
@@ -41,15 +42,16 @@ class SecurityAgent:
         for chunk in chunks:
             findings.extend(self._scan_chunk(chunk))
-        await self.llm_client.complete_json(
-            "You are a security code review agent. Return JSON findings only.",
-            f"Review {len(chunks)} chunks for security issues.",
         )
         return AgentOutput(
             agent_name=self.name,
             findings=findings,
-            metadata={"chunks_scanned": len(chunks), "mode": "static-rules-plus-llm-interface"},
         )
     def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:

 import re
 from app.schemas import AgentOutput, CodeChunk, Finding, Severity
+from app.agents.llm_enrichment import LLMEnrichmentMixin
 from app.services.llm_client import LLMClient
 ]
+class SecurityAgent(LLMEnrichmentMixin):
     name = "Security Agent"
     def __init__(self, llm_client: LLMClient):
         for chunk in chunks:
             findings.extend(self._scan_chunk(chunk))
+        llm_output = await self._run_llm_enrichment(
+            chunks,
+            "Review these code chunks for high-confidence security issues.",
         )
+        findings.extend(llm_output.findings)
         return AgentOutput(
             agent_name=self.name,
             findings=findings,
+            metadata=self._llm_metadata(chunks, llm_output),
         )
     def _scan_chunk(self, chunk: CodeChunk) -> list[Finding]:

app/agents/synthesizer_agent.py CHANGED Viewed

@@ -13,9 +13,59 @@ MAX_DISPLAY_FINDINGS_BY_AGENT = {
     "Security Agent": 20,
     "Performance Agent": 12,
     "Quality Agent": 10,
-    "Docs Agent": 8,
 }
 class SynthesizerAgent:
     name = "Synthesizer Agent"
@@ -30,6 +80,21 @@ class SynthesizerAgent:
         agent_counts = {output.agent_name: len(output.findings) for output in outputs}
         display_findings, hidden_count, warnings = self._select_display_findings(all_findings, agent_counts)
         return AuditReport(
             repo_url=repo.repo_url,
@@ -41,8 +106,13 @@ class SynthesizerAgent:
             displayed_findings_count=len(display_findings),
             hidden_findings_count=hidden_count,
             agent_finding_counts=agent_counts,
             agents_run=[output.agent_name for output in outputs] + [self.name],
-            warnings=repo.warnings + warnings,
         )
     def _dedupe(self, findings: list[Finding]) -> list[Finding]:
@@ -63,16 +133,47 @@ class SynthesizerAgent:
     ) -> tuple[list[Finding], int, list[str]]:
         selected: list[Finding] = []
         selected_by_agent = {agent_name: 0 for agent_name in agent_counts}
         for finding in findings:
             agent_limit = MAX_DISPLAY_FINDINGS_BY_AGENT.get(finding.agent_source, MAX_DISPLAY_FINDINGS)
             if selected_by_agent.get(finding.agent_source, 0) >= agent_limit:
                 continue
             if len(selected) >= MAX_DISPLAY_FINDINGS:
                 break
             selected.append(finding)
             selected_by_agent[finding.agent_source] = selected_by_agent.get(finding.agent_source, 0) + 1
         hidden_count = max(0, len(findings) - len(selected))
         warnings: list[str] = []
         if hidden_count:
@@ -89,6 +190,13 @@ class SynthesizerAgent:
         return selected, hidden_count, warnings
     def _sort_key(self, finding: Finding) -> tuple[int, int, str, int]:
         test_file_penalty = 1 if self._is_test_file(finding.file_path) and finding.severity != Severity.critical else 0
         return (SEVERITY_ORDER[finding.severity], test_file_penalty, finding.file_path, finding.line_start)
@@ -96,3 +204,74 @@ class SynthesizerAgent:
     def _is_test_file(self, file_path: str) -> bool:
         normalized = file_path.lower().replace("\\", "/")
         return "/test" in normalized or normalized.startswith("test") or "_test." in normalized

     "Security Agent": 20,
     "Performance Agent": 12,
     "Quality Agent": 10,
+    "Docs Agent": 12,
+}
+MAX_DISPLAY_BY_SEVERITY = {
+    Severity.critical: None,
+    Severity.high: 30,
+    Severity.medium: 18,
+    Severity.low: 12,
+}
+SECURITY_CATEGORIES = {
+    "security",
+    "config",
+    "dependency",
+    "cuda_migration",
+}
+PRODUCTION_CATEGORIES = {
+    "performance",
+    "quality",
+    "docs",
+    "error_handling",
+    "observability",
+}
+AGENT_CATEGORY_DEFAULTS = {
+    "Security Agent": "security",
+    "Config Agent": "config",
+    "Dependency Agent": "dependency",
+    "CUDA-to-ROCm Agent": "cuda_migration",
+    "Performance Agent": "performance",
+    "Quality Agent": "quality",
+    "Docs Agent": "docs",
+    "Error Handling Agent": "error_handling",
+    "Observability Agent": "observability",
 }
+SECURITY_WEIGHTS = {
+    Severity.critical: 24,
+    Severity.high: 12,
+    Severity.medium: 5,
+    Severity.low: 1,
+}
+PRODUCTION_WEIGHTS = {
+    Severity.critical: 16,
+    Severity.high: 9,
+    Severity.medium: 4,
+    Severity.low: 1,
+}
+MAX_SECURITY_CATEGORY_PENALTY = 35
+MAX_PRODUCTION_CATEGORY_PENALTY = 28
 class SynthesizerAgent:
     name = "Synthesizer Agent"
         agent_counts = {output.agent_name: len(output.findings) for output in outputs}
         display_findings, hidden_count, warnings = self._select_display_findings(all_findings, agent_counts)
+        category_summary = self._category_summary(all_findings)
+        security_score, production_score = self._compute_scores(all_findings)
+        roadmap = self._build_roadmap(all_findings)
+        dependency_cves = [
+            cve
+            for output in outputs
+            for cve in output.metadata.get("dependency_cves", [])
+            if isinstance(cve, dict)
+        ]
+        dependency_warnings = [
+            warning
+            for output in outputs
+            for warning in output.metadata.get("warnings", [])
+            if isinstance(warning, str)
+        ]
         return AuditReport(
             repo_url=repo.repo_url,
             displayed_findings_count=len(display_findings),
             hidden_findings_count=hidden_count,
             agent_finding_counts=agent_counts,
+            category_summary=category_summary,
+            security_score=security_score,
+            production_score=production_score,
+            remediation_roadmap=roadmap,
+            dependency_cves=dependency_cves,
             agents_run=[output.agent_name for output in outputs] + [self.name],
+            warnings=repo.warnings + dependency_warnings + warnings,
         )
     def _dedupe(self, findings: list[Finding]) -> list[Finding]:
     ) -> tuple[list[Finding], int, list[str]]:
         selected: list[Finding] = []
         selected_by_agent = {agent_name: 0 for agent_name in agent_counts}
+        selected_by_severity = {severity: 0 for severity in Severity}
         for finding in findings:
             agent_limit = MAX_DISPLAY_FINDINGS_BY_AGENT.get(finding.agent_source, MAX_DISPLAY_FINDINGS)
+            severity_limit = MAX_DISPLAY_BY_SEVERITY[finding.severity]
+            if severity_limit is not None and selected_by_severity[finding.severity] >= severity_limit:
+                continue
             if selected_by_agent.get(finding.agent_source, 0) >= agent_limit:
                 continue
             if len(selected) >= MAX_DISPLAY_FINDINGS:
                 break
             selected.append(finding)
             selected_by_agent[finding.agent_source] = selected_by_agent.get(finding.agent_source, 0) + 1
+            selected_by_severity[finding.severity] += 1
+        if not any(finding.severity == Severity.low for finding in selected):
+            low_findings = [finding for finding in findings if finding.severity == Severity.low]
+            low_slots = MAX_DISPLAY_BY_SEVERITY[Severity.low] or 0
+            for finding in low_findings[:low_slots]:
+                if finding in selected:
+                    continue
+                if len(selected) >= MAX_DISPLAY_FINDINGS:
+                    replace_index = self._replaceable_display_index(selected)
+                    if replace_index is None:
+                        break
+                    replaced = selected[replace_index]
+                    selected_by_agent[replaced.agent_source] = max(
+                        0,
+                        selected_by_agent.get(replaced.agent_source, 0) - 1,
+                    )
+                    selected_by_severity[replaced.severity] = max(
+                        0,
+                        selected_by_severity[replaced.severity] - 1,
+                    )
+                    selected[replace_index] = finding
+                else:
+                    selected.append(finding)
+                selected_by_agent[finding.agent_source] = selected_by_agent.get(finding.agent_source, 0) + 1
+                selected_by_severity[finding.severity] += 1
+        selected.sort(key=self._sort_key)
         hidden_count = max(0, len(findings) - len(selected))
         warnings: list[str] = []
         if hidden_count:
         return selected, hidden_count, warnings
+    def _replaceable_display_index(self, selected: list[Finding]) -> int | None:
+        for severity in (Severity.low, Severity.medium):
+            for index in range(len(selected) - 1, -1, -1):
+                if selected[index].severity == severity:
+                    return index
+        return None
     def _sort_key(self, finding: Finding) -> tuple[int, int, str, int]:
         test_file_penalty = 1 if self._is_test_file(finding.file_path) and finding.severity != Severity.critical else 0
         return (SEVERITY_ORDER[finding.severity], test_file_penalty, finding.file_path, finding.line_start)
     def _is_test_file(self, file_path: str) -> bool:
         normalized = file_path.lower().replace("\\", "/")
         return "/test" in normalized or normalized.startswith("test") or "_test." in normalized
+    def _category_for(self, finding: Finding) -> str:
+        if finding.category:
+            return finding.category
+        return AGENT_CATEGORY_DEFAULTS.get(finding.agent_source, finding.agent_source.replace(" Agent", "").lower())
+    def _category_summary(self, findings: list[Finding]) -> dict[str, int]:
+        summary: dict[str, int] = {}
+        for finding in findings:
+            category = self._category_for(finding)
+            summary[category] = summary.get(category, 0) + 1
+        return dict(sorted(summary.items(), key=lambda item: (-item[1], item[0])))
+    def _compute_scores(self, findings: list[Finding]) -> tuple[int, int]:
+        security_penalties: dict[str, int] = {}
+        production_penalties: dict[str, int] = {}
+        for finding in findings:
+            category = self._category_for(finding)
+            if category in SECURITY_CATEGORIES or finding.agent_source in {
+                "Security Agent",
+                "Config Agent",
+                "Dependency Agent",
+                "CUDA-to-ROCm Agent",
+            }:
+                security_penalties[category] = security_penalties.get(category, 0) + SECURITY_WEIGHTS[finding.severity]
+            if category in PRODUCTION_CATEGORIES or finding.agent_source in {
+                "Performance Agent",
+                "Quality Agent",
+                "Docs Agent",
+                "Error Handling Agent",
+                "Observability Agent",
+            }:
+                production_penalties[category] = (
+                    production_penalties.get(category, 0) + PRODUCTION_WEIGHTS[finding.severity]
+                )
+        security_penalty = sum(min(value, MAX_SECURITY_CATEGORY_PENALTY) for value in security_penalties.values())
+        production_penalty = sum(min(value, MAX_PRODUCTION_CATEGORY_PENALTY) for value in production_penalties.values())
+        return self._score_from_penalty(security_penalty), self._score_from_penalty(production_penalty)
+    def _score_from_penalty(self, penalty: int) -> int:
+        if penalty <= 0:
+            return 100
+        return max(1, round(10000 / (100 + penalty)))
+    def _build_roadmap(self, findings: list[Finding]) -> dict[str, list[dict[str, str]]]:
+        critical = [finding for finding in findings if finding.severity == Severity.critical]
+        high = [finding for finding in findings if finding.severity == Severity.high]
+        medium = [finding for finding in findings if finding.severity == Severity.medium]
+        low = [finding for finding in findings if finding.severity == Severity.low]
+        this_week = critical + high[:5]
+        next_sprint = high[5:] + medium[:10]
+        backlog = medium[10:] + low
+        return {
+            "this_week": [self._roadmap_item(finding) for finding in this_week],
+            "next_sprint": [self._roadmap_item(finding) for finding in next_sprint],
+            "backlog": [self._roadmap_item(finding) for finding in backlog],
+        }
+    def _roadmap_item(self, finding: Finding) -> dict[str, str]:
+        return {
+            "title": finding.title,
+            "severity": finding.severity.value,
+            "category": self._category_for(finding),
+            "file_path": finding.file_path,
+            "line_start": str(finding.line_start),
+            "agent_source": finding.agent_source,
+        }

app/config.py CHANGED Viewed

@@ -6,9 +6,14 @@ from pydantic_settings import BaseSettings, SettingsConfigDict
 class Settings(BaseSettings):
     app_name: str = "SwarmAudit"
     llm_provider: str = "mock"
-    llm_base_url: str = "http://localhost:8000/v1"
     llm_api_key: str = "not-needed-for-mock"
     llm_model: str = "Qwen/Qwen2.5-Coder-32B-Instruct"
     max_files: int = 200
     max_file_size_kb: int = 250
     max_chars_per_chunk: int = 12000

 class Settings(BaseSettings):
     app_name: str = "SwarmAudit"
     llm_provider: str = "mock"
+    llm_base_url: str = "http://localhost:9000/v1"
     llm_api_key: str = "not-needed-for-mock"
     llm_model: str = "Qwen/Qwen2.5-Coder-32B-Instruct"
+    enable_llm_enrichment: bool = False
+    enable_dependency_cve_lookup: bool = False
+    dependency_osv_timeout_seconds: int = 20
+    max_llm_chunks: int = 5
+    llm_timeout_seconds: int = 120
     max_files: int = 200
     max_file_size_kb: int = 250
     max_chars_per_chunk: int = 12000

app/main.py CHANGED Viewed

@@ -2,7 +2,8 @@ from fastapi import FastAPI, HTTPException
 from app.agents.graph import AuditGraph
 from app.config import get_settings
-from app.schemas import AuditReport, AuditRequest
 app = FastAPI(title="SwarmAudit", version="0.1.0")
@@ -12,6 +13,11 @@ async def health() -> dict[str, str]:
     return {"status": "ok", "app": get_settings().app_name}
 @app.post("/audit", response_model=AuditReport)
 async def audit(request: AuditRequest) -> AuditReport:
     try:

 from app.agents.graph import AuditGraph
 from app.config import get_settings
+from app.schemas import AuditReport, AuditRequest, LLMHealth
+from app.services.llm_client import LLMClient
 app = FastAPI(title="SwarmAudit", version="0.1.0")
     return {"status": "ok", "app": get_settings().app_name}
+@app.get("/llm/health", response_model=LLMHealth)
+async def llm_health() -> LLMHealth:
+    return await LLMClient(get_settings()).health_check()
 @app.post("/audit", response_model=AuditReport)
 async def audit(request: AuditRequest) -> AuditReport:
     try:

app/schemas.py CHANGED Viewed

@@ -41,6 +41,8 @@ class Finding(BaseModel):
     why_it_matters: str
     suggested_fix: str
     agent_source: str
 class AgentOutput(BaseModel):
@@ -67,6 +69,11 @@ class AuditReport(BaseModel):
     displayed_findings_count: int = 0
     hidden_findings_count: int = 0
     agent_finding_counts: dict[str, int] = Field(default_factory=dict)
     generated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
     agents_run: list[str]
     warnings: list[str] = Field(default_factory=list)
@@ -75,3 +82,28 @@ class AuditReport(BaseModel):
 class AuditProgress(BaseModel):
     message: str
     stage: str

     why_it_matters: str
     suggested_fix: str
     agent_source: str
+    category: str | None = None
+    confidence: float | None = Field(default=None, ge=0, le=1)
 class AgentOutput(BaseModel):
     displayed_findings_count: int = 0
     hidden_findings_count: int = 0
     agent_finding_counts: dict[str, int] = Field(default_factory=dict)
+    category_summary: dict[str, int] = Field(default_factory=dict)
+    security_score: int | None = Field(default=None, ge=0, le=100)
+    production_score: int | None = Field(default=None, ge=0, le=100)
+    remediation_roadmap: dict[str, Any] = Field(default_factory=dict)
+    dependency_cves: list[dict[str, Any]] = Field(default_factory=list)
     generated_at: datetime = Field(default_factory=lambda: datetime.now(timezone.utc))
     agents_run: list[str]
     warnings: list[str] = Field(default_factory=list)
 class AuditProgress(BaseModel):
     message: str
     stage: str
+class LLMHealth(BaseModel):
+    provider: str
+    model: str
+    base_url: str
+    ok: bool
+    latency_ms: float | None = None
+    models: list[str] = Field(default_factory=list)
+    completion_preview: str | None = None
+    error: str | None = None
+class BenchmarkResult(BaseModel):
+    provider: str
+    model: str
+    backend: str
+    hardware: str
+    ok: bool
+    latency_ms: float | None = None
+    prompt_chars: int
+    completion_chars: int = 0
+    chars_per_second: float | None = None
+    completion_preview: str | None = None
+    error: str | None = None

app/services/benchmark.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import time
+from app.config import Settings
+from app.schemas import BenchmarkResult
+from app.services.llm_client import LLMClient
+BENCHMARK_PROMPT = (
+    "Review this Python snippet for one security issue and answer in one concise sentence:\n"
+    "user_code = input('code: ')\n"
+    "eval(user_code)\n"
+)
+class BenchmarkService:
+    def __init__(self, settings: Settings):
+        self.settings = settings
+        self.llm_client = LLMClient(settings)
+    async def run_llm_benchmark(self) -> BenchmarkResult:
+        start = time.perf_counter()
+        try:
+            completion = await self.llm_client.test_completion()
+            latency_ms = round((time.perf_counter() - start) * 1000, 2)
+            completion_chars = len(completion)
+            chars_per_second = self._chars_per_second(completion_chars, latency_ms)
+            return BenchmarkResult(
+                provider=self.settings.llm_provider,
+                model=self.settings.llm_model,
+                backend=self._backend_name(),
+                hardware=self._hardware_label(),
+                ok=True,
+                latency_ms=latency_ms,
+                prompt_chars=len(BENCHMARK_PROMPT),
+                completion_chars=completion_chars,
+                chars_per_second=chars_per_second,
+                completion_preview=completion,
+            )
+        except Exception as exc:
+            latency_ms = round((time.perf_counter() - start) * 1000, 2)
+            return BenchmarkResult(
+                provider=self.settings.llm_provider,
+                model=self.settings.llm_model,
+                backend=self._backend_name(),
+                hardware=self._hardware_label(),
+                ok=False,
+                latency_ms=latency_ms,
+                prompt_chars=len(BENCHMARK_PROMPT),
+                error=str(exc),
+            )
+    def _backend_name(self) -> str:
+        if self.settings.llm_provider == "mock":
+            return "Mock local backend"
+        if self.settings.llm_provider == "vllm":
+            return "vLLM OpenAI-compatible endpoint"
+        return self.settings.llm_provider
+    def _hardware_label(self) -> str:
+        if self.settings.llm_provider == "vllm":
+            return "AMD MI300X target"
+        return "Local/mock"
+    def _chars_per_second(self, completion_chars: int, latency_ms: float) -> float | None:
+        if latency_ms <= 0:
+            return None
+        return round(completion_chars / (latency_ms / 1000), 2)

app/services/json_parser.py ADDED Viewed

	@@ -0,0 +1,42 @@

+import json
+import re
+from typing import Any
+from pydantic import ValidationError
+from app.schemas import AgentOutput
+FENCED_JSON_PATTERN = re.compile(r"```(?:json)?\s*(.*?)```", re.DOTALL | re.IGNORECASE)
+def parse_json_object(raw: str | dict[str, Any]) -> dict[str, Any]:
+    if isinstance(raw, dict):
+        return raw
+    text = raw.strip()
+    fence_match = FENCED_JSON_PATTERN.search(text)
+    if fence_match:
+        text = fence_match.group(1).strip()
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        start = text.find("{")
+        end = text.rfind("}")
+        if start == -1 or end == -1 or end <= start:
+            raise
+        return json.loads(text[start : end + 1])
+def parse_agent_output(raw: str | dict[str, Any], agent_name: str) -> AgentOutput:
+    try:
+        data = parse_json_object(raw)
+        data.setdefault("agent_name", agent_name)
+        return AgentOutput.model_validate(data)
+    except (json.JSONDecodeError, ValidationError, TypeError, ValueError):
+        return AgentOutput(
+            agent_name=agent_name,
+            findings=[],
+            metadata={"parse_error": True},
+        )

app/services/llm_client.py CHANGED Viewed

@@ -1,9 +1,11 @@
 import json
 from typing import Any
 import httpx
 from app.config import Settings
 class LLMClient:
@@ -29,13 +31,97 @@ class LLMClient:
             "temperature": 0.1,
             "response_format": {"type": "json_object"},
         }
         headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"}
-        async with httpx.AsyncClient(timeout=120) as client:
             response = await client.post(
-                f"{self.settings.llm_base_url.rstrip('/')}/chat/completions",
                 json=payload,
                 headers=headers,
             )
             response.raise_for_status()
-        content = response.json()["choices"][0]["message"]["content"]
-        return json.loads(content)

 import json
+import time
 from typing import Any
 import httpx
 from app.config import Settings
+from app.schemas import LLMHealth
 class LLMClient:
             "temperature": 0.1,
             "response_format": {"type": "json_object"},
         }
+        response = await self._client_post("/chat/completions", payload)
+        content = response.json()["choices"][0]["message"]["content"]
+        return json.loads(content)
+    async def health_check(self) -> LLMHealth:
+        if self.settings.llm_provider == "mock":
+            return LLMHealth(
+                provider=self.settings.llm_provider,
+                model=self.settings.llm_model,
+                base_url=self.settings.llm_base_url,
+                ok=True,
+                latency_ms=0,
+                models=[self.settings.llm_model],
+                completion_preview="Mock LLM is active.",
+            )
+        if self.settings.llm_provider != "vllm":
+            return LLMHealth(
+                provider=self.settings.llm_provider,
+                model=self.settings.llm_model,
+                base_url=self.settings.llm_base_url,
+                ok=False,
+                error=f"Unsupported LLM_PROVIDER={self.settings.llm_provider}",
+            )
+        start = time.perf_counter()
+        try:
+            models = await self.list_models()
+            preview = await self.test_completion()
+            latency_ms = round((time.perf_counter() - start) * 1000, 2)
+            return LLMHealth(
+                provider=self.settings.llm_provider,
+                model=self.settings.llm_model,
+                base_url=self.settings.llm_base_url,
+                ok=True,
+                latency_ms=latency_ms,
+                models=models,
+                completion_preview=preview,
+            )
+        except Exception as exc:
+            latency_ms = round((time.perf_counter() - start) * 1000, 2)
+            return LLMHealth(
+                provider=self.settings.llm_provider,
+                model=self.settings.llm_model,
+                base_url=self.settings.llm_base_url,
+                ok=False,
+                latency_ms=latency_ms,
+                error=str(exc),
+            )
+    async def list_models(self) -> list[str]:
+        if self.settings.llm_provider == "mock":
+            return [self.settings.llm_model]
+        response = await self._client_get("/models")
+        data = response.json()
+        return [model.get("id", "unknown") for model in data.get("data", [])]
+    async def test_completion(self) -> str:
+        if self.settings.llm_provider == "mock":
+            return "Mock LLM is active."
+        payload = {
+            "model": self.settings.llm_model,
+            "messages": [
+                {"role": "system", "content": "You are a concise diagnostics assistant."},
+                {"role": "user", "content": "Reply with exactly: SwarmAudit LLM OK"},
+            ],
+            "temperature": 0,
+            "max_tokens": 16,
+        }
+        response = await self._client_post("/chat/completions", payload)
+        return response.json()["choices"][0]["message"]["content"].strip()
+    async def _client_get(self, path: str) -> httpx.Response:
         headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"}
+        async with httpx.AsyncClient(timeout=30) as client:
+            response = await client.get(
+                f"{self.settings.llm_base_url.rstrip('/')}{path}",
+                headers=headers,
+            )
+            response.raise_for_status()
+            return response
+    async def _client_post(self, path: str, payload: dict[str, Any]) -> httpx.Response:
+        headers = {"Authorization": f"Bearer {self.settings.llm_api_key}"}
+        async with httpx.AsyncClient(timeout=self.settings.llm_timeout_seconds) as client:
             response = await client.post(
+                f"{self.settings.llm_base_url.rstrip('/')}{path}",
                 json=payload,
                 headers=headers,
             )
             response.raise_for_status()
+            return response

app/services/repo_crawler.py CHANGED Viewed

@@ -40,6 +40,13 @@ SUPPORTED_EXTENSIONS = {
 }
 README_FILENAMES = {"readme", "readme.md", "readme.rst", "readme.txt"}
 def validate_github_url(repo_url: str) -> str:
@@ -87,7 +94,8 @@ class RepoCrawler:
                 skipped += 1
                 continue
             readme_language = self._readme_language(rel_path)
-            if path.suffix.lower() not in SUPPORTED_EXTENSIONS and readme_language is None:
                 skipped += 1
                 continue
             size = path.stat().st_size
@@ -99,7 +107,7 @@ class RepoCrawler:
                 skipped += 1
                 continue
-            language = readme_language or SUPPORTED_EXTENSIONS[path.suffix.lower()]
             files.append(
                 SourceFile(
                     path=str(rel_path).replace("\\", "/"),
@@ -125,6 +133,9 @@ class RepoCrawler:
             return None
         return "Markdown" if rel_path.suffix.lower() == ".md" else "Documentation"
     def cleanup(self, scan_result: RepoScanResult | None) -> None:
         if scan_result is None:
             return

 }
 README_FILENAMES = {"readme", "readme.md", "readme.rst", "readme.txt"}
+DEPENDENCY_MANIFESTS = {
+    "requirements.txt": "Python Requirements",
+    "pyproject.toml": "Python Project",
+    "package.json": "Node Package",
+    "go.mod": "Go Module",
+    "cargo.toml": "Rust Package",
+}
 def validate_github_url(repo_url: str) -> str:
                 skipped += 1
                 continue
             readme_language = self._readme_language(rel_path)
+            manifest_language = self._manifest_language(rel_path)
+            if path.suffix.lower() not in SUPPORTED_EXTENSIONS and readme_language is None and manifest_language is None:
                 skipped += 1
                 continue
             size = path.stat().st_size
                 skipped += 1
                 continue
+            language = readme_language or manifest_language or SUPPORTED_EXTENSIONS[path.suffix.lower()]
             files.append(
                 SourceFile(
                     path=str(rel_path).replace("\\", "/"),
             return None
         return "Markdown" if rel_path.suffix.lower() == ".md" else "Documentation"
+    def _manifest_language(self, rel_path: Path) -> str | None:
+        return DEPENDENCY_MANIFESTS.get(rel_path.name.lower())
     def cleanup(self, scan_result: RepoScanResult | None) -> None:
         if scan_result is None:
             return

app/services/report_formatter.py CHANGED Viewed

@@ -1,3 +1,8 @@
 from app.schemas import AuditReport, Severity
@@ -10,6 +15,11 @@ def format_report_markdown(report: AuditReport) -> str:
         f"Files skipped: `{report.skipped_file_count}`",
         f"Findings shown: `{report.displayed_findings_count}` of `{report.total_findings_count}`",
         "",
         "## Severity Summary",
         "",
     ]
@@ -22,6 +32,40 @@ def format_report_markdown(report: AuditReport) -> str:
         for agent_name, count in report.agent_finding_counts.items():
             lines.append(f"- **{agent_name}**: {count}")
     if report.warnings:
         lines.extend(["", "## Warnings", ""])
         lines.extend(f"- {warning}" for warning in report.warnings)
@@ -53,3 +97,247 @@ def format_report_markdown(report: AuditReport) -> str:
         )
     return "\n".join(lines)

+import tempfile
+from html import escape
+from pathlib import Path
+from urllib.parse import quote
 from app.schemas import AuditReport, Severity
         f"Files skipped: `{report.skipped_file_count}`",
         f"Findings shown: `{report.displayed_findings_count}` of `{report.total_findings_count}`",
         "",
+        "## Readiness Scores",
+        "",
+        f"- **Security Score**: `{_score_label(report.security_score)}`",
+        f"- **Production Readiness Score**: `{_score_label(report.production_score)}`",
+        "",
         "## Severity Summary",
         "",
     ]
         for agent_name, count in report.agent_finding_counts.items():
             lines.append(f"- **{agent_name}**: {count}")
+    if report.category_summary:
+        lines.extend(["", "## Category Summary", ""])
+        for category, count in report.category_summary.items():
+            lines.append(f"- **{_label(category)}**: {count}")
+    if report.remediation_roadmap:
+        lines.extend(["", "## Remediation Roadmap", ""])
+        for key, label in [
+            ("this_week", "This Week"),
+            ("next_sprint", "Next Sprint"),
+            ("backlog", "Backlog"),
+        ]:
+            items = report.remediation_roadmap.get(key, [])
+            lines.extend(["", f"### {label}", ""])
+            if not items:
+                lines.append("No items in this lane.")
+                continue
+            for item in items:
+                lines.append(
+                    f"- **[{item.get('severity', 'LOW')}] {item.get('title', 'Finding')}** "
+                    f"({_label(item.get('category', 'general'))}) - "
+                    f"`{item.get('file_path', 'unknown')}:{item.get('line_start', '?')}`"
+                )
+    if report.dependency_cves:
+        lines.extend(["", "## Dependency CVEs", ""])
+        for cve in report.dependency_cves:
+            fixed_version = cve.get("fixed_version") or "a patched version"
+            lines.append(
+                f"- **[{cve.get('severity', 'LOW')}] {cve.get('id', 'UNKNOWN')}** "
+                f"`{cve.get('package', 'package')}@{cve.get('version', 'unknown')}` "
+                f"({cve.get('ecosystem', 'unknown')}) - upgrade to {fixed_version}"
+            )
     if report.warnings:
         lines.extend(["", "## Warnings", ""])
         lines.extend(f"- {warning}" for warning in report.warnings)
         )
     return "\n".join(lines)
+def format_report_html(report: AuditReport) -> str:
+    findings = report.findings
+    critical = report.severity_summary.get(Severity.critical, 0)
+    high = report.severity_summary.get(Severity.high, 0)
+    medium = report.severity_summary.get(Severity.medium, 0)
+    low = report.severity_summary.get(Severity.low, 0)
+    if not findings:
+        return """
+        <section class="audit-console">
+            <div class="audit-console-header">
+                <div class="audit-console-title">Audit report</div>
+                <div class="audit-filter-row"><span class="filter-pill">All 0</span></div>
+            </div>
+            <div class="audit-empty">
+                <h3>No findings detected</h3>
+                <p>The current agent set did not raise findings for the displayed report.</p>
+            </div>
+        </section>
+        """
+    selected = findings[0]
+    list_items = "\n".join(_finding_list_item(finding, index + 1) for index, finding in enumerate(findings[:12]))
+    filter_items = _severity_filter_items(
+        {
+            Severity.critical: critical,
+            Severity.high: high,
+            Severity.medium: medium,
+            Severity.low: low,
+        }
+    )
+    return f"""
+    <section class="audit-console">
+        <div class="audit-console-header">
+            <div class="audit-console-title">Audit report</div>
+            <div class="audit-filter-row">
+                <span class="filter-pill active">All {report.displayed_findings_count}</span>
+                {filter_items}
+            </div>
+        </div>
+        <div class="audit-console-body">
+            <div class="finding-list">
+                {list_items}
+            </div>
+            <div class="finding-detail">
+                {_finding_detail(selected, 1)}
+            </div>
+        </div>
+    </section>
+    """
+def format_empty_report_html() -> str:
+    return """
+    <section class="audit-console">
+        <div class="audit-console-header">
+            <div class="audit-console-title">Audit report</div>
+            <div class="audit-filter-row"><span class="filter-pill active">All 0</span></div>
+        </div>
+        <div class="audit-empty">
+            <h3>Run an audit to populate findings</h3>
+            <p>The report panel will show ranked findings with file references and suggested fixes.</p>
+        </div>
+    </section>
+    """
+def format_report_overview_html(report: AuditReport | None) -> str:
+    if report is None:
+        return """
+        <section class="report-overview">
+            <div class="overview-column">
+                <span>Security Score</span>
+                <strong>-</strong>
+            </div>
+            <div class="overview-column">
+                <span>Production Readiness</span>
+                <strong>-</strong>
+            </div>
+        </section>
+        """
+    categories = "".join(
+        f"<span>{escape(_label(category))}: {count}</span>"
+        for category, count in list(report.category_summary.items())[:6]
+    )
+    roadmap = report.remediation_roadmap or {}
+    return f"""
+    <section class="report-overview">
+        <div class="overview-column">
+            <span>Security Score</span>
+            <strong>{_score_label(report.security_score)}</strong>
+        </div>
+        <div class="overview-column">
+            <span>Production Readiness</span>
+            <strong>{_score_label(report.production_score)}</strong>
+        </div>
+        <div class="overview-column overview-wide">
+            <span>Category Summary</span>
+            <div class="overview-tags">{categories or "<span>No categories raised</span>"}</div>
+        </div>
+        <div class="overview-column overview-wide">
+            <span>Roadmap</span>
+            <div class="overview-tags">
+                <span>This Week: {len(roadmap.get("this_week", []))}</span>
+                <span>Next Sprint: {len(roadmap.get("next_sprint", []))}</span>
+                <span>Backlog: {len(roadmap.get("backlog", []))}</span>
+            </div>
+        </div>
+    </section>
+    """
+def format_finding_detail_html(report: AuditReport | None, index: int = 0) -> str:
+    if report is None or not report.findings:
+        return format_empty_finding_detail_html()
+    safe_index = min(max(index, 0), len(report.findings) - 1)
+    return f"""
+    <section class="finding-detail-panel">
+        {_finding_detail(report.findings[safe_index], safe_index + 1, report.repo_url)}
+    </section>
+    """
+def format_empty_finding_detail_html() -> str:
+    return """
+    <section class="finding-detail-panel empty-detail">
+        <div class="audit-empty">
+            <h3>Select a finding</h3>
+            <p>Run an audit, then click any row in the findings list to inspect its explanation and suggested fix.</p>
+        </div>
+    </section>
+    """
+def _finding_list_item(finding, index: int) -> str:
+    severity = finding.severity.value
+    severity_class = severity.lower()
+    reference = f"{finding.file_path}:{finding.line_start}"
+    return f"""
+    <article class="finding-row severity-{severity_class}">
+        <div class="finding-row-meta">
+            <span class="severity-badge">{escape(severity)}</span>
+            <span>F-{index:03d}</span>
+        </div>
+        <div class="finding-row-title">{escape(finding.title)}</div>
+        <div class="finding-row-path">{escape(reference)}</div>
+    </article>
+    """
+def _severity_filter_items(counts: dict[Severity, int]) -> str:
+    items: list[str] = []
+    for severity, css_class, label in [
+        (Severity.critical, "dot-critical", "Critical"),
+        (Severity.high, "dot-high", "High"),
+        (Severity.medium, "dot-medium", "Medium"),
+        (Severity.low, "dot-low", "Low"),
+    ]:
+        count = counts.get(severity, 0)
+        if count <= 0:
+            continue
+        items.append(f'<span class="filter-dot {css_class}"></span><span>{label} {count}</span>')
+    return "\n".join(items)
+def _finding_detail(finding, index: int, repo_url: str | None = None) -> str:
+    severity = finding.severity.value
+    severity_class = severity.lower()
+    reference = f"{finding.file_path}:{finding.line_start}-{finding.line_end}"
+    category = finding.category or finding.agent_source.replace(" Agent", "").lower()
+    file_url = _github_file_url(repo_url, finding.file_path, finding.line_start)
+    open_html = (
+        f'<a href="{escape(file_url)}" target="_blank" rel="noopener noreferrer">open -></a>'
+        if file_url
+        else "<span>open -></span>"
+    )
+    return f"""
+    <div class="finding-detail-meta">
+        <span>F-{index:03d}</span>
+        <span>></span>
+        <span>{escape(category.upper())}</span>
+        <span>></span>
+        <span>{escape(reference)}</span>
+    </div>
+    <div class="finding-detail-title">
+        <span class="severity-badge severity-{severity_class}">{escape(severity)}</span>
+        <h3>{escape(finding.title)}</h3>
+    </div>
+    <div class="detail-section">
+        <span>Explanation</span>
+        <p>{escape(finding.description)}</p>
+    </div>
+    <div class="detail-section">
+        <span>Why it matters</span>
+        <p>{escape(finding.why_it_matters)}</p>
+    </div>
+    <div class="detail-section">
+        <span>Suggested fix</span>
+        <pre>{escape(finding.suggested_fix)}</pre>
+    </div>
+    <div class="reference-card">
+        <code>{escape(reference)}</code>
+        {open_html}
+    </div>
+    """
+def write_report_exports(report: AuditReport, output_dir: Path | None = None) -> tuple[str, str]:
+    export_dir = output_dir or Path(tempfile.mkdtemp(prefix="swarm_audit_export_"))
+    export_dir.mkdir(parents=True, exist_ok=True)
+    markdown_path = export_dir / "swarm_audit_report.md"
+    json_path = export_dir / "swarm_audit_report.json"
+    markdown_path.write_text(format_report_markdown(report), encoding="utf-8")
+    json_path.write_text(report.model_dump_json(indent=2), encoding="utf-8")
+    return str(markdown_path), str(json_path)
+def _score_label(score: int | None) -> str:
+    if score is None:
+        return "-"
+    return f"{score}/100"
+def _label(value: str | None) -> str:
+    if not value:
+        return "General"
+    return value.replace("_", " ").replace("-", " ").title()
+def _github_file_url(repo_url: str | None, file_path: str, line_start: int) -> str | None:
+    if not repo_url or "github.com/" not in repo_url:
+        return None
+    normalized_repo = repo_url.removesuffix(".git").rstrip("/")
+    quoted_path = quote(file_path.replace("\\", "/"))
+    return f"{normalized_repo}/blob/HEAD/{quoted_path}#L{line_start}"

app/ui/gradio_app.py CHANGED Viewed

@@ -1,10 +1,19 @@
 import os
 import gradio as gr
 from app.agents.graph import AuditGraph
-from app.schemas import AuditReport
-from app.services.report_formatter import format_report_markdown
 EXAMPLE_REPOS = {
@@ -13,69 +22,1452 @@ EXAMPLE_REPOS = {
     "Flask": "https://github.com/pallets/flask",
 }
 async def analyze_repo(repo_url: str):
     if not repo_url.strip():
-        yield "Paste a public GitHub repository URL to start.", ""
         return
     progress: list[str] = []
-    report_markdown = ""
     try:
         async for event in AuditGraph().run_with_progress(repo_url.strip()):
             if isinstance(event, AuditReport):
-                report_markdown = format_report_markdown(event)
             else:
                 progress.append(event)
-            yield "\n".join(progress), report_markdown
     except Exception as exc:
         progress.append(f"Audit failed: {exc}")
-        yield "\n".join(progress), report_markdown
 def choose_example(example_name: str) -> str:
     return EXAMPLE_REPOS.get(example_name, "")
 def build_app() -> gr.Blocks:
-    with gr.Blocks(title="SwarmAudit") as demo:
-        gr.Markdown(
-            "# SwarmAudit\n"
-            "Paste a public GitHub URL and get a structured multi-agent audit report."
         )
-        with gr.Row():
-            repo_url = gr.Textbox(
-                label="GitHub Repository URL",
-                placeholder="https://github.com/owner/repo",
-                scale=4,
-            )
-            analyze = gr.Button("Analyze", variant="primary", scale=1)
-        example = gr.Dropdown(
-            label="Example repos",
-            choices=list(EXAMPLE_REPOS.keys()),
-            value=None,
-            interactive=True,
-        )
-        example.change(choose_example, inputs=example, outputs=repo_url)
-        with gr.Row():
-            progress_output = gr.Textbox(
-                label="Agent Progress",
-                lines=10,
-                interactive=False,
-            )
-            report_output = gr.Markdown(label="Audit Report")
-        analyze.click(analyze_repo, inputs=repo_url, outputs=[progress_output, report_output])
     return demo
 def launch_app() -> None:
     server_name = os.getenv("GRADIO_SERVER_NAME", "0.0.0.0")
-    server_port = int(os.getenv("PORT", os.getenv("GRADIO_SERVER_PORT", "7860")))
-    build_app().queue().launch(server_name=server_name, server_port=server_port)
 if __name__ == "__main__":

 import os
+import warnings
 import gradio as gr
 from app.agents.graph import AuditGraph
+from app.config import get_settings
+from app.schemas import AuditReport, Severity
+from app.services.llm_client import LLMClient
+from app.services.benchmark import BenchmarkService
+from app.services.report_formatter import (
+    format_empty_finding_detail_html,
+    format_finding_detail_html,
+    format_report_overview_html,
+    write_report_exports,
+)
 EXAMPLE_REPOS = {
     "Flask": "https://github.com/pallets/flask",
 }
+AGENT_SWARM = [
+    ("Crawler", "Fetch repository tree", "Crawler Agent", "mapped"),
+    ("Chunker", "Tokenize and segment files", "Chunker", "created"),
+    ("Security", "CVE and secret scanning", "Security Agent", "found"),
+    ("Performance", "Hot-path and complexity", "Performance Agent", "found"),
+    ("Quality", "Lint, types, smells", "Quality Agent", "found"),
+    ("Docs", "Coverage and accuracy", "Docs Agent", "found"),
+    ("Config", "Production config risk", "Config Agent", "found"),
+    ("Dependency", "Manifest and CVE checks", "Dependency Agent", "found"),
+    ("Errors", "Resilience paths", "Error Handling Agent", "found"),
+    ("Observability", "Logs and health checks", "Observability Agent", "found"),
+    ("ROCm", "CUDA portability", "CUDA-to-ROCm Agent", "found"),
+    ("Synthesizer", "Merge findings into report", "Synthesizer Agent", "final report"),
+]
+APP_CSS = """
+@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;500&display=swap');
+:root {
+    --sa-bg: #080d14;
+    --sa-surface: #0d141d;
+    --sa-panel: #101923;
+    --sa-panel-high: #162233;
+    --sa-panel-higher: #1b293a;
+    --sa-border: #26364a;
+    --sa-border-strong: #33465e;
+    --sa-text: #e6f0ff;
+    --sa-muted: #8aa0b8;
+    --sa-primary: #60a5fa;
+    --sa-primary-soft: rgba(96, 165, 250, 0.14);
+    --sa-blue: #06b6d4;
+    --sa-orange: #f97316;
+    --sa-yellow: #eab308;
+    --sa-red: #ef4444;
+    --sa-green: #22c55e;
+    --sa-info: #64748b;
+    --sa-card-shadow: 0 18px 60px rgba(0, 0, 0, 0.24);
+}
+* {
+    scrollbar-width: thin;
+    scrollbar-color: #475569 #0f172a;
+}
+*::-webkit-scrollbar {
+    width: 8px;
+    height: 8px;
+}
+*::-webkit-scrollbar-track {
+    background: #0f172a;
+}
+*::-webkit-scrollbar-thumb {
+    background: #475569;
+    border-radius: 999px;
+    border: 2px solid #0f172a;
+}
+*::-webkit-scrollbar-thumb:hover {
+    background: #64748b;
+}
+.gradio-container {
+    background:
+        radial-gradient(circle at 18% -10%, rgba(96, 165, 250, 0.08), transparent 30%),
+        linear-gradient(180deg, #0a1018 0%, var(--sa-bg) 38%, #070b11 100%) !important;
+    color: var(--sa-text) !important;
+    font-family: Inter, system-ui, sans-serif !important;
+}
+#swarm-shell {
+    max-width: 1440px;
+    margin: 0 auto;
+}
+.swarm-topbar {
+    border: 1px solid rgba(96, 165, 250, 0.18);
+    background:
+        linear-gradient(135deg, rgba(16, 25, 35, 0.94), rgba(13, 20, 29, 0.86)),
+        rgba(16, 25, 35, 0.86);
+    border-radius: 10px;
+    padding: 14px 16px 13px;
+    margin-bottom: 12px;
+    box-shadow: 0 18px 70px rgba(0, 0, 0, 0.18), inset 0 1px 0 rgba(230, 240, 255, 0.04);
+    backdrop-filter: blur(10px);
+}
+.swarm-brand-row {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    gap: 16px;
+    margin-bottom: 10px;
+}
+.swarm-brand {
+    font-size: 19px;
+    line-height: 24px;
+    font-weight: 700;
+    letter-spacing: 0;
+}
+.swarm-tagline {
+    color: var(--sa-muted);
+    font-size: 12px;
+    line-height: 18px;
+}
+.swarm-status {
+    color: var(--sa-muted);
+    font: 600 11px/16px JetBrains Mono, monospace;
+    text-transform: uppercase;
+}
+.swarm-progressbar {
+    height: 3px;
+    border-radius: 999px;
+    background: rgba(38, 54, 74, 0.7);
+    overflow: hidden;
+}
+.swarm-progressbar span {
+    display: block;
+    width: 100%;
+    height: 100%;
+    background: linear-gradient(90deg, var(--sa-primary), #22c55e);
+    box-shadow: 0 0 18px rgba(96, 165, 250, 0.24);
+}
+.swarm-summary-grid {
+    display: grid;
+    grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
+    gap: 10px;
+    margin: 12px 0;
+}
+.swarm-metric {
+    border: 1px solid rgba(38, 54, 74, 0.95);
+    background: linear-gradient(180deg, rgba(22, 34, 51, 0.86), rgba(16, 25, 35, 0.9));
+    border-radius: 8px;
+    padding: 12px;
+    box-shadow: inset 0 1px 0 rgba(230, 240, 255, 0.035);
+    transition: border-color 160ms ease, transform 160ms ease, background 160ms ease;
+}
+.swarm-metric:hover {
+    border-color: rgba(96, 165, 250, 0.34);
+    transform: translateY(-1px);
+}
+.swarm-metric span {
+    display: block;
+    color: var(--sa-muted);
+    font: 600 11px/16px JetBrains Mono, monospace;
+    text-transform: uppercase;
+    letter-spacing: 0;
+}
+.swarm-metric strong {
+    display: block;
+    color: var(--sa-text);
+    font-size: 22px;
+    line-height: 28px;
+    margin-top: 2px;
+}
+.metric-critical strong,
+.metric-critical span {
+    color: var(--sa-red);
+}
+.metric-high strong,
+.metric-high span {
+    color: var(--sa-orange);
+}
+.metric-medium strong,
+.metric-medium span {
+    color: var(--sa-yellow);
+}
+.metric-low strong,
+.metric-low span {
+    color: var(--sa-blue);
+}
+.swarm-card,
+.swarm-panel,
+.swarm-export {
+    border: 1px solid var(--sa-border) !important;
+    background: rgba(16, 25, 35, 0.92) !important;
+    border-radius: 8px !important;
+    box-shadow: inset 0 1px 0 rgba(230, 240, 255, 0.03);
+}
+.agent-card {
+    border: 1px solid rgba(38, 54, 74, 0.95);
+    background: linear-gradient(180deg, rgba(16, 25, 35, 0.95), rgba(13, 20, 29, 0.96));
+    border-radius: 9px;
+    overflow: hidden;
+    margin-bottom: 12px;
+    box-shadow: inset 0 1px 0 rgba(230, 240, 255, 0.035);
+}
+.agent-card-header,
+.audit-console-header {
+    min-height: 42px;
+    border-bottom: 1px solid var(--sa-border);
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    padding: 0 14px;
+}
+.agent-card-title,
+.audit-console-title {
+    color: var(--sa-text);
+    font-size: 13px;
+    line-height: 18px;
+    font-weight: 700;
+}
+.agent-card-count,
+.audit-filter-row {
+    color: var(--sa-muted);
+    font: 500 11px/16px JetBrains Mono, monospace;
+}
+.agent-list {
+    padding: 12px 14px 14px;
+}
+.agent-item {
+    display: grid;
+    grid-template-columns: 28px 1fr auto;
+    gap: 10px;
+    align-items: center;
+    padding: 8px 0;
+    border-radius: 7px;
+    transition: background 150ms ease, border-color 150ms ease;
+}
+.agent-icon {
+    width: 28px;
+    height: 28px;
+    border-radius: 6px;
+    border: 1px solid var(--sa-border);
+    background: rgba(27, 41, 58, 0.88);
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    color: var(--sa-muted);
+    font: 700 11px/16px JetBrains Mono, monospace;
+}
+.agent-name {
+    color: var(--sa-text);
+    font-size: 13px;
+    line-height: 18px;
+    font-weight: 700;
+}
+.agent-desc {
+    color: var(--sa-muted);
+    font-size: 11px;
+    line-height: 16px;
+}
+.agent-status {
+    font: 600 11px/16px JetBrains Mono, monospace;
+}
+.agent-status.done {
+    color: var(--sa-green);
+}
+.agent-status.running {
+    color: var(--sa-primary);
+}
+.agent-status.idle {
+    color: var(--sa-muted);
+}
+.agent-item.running {
+    background: rgba(34, 197, 94, 0.08);
+    border: 1px solid rgba(34, 197, 94, 0.22);
+    border-radius: 7px;
+    margin: 2px -6px;
+    padding: 8px 6px;
+}
+.swarm-card textarea,
+.swarm-card input,
+.swarm-card select {
+    font-family: JetBrains Mono, monospace !important;
+}
+.swarm-progress textarea {
+    min-height: 285px !important;
+    font-family: JetBrains Mono, monospace !important;
+    font-size: 12px !important;
+    line-height: 20px !important;
+    color: #d8e3ef !important;
+    background: #0b1118 !important;
+}
+.swarm-report {
+    min-height: 560px;
+}
+.swarm-report h1,
+.swarm-report h2,
+.swarm-report h3 {
+    color: var(--sa-text) !important;
+}
+.swarm-report code,
+.swarm-report pre {
+    font-family: JetBrains Mono, monospace !important;
+}
+.swarm-export {
+    padding: 12px !important;
+}
+.audit-actionbar {
+    border: 1px solid rgba(38, 54, 74, 0.95) !important;
+    background: linear-gradient(180deg, rgba(16, 25, 35, 0.92), rgba(13, 20, 29, 0.94)) !important;
+    border-radius: 10px !important;
+    padding: 7px 8px !important;
+    margin-bottom: 12px !important;
+    box-shadow: inset 0 1px 0 rgba(230, 240, 255, 0.035);
+}
+.audit-actionbar .form,
+.audit-actionbar .block {
+    min-height: 0 !important;
+}
+.audit-actionbar .gradio-row,
+.audit-actionbar .row {
+    align-items: center !important;
+    gap: 8px !important;
+}
+.repo-input {
+    min-width: min(560px, 100%) !important;
+}
+.audit-actionbar label {
+    color: var(--sa-muted) !important;
+    font: 600 11px/16px JetBrains Mono, monospace !important;
+    text-transform: lowercase !important;
+}
+.audit-actionbar input {
+    background: #111a25 !important;
+    border: 1px solid var(--sa-border) !important;
+    border-radius: 7px !important;
+    color: var(--sa-text) !important;
+    font-family: JetBrains Mono, monospace !important;
+    min-height: 34px !important;
+    height: 34px !important;
+    padding: 6px 10px !important;
+    transition: border-color 150ms ease, box-shadow 150ms ease, background 150ms ease;
+}
+.audit-actionbar input:focus {
+    border-color: rgba(96, 165, 250, 0.7) !important;
+    box-shadow: 0 0 0 3px rgba(96, 165, 250, 0.11) !important;
+    background: #132033 !important;
+}
+.example-label {
+    display: flex;
+    align-items: center;
+    color: var(--sa-muted);
+    font: 600 11px/16px JetBrains Mono, monospace;
+    letter-spacing: 0.02em;
+    text-transform: uppercase;
+    height: 34px;
+    padding: 0 8px 0 12px;
+}
+.example-chip button {
+    background: rgba(74, 91, 113, 0.82) !important;
+    border: 1px solid rgba(100, 116, 139, 0.42) !important;
+    border-radius: 12px !important;
+    color: var(--sa-text) !important;
+    font: 700 13px/18px Inter, system-ui, sans-serif !important;
+    min-width: 0 !important;
+    height: 40px !important;
+    min-height: 40px !important;
+    padding: 0 20px !important;
+    margin: 0 4px !important;
+    transition: border-color 150ms ease, background 150ms ease, color 150ms ease, transform 150ms ease;
+}
+.example-chip button:hover {
+    background: rgba(87, 108, 135, 0.92) !important;
+    border-color: rgba(96, 165, 250, 0.36) !important;
+    color: var(--sa-text) !important;
+    transform: translateY(-1px);
+}
+button.primary,
+.gradio-button.primary {
+    background: linear-gradient(180deg, #7bb8ff, var(--sa-primary)) !important;
+    color: #08111d !important;
+    border: 1px solid rgba(147, 197, 253, 0.48) !important;
+    border-radius: 8px !important;
+    font-weight: 700 !important;
+    box-shadow: 0 0 0 1px rgba(96, 165, 250, 0.08), 0 10px 26px rgba(96, 165, 250, 0.14);
+    min-height: 34px !important;
+    height: 34px !important;
+    padding: 0 14px !important;
+    transition: filter 150ms ease, transform 150ms ease, box-shadow 150ms ease;
+}
+button.primary:hover,
+.gradio-button.primary:hover {
+    filter: brightness(1.04);
+    transform: translateY(-1px);
+    box-shadow: 0 0 0 1px rgba(96, 165, 250, 0.1), 0 14px 30px rgba(96, 165, 250, 0.18);
+}
+.tabs {
+    border: 1px solid var(--sa-border) !important;
+    border-radius: 10px !important;
+    background: rgba(8, 13, 20, 0.74) !important;
+    padding: 8px !important;
+    box-shadow: inset 0 1px 0 rgba(230, 240, 255, 0.03);
+}
+.tab-nav button {
+    border-radius: 7px !important;
+    font-weight: 600 !important;
+}
+.tab-nav button.selected,
+.tab-nav button[aria-selected="true"] {
+    color: var(--sa-primary) !important;
+    box-shadow: inset 0 -1px 0 var(--sa-primary), 0 10px 24px rgba(96, 165, 250, 0.08);
+}
+.swarm-note {
+    color: var(--sa-muted);
+    font-size: 13px;
+    line-height: 20px;
+    margin: 0 0 10px;
+}
+.swarm-report a {
+    color: var(--sa-primary) !important;
+}
+.swarm-report blockquote {
+    border-left: 3px solid var(--sa-border-strong) !important;
+    color: var(--sa-muted) !important;
+}
+.audit-console {
+    border: 1px solid var(--sa-border);
+    background: rgba(16, 25, 35, 0.92);
+    border-radius: 9px;
+    overflow: hidden;
+    min-height: 700px;
+}
+.findings-list-radio,
+.finding-detail-panel {
+    border: 1px solid rgba(38, 54, 74, 0.95);
+    background: rgba(16, 25, 35, 0.94);
+    border-radius: 0;
+    overflow: hidden;
+}
+.findings-list-radio {
+    height: 690px;
+    max-height: 690px;
+    overflow-y: auto !important;
+    border-right: 0;
+    border-radius: 0 0 0 8px;
+    scrollbar-gutter: auto;
+}
+.report-toolbar {
+    min-height: 41px;
+    border: 1px solid rgba(38, 54, 74, 0.95);
+    border-bottom: 0;
+    background: linear-gradient(180deg, rgba(16, 25, 35, 0.98), rgba(13, 20, 29, 0.96));
+    border-radius: 9px 9px 0 0;
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    gap: 12px;
+    padding: 0 13px;
+}
+.report-header-row {
+    border: 1px solid rgba(38, 54, 74, 0.95) !important;
+    border-bottom: 0 !important;
+    background: linear-gradient(180deg, rgba(16, 25, 35, 0.98), rgba(13, 20, 29, 0.96)) !important;
+    border-radius: 9px 9px 0 0 !important;
+    align-items: center !important;
+    gap: 8px !important;
+    padding: 7px 10px !important;
+}
+.report-header-row .report-toolbar {
+    border: 0 !important;
+    background: transparent !important;
+    min-height: 28px !important;
+    padding: 0 !important;
+}
+.severity-filter-radio {
+    min-width: 360px !important;
+}
+.severity-filter-radio .wrap,
+.severity-filter-radio .block,
+.severity-filter-radio fieldset {
+    background: transparent !important;
+    border: 0 !important;
+    padding: 0 !important;
+}
+.severity-filter-radio label {
+    border: 1px solid transparent !important;
+    border-radius: 6px !important;
+    background: transparent !important;
+    padding: 5px 7px !important;
+    margin: 0 1px !important;
+    color: var(--sa-muted) !important;
+    transition: background 140ms ease, border-color 140ms ease, color 140ms ease;
+}
+.severity-filter-radio label:hover,
+.severity-filter-radio label:has(input:checked) {
+    background: rgba(22, 34, 51, 0.92) !important;
+    border-color: rgba(96, 165, 250, 0.18) !important;
+    color: var(--sa-text) !important;
+}
+.severity-filter-radio label:has(input[value^="Critical"]) span { color: var(--sa-red) !important; }
+.severity-filter-radio label:has(input[value^="High"]) span { color: var(--sa-orange) !important; }
+.severity-filter-radio label:has(input[value^="Medium"]) span { color: var(--sa-yellow) !important; }
+.severity-filter-radio label:has(input[value^="Low"]) span { color: var(--sa-blue) !important; }
+.severity-filter-radio span {
+    font: 700 10px/14px JetBrains Mono, monospace !important;
+    white-space: nowrap !important;
+}
+.severity-filter-radio input {
+    display: none !important;
+}
+.report-download button {
+    height: 30px !important;
+    min-height: 30px !important;
+    border-radius: 7px !important;
+    border: 1px solid var(--sa-border) !important;
+    background: rgba(22, 34, 51, 0.82) !important;
+    color: var(--sa-text) !important;
+    font: 700 11px/16px Inter, system-ui, sans-serif !important;
+    padding: 0 10px !important;
+}
+.report-download button:hover {
+    border-color: rgba(96, 165, 250, 0.34) !important;
+    background: rgba(27, 41, 58, 0.96) !important;
+}
+.report-overview {
+    border: 1px solid rgba(38, 54, 74, 0.95);
+    border-top: 0;
+    background: rgba(16, 25, 35, 0.88);
+    display: grid;
+    grid-template-columns: repeat(2, minmax(120px, 0.7fr)) repeat(2, minmax(170px, 1fr));
+    gap: 0;
+}
+.overview-column {
+    border-right: 1px solid var(--sa-border);
+    padding: 10px 12px;
+}
+.overview-column:last-child {
+    border-right: 0;
+}
+.overview-column span {
+    color: var(--sa-muted);
+    font: 600 10px/15px JetBrains Mono, monospace;
+    text-transform: uppercase;
+}
+.overview-column strong {
+    display: block;
+    color: var(--sa-text);
+    font-size: 18px;
+    line-height: 24px;
+    margin-top: 2px;
+}
+.overview-tags {
+    display: flex;
+    flex-wrap: wrap;
+    gap: 6px;
+    margin-top: 4px;
+}
+.overview-tags span {
+    border: 1px solid var(--sa-border);
+    border-radius: 6px;
+    background: rgba(22, 34, 51, 0.82);
+    color: #cbd5e1;
+    padding: 3px 6px;
+    text-transform: none;
+}
+.report-body {
+    border: 1px solid var(--sa-border) !important;
+    border-top: 0 !important;
+    background: rgba(16, 25, 35, 0.94) !important;
+    border-radius: 0 0 9px 9px !important;
+    overflow: hidden !important;
+}
+.report-body > .form {
+    gap: 0 !important;
+}
+.report-title {
+    color: var(--sa-text);
+    font-size: 13px;
+    line-height: 18px;
+    font-weight: 700;
+}
+.report-title span {
+    color: var(--sa-muted);
+    font: 600 11px/16px JetBrains Mono, monospace;
+    margin-right: 6px;
+}
+.report-subnote {
+    color: var(--sa-muted);
+    font: 500 10px/14px JetBrains Mono, monospace;
+    margin-top: 1px;
+    opacity: 0.74;
+}
+.findings-list-radio .wrap,
+.findings-list-radio .block,
+.findings-list-radio fieldset {
+    background: transparent !important;
+    border: 0 !important;
+    padding: 0 !important;
+}
+.findings-list-radio label {
+    border-bottom: 1px solid rgba(38, 54, 74, 0.72) !important;
+    background: rgba(16, 25, 35, 0.5) !important;
+    padding: 12px 13px !important;
+    margin: 0 !important;
+    align-items: flex-start !important;
+    cursor: pointer !important;
+    transition: background 140ms ease, box-shadow 140ms ease, border-color 140ms ease;
+}
+.findings-list-radio label:hover {
+    background: rgba(22, 34, 51, 0.86) !important;
+}
+.findings-list-radio input:checked + span,
+.findings-list-radio label:has(input:checked) {
+    background: linear-gradient(90deg, rgba(96, 165, 250, 0.14), rgba(22, 34, 51, 0.86)) !important;
+    box-shadow: inset 2px 0 0 var(--sa-primary);
+}
+.findings-list-radio span {
+    color: #dce4ee !important;
+    font: 600 12px/18px Inter, system-ui, sans-serif !important;
+    white-space: pre-wrap !important;
+}
+.findings-list-radio label:has(input[value^="CRIT"]) span { color: var(--sa-red) !important; }
+.findings-list-radio label:has(input[value^="HIGH"]) span { color: var(--sa-orange) !important; }
+.findings-list-radio label:has(input[value^="MED"]) span { color: var(--sa-yellow) !important; }
+.findings-list-radio label:has(input[value^="LOW"]) span { color: var(--sa-blue) !important; }
+.findings-list-radio label:has(input[value^="LOW"]) {
+    background: rgba(6, 182, 212, 0.055) !important;
+}
+.findings-list-radio input {
+    margin-top: 4px !important;
+    accent-color: var(--sa-primary) !important;
+}
+.finding-detail-panel {
+    height: 690px;
+    max-height: 690px;
+    overflow-y: auto;
+    border-radius: 0 0 8px 0;
+    scrollbar-gutter: auto;
+}
+.swarm-report .finding-detail-panel {
+    border: 0;
+    background: transparent;
+    border-radius: 0;
+}
+.audit-filter-row {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    white-space: nowrap;
+}
+.filter-pill {
+    background: rgba(32, 42, 54, 0.9);
+    border-radius: 6px;
+    padding: 5px 10px;
+    color: var(--sa-muted);
+}
+.filter-pill.active {
+    color: var(--sa-text);
+}
+.filter-dot {
+    width: 6px;
+    height: 6px;
+    border-radius: 999px;
+    display: inline-block;
+}
+.dot-critical { background: var(--sa-red); }
+.dot-high { background: var(--sa-orange); }
+.dot-medium { background: var(--sa-yellow); }
+.dot-low { background: var(--sa-blue); }
+.audit-console-body {
+    display: grid;
+    grid-template-columns: minmax(280px, 42%) 1fr;
+    min-height: 657px;
+}
+.finding-list {
+    border-right: 1px solid var(--sa-border);
+    background: #121922;
+}
+.finding-row {
+    padding: 14px 16px;
+    border-bottom: 1px solid var(--sa-border);
+    background: #121922;
+}
+.finding-row:first-child {
+    background: #1b232d;
+}
+.finding-row-meta {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    color: var(--sa-muted);
+    font: 500 11px/16px JetBrains Mono, monospace;
+    margin-bottom: 7px;
+}
+.severity-badge {
+    border: 1px solid currentColor;
+    border-radius: 5px;
+    padding: 2px 7px;
+    font: 700 10px/14px JetBrains Mono, monospace;
+    color: var(--sa-muted);
+    letter-spacing: 0.01em;
+}
+.severity-critical .severity-badge,
+.severity-badge.severity-critical {
+    color: #fecaca;
+    background: rgba(239, 68, 68, 0.13);
+    border-color: rgba(239, 68, 68, 0.55);
+}
+.severity-high .severity-badge,
+.severity-badge.severity-high {
+    color: #fed7aa;
+    background: rgba(249, 115, 22, 0.13);
+    border-color: rgba(249, 115, 22, 0.55);
+}
+.severity-medium .severity-badge,
+.severity-badge.severity-medium {
+    color: #fde68a;
+    background: rgba(234, 179, 8, 0.13);
+    border-color: rgba(234, 179, 8, 0.55);
+}
+.severity-low .severity-badge,
+.severity-badge.severity-low {
+    color: #a5f3fc;
+    background: rgba(6, 182, 212, 0.13);
+    border-color: rgba(6, 182, 212, 0.55);
+}
+.severity-info .severity-badge,
+.severity-badge.severity-info {
+    color: #cbd5e1;
+    background: rgba(100, 116, 139, 0.16);
+    border-color: rgba(100, 116, 139, 0.55);
+}
+.finding-row-title {
+    color: var(--sa-text);
+    font-size: 13px;
+    line-height: 19px;
+    font-weight: 700;
+}
+.finding-row-path {
+    color: var(--sa-muted);
+    font: 500 11px/16px JetBrains Mono, monospace;
+    margin-top: 3px;
+}
+.finding-detail {
+    padding: 22px 22px 26px;
+    background: transparent;
+}
+.finding-detail-meta {
+    display: flex;
+    gap: 8px;
+    color: var(--sa-muted);
+    font: 500 11px/16px JetBrains Mono, monospace;
+    margin-bottom: 12px;
+}
+.finding-detail-title {
+    display: flex;
+    align-items: center;
+    gap: 10px;
+    margin-bottom: 22px;
+}
+.finding-detail-title h3 {
+    margin: 0;
+    color: var(--sa-text);
+    font-size: 18px;
+    line-height: 26px;
+}
+.detail-section {
+    margin-bottom: 20px;
+}
+.detail-section span {
+    display: block;
+    color: var(--sa-muted);
+    font: 600 11px/16px JetBrains Mono, monospace;
+    text-transform: uppercase;
+    margin-bottom: 8px;
+}
+.detail-section p {
+    color: #dbeafe;
+    font-size: 13px;
+    line-height: 21px;
+    margin: 0;
+}
+.detail-section pre,
+.reference-card {
+    border: 0;
+    background: rgba(22, 34, 51, 0.48);
+    border-radius: 7px;
+}
+.detail-section pre {
+    color: #f1f5f9;
+    white-space: pre-wrap;
+    font: 500 12px/20px JetBrains Mono, monospace;
+    padding: 14px;
+    box-shadow: inset 0 1px 0 rgba(230, 240, 255, 0.03);
+}
+.reference-card {
+    display: flex;
+    align-items: center;
+    justify-content: space-between;
+    padding: 12px 14px;
+    color: var(--sa-muted);
+    transition: border-color 150ms ease, background 150ms ease;
+}
+.reference-card:hover {
+    background: rgba(27, 41, 58, 0.9);
+    border-color: rgba(96, 165, 250, 0.34);
+}
+.reference-card code {
+    color: #dce4ee;
+    font: 600 12px/18px JetBrains Mono, monospace;
+}
+.reference-card a {
+    color: var(--sa-text) !important;
+    text-decoration: none !important;
+    font: 700 12px/18px Inter, system-ui, sans-serif;
+}
+.audit-empty {
+    padding: 72px 24px;
+    text-align: center;
+    color: var(--sa-muted);
+}
+.audit-empty h3 {
+    color: var(--sa-text);
+    margin: 0 0 8px;
+}
+@media (max-width: 900px) {
+    .swarm-summary-grid {
+        grid-template-columns: repeat(2, minmax(0, 1fr));
+    }
+    .audit-console-body {
+        grid-template-columns: 1fr;
+    }
+    .finding-list {
+        border-right: 0;
+    }
+    .report-overview {
+        grid-template-columns: 1fr 1fr;
+    }
+}
+"""
+def render_workspace_header() -> str:
+    return """
+    <section class="swarm-topbar">
+        <div class="swarm-brand-row">
+            <div>
+                <div class="swarm-brand">SwarmAudit</div>
+                <div class="swarm-tagline">AI-generated code production-readiness scanner</div>
+            </div>
+            <div class="swarm-status">mock-first / vLLM-ready</div>
+        </div>
+        <div class="swarm-progressbar"><span></span></div>
+    </section>
+    """
+def render_agent_swarm(progress: list[str] | None = None) -> str:
+    progress = progress or []
+    done_count = sum(1 for _, _, token, done_token in AGENT_SWARM if _agent_status(progress, token, done_token) == "done")
+    items = "\n".join(
+        f"""
+        <div class="agent-item {status}">
+            <div class="agent-icon">{name[:2].upper()}</div>
+            <div>
+                <div class="agent-name">{name}</div>
+                <div class="agent-desc">{desc}</div>
+            </div>
+            <div class="agent-status {status}">{status}</div>
+        </div>
+        """
+        for name, desc, token, done_token in AGENT_SWARM
+        for status in [_agent_status(progress, token, done_token)]
+    )
+    return f"""
+    <section class="agent-card">
+        <div class="agent-card-header">
+            <div class="agent-card-title">Agent swarm</div>
+            <div class="agent-card-count">{done_count}/{len(AGENT_SWARM)} done</div>
+        </div>
+        <div class="agent-list">{items}</div>
+    </section>
+    """
+def _agent_status(progress: list[str], token: str, done_token: str) -> str:
+    matching_events = [event for event in progress if token in event]
+    if any(done_token in event for event in matching_events):
+        return "done"
+    if matching_events:
+        return "running"
+    return "idle"
+def render_empty_summary() -> str:
+    return render_summary_cards(
+        files_scanned="-",
+        total_findings="-",
+        severity_counts={},
+    )
+def render_report_summary(report: AuditReport) -> str:
+    return render_summary_cards(
+        files_scanned=str(report.scanned_file_count),
+        total_findings=str(report.total_findings_count),
+        severity_counts={
+            Severity.critical: report.severity_summary.get(Severity.critical, 0),
+            Severity.high: report.severity_summary.get(Severity.high, 0),
+            Severity.medium: report.severity_summary.get(Severity.medium, 0),
+            Severity.low: report.severity_summary.get(Severity.low, 0),
+        },
+    )
+def render_report_toolbar(report: AuditReport | None) -> str:
+    return f"""
+    <section class="report-toolbar">
+        <div>
+            <div class="report-title"><span>DOC</span>Audit report</div>
+            <div class="report-subnote">Visible rows prioritize important findings; downloads keep full report data.</div>
+        </div>
+    </section>
+    """
+def build_severity_filter_choices(report: AuditReport | None) -> list[str]:
+    if report is None:
+        return ["All 0"]
+    displayed_counts = {severity: 0 for severity in Severity}
+    for finding in report.findings:
+        displayed_counts[finding.severity] += 1
+    choices = [f"All {len(report.findings)}"]
+    for severity, label in [
+        (Severity.critical, "Critical"),
+        (Severity.high, "High"),
+        (Severity.medium, "Medium"),
+        (Severity.low, "Low"),
+    ]:
+        count = displayed_counts.get(severity, 0)
+        if count > 0:
+            choices.append(f"{label} {count}")
+    return choices
+def render_summary_cards(
+    files_scanned: str,
+    total_findings: str,
+    severity_counts: dict[Severity, int],
+) -> str:
+    severity_cards = []
+    for severity, css_class in [
+        (Severity.critical, "metric-critical"),
+        (Severity.high, "metric-high"),
+        (Severity.medium, "metric-medium"),
+        (Severity.low, "metric-low"),
+    ]:
+        count = severity_counts.get(severity, 0)
+        if count <= 0:
+            continue
+        severity_cards.append(
+            f'<div class="swarm-metric {css_class}"><span>{severity.value.title()}</span><strong>{count}</strong></div>'
+        )
+    severity_html = "\n".join(severity_cards)
+    return f"""
+    <section class="swarm-summary-grid">
+        <div class="swarm-metric"><span>Files scanned</span><strong>{files_scanned}</strong></div>
+        <div class="swarm-metric"><span>Findings</span><strong>{total_findings}</strong></div>
+        {severity_html}
+    </section>
+    """
 async def analyze_repo(repo_url: str):
     if not repo_url.strip():
+        yield (
+            "Paste a public GitHub repository URL to start.",
+            render_agent_swarm(),
+            render_empty_summary(),
+            render_report_toolbar(None),
+            gr.update(choices=["All 0"], value="All 0"),
+            format_report_overview_html(None),
+            gr.update(choices=[], value=None),
+            format_empty_finding_detail_html(),
+            None,
+            None,
+            None,
+        )
         return
     progress: list[str] = []
+    agent_html = render_agent_swarm(progress)
+    summary_html = render_empty_summary()
+    report_toolbar_html = render_report_toolbar(None)
+    severity_filter_update = gr.update(choices=["All 0"], value="All 0")
+    report_overview_html = format_report_overview_html(None)
+    finding_choice_update = gr.update(choices=[], value=None)
+    finding_detail_html = format_empty_finding_detail_html()
+    markdown_export = None
+    json_export = None
+    report_state = None
     try:
         async for event in AuditGraph().run_with_progress(repo_url.strip()):
             if isinstance(event, AuditReport):
+                report_state = event
+                filter_choices = build_severity_filter_choices(event)
+                selected_filter = filter_choices[0]
+                severity_filter_update = gr.update(choices=filter_choices, value=selected_filter)
+                finding_choices = build_finding_choices(event, selected_filter)
+                finding_choice_update = gr.update(
+                    choices=finding_choices,
+                    value=finding_choices[0] if finding_choices else None,
+                )
+                finding_detail_html = format_finding_detail_html(event, 0)
+                summary_html = render_report_summary(event)
+                report_toolbar_html = render_report_toolbar(event)
+                report_overview_html = format_report_overview_html(event)
+                markdown_export, json_export = write_report_exports(event)
             else:
                 progress.append(event)
+                agent_html = render_agent_swarm(progress)
+            yield (
+                "\n".join(progress),
+                agent_html,
+                summary_html,
+                report_toolbar_html,
+                severity_filter_update,
+                report_overview_html,
+                finding_choice_update,
+                finding_detail_html,
+                markdown_export,
+                json_export,
+                report_state,
+            )
     except Exception as exc:
         progress.append(f"Audit failed: {exc}")
+        yield (
+            "\n".join(progress),
+            render_agent_swarm(progress),
+            render_empty_summary(),
+            render_report_toolbar(None),
+            gr.update(choices=["All 0"], value="All 0"),
+            format_report_overview_html(None),
+            gr.update(choices=[], value=None),
+            format_empty_finding_detail_html(),
+            None,
+            None,
+            None,
+        )
+def build_finding_rows(report: AuditReport | None) -> list[list[str]]:
+    if report is None:
+        return []
+    rows: list[list[str]] = []
+    for index, finding in enumerate(report.findings, start=1):
+        rows.append(
+            [
+                f"F-{index:03d}",
+                finding.severity.value,
+                finding.title,
+                f"{finding.file_path}:{finding.line_start}",
+                finding.agent_source,
+            ]
+        )
+    return rows
+def _severity_from_filter(filter_label: str | None) -> Severity | None:
+    if not filter_label:
+        return None
+    normalized = filter_label.lower()
+    for severity in Severity:
+        if normalized.startswith(severity.value.lower()):
+            return severity
+    return None
+def _severity_marker(severity: Severity) -> str:
+    return {
+        Severity.critical: "CRIT",
+        Severity.high: "HIGH",
+        Severity.medium: "MED",
+        Severity.low: "LOW",
+    }.get(severity, "INFO")
+def build_finding_choices(report: AuditReport | None, severity_filter: str | None = None) -> list[str]:
+    if report is None:
+        return []
+    selected_severity = _severity_from_filter(severity_filter)
+    choices: list[str] = []
+    for index, finding in enumerate(report.findings, start=1):
+        if selected_severity is not None and finding.severity != selected_severity:
+            continue
+        marker = _severity_marker(finding.severity)
+        choices.append(
+            f"{marker:<4}  {finding.title}\n"
+            f"{finding.file_path}:{finding.line_start}  |  {finding.agent_source}"
+        )
+    return choices
+def filter_findings(severity_filter: str | None, report: AuditReport | None):
+    choices = build_finding_choices(report, severity_filter)
+    selected = choices[0] if choices else None
+    detail_html = select_finding(selected, report) if selected else format_empty_finding_detail_html()
+    return gr.update(choices=choices, value=selected), detail_html
+def select_finding(choice: str | None, report: AuditReport | None) -> str:
+    if report is None or not report.findings:
+        return format_empty_finding_detail_html()
+    row_index = 0
+    if choice:
+        choices = build_finding_choices(report)
+        if choice in choices:
+            row_index = choices.index(choice)
+    return format_finding_detail_html(report, row_index)
 def choose_example(example_name: str) -> str:
     return EXAMPLE_REPOS.get(example_name, "")
+async def run_llm_diagnostics() -> str:
+    health = await LLMClient(get_settings()).health_check()
+    lines = [
+        "# LLM Diagnostics",
+        "",
+        f"- Provider: `{health.provider}`",
+        f"- Model: `{health.model}`",
+        f"- Base URL: `{health.base_url}`",
+        f"- Status: `{'OK' if health.ok else 'FAILED'}`",
+    ]
+    if health.latency_ms is not None:
+        lines.append(f"- Latency: `{health.latency_ms} ms`")
+    if health.models:
+        lines.extend(["", "## Models", ""])
+        lines.extend(f"- `{model}`" for model in health.models)
+    if health.completion_preview:
+        lines.extend(["", "## Completion Preview", "", health.completion_preview])
+    if health.error:
+        lines.extend(["", "## Error", "", f"```text\n{health.error}\n```"])
+    return "\n".join(lines)
+async def run_benchmark() -> str:
+    result = await BenchmarkService(get_settings()).run_llm_benchmark()
+    lines = [
+        "# LLM Benchmark",
+        "",
+        f"- Provider: `{result.provider}`",
+        f"- Backend: `{result.backend}`",
+        f"- Model: `{result.model}`",
+        f"- Hardware: `{result.hardware}`",
+        f"- Status: `{'OK' if result.ok else 'FAILED'}`",
+        f"- Prompt chars: `{result.prompt_chars}`",
+        f"- Completion chars: `{result.completion_chars}`",
+    ]
+    if result.latency_ms is not None:
+        lines.append(f"- Latency: `{result.latency_ms} ms`")
+    if result.chars_per_second is not None:
+        lines.append(f"- Approx chars/sec: `{result.chars_per_second}`")
+    if result.completion_preview:
+        lines.extend(["", "## Completion Preview", "", result.completion_preview])
+    if result.error:
+        lines.extend(["", "## Error", "", f"```text\n{result.error}\n```"])
+    lines.extend(
+        [
+            "",
+            "## Notes",
+            "",
+            "This scaffold uses character counts until a real vLLM endpoint exposes token usage. "
+            "When running on AMD MI300X, record latency/tokens-per-second here for the final demo.",
+        ]
+    )
+    return "\n".join(lines)
 def build_app() -> gr.Blocks:
+    theme = gr.themes.Base(
+        primary_hue="blue",
+        secondary_hue="cyan",
+        neutral_hue="slate",
+        font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
+        font_mono=[gr.themes.GoogleFont("JetBrains Mono"), "monospace"],
+    )
+    with warnings.catch_warnings():
+        warnings.filterwarnings(
+            "ignore",
+            message="The parameters have been moved from the Blocks constructor to the launch.*",
+            category=UserWarning,
         )
+        with gr.Blocks(title="SwarmAudit", theme=theme, css=APP_CSS, elem_id="swarm-shell") as demo:
+            gr.HTML(render_workspace_header())
+            with gr.Tab("Audit"):
+                with gr.Group(elem_classes=["audit-actionbar"]):
+                    with gr.Row(equal_height=False):
+                        repo_url = gr.Textbox(
+                            label="",
+                            placeholder="repo  https://github.com/owner/repo",
+                            scale=8,
+                            min_width=420,
+                            show_label=False,
+                            elem_classes=["repo-input"],
+                        )
+                        analyze = gr.Button("Analyze", variant="primary", scale=0, min_width=112)
+                        gr.HTML('<div class="example-label">Examples</div>', scale=0)
+                        for example_name, example_url in EXAMPLE_REPOS.items():
+                            example_button = gr.Button(
+                                example_name,
+                                scale=0,
+                                min_width=124,
+                                elem_classes=["example-chip"],
+                            )
+                            example_button.click(lambda url=example_url: url, outputs=repo_url)
+                summary_output = gr.HTML(render_empty_summary())
+                report_state = gr.State(None)
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        agent_output = gr.HTML(render_agent_swarm())
+                        progress_output = gr.Textbox(
+                            label="Activity log",
+                            lines=12,
+                            interactive=False,
+                            elem_classes=["swarm-panel", "swarm-progress"],
+                        )
+                    with gr.Column(scale=3):
+                        with gr.Row(elem_classes=["report-header-row"]):
+                            report_toolbar = gr.HTML(render_report_toolbar(None), scale=1)
+                            severity_filter = gr.Radio(
+                                choices=["All 0"],
+                                value="All 0",
+                                interactive=True,
+                                show_label=False,
+                                scale=0,
+                                min_width=360,
+                                elem_classes=["severity-filter-radio"],
+                            )
+                            markdown_export = gr.DownloadButton(
+                                "Markdown",
+                                value=None,
+                                size="sm",
+                                scale=0,
+                                min_width=96,
+                                elem_classes=["report-download"],
+                            )
+                            json_export = gr.DownloadButton(
+                                "JSON",
+                                value=None,
+                                size="sm",
+                                scale=0,
+                                min_width=76,
+                                elem_classes=["report-download"],
+                            )
+                        report_overview = gr.HTML(format_report_overview_html(None))
+                        with gr.Row(equal_height=True, elem_classes=["report-body"]):
+                            with gr.Column(scale=1):
+                                finding_selector = gr.Radio(
+                                    choices=[],
+                                    value=None,
+                                    interactive=True,
+                                    show_label=False,
+                                    elem_classes=["findings-list-radio"],
+                                )
+                            with gr.Column(scale=1):
+                                finding_detail = gr.HTML(
+                                    format_empty_finding_detail_html(),
+                                    elem_classes=["swarm-panel", "swarm-report"],
+                                )
+                analyze.click(
+                    analyze_repo,
+                    inputs=repo_url,
+                    outputs=[
+                        progress_output,
+                        agent_output,
+                        summary_output,
+                        report_toolbar,
+                        severity_filter,
+                        report_overview,
+                        finding_selector,
+                        finding_detail,
+                        markdown_export,
+                        json_export,
+                        report_state,
+                    ],
+                )
+                severity_filter.change(
+                    filter_findings,
+                    inputs=[severity_filter, report_state],
+                    outputs=[finding_selector, finding_detail],
+                )
+                finding_selector.change(select_finding, inputs=[finding_selector, report_state], outputs=finding_detail)
+            with gr.Tab("Diagnostics"):
+                gr.Markdown(
+                    "Verify the configured LLM backend before switching from mock mode to AMD/vLLM enrichment.",
+                    elem_classes=["swarm-note"],
+                )
+                diagnostics_button = gr.Button("Test LLM Connection", variant="primary")
+                diagnostics_output = gr.Markdown(elem_classes=["swarm-panel"])
+                diagnostics_button.click(run_llm_diagnostics, outputs=diagnostics_output)
+            with gr.Tab("Benchmark"):
+                gr.Markdown(
+                    "Run a small timing probe. Mock mode validates the UI path; vLLM mode records MI300X demo numbers.",
+                    elem_classes=["swarm-note"],
+                )
+                benchmark_button = gr.Button("Run Benchmark", variant="primary")
+                benchmark_output = gr.Markdown(elem_classes=["swarm-panel"])
+                benchmark_button.click(run_benchmark, outputs=benchmark_output)
     return demo
 def launch_app() -> None:
     server_name = os.getenv("GRADIO_SERVER_NAME", "0.0.0.0")
+    configured_port = os.getenv("PORT") or os.getenv("GRADIO_SERVER_PORT")
+    server_port = int(configured_port or "7860")
+    try:
+        build_app().queue().launch(server_name=server_name, server_port=server_port)
+    except OSError:
+        if configured_port:
+            raise
+        build_app().queue().launch(server_name=server_name, server_port=None)
 if __name__ == "__main__":

tests/test_agent_llm_enrichment.py ADDED Viewed

	@@ -0,0 +1,104 @@

+import pytest
+from app.agents.docs_agent import DocsAgent
+from app.agents.performance_agent import PerformanceAgent
+from app.agents.quality_agent import QualityAgent
+from app.config import Settings
+from app.schemas import CodeChunk
+from app.services.llm_client import LLMClient
+class FakeLLMClient(LLMClient):
+    def __init__(self, settings: Settings, payload=None, should_fail: bool = False):
+        super().__init__(settings)
+        self.payload = payload or {"findings": []}
+        self.should_fail = should_fail
+        self.calls = 0
+    async def complete_json(self, system_prompt: str, user_prompt: str):
+        self.calls += 1
+        if self.should_fail:
+            raise RuntimeError("vLLM unavailable")
+        return self.payload
+def make_chunk() -> CodeChunk:
+    return CodeChunk(
+        file_path="app.py",
+        language="Python",
+        line_start=1,
+        line_end=2,
+        content="def work():\n    return True",
+    )
+def make_payload(agent_name: str):
+    return {
+        "findings": [
+            {
+                "title": f"{agent_name} LLM finding",
+                "severity": "LOW",
+                "file_path": "app.py",
+                "line_start": 1,
+                "line_end": 1,
+                "description": "LLM detected an issue.",
+                "why_it_matters": "It affects maintainability or runtime behavior.",
+                "suggested_fix": "Review and improve the implementation.",
+                "agent_source": agent_name,
+            }
+        ]
+    }
+@pytest.mark.anyio
+@pytest.mark.parametrize(
+    ("agent_cls", "agent_name"),
+    [
+        (PerformanceAgent, "Performance Agent"),
+        (QualityAgent, "Quality Agent"),
+        (DocsAgent, "Docs Agent"),
+    ],
+)
+async def test_agent_enrichment_disabled_does_not_call_llm(agent_cls, agent_name):
+    llm_client = FakeLLMClient(Settings(enable_llm_enrichment=False))
+    output = await agent_cls(llm_client).analyze([make_chunk()])
+    assert llm_client.calls == 0
+    assert output.metadata["llm_enrichment_enabled"] is False
+@pytest.mark.anyio
+@pytest.mark.parametrize(
+    ("agent_cls", "agent_name"),
+    [
+        (PerformanceAgent, "Performance Agent"),
+        (QualityAgent, "Quality Agent"),
+        (DocsAgent, "Docs Agent"),
+    ],
+)
+async def test_agent_enrichment_merges_valid_llm_findings(agent_cls, agent_name):
+    llm_client = FakeLLMClient(
+        Settings(enable_llm_enrichment=True, max_llm_chunks=1),
+        make_payload(agent_name),
+    )
+    output = await agent_cls(llm_client).analyze([make_chunk()])
+    assert llm_client.calls == 1
+    assert any(finding.title == f"{agent_name} LLM finding" for finding in output.findings)
+    assert output.metadata["llm_findings"] == 1
+@pytest.mark.anyio
+@pytest.mark.parametrize(
+    ("agent_cls", "agent_name"),
+    [
+        (PerformanceAgent, "Performance Agent"),
+        (QualityAgent, "Quality Agent"),
+        (DocsAgent, "Docs Agent"),
+    ],
+)
+async def test_agent_enrichment_failure_is_metadata_not_exception(agent_cls, agent_name):
+    llm_client = FakeLLMClient(Settings(enable_llm_enrichment=True), should_fail=True)
+    output = await agent_cls(llm_client).analyze([make_chunk()])
+    assert "vLLM unavailable" in output.metadata["llm_error"]

tests/test_api.py CHANGED Viewed

@@ -1,5 +1,7 @@
 from fastapi.testclient import TestClient
 from app.main import app
@@ -8,3 +10,13 @@ def test_health_endpoint():
     assert response.status_code == 200
     assert response.json() == {"status": "ok", "app": "SwarmAudit"}

 from fastapi.testclient import TestClient
+import app.main as main
+from app.config import Settings
 from app.main import app
     assert response.status_code == 200
     assert response.json() == {"status": "ok", "app": "SwarmAudit"}
+def test_llm_health_endpoint(monkeypatch):
+    monkeypatch.setattr(main, "get_settings", lambda: Settings(_env_file=None, llm_provider="mock"))
+    response = TestClient(app).get("/llm/health")
+    assert response.status_code == 200
+    assert response.json()["ok"] is True
+    assert response.json()["provider"] in {"mock", "vllm"}

tests/test_benchmark.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import pytest
+from app.config import Settings
+from app.services.benchmark import BenchmarkService
+@pytest.mark.anyio
+async def test_mock_benchmark_returns_ok_result():
+    result = await BenchmarkService(Settings(llm_provider="mock")).run_llm_benchmark()
+    assert result.ok is True
+    assert result.provider == "mock"
+    assert result.backend == "Mock local backend"
+    assert result.hardware == "Local/mock"
+    assert result.completion_chars > 0
+@pytest.mark.anyio
+async def test_benchmark_reports_llm_errors():
+    service = BenchmarkService(Settings(llm_provider="mock"))
+    async def fail_completion():
+        raise RuntimeError("benchmark failed")
+    service.llm_client.test_completion = fail_completion
+    result = await service.run_llm_benchmark()
+    assert result.ok is False
+    assert "benchmark failed" in result.error

tests/test_config_agent.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import pytest
+from app.agents.config_agent import ConfigAgent
+from app.schemas import CodeChunk, Severity
+def make_chunk(content: str, file_path: str = "config.py") -> CodeChunk:
+    return CodeChunk(
+        file_path=file_path,
+        language="Python",
+        line_start=1,
+        line_end=max(1, len(content.splitlines())),
+        content=content,
+    )
+@pytest.mark.anyio
+async def test_config_agent_detects_debug_mode():
+    output = await ConfigAgent().analyze([make_chunk("DEBUG = True")])
+    assert output.findings[0].title == "Debug mode enabled"
+    assert output.findings[0].severity == Severity.high
+    assert output.findings[0].category == "config"
+    assert output.findings[0].confidence is not None
+@pytest.mark.anyio
+async def test_config_agent_detects_wildcard_cors():
+    output = await ConfigAgent().analyze([make_chunk('allow_origins=["*"]')])
+    assert output.findings[0].title == "Wildcard CORS origin"
+    assert output.findings[0].severity == Severity.medium
+@pytest.mark.anyio
+async def test_config_agent_detects_disabled_tls_verification():
+    output = await ConfigAgent().analyze([make_chunk("session.verify = False")])
+    assert output.findings[0].title == "TLS verification disabled in configuration"
+    assert output.findings[0].severity == Severity.high
+@pytest.mark.anyio
+async def test_config_agent_detects_weak_default_secret():
+    output = await ConfigAgent().analyze([make_chunk("SECRET_KEY = 'django-insecure-demo'")])
+    assert output.findings[0].title == "Weak default secret configured"
+    assert output.findings[0].severity == Severity.high
+@pytest.mark.anyio
+async def test_config_agent_returns_empty_output_for_clean_config():
+    output = await ConfigAgent().analyze([make_chunk("DEBUG = env.bool('DEBUG', default=False)")])
+    assert output.findings == []
+    assert output.metadata["mode"] == "static-rules"

tests/test_cuda_migration_agent.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import pytest
+from app.agents.cuda_migration_agent import CudaMigrationAgent
+from app.schemas import CodeChunk, Severity
+def make_chunk(content: str, file_path: str = "model.py") -> CodeChunk:
+    return CodeChunk(
+        file_path=file_path,
+        language="Python",
+        line_start=1,
+        line_end=max(1, len(content.splitlines())),
+        content=content,
+    )
+@pytest.mark.anyio
+async def test_cuda_migration_agent_detects_torch_cuda():
+    output = await CudaMigrationAgent().analyze([make_chunk("device = torch.cuda.current_device()")])
+    assert output.findings[0].title == "PyTorch CUDA-specific API usage"
+    assert output.findings[0].severity == Severity.medium
+    assert output.findings[0].category == "cuda_migration"
+@pytest.mark.anyio
+async def test_cuda_migration_agent_detects_nvidia_monitoring():
+    output = await CudaMigrationAgent().analyze([make_chunk("import pynvml\nsubprocess.run(['nvidia-smi'])")])
+    assert output.findings[0].title == "NVIDIA-specific GPU monitoring"
+    assert "rocm-smi" in output.findings[0].suggested_fix
+@pytest.mark.anyio
+async def test_cuda_migration_agent_detects_cuda_runtime_calls():
+    output = await CudaMigrationAgent().analyze([make_chunk("cudaMemcpy(dst, src, size, cudaMemcpyDeviceToHost);", "kernel.cu")])
+    assert output.findings[0].title == "CUDA runtime API call"
+    assert output.findings[0].confidence is not None
+@pytest.mark.anyio
+async def test_cuda_migration_agent_detects_cuda_libraries():
+    output = await CudaMigrationAgent().analyze([make_chunk("handle = cublasCreate()", "linear_algebra.cpp")])
+    assert output.findings[0].title == "CUDA library dependency"
+    assert "rocBLAS" in output.findings[0].suggested_fix
+@pytest.mark.anyio
+async def test_cuda_migration_agent_returns_empty_for_cpu_code():
+    output = await CudaMigrationAgent().analyze([make_chunk("device = torch.device('cpu')")])
+    assert output.findings == []

tests/test_dependency_agent.py ADDED Viewed

	@@ -0,0 +1,80 @@

+import pytest
+from app.agents.dependency_agent import DependencyAgent
+from app.config import Settings
+from app.schemas import CodeChunk, Severity
+def make_chunk(file_path: str, content: str) -> CodeChunk:
+    return CodeChunk(
+        file_path=file_path,
+        language="Manifest",
+        line_start=1,
+        line_end=max(1, len(content.splitlines())),
+        content=content,
+    )
+@pytest.mark.anyio
+async def test_dependency_agent_parses_common_manifests_without_network():
+    chunks = [
+        make_chunk("requirements.txt", "requests==2.28.0\nfastapi>=0.100.0\n"),
+        make_chunk("package.json", '{"dependencies": {"express": "^4.18.2"}}'),
+        make_chunk("pyproject.toml", '[project]\ndependencies = ["pydantic==2.0.0"]\n'),
+        make_chunk("go.mod", "module demo\n\nrequire github.com/gin-gonic/gin v1.9.1\n"),
+        make_chunk("Cargo.toml", '[dependencies]\nserde = "1.0.0"\n'),
+    ]
+    output = await DependencyAgent(Settings(enable_dependency_cve_lookup=False)).analyze(chunks)
+    assert output.agent_name == "Dependency Agent"
+    assert output.findings == []
+    assert output.metadata["dependency_count"] == 6
+    assert "requirements.txt" in output.metadata["manifests"]
+    assert output.metadata["dependency_cves"] == []
+@pytest.mark.anyio
+async def test_dependency_agent_turns_cves_into_findings(monkeypatch):
+    async def fake_lookup_cves(dependencies):
+        return (
+            [
+                {
+                    "id": "GHSA-test",
+                    "package": "requests",
+                    "version": "2.28.0",
+                    "ecosystem": "PyPI",
+                    "severity": "HIGH",
+                    "summary": "Demo vulnerability",
+                    "manifest_path": "requirements.txt",
+                    "line_number": 1,
+                    "fixed_version": "2.32.0",
+                }
+            ],
+            [],
+        )
+    agent = DependencyAgent(Settings(enable_dependency_cve_lookup=True))
+    monkeypatch.setattr(agent, "_lookup_cves", fake_lookup_cves)
+    output = await agent.analyze([make_chunk("requirements.txt", "requests==2.28.0\n")])
+    assert output.findings[0].severity == Severity.high
+    assert output.findings[0].category == "dependency"
+    assert output.findings[0].agent_source == "Dependency Agent"
+    assert output.metadata["dependency_cves"][0]["id"] == "GHSA-test"
+@pytest.mark.anyio
+async def test_dependency_agent_fails_gracefully_when_osv_is_unavailable(monkeypatch):
+    async def fake_lookup_cves(dependencies):
+        return [], ["Dependency CVE lookup failed gracefully: network unavailable"]
+    agent = DependencyAgent(Settings(enable_dependency_cve_lookup=True))
+    monkeypatch.setattr(agent, "_lookup_cves", fake_lookup_cves)
+    output = await agent.analyze([make_chunk("requirements.txt", "requests==2.28.0\n")])
+    assert output.findings == []
+    assert output.metadata["dependency_cves"] == []
+    assert "network unavailable" in output.metadata["warnings"][0]

tests/test_docs_agent.py CHANGED Viewed

@@ -1,6 +1,8 @@
 import pytest
 from app.agents.docs_agent import DocsAgent
 from app.schemas import CodeChunk, Severity
@@ -14,7 +16,7 @@ async def test_docs_agent_flags_incomplete_readme():
         content="# Demo\nShort description only.",
     )
-    output = await DocsAgent().analyze([chunk])
     titles = {finding.title for finding in output.findings}
     assert "README missing usage/setup guidance" in titles
@@ -32,7 +34,7 @@ async def test_docs_agent_accepts_useful_readme():
         content="# Demo\n\n## Quick Start\nInstall and run it.\n## Tests\nRun pytest.\n## Configuration\nCopy .env.example.",
     )
-    output = await DocsAgent().analyze([chunk])
     assert output.findings == []
@@ -47,7 +49,7 @@ async def test_docs_agent_flags_public_python_symbol_without_docstring():
         content="def run_audit():\n    return True",
     )
-    output = await DocsAgent().analyze([chunk])
     assert output.findings[0].title == "Public Python symbols missing docstrings"
     assert output.findings[0].severity == Severity.low
@@ -64,7 +66,7 @@ async def test_docs_agent_summarizes_missing_docstrings_per_chunk():
         content="def first():\n    pass\n\ndef second():\n    pass",
     )
-    output = await DocsAgent().analyze([chunk])
     docstring_findings = [
         finding for finding in output.findings if finding.title == "Public Python symbols missing docstrings"

 import pytest
 from app.agents.docs_agent import DocsAgent
+from app.config import Settings
+from app.services.llm_client import LLMClient
 from app.schemas import CodeChunk, Severity
         content="# Demo\nShort description only.",
     )
+    output = await DocsAgent(LLMClient(Settings(enable_llm_enrichment=False))).analyze([chunk])
     titles = {finding.title for finding in output.findings}
     assert "README missing usage/setup guidance" in titles
         content="# Demo\n\n## Quick Start\nInstall and run it.\n## Tests\nRun pytest.\n## Configuration\nCopy .env.example.",
     )
+    output = await DocsAgent(LLMClient(Settings(enable_llm_enrichment=False))).analyze([chunk])
     assert output.findings == []
         content="def run_audit():\n    return True",
     )
+    output = await DocsAgent(LLMClient(Settings(enable_llm_enrichment=False))).analyze([chunk])
     assert output.findings[0].title == "Public Python symbols missing docstrings"
     assert output.findings[0].severity == Severity.low
         content="def first():\n    pass\n\ndef second():\n    pass",
     )
+    output = await DocsAgent(LLMClient(Settings(enable_llm_enrichment=False))).analyze([chunk])
     docstring_findings = [
         finding for finding in output.findings if finding.title == "Public Python symbols missing docstrings"

tests/test_error_handling_agent.py ADDED Viewed

	@@ -0,0 +1,82 @@

+import pytest
+from app.agents.error_handling_agent import ErrorHandlingAgent
+from app.schemas import CodeChunk, Severity
+def make_chunk(content: str, file_path: str = "app.py") -> CodeChunk:
+    return CodeChunk(
+        file_path=file_path,
+        language="Python",
+        line_start=1,
+        line_end=max(1, len(content.splitlines())),
+        content=content,
+    )
+@pytest.mark.anyio
+async def test_error_handling_agent_detects_bare_except_and_swallow():
+    output = await ErrorHandlingAgent().analyze(
+        [
+            make_chunk(
+                "try:\n"
+                "    work()\n"
+                "except:\n"
+                "    pass\n"
+            )
+        ]
+    )
+    titles = {finding.title for finding in output.findings}
+    assert "Broad exception handler" in titles
+    assert "Exception swallowed without recovery" in titles
+    assert all(finding.category == "error_handling" for finding in output.findings)
+@pytest.mark.anyio
+async def test_error_handling_agent_detects_return_none_swallow():
+    output = await ErrorHandlingAgent().analyze(
+        [
+            make_chunk(
+                "try:\n"
+                "    return load_user()\n"
+                "except ValueError:\n"
+                "    return None\n"
+            )
+        ]
+    )
+    assert output.findings[0].title == "Exception swallowed without recovery"
+    assert output.findings[0].severity == Severity.high
+@pytest.mark.anyio
+async def test_error_handling_agent_does_not_flag_logged_specific_exception():
+    output = await ErrorHandlingAgent().analyze(
+        [
+            make_chunk(
+                "try:\n"
+                "    return load_user()\n"
+                "except ValueError:\n"
+                "    logger.exception('load failed')\n"
+                "    raise\n"
+            )
+        ]
+    )
+    assert output.findings == []
+@pytest.mark.anyio
+async def test_error_handling_agent_detects_request_without_timeout():
+    output = await ErrorHandlingAgent().analyze([make_chunk("response = requests.get(url)")])
+    assert output.findings[0].title == "External HTTP call without timeout"
+    assert output.findings[0].severity == Severity.medium
+@pytest.mark.anyio
+async def test_error_handling_agent_ignores_request_with_timeout():
+    output = await ErrorHandlingAgent().analyze([make_chunk("response = requests.get(url, timeout=10)")])
+    assert output.findings == []

tests/test_gradio_app.py CHANGED Viewed

@@ -1,7 +1,27 @@
 import runpy
 from pathlib import Path
-from app.ui.gradio_app import build_app, choose_example, launch_app
 def test_choose_example_returns_repo_url():
@@ -18,6 +38,188 @@ def test_build_app_creates_gradio_blocks():
     assert demo is not None
 def test_root_app_py_exposes_demo_for_spaces():
     namespace = runpy.run_path(str(Path(__file__).parents[1] / "app.py"))
@@ -43,3 +245,98 @@ def test_launch_app_uses_spaces_friendly_defaults(monkeypatch):
     launch_app()
     assert calls == {"server_name": "0.0.0.0", "server_port": 7860}

 import runpy
 from pathlib import Path
+import pytest
+from app.ui.gradio_app import (
+    analyze_repo,
+    build_app,
+    build_finding_choices,
+    build_finding_rows,
+    build_severity_filter_choices,
+    choose_example,
+    filter_findings,
+    launch_app,
+    render_agent_swarm,
+    render_empty_summary,
+    render_report_toolbar,
+    render_report_summary,
+    render_workspace_header,
+    run_benchmark,
+    run_llm_diagnostics,
+    select_finding,
+)
+from app.schemas import AuditReport, Finding, Severity
 def test_choose_example_returns_repo_url():
     assert demo is not None
+def test_render_workspace_header_contains_product_and_readiness_signals():
+    html = render_workspace_header()
+    assert "SwarmAudit" in html
+    assert "production-readiness scanner" in html
+    assert "vLLM" in html
+def test_render_empty_summary_contains_placeholder_cards():
+    html = render_empty_summary()
+    assert "Files scanned" in html
+    assert "<strong>-</strong>" in html
+def test_render_agent_swarm_contains_current_agent_panel():
+    html = render_agent_swarm()
+    assert "Agent swarm" in html
+    assert "Synthesizer" in html
+    assert "idle" in html
+def test_render_agent_swarm_tracks_running_and_done_states():
+    html = render_agent_swarm(
+        [
+            "Crawler Agent: cloning and mapping repository...",
+            "Crawler Agent: mapped 4 files and skipped 1.",
+            "Chunker: filtering source files and creating chunks...",
+        ]
+    )
+    assert "1/12 done" in html
+    assert '<div class="agent-item done">' in html
+    assert '<div class="agent-item running">' in html
+def test_render_report_summary_uses_report_counts():
+    report = AuditReport(
+        repo_url="https://github.com/example/project",
+        scanned_file_count=4,
+        skipped_file_count=1,
+        findings=[],
+        severity_summary={
+            Severity.critical: 1,
+            Severity.high: 2,
+            Severity.medium: 3,
+            Severity.low: 4,
+        },
+        total_findings_count=10,
+        security_score=76,
+        production_score=84,
+        category_summary={"security": 3},
+        remediation_roadmap={"this_week": [], "next_sprint": [], "backlog": []},
+        agents_run=["Synthesizer Agent"],
+    )
+    html = render_report_summary(report)
+    assert "Files scanned" in html
+    assert "<strong>4</strong>" in html
+    assert "<strong>10</strong>" in html
+    assert "metric-critical" in html
+def test_render_report_toolbar_renders_report_title():
+    report = AuditReport(
+        repo_url="https://github.com/example/project",
+        scanned_file_count=4,
+        skipped_file_count=1,
+        findings=[],
+        severity_summary={
+            Severity.critical: 1,
+            Severity.high: 2,
+            Severity.medium: 0,
+            Severity.low: 0,
+        },
+        displayed_findings_count=3,
+        security_score=76,
+        production_score=84,
+        category_summary={"security": 3},
+        remediation_roadmap={"this_week": [1], "next_sprint": [], "backlog": []},
+        agents_run=["Synthesizer Agent"],
+    )
+    html = render_report_toolbar(report)
+    assert "Audit report" in html
+def test_build_severity_filter_choices_uses_actual_counts():
+    report = AuditReport(
+        repo_url="https://github.com/example/project",
+        scanned_file_count=4,
+        skipped_file_count=1,
+        findings=[],
+        severity_summary={
+            Severity.critical: 1,
+            Severity.high: 2,
+            Severity.medium: 0,
+            Severity.low: 0,
+        },
+        displayed_findings_count=3,
+        agents_run=["Synthesizer Agent"],
+    )
+    assert build_severity_filter_choices(report) == ["All 3", "Critical 1", "High 2"]
+def make_report_with_findings() -> AuditReport:
+    finding = Finding(
+        title="Missing timeout",
+        severity=Severity.medium,
+        file_path="app.py",
+        line_start=10,
+        line_end=10,
+        description="HTTP request has no timeout.",
+        why_it_matters="Requests can hang indefinitely.",
+        suggested_fix="Pass timeout=10.",
+        agent_source="Performance Agent",
+        category="performance",
+    )
+    return AuditReport(
+        repo_url="https://github.com/example/project",
+        scanned_file_count=1,
+        skipped_file_count=0,
+        findings=[finding],
+        severity_summary={
+            Severity.critical: 0,
+            Severity.high: 0,
+            Severity.medium: 1,
+            Severity.low: 0,
+        },
+        total_findings_count=1,
+        displayed_findings_count=1,
+        agents_run=["Performance Agent"],
+    )
+def test_build_finding_rows_uses_actual_report_findings():
+    rows = build_finding_rows(make_report_with_findings())
+    assert rows == [["F-001", "MEDIUM", "Missing timeout", "app.py:10", "Performance Agent"]]
+def test_build_finding_choices_uses_actual_report_findings():
+    choices = build_finding_choices(make_report_with_findings())
+    assert choices == ["MED   Missing timeout\napp.py:10  |  Performance Agent"]
+def test_filter_findings_returns_only_selected_severity():
+    high = Finding(
+        title="High risk",
+        severity=Severity.high,
+        file_path="app.py",
+        line_start=20,
+        line_end=20,
+        description="High issue.",
+        why_it_matters="Important.",
+        suggested_fix="Fix it.",
+        agent_source="Security Agent",
+        category="security",
+    )
+    report = make_report_with_findings()
+    report.findings.append(high)
+    update, html = filter_findings("High 1", report)
+    assert update["choices"] == ["HIGH  High risk\napp.py:20  |  Security Agent"]
+    assert "High risk" in html
+def test_select_finding_renders_selected_actual_finding():
+    choices = build_finding_choices(make_report_with_findings())
+    html = select_finding(choices[0], make_report_with_findings())
+    assert "Missing timeout" in html
+    assert "Pass timeout=10." in html
 def test_root_app_py_exposes_demo_for_spaces():
     namespace = runpy.run_path(str(Path(__file__).parents[1] / "app.py"))
     launch_app()
     assert calls == {"server_name": "0.0.0.0", "server_port": 7860}
+def test_launch_app_retries_dynamic_port_when_default_local_port_is_busy(monkeypatch):
+    calls = []
+    class FakeQueuedApp:
+        def launch(self, **kwargs):
+            calls.append(kwargs)
+            if len(calls) == 1:
+                raise OSError("Cannot find empty port in range: 7860-7860")
+    class FakeApp:
+        def queue(self):
+            return FakeQueuedApp()
+    monkeypatch.setattr("app.ui.gradio_app.build_app", lambda: FakeApp())
+    monkeypatch.delenv("PORT", raising=False)
+    monkeypatch.delenv("GRADIO_SERVER_PORT", raising=False)
+    monkeypatch.delenv("GRADIO_SERVER_NAME", raising=False)
+    launch_app()
+    assert calls == [
+        {"server_name": "0.0.0.0", "server_port": 7860},
+        {"server_name": "0.0.0.0", "server_port": None},
+    ]
+@pytest.mark.anyio
+async def test_run_llm_diagnostics_returns_provider_status(monkeypatch):
+    monkeypatch.setattr(
+        "app.ui.gradio_app.get_settings",
+        lambda: __import__("app.config").config.Settings(llm_provider="mock"),
+    )
+    markdown = await run_llm_diagnostics()
+    assert "LLM Diagnostics" in markdown
+    assert "Provider: `mock`" in markdown
+    assert "Status: `OK`" in markdown
+@pytest.mark.anyio
+async def test_run_benchmark_returns_mock_result(monkeypatch):
+    monkeypatch.setattr(
+        "app.ui.gradio_app.get_settings",
+        lambda: __import__("app.config").config.Settings(llm_provider="mock"),
+    )
+    markdown = await run_benchmark()
+    assert "LLM Benchmark" in markdown
+    assert "Provider: `mock`" in markdown
+    assert "Status: `OK`" in markdown
+@pytest.mark.anyio
+async def test_analyze_repo_empty_input_clears_report_exports():
+    result = await anext(analyze_repo(" "))
+    assert result[0] == "Paste a public GitHub repository URL to start."
+    assert "Agent swarm" in result[1]
+    assert "Files scanned" in result[2]
+    assert "Audit report" in result[3]
+    assert result[4]["choices"] == ["All 0"]
+    assert "Security Score" in result[5]
+    assert result[6]["choices"] == []
+    assert result[6]["value"] is None
+    assert "Select a finding" in result[7]
+    assert result[8:] == (None, None, None)
+@pytest.mark.anyio
+async def test_analyze_repo_failure_clears_report_exports(monkeypatch):
+    class FakeAuditGraph:
+        async def run_with_progress(self, repo_url: str):
+            yield "Crawler Agent: cloning and mapping repository..."
+            raise RuntimeError("clone failed")
+    monkeypatch.setattr("app.ui.gradio_app.AuditGraph", FakeAuditGraph)
+    updates = []
+    async for update in analyze_repo("https://github.com/example/project"):
+        updates.append(update)
+    assert updates[-1][0].endswith("Audit failed: clone failed")
+    assert "Agent swarm" in updates[-1][1]
+    assert "Files scanned" in updates[-1][2]
+    assert "Audit report" in updates[-1][3]
+    assert updates[-1][4]["choices"] == ["All 0"]
+    assert "Security Score" in updates[-1][5]
+    assert updates[-1][6]["choices"] == []
+    assert updates[-1][6]["value"] is None
+    assert "Select a finding" in updates[-1][7]
+    assert updates[-1][8:] == (None, None, None)

tests/test_graph_progress.py CHANGED Viewed

@@ -7,6 +7,44 @@ from app.config import Settings
 from app.schemas import AuditReport
 @pytest.mark.anyio
 async def test_run_with_progress_yields_real_stages_and_report(tmp_path: Path):
     source = tmp_path / "app.py"
@@ -29,9 +67,20 @@ async def test_run_with_progress_yields_real_stages_and_report(tmp_path: Path):
     assert any("Performance Agent" in event for event in events if isinstance(event, str))
     assert any("Quality Agent" in event for event in events if isinstance(event, str))
     assert any("Docs Agent" in event for event in events if isinstance(event, str))
     assert isinstance(events[-1], AuditReport)
-    assert len(events[-1].findings) == 2
     assert "Security Agent" in events[-1].agents_run
     assert "Performance Agent" in events[-1].agents_run
     assert "Quality Agent" in events[-1].agents_run
     assert "Docs Agent" in events[-1].agents_run

 from app.schemas import AuditReport
+def test_audit_graph_exposes_current_agents_through_registry():
+    graph = AuditGraph(Settings())
+    assert [spec.node_name for spec in graph.analysis_agents] == [
+        "security",
+        "performance",
+        "quality",
+        "docs",
+        "config",
+        "dependency",
+        "error_handling",
+        "observability",
+        "cuda_migration",
+    ]
+    assert [spec.state_key for spec in graph.analysis_agents] == [
+        "security_output",
+        "performance_output",
+        "quality_output",
+        "docs_output",
+        "config_output",
+        "dependency_output",
+        "error_handling_output",
+        "observability_output",
+        "cuda_migration_output",
+    ]
+    assert [spec.agent.name for spec in graph.analysis_agents] == [
+        "Security Agent",
+        "Performance Agent",
+        "Quality Agent",
+        "Docs Agent",
+        "Config Agent",
+        "Dependency Agent",
+        "Error Handling Agent",
+        "Observability Agent",
+        "CUDA-to-ROCm Agent",
+    ]
 @pytest.mark.anyio
 async def test_run_with_progress_yields_real_stages_and_report(tmp_path: Path):
     source = tmp_path / "app.py"
     assert any("Performance Agent" in event for event in events if isinstance(event, str))
     assert any("Quality Agent" in event for event in events if isinstance(event, str))
     assert any("Docs Agent" in event for event in events if isinstance(event, str))
+    assert any("Config Agent" in event for event in events if isinstance(event, str))
+    assert any("Dependency Agent" in event for event in events if isinstance(event, str))
+    assert any("Error Handling Agent" in event for event in events if isinstance(event, str))
+    assert any("Observability Agent" in event for event in events if isinstance(event, str))
+    assert any("CUDA-to-ROCm Agent" in event for event in events if isinstance(event, str))
     assert isinstance(events[-1], AuditReport)
+    assert len(events[-1].findings) >= 2
+    assert any(finding.agent_source == "Error Handling Agent" for finding in events[-1].findings)
     assert "Security Agent" in events[-1].agents_run
     assert "Performance Agent" in events[-1].agents_run
     assert "Quality Agent" in events[-1].agents_run
     assert "Docs Agent" in events[-1].agents_run
+    assert "Config Agent" in events[-1].agents_run
+    assert "Dependency Agent" in events[-1].agents_run
+    assert "Error Handling Agent" in events[-1].agents_run
+    assert "Observability Agent" in events[-1].agents_run
+    assert "CUDA-to-ROCm Agent" in events[-1].agents_run

tests/test_json_parser.py ADDED Viewed

	@@ -0,0 +1,44 @@

+from app.schemas import Severity
+from app.services.json_parser import parse_agent_output, parse_json_object
+def test_parse_json_object_accepts_fenced_json():
+    data = parse_json_object('```json\n{"findings": []}\n```')
+    assert data == {"findings": []}
+def test_parse_json_object_extracts_object_from_extra_text():
+    data = parse_json_object('Here is JSON: {"findings": []} done.')
+    assert data == {"findings": []}
+def test_parse_agent_output_returns_empty_output_for_invalid_json():
+    output = parse_agent_output("not json", "Security Agent")
+    assert output.findings == []
+    assert output.metadata["parse_error"] is True
+def test_parse_agent_output_validates_findings():
+    output = parse_agent_output(
+        {
+            "findings": [
+                {
+                    "title": "Unsafe eval",
+                    "severity": "HIGH",
+                    "file_path": "app.py",
+                    "line_start": 1,
+                    "line_end": 1,
+                    "description": "eval is used",
+                    "why_it_matters": "Arbitrary code execution",
+                    "suggested_fix": "Remove eval",
+                    "agent_source": "Security Agent",
+                }
+            ]
+        },
+        "Security Agent",
+    )
+    assert output.findings[0].severity == Severity.high

tests/test_llm_client.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import httpx
+import pytest
+from app.config import Settings
+from app.services.llm_client import LLMClient
+@pytest.mark.anyio
+async def test_mock_llm_health_check_is_ok():
+    health = await LLMClient(Settings(llm_provider="mock")).health_check()
+    assert health.ok is True
+    assert health.provider == "mock"
+    assert health.completion_preview == "Mock LLM is active."
+@pytest.mark.anyio
+async def test_vllm_health_check_lists_models_and_tests_completion(monkeypatch):
+    async def fake_get(self, url, headers):
+        return httpx.Response(
+            200,
+            json={"data": [{"id": "Qwen/Qwen2.5-Coder-32B-Instruct"}]},
+            request=httpx.Request("GET", url),
+        )
+    async def fake_post(self, url, json, headers):
+        return httpx.Response(
+            200,
+            json={"choices": [{"message": {"content": "SwarmAudit LLM OK"}}]},
+            request=httpx.Request("POST", url),
+        )
+    monkeypatch.setattr(httpx.AsyncClient, "get", fake_get)
+    monkeypatch.setattr(httpx.AsyncClient, "post", fake_post)
+    health = await LLMClient(
+        Settings(
+            llm_provider="vllm",
+            llm_base_url="http://amd.example:8000/v1",
+            llm_api_key="token",
+        )
+    ).health_check()
+    assert health.ok is True
+    assert health.models == ["Qwen/Qwen2.5-Coder-32B-Instruct"]
+    assert health.completion_preview == "SwarmAudit LLM OK"
+@pytest.mark.anyio
+async def test_vllm_health_check_reports_errors(monkeypatch):
+    async def fake_get(self, url, headers):
+        raise httpx.ConnectError("connection failed", request=httpx.Request("GET", url))
+    monkeypatch.setattr(httpx.AsyncClient, "get", fake_get)
+    health = await LLMClient(Settings(llm_provider="vllm")).health_check()
+    assert health.ok is False
+    assert "connection failed" in health.error

tests/test_observability_agent.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import pytest
+from app.agents.observability_agent import ObservabilityAgent
+from app.schemas import CodeChunk, Severity
+def make_chunk(content: str, file_path: str = "app.py") -> CodeChunk:
+    return CodeChunk(
+        file_path=file_path,
+        language="Python",
+        line_start=1,
+        line_end=max(1, len(content.splitlines())),
+        content=content,
+    )
+@pytest.mark.anyio
+async def test_observability_agent_detects_sensitive_logging():
+    output = await ObservabilityAgent().analyze([make_chunk("print(f'password={password}')")])
+    assert output.findings[0].title == "Sensitive value may be written to logs"
+    assert output.findings[0].severity == Severity.high
+    assert output.findings[0].category == "observability"
+@pytest.mark.anyio
+async def test_observability_agent_detects_print_overuse_without_logger():
+    output = await ObservabilityAgent().analyze(
+        [
+            make_chunk(
+                "print('start')\n"
+                "print('middle')\n"
+                "print('done')\n"
+            )
+        ]
+    )
+    assert output.findings[0].title == "Print statements used instead of structured logging"
+    assert output.findings[0].severity == Severity.low
+@pytest.mark.anyio
+async def test_observability_agent_does_not_flag_prints_when_logger_exists():
+    output = await ObservabilityAgent().analyze(
+        [
+            make_chunk("print('start')\nprint('middle')\nprint('done')\n"),
+            make_chunk("logger.info('service started')", "logging_setup.py"),
+        ]
+    )
+    assert output.findings == []
+@pytest.mark.anyio
+async def test_observability_agent_detects_missing_health_route():
+    output = await ObservabilityAgent().analyze(
+        [
+            make_chunk(
+                "@app.get('/users')\n"
+                "def users():\n"
+                "    return []\n"
+            )
+        ]
+    )
+    assert output.findings[0].title == "Web service has routes but no health endpoint detected"
+    assert output.findings[0].severity == Severity.medium
+@pytest.mark.anyio
+async def test_observability_agent_accepts_existing_health_route():
+    output = await ObservabilityAgent().analyze(
+        [
+            make_chunk(
+                "@app.get('/users')\n"
+                "def users():\n"
+                "    return []\n"
+                "@app.get('/health')\n"
+                "def health():\n"
+                "    return {'ok': True}\n"
+            )
+        ]
+    )
+    assert output.findings == []

tests/test_repo_crawler.py CHANGED Viewed

@@ -40,6 +40,17 @@ def test_scan_local_repo_includes_readme_for_docs_agent(tmp_path: Path):
     assert result.files[0].language == "Markdown"
 def test_clone_and_scan_omits_gitpython_timeout_on_windows(tmp_path: Path):
     crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1, clone_base_dir=str(tmp_path / "clones")))

     assert result.files[0].language == "Markdown"
+def test_scan_local_repo_includes_dependency_manifests(tmp_path: Path):
+    (tmp_path / "requirements.txt").write_text("requests==2.28.0\n", encoding="utf-8")
+    (tmp_path / "package.json").write_text('{"dependencies": {"express": "4.18.2"}}', encoding="utf-8")
+    crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=10))
+    result = crawler.scan_local_repo("https://github.com/example/project", tmp_path)
+    assert {source_file.path for source_file in result.files} == {"requirements.txt", "package.json"}
+    assert {source_file.language for source_file in result.files} == {"Python Requirements", "Node Package"}
 def test_clone_and_scan_omits_gitpython_timeout_on_windows(tmp_path: Path):
     crawler = RepoCrawler(Settings(max_files=10, max_file_size_kb=1, clone_base_dir=str(tmp_path / "clones")))

tests/test_report_exports.py ADDED Viewed

	@@ -0,0 +1,130 @@

+import json
+from pathlib import Path
+from app.schemas import AuditReport, Finding, Severity
+from app.services.report_formatter import (
+    format_empty_report_html,
+    format_finding_detail_html,
+    format_report_html,
+    write_report_exports,
+)
+def make_report() -> AuditReport:
+    finding = Finding(
+        title="Missing timeout",
+        severity=Severity.medium,
+        file_path="app.py",
+        line_start=10,
+        line_end=10,
+        description="HTTP request has no timeout.",
+        why_it_matters="Requests can hang indefinitely.",
+        suggested_fix="Pass a timeout value.",
+        agent_source="Performance Agent",
+    )
+    return AuditReport(
+        repo_url="https://github.com/example/project",
+        scanned_file_count=1,
+        skipped_file_count=0,
+        findings=[finding],
+        severity_summary={
+            Severity.critical: 0,
+            Severity.high: 0,
+            Severity.medium: 1,
+            Severity.low: 0,
+        },
+        total_findings_count=1,
+        displayed_findings_count=1,
+        hidden_findings_count=0,
+        agent_finding_counts={"Performance Agent": 1},
+        category_summary={"performance": 1},
+        security_score=100,
+        production_score=96,
+        remediation_roadmap={
+            "this_week": [],
+            "next_sprint": [
+                {
+                    "title": "Missing timeout",
+                    "severity": "MEDIUM",
+                    "category": "performance",
+                    "file_path": "app.py",
+                    "line_start": "10",
+                    "agent_source": "Performance Agent",
+                }
+            ],
+            "backlog": [],
+        },
+        dependency_cves=[
+            {
+                "id": "GHSA-test",
+                "package": "requests",
+                "version": "2.28.0",
+                "ecosystem": "PyPI",
+                "severity": "HIGH",
+                "fixed_version": "2.32.0",
+            }
+        ],
+        agents_run=["Performance Agent", "Synthesizer Agent"],
+    )
+def test_write_report_exports_creates_markdown_and_json():
+    output_dir = Path.cwd() / ".tmp_test_exports" / "report_export"
+    output_dir.mkdir(parents=True, exist_ok=True)
+    markdown_path, json_path = write_report_exports(make_report(), output_dir)
+    markdown = output_dir.joinpath("swarm_audit_report.md").read_text(encoding="utf-8")
+    data = json.loads(output_dir.joinpath("swarm_audit_report.json").read_text(encoding="utf-8"))
+    assert markdown_path.endswith("swarm_audit_report.md")
+    assert json_path.endswith("swarm_audit_report.json")
+    assert "# SwarmAudit Report" in markdown
+    assert "Security Score" in markdown
+    assert "Production Readiness Score" in markdown
+    assert "Category Summary" in markdown
+    assert "Remediation Roadmap" in markdown
+    assert "Dependency CVEs" in markdown
+    assert "GHSA-test" in markdown
+    assert "Missing timeout" in markdown
+    assert data["repo_url"] == "https://github.com/example/project"
+    assert data["findings"][0]["severity"] == "MEDIUM"
+    assert data["total_findings_count"] == 1
+def test_format_report_html_renders_console_and_escapes_content():
+    report = make_report()
+    report.findings[0].title = "<script>alert('x')</script>"
+    html = format_report_html(report)
+    assert "audit-console" in html
+    assert "finding-list" in html
+    assert "finding-detail" in html
+    assert "&lt;script&gt;" in html
+    assert "<script>" not in html
+def test_format_report_html_hides_zero_count_severity_filters():
+    report = make_report()
+    html = format_report_html(report)
+    assert "Medium 1" in html
+    assert "Critical 0" not in html
+    assert "High 0" not in html
+    assert "Low 0" not in html
+def test_format_empty_report_html_renders_placeholder():
+    html = format_empty_report_html()
+    assert "Run an audit to populate findings" in html
+    assert "audit-console" in html
+def test_format_finding_detail_links_to_github_file_reference():
+    html = format_finding_detail_html(make_report(), 0)
+    assert 'href="https://github.com/example/project/blob/HEAD/app.py#L10"' in html
+    assert 'target="_blank"' in html

tests/test_security_report.py CHANGED Viewed

@@ -16,7 +16,7 @@ async def test_security_agent_and_synthesizer_return_structured_report():
         line_end=10,
         content="API_KEY = '1234567890abcdef'",
     )
-    output = await SecurityAgent(LLMClient(Settings())).analyze([chunk])
     repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
     report = await SynthesizerAgent().synthesize(repo, [output])
@@ -29,3 +29,69 @@ async def test_security_agent_and_synthesizer_return_structured_report():
     assert report.total_findings_count == 1
     assert report.displayed_findings_count == 1
     assert report.hidden_findings_count == 0

         line_end=10,
         content="API_KEY = '1234567890abcdef'",
     )
+    output = await SecurityAgent(LLMClient(Settings(enable_llm_enrichment=False))).analyze([chunk])
     repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
     report = await SynthesizerAgent().synthesize(repo, [output])
     assert report.total_findings_count == 1
     assert report.displayed_findings_count == 1
     assert report.hidden_findings_count == 0
+class FakeLLMClient(LLMClient):
+    def __init__(self, settings: Settings, payload):
+        super().__init__(settings)
+        self.payload = payload
+        self.calls = 0
+    async def complete_json(self, system_prompt: str, user_prompt: str):
+        self.calls += 1
+        return self.payload
+@pytest.mark.anyio
+async def test_security_agent_does_not_call_llm_when_enrichment_disabled():
+    llm_client = FakeLLMClient(Settings(enable_llm_enrichment=False), {"findings": []})
+    chunk = CodeChunk(file_path="app.py", language="Python", line_start=1, line_end=1, content="print('ok')")
+    output = await SecurityAgent(llm_client).analyze([chunk])
+    assert llm_client.calls == 0
+    assert output.metadata["llm_enrichment_enabled"] is False
+@pytest.mark.anyio
+async def test_security_agent_merges_valid_llm_findings_when_enabled():
+    llm_client = FakeLLMClient(
+        Settings(enable_llm_enrichment=True, max_llm_chunks=1),
+        {
+            "findings": [
+                {
+                    "title": "LLM detected command injection",
+                    "severity": "HIGH",
+                    "file_path": "app.py",
+                    "line_start": 2,
+                    "line_end": 2,
+                    "description": "User input reaches a shell command.",
+                    "why_it_matters": "Attackers could execute arbitrary commands.",
+                    "suggested_fix": "Avoid shell=True and pass argument lists.",
+                    "agent_source": "Security Agent",
+                }
+            ]
+        },
+    )
+    chunk = CodeChunk(file_path="app.py", language="Python", line_start=1, line_end=2, content="run(user_input)")
+    output = await SecurityAgent(llm_client).analyze([chunk])
+    assert llm_client.calls == 1
+    assert output.findings[0].title == "LLM detected command injection"
+    assert output.metadata["llm_findings"] == 1
+@pytest.mark.anyio
+async def test_security_agent_survives_llm_failure_when_enabled():
+    class FailingLLMClient(FakeLLMClient):
+        async def complete_json(self, system_prompt: str, user_prompt: str):
+            raise RuntimeError("vLLM unavailable")
+    llm_client = FailingLLMClient(Settings(enable_llm_enrichment=True), {})
+    chunk = CodeChunk(file_path="app.py", language="Python", line_start=1, line_end=1, content="print('ok')")
+    output = await SecurityAgent(llm_client).analyze([chunk])
+    assert output.findings == []
+    assert "vLLM unavailable" in output.metadata["llm_error"]

tests/test_synthesizer_agent.py CHANGED Viewed

@@ -29,10 +29,10 @@ async def test_synthesizer_preserves_totals_when_display_is_truncated():
     report = await SynthesizerAgent().synthesize(repo, [output])
     assert report.total_findings_count == 20
-    assert report.displayed_findings_count == 8
-    assert report.hidden_findings_count == 12
     assert report.agent_finding_counts["Docs Agent"] == 20
-    assert any("displaying 8 of 20" in warning for warning in report.warnings)
 @pytest.mark.anyio
@@ -46,3 +46,94 @@ async def test_synthesizer_keeps_high_severity_before_low_findings():
     report = await SynthesizerAgent().synthesize(repo, outputs)
     assert report.findings[0].severity == Severity.high

     report = await SynthesizerAgent().synthesize(repo, [output])
     assert report.total_findings_count == 20
+    assert report.displayed_findings_count == 12
+    assert report.hidden_findings_count == 8
     assert report.agent_finding_counts["Docs Agent"] == 20
+    assert any("displaying 12 of 20" in warning for warning in report.warnings)
 @pytest.mark.anyio
     report = await SynthesizerAgent().synthesize(repo, outputs)
     assert report.findings[0].severity == Severity.high
+@pytest.mark.anyio
+async def test_synthesizer_keeps_low_findings_visible_when_report_is_noisy():
+    outputs = [
+        AgentOutput(
+            agent_name="Performance Agent",
+            findings=[make_finding(index, "Performance Agent", Severity.high) for index in range(45)],
+        ),
+        AgentOutput(
+            agent_name="Docs Agent",
+            findings=[make_finding(index + 100, "Docs Agent", Severity.low) for index in range(20)],
+        ),
+    ]
+    repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
+    report = await SynthesizerAgent().synthesize(repo, outputs)
+    assert any(finding.severity == Severity.low for finding in report.findings)
+    assert sum(1 for finding in report.findings if finding.severity == Severity.low) <= 12
+@pytest.mark.anyio
+async def test_synthesizer_populates_scores_categories_and_roadmap():
+    outputs = [
+        AgentOutput(
+            agent_name="Security Agent",
+            findings=[make_finding(1, "Security Agent", Severity.high)],
+        ),
+        AgentOutput(
+            agent_name="Performance Agent",
+            findings=[make_finding(2, "Performance Agent", Severity.medium)],
+        ),
+        AgentOutput(
+            agent_name="Error Handling Agent",
+            findings=[make_finding(3, "Error Handling Agent", Severity.low)],
+        ),
+    ]
+    repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
+    report = await SynthesizerAgent().synthesize(repo, outputs)
+    assert report.security_score == 89
+    assert report.production_score == 95
+    assert report.category_summary == {"error_handling": 1, "performance": 1, "security": 1}
+    assert report.remediation_roadmap["this_week"][0]["category"] == "security"
+    assert report.remediation_roadmap["next_sprint"][0]["category"] == "performance"
+    assert report.remediation_roadmap["backlog"][0]["category"] == "error_handling"
+@pytest.mark.anyio
+async def test_synthesizer_carries_dependency_cves_and_warnings():
+    outputs = [
+        AgentOutput(
+            agent_name="Dependency Agent",
+            findings=[],
+            metadata={
+                "dependency_cves": [{"id": "GHSA-test", "package": "requests", "severity": "HIGH"}],
+                "warnings": ["Dependency CVE lookup failed gracefully: timeout"],
+            },
+        )
+    ]
+    repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
+    report = await SynthesizerAgent().synthesize(repo, outputs)
+    assert report.dependency_cves == [{"id": "GHSA-test", "package": "requests", "severity": "HIGH"}]
+    assert "timeout" in report.warnings[0]
+@pytest.mark.anyio
+async def test_synthesizer_caps_score_penalties_for_noisy_repos():
+    outputs = [
+        AgentOutput(
+            agent_name="Performance Agent",
+            findings=[make_finding(index, "Performance Agent", Severity.medium) for index in range(120)],
+        ),
+        AgentOutput(
+            agent_name="Docs Agent",
+            findings=[make_finding(index + 200, "Docs Agent", Severity.low) for index in range(80)],
+        ),
+        AgentOutput(
+            agent_name="Error Handling Agent",
+            findings=[make_finding(index + 400, "Error Handling Agent", Severity.high) for index in range(20)],
+        ),
+    ]
+    repo = RepoScanResult(repo_url="https://github.com/example/project", local_path=".", files=[], skipped_files=0)
+    report = await SynthesizerAgent().synthesize(repo, outputs)
+    assert report.production_score == 54

tests/test_v2_schemas.py ADDED Viewed

	@@ -0,0 +1,93 @@

+import json
+import pytest
+from pydantic import ValidationError
+from app.schemas import AuditReport, Finding, Severity
+from app.services.report_formatter import write_report_exports
+def make_finding(**overrides) -> Finding:
+    data = {
+        "title": "Finding",
+        "severity": Severity.low,
+        "file_path": "app.py",
+        "line_start": 1,
+        "line_end": 1,
+        "description": "Description",
+        "why_it_matters": "Why",
+        "suggested_fix": "Fix",
+        "agent_source": "Quality Agent",
+    }
+    data.update(overrides)
+    return Finding(**data)
+def make_report(**overrides) -> AuditReport:
+    data = {
+        "repo_url": "https://github.com/example/project",
+        "scanned_file_count": 1,
+        "skipped_file_count": 0,
+        "findings": [make_finding()],
+        "severity_summary": {
+            Severity.critical: 0,
+            Severity.high: 0,
+            Severity.medium: 0,
+            Severity.low: 1,
+        },
+        "agents_run": ["Quality Agent"],
+    }
+    data.update(overrides)
+    return AuditReport(**data)
+def test_finding_keeps_legacy_fields_optional_for_v2_metadata():
+    finding = make_finding()
+    assert finding.category is None
+    assert finding.confidence is None
+def test_finding_accepts_v2_category_and_confidence():
+    finding = make_finding(category="observability", confidence=0.91)
+    assert finding.category == "observability"
+    assert finding.confidence == 0.91
+@pytest.mark.parametrize("confidence", [-0.1, 1.1])
+def test_finding_rejects_invalid_confidence(confidence):
+    with pytest.raises(ValidationError):
+        make_finding(confidence=confidence)
+def test_audit_report_defaults_v2_fields_without_breaking_legacy_reports():
+    report = make_report()
+    assert report.category_summary == {}
+    assert report.security_score is None
+    assert report.production_score is None
+    assert report.remediation_roadmap == {}
+    assert report.dependency_cves == []
+def test_audit_report_exports_v2_fields_to_json(tmp_path):
+    report = make_report(
+        findings=[make_finding(category="config", confidence=0.8)],
+        category_summary={"config": 1},
+        security_score=88,
+        production_score=92,
+        remediation_roadmap={"this_week": [], "next_sprint": [], "backlog": []},
+        dependency_cves=[{"id": "GHSA-test", "package": "demo", "severity": "LOW"}],
+    )
+    _, json_path = write_report_exports(report, tmp_path)
+    data = json.loads(tmp_path.joinpath("swarm_audit_report.json").read_text(encoding="utf-8"))
+    assert json_path.endswith("swarm_audit_report.json")
+    assert data["findings"][0]["category"] == "config"
+    assert data["findings"][0]["confidence"] == 0.8
+    assert data["category_summary"] == {"config": 1}
+    assert data["security_score"] == 88
+    assert data["production_score"] == 92
+    assert data["dependency_cves"][0]["id"] == "GHSA-test"